paddlex 3.0.2__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/configs/modules/text_recognition/eslav_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/korean_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/latin_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/pipelines/PP-DocTranslation.yaml +261 -0
- paddlex/inference/common/batch_sampler/__init__.py +1 -0
- paddlex/inference/common/batch_sampler/markdown_batch_sampler.py +116 -0
- paddlex/inference/common/result/base_cv_result.py +2 -3
- paddlex/inference/common/result/mixin.py +3 -1
- paddlex/inference/models/base/predictor/base_predictor.py +2 -0
- paddlex/inference/models/common/static_infer.py +2 -0
- paddlex/inference/models/common/vlm/generation/utils.py +2 -2
- paddlex/inference/models/formula_recognition/result.py +2 -2
- paddlex/inference/models/image_classification/result.py +3 -5
- paddlex/inference/models/image_multilabel_classification/result.py +2 -2
- paddlex/inference/models/object_detection/result.py +2 -2
- paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +3 -0
- paddlex/inference/models/text_recognition/predictor.py +51 -1
- paddlex/inference/models/text_recognition/result.py +5 -2
- paddlex/inference/models/video_classification/result.py +3 -3
- paddlex/inference/models/video_detection/result.py +2 -4
- paddlex/inference/pipelines/__init__.py +1 -0
- paddlex/inference/pipelines/attribute_recognition/result.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/__init__.py +1 -0
- paddlex/inference/pipelines/components/prompt_engineering/generate_translate_prompt.py +179 -0
- paddlex/inference/pipelines/doc_preprocessor/result.py +2 -2
- paddlex/inference/pipelines/formula_recognition/result.py +2 -2
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +2 -0
- paddlex/inference/pipelines/layout_parsing/result_v2.py +11 -4
- paddlex/inference/pipelines/ocr/pipeline.py +2 -0
- paddlex/inference/pipelines/ocr/result.py +11 -7
- paddlex/inference/pipelines/pp_doctranslation/__init__.py +15 -0
- paddlex/inference/pipelines/pp_doctranslation/pipeline.py +523 -0
- paddlex/inference/pipelines/pp_doctranslation/result.py +39 -0
- paddlex/inference/pipelines/pp_doctranslation/utils.py +260 -0
- paddlex/inference/pipelines/pp_shitu_v2/result.py +2 -2
- paddlex/inference/serving/basic_serving/_app.py +1 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +5 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -24
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +16 -26
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py +203 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +4 -2
- paddlex/inference/serving/infra/utils.py +22 -17
- paddlex/inference/serving/schemas/anomaly_detection.py +1 -0
- paddlex/inference/serving/schemas/doc_preprocessor.py +1 -0
- paddlex/inference/serving/schemas/face_recognition.py +1 -0
- paddlex/inference/serving/schemas/formula_recognition.py +1 -0
- paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -0
- paddlex/inference/serving/schemas/image_classification.py +1 -0
- paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -0
- paddlex/inference/serving/schemas/instance_segmentation.py +1 -0
- paddlex/inference/serving/schemas/layout_parsing.py +1 -0
- paddlex/inference/serving/schemas/object_detection.py +1 -0
- paddlex/inference/serving/schemas/ocr.py +1 -0
- paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -0
- paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -0
- paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -0
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +5 -4
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +6 -5
- paddlex/inference/serving/schemas/pp_doctranslation.py +115 -0
- paddlex/inference/serving/schemas/pp_shituv2.py +1 -0
- paddlex/inference/serving/schemas/pp_structurev3.py +2 -9
- paddlex/inference/serving/schemas/rotated_object_detection.py +1 -0
- paddlex/inference/serving/schemas/seal_recognition.py +1 -0
- paddlex/inference/serving/schemas/semantic_segmentation.py +1 -0
- paddlex/inference/serving/schemas/shared/ocr.py +8 -1
- paddlex/inference/serving/schemas/small_object_detection.py +1 -0
- paddlex/inference/serving/schemas/table_recognition.py +1 -0
- paddlex/inference/serving/schemas/table_recognition_v2.py +1 -0
- paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -0
- paddlex/inference/serving/schemas/ts_classification.py +1 -0
- paddlex/inference/serving/schemas/ts_forecast.py +1 -0
- paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -0
- paddlex/inference/utils/hpi.py +42 -14
- paddlex/inference/utils/hpi_model_info_collection.json +0 -2
- paddlex/inference/utils/io/__init__.py +1 -0
- paddlex/inference/utils/io/readers.py +46 -0
- paddlex/inference/utils/io/writers.py +2 -0
- paddlex/inference/utils/official_models.py +7 -0
- paddlex/inference/utils/pp_option.py +34 -18
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/text_recognition/model_list.py +3 -0
- paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +27 -0
- paddlex/repo_manager/meta.py +3 -3
- paddlex/utils/device.py +4 -1
- paddlex/utils/download.py +10 -7
- paddlex/utils/{fonts/__init__.py → fonts.py} +45 -26
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/METADATA +25 -1
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/RECORD +134 -122
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/LICENSE +0 -0
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/WHEEL +0 -0
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/top_level.txt +0 -0
@@ -35,6 +35,7 @@ INFER_ENDPOINT: Final[str] = "/pedestrian-attribute-recognition"
|
|
35
35
|
class InferRequest(BaseModel):
|
36
36
|
image: str
|
37
37
|
detThreshold: Optional[float] = None
|
38
|
+
visualize: Optional[bool] = None
|
38
39
|
clsThreshold: Optional[
|
39
40
|
Union[float, Dict[Union[Literal["default"], int], float], List[float]]
|
40
41
|
] = None
|
@@ -57,6 +57,7 @@ class AnalyzeImagesRequest(ocr.BaseInferRequest):
|
|
57
57
|
sealDetBoxThresh: Optional[float] = None
|
58
58
|
sealDetUnclipRatio: Optional[float] = None
|
59
59
|
sealRecScoreThresh: Optional[float] = None
|
60
|
+
visualize: Optional[bool] = None
|
60
61
|
|
61
62
|
|
62
63
|
class LayoutParsingResult(BaseModel):
|
@@ -78,8 +79,8 @@ BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
|
|
78
79
|
|
79
80
|
class BuildVectorStoreRequest(BaseModel):
|
80
81
|
visualInfo: List[dict]
|
81
|
-
minCharacters:
|
82
|
-
blockSize:
|
82
|
+
minCharacters: int = 3500
|
83
|
+
blockSize: int = 300
|
83
84
|
retrieverConfig: Optional[dict] = None
|
84
85
|
|
85
86
|
|
@@ -93,9 +94,9 @@ CHAT_ENDPOINT: Final[str] = "/chatocr-chat"
|
|
93
94
|
class ChatRequest(BaseModel):
|
94
95
|
keyList: List[str]
|
95
96
|
visualInfo: List[dict]
|
96
|
-
useVectorRetrieval:
|
97
|
+
useVectorRetrieval: bool = True
|
97
98
|
vectorInfo: Optional[dict] = None
|
98
|
-
minCharacters:
|
99
|
+
minCharacters: int = 3500
|
99
100
|
textTaskDescription: Optional[str] = None
|
100
101
|
textOutputFormat: Optional[str] = None
|
101
102
|
# Is the "Str" in the name unnecessary? Keep the names consistent with the
|
@@ -61,6 +61,7 @@ class AnalyzeImagesRequest(ocr.BaseInferRequest):
|
|
61
61
|
sealDetBoxThresh: Optional[float] = None
|
62
62
|
sealDetUnclipRatio: Optional[float] = None
|
63
63
|
sealRecScoreThresh: Optional[float] = None
|
64
|
+
visualize: Optional[bool] = None
|
64
65
|
|
65
66
|
|
66
67
|
class LayoutParsingResult(BaseModel):
|
@@ -80,8 +81,8 @@ BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
|
|
80
81
|
|
81
82
|
class BuildVectorStoreRequest(BaseModel):
|
82
83
|
visualInfo: List[dict]
|
83
|
-
minCharacters:
|
84
|
-
blockSize:
|
84
|
+
minCharacters: int = 3500
|
85
|
+
blockSize: int = 300
|
85
86
|
retrieverConfig: Optional[dict] = None
|
86
87
|
|
87
88
|
|
@@ -108,9 +109,9 @@ CHAT_ENDPOINT: Final[str] = "/chatocr-chat"
|
|
108
109
|
class ChatRequest(BaseModel):
|
109
110
|
keyList: List[str]
|
110
111
|
visualInfo: List[dict]
|
111
|
-
useVectorRetrieval:
|
112
|
+
useVectorRetrieval: bool = True
|
112
113
|
vectorInfo: Optional[dict] = None
|
113
|
-
minCharacters:
|
114
|
+
minCharacters: int = 3500
|
114
115
|
textTaskDescription: Optional[str] = None
|
115
116
|
textOutputFormat: Optional[str] = None
|
116
117
|
textRulesStr: Optional[str] = None
|
@@ -122,7 +123,7 @@ class ChatRequest(BaseModel):
|
|
122
123
|
tableFewShotDemoTextContent: Optional[str] = None
|
123
124
|
tableFewShotDemoKeyValueList: Optional[str] = None
|
124
125
|
mllmPredictInfo: Optional[dict] = None
|
125
|
-
mllmIntegrationStrategy:
|
126
|
+
mllmIntegrationStrategy: str = "integration"
|
126
127
|
chatBotConfig: Optional[dict] = None
|
127
128
|
retrieverConfig: Optional[dict] = None
|
128
129
|
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from typing import Dict, Final, List, Optional, Tuple, Union
|
16
|
+
|
17
|
+
from pydantic import BaseModel
|
18
|
+
|
19
|
+
from ..infra.models import DataInfo, PrimaryOperations
|
20
|
+
from .shared import ocr
|
21
|
+
|
22
|
+
__all__ = [
|
23
|
+
"ANALYZE_IMAGES_ENDPOINT",
|
24
|
+
"AnalyzeImagesRequest",
|
25
|
+
"LayoutParsingResult",
|
26
|
+
"AnalyzeImagesResult",
|
27
|
+
"TRANSLATE_ENDPOINT",
|
28
|
+
"TranslateRequest",
|
29
|
+
"TranslationResult",
|
30
|
+
"TranslateResult",
|
31
|
+
"PRIMARY_OPERATIONS",
|
32
|
+
]
|
33
|
+
|
34
|
+
ANALYZE_IMAGES_ENDPOINT: Final[str] = "/doctrans-visual"
|
35
|
+
|
36
|
+
|
37
|
+
class AnalyzeImagesRequest(ocr.BaseInferRequest):
|
38
|
+
useDocOrientationClassify: Optional[bool] = False
|
39
|
+
useDocUnwarping: Optional[bool] = False
|
40
|
+
useTextlineOrientation: Optional[bool] = None
|
41
|
+
useSealRecognition: Optional[bool] = None
|
42
|
+
useTableRecognition: Optional[bool] = None
|
43
|
+
useFormulaRecognition: Optional[bool] = None
|
44
|
+
useChartRecognition: Optional[bool] = False
|
45
|
+
useRegionDetection: Optional[bool] = None
|
46
|
+
layoutThreshold: Optional[Union[float, dict]] = None
|
47
|
+
layoutNms: Optional[bool] = None
|
48
|
+
layoutUnclipRatio: Optional[Union[float, Tuple[float, float], dict]] = None
|
49
|
+
layoutMergeBboxesMode: Optional[Union[str, dict]] = None
|
50
|
+
textDetLimitSideLen: Optional[int] = None
|
51
|
+
textDetLimitType: Optional[str] = None
|
52
|
+
textDetThresh: Optional[float] = None
|
53
|
+
textDetBoxThresh: Optional[float] = None
|
54
|
+
textDetUnclipRatio: Optional[float] = None
|
55
|
+
textRecScoreThresh: Optional[float] = None
|
56
|
+
sealDetLimitSideLen: Optional[int] = None
|
57
|
+
sealDetLimitType: Optional[str] = None
|
58
|
+
sealDetThresh: Optional[float] = None
|
59
|
+
sealDetBoxThresh: Optional[float] = None
|
60
|
+
sealDetUnclipRatio: Optional[float] = None
|
61
|
+
sealRecScoreThresh: Optional[float] = None
|
62
|
+
useWiredTableCellsTransToHtml: bool = False
|
63
|
+
useWirelessTableCellsTransToHtml: bool = False
|
64
|
+
useTableOrientationClassify: bool = True
|
65
|
+
useOcrResultsWithTableCells: bool = True
|
66
|
+
useE2eWiredTableRecModel: bool = False
|
67
|
+
useE2eWirelessTableRecModel: bool = True
|
68
|
+
visualize: Optional[bool] = None
|
69
|
+
|
70
|
+
|
71
|
+
class LayoutParsingResult(BaseModel):
|
72
|
+
prunedResult: dict
|
73
|
+
markdown: ocr.MarkdownData
|
74
|
+
outputImages: Optional[Dict[str, str]] = None
|
75
|
+
inputImage: Optional[str] = None
|
76
|
+
|
77
|
+
|
78
|
+
class AnalyzeImagesResult(BaseModel):
|
79
|
+
layoutParsingResults: List[LayoutParsingResult]
|
80
|
+
dataInfo: DataInfo
|
81
|
+
|
82
|
+
|
83
|
+
TRANSLATE_ENDPOINT: Final[str] = "/doctrans-translate"
|
84
|
+
|
85
|
+
|
86
|
+
class TranslateRequest(BaseModel):
|
87
|
+
markdownList: List[ocr.MarkdownData]
|
88
|
+
targetLanguage: str = "zh"
|
89
|
+
chunkSize: int = 5000
|
90
|
+
taskDescription: Optional[str] = None
|
91
|
+
outputFormat: Optional[str] = None
|
92
|
+
rulesStr: Optional[str] = None
|
93
|
+
fewShotDemoTextContent: Optional[str] = None
|
94
|
+
fewShotDemoKeyValueList: Optional[str] = None
|
95
|
+
chatBotConfig: Optional[dict] = None
|
96
|
+
sleepInterval: float = 0
|
97
|
+
|
98
|
+
|
99
|
+
class TranslationResult(BaseModel):
|
100
|
+
language: str
|
101
|
+
markdown: ocr.MarkdownData
|
102
|
+
|
103
|
+
|
104
|
+
class TranslateResult(BaseModel):
|
105
|
+
translationResults: List[TranslationResult]
|
106
|
+
|
107
|
+
|
108
|
+
PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
|
109
|
+
"analyzeImages": (
|
110
|
+
ANALYZE_IMAGES_ENDPOINT,
|
111
|
+
AnalyzeImagesRequest,
|
112
|
+
AnalyzeImagesResult,
|
113
|
+
),
|
114
|
+
"translate": (TRANSLATE_ENDPOINT, TranslateRequest, TranslateResult),
|
115
|
+
}
|
@@ -22,7 +22,6 @@ from .shared import ocr
|
|
22
22
|
__all__ = [
|
23
23
|
"INFER_ENDPOINT",
|
24
24
|
"InferRequest",
|
25
|
-
"MarkdownData",
|
26
25
|
"LayoutParsingResult",
|
27
26
|
"InferResult",
|
28
27
|
"PRIMARY_OPERATIONS",
|
@@ -62,18 +61,12 @@ class InferRequest(ocr.BaseInferRequest):
|
|
62
61
|
useOcrResultsWithTableCells: bool = True
|
63
62
|
useE2eWiredTableRecModel: bool = False
|
64
63
|
useE2eWirelessTableRecModel: bool = True
|
65
|
-
|
66
|
-
|
67
|
-
class MarkdownData(BaseModel):
|
68
|
-
text: str
|
69
|
-
images: Dict[str, str]
|
70
|
-
isStart: bool
|
71
|
-
isEnd: bool
|
64
|
+
visualize: Optional[bool] = None
|
72
65
|
|
73
66
|
|
74
67
|
class LayoutParsingResult(BaseModel):
|
75
68
|
prunedResult: dict
|
76
|
-
markdown: MarkdownData
|
69
|
+
markdown: ocr.MarkdownData
|
77
70
|
outputImages: Optional[Dict[str, str]] = None
|
78
71
|
inputImage: Optional[str] = None
|
79
72
|
|
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import Optional
|
15
|
+
from typing import Dict, Optional
|
16
16
|
|
17
17
|
from pydantic import BaseModel
|
18
18
|
from typing_extensions import Literal, TypeAlias
|
@@ -23,3 +23,10 @@ FileType: TypeAlias = Literal[0, 1]
|
|
23
23
|
class BaseInferRequest(BaseModel):
|
24
24
|
file: str
|
25
25
|
fileType: Optional[FileType] = None
|
26
|
+
|
27
|
+
|
28
|
+
class MarkdownData(BaseModel):
|
29
|
+
text: str
|
30
|
+
isStart: bool
|
31
|
+
isEnd: bool
|
32
|
+
images: Optional[Dict[str, str]] = None
|
@@ -35,6 +35,7 @@ INFER_ENDPOINT: Final[str] = "/vehicle-attribute-recognition"
|
|
35
35
|
class InferRequest(BaseModel):
|
36
36
|
image: str
|
37
37
|
detThreshold: Optional[float] = None
|
38
|
+
visualize: Optional[bool] = None
|
38
39
|
clsThreshold: Optional[
|
39
40
|
Union[float, Dict[Union[Literal["default"], int], float], List[float]]
|
40
41
|
] = None
|
paddlex/inference/utils/hpi.py
CHANGED
@@ -132,13 +132,25 @@ def suggest_inference_backend_and_config(
|
|
132
132
|
available_backends = []
|
133
133
|
if "paddle" in model_paths:
|
134
134
|
available_backends.append("paddle")
|
135
|
-
if
|
135
|
+
if (
|
136
|
+
is_built_with_openvino()
|
137
|
+
and is_onnx_model_available
|
138
|
+
and hpi_config.device_type == "cpu"
|
139
|
+
):
|
136
140
|
available_backends.append("openvino")
|
137
|
-
if
|
141
|
+
if (
|
142
|
+
is_built_with_ort()
|
143
|
+
and is_onnx_model_available
|
144
|
+
and hpi_config.device_type in ("cpu", "gpu")
|
145
|
+
):
|
138
146
|
available_backends.append("onnxruntime")
|
139
|
-
if
|
147
|
+
if (
|
148
|
+
is_built_with_trt()
|
149
|
+
and is_onnx_model_available
|
150
|
+
and hpi_config.device_type == "gpu"
|
151
|
+
):
|
140
152
|
available_backends.append("tensorrt")
|
141
|
-
if is_built_with_om() and "om" in model_paths:
|
153
|
+
if is_built_with_om() and "om" in model_paths and hpi_config.device_type == "npu":
|
142
154
|
available_backends.append("om")
|
143
155
|
|
144
156
|
if not available_backends:
|
@@ -188,20 +200,21 @@ def suggest_inference_backend_and_config(
|
|
188
200
|
hpi_config.pdx_model_name
|
189
201
|
].copy()
|
190
202
|
|
191
|
-
if not is_mkldnn_available():
|
192
|
-
|
193
|
-
|
203
|
+
if not (is_mkldnn_available() and hpi_config.device_type == "cpu"):
|
204
|
+
for pb in supported_pseudo_backends[:]:
|
205
|
+
if pb.startswith("paddle_mkldnn"):
|
206
|
+
supported_pseudo_backends.remove(pb)
|
194
207
|
|
195
208
|
# XXX
|
196
209
|
if not (
|
197
210
|
USE_PIR_TRT
|
198
211
|
and importlib.util.find_spec("tensorrt")
|
199
212
|
and ctypes.util.find_library("nvinfer")
|
213
|
+
and hpi_config.device_type == "gpu"
|
200
214
|
):
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
supported_pseudo_backends.remove("paddle_tensorrt_fp16")
|
215
|
+
for pb in supported_pseudo_backends[:]:
|
216
|
+
if pb.startswith("paddle_tensorrt"):
|
217
|
+
supported_pseudo_backends.remove(pb)
|
205
218
|
|
206
219
|
supported_backends = []
|
207
220
|
backend_to_pseudo_backends = defaultdict(list)
|
@@ -227,12 +240,27 @@ def suggest_inference_backend_and_config(
|
|
227
240
|
f"{repr(hpi_config.backend)} is not a supported inference backend.",
|
228
241
|
)
|
229
242
|
suggested_backend = hpi_config.backend
|
230
|
-
pseudo_backends = backend_to_pseudo_backends[suggested_backend]
|
231
|
-
pseudo_backend = pseudo_backends[0]
|
232
243
|
else:
|
233
244
|
# Prefer the first one.
|
234
245
|
suggested_backend = supported_backends[0]
|
235
|
-
|
246
|
+
|
247
|
+
pseudo_backends = backend_to_pseudo_backends[suggested_backend]
|
248
|
+
|
249
|
+
if hpi_config.backend_config is not None:
|
250
|
+
requested_base_pseudo_backend = None
|
251
|
+
if suggested_backend == "paddle":
|
252
|
+
if "run_mode" in hpi_config.backend_config:
|
253
|
+
if hpi_config.backend_config["run_mode"].startswith("mkldnn"):
|
254
|
+
requested_base_pseudo_backend = "paddle_mkldnn"
|
255
|
+
elif hpi_config.backend_config["run_mode"].startswith("trt"):
|
256
|
+
requested_base_pseudo_backend = "paddle_tensorrt"
|
257
|
+
if requested_base_pseudo_backend:
|
258
|
+
for pb in pseudo_backends:
|
259
|
+
if pb.startswith(requested_base_pseudo_backend):
|
260
|
+
break
|
261
|
+
else:
|
262
|
+
return None, "Unsupported backend configuration."
|
263
|
+
pseudo_backend = pseudo_backends[0]
|
236
264
|
|
237
265
|
suggested_backend_config = {}
|
238
266
|
if suggested_backend == "paddle":
|
@@ -1992,7 +1992,6 @@
|
|
1992
1992
|
"onnxruntime"
|
1993
1993
|
],
|
1994
1994
|
"PP-OCRv4_server_seal_det": [
|
1995
|
-
"paddle_tensorrt",
|
1996
1995
|
"tensorrt",
|
1997
1996
|
"onnxruntime",
|
1998
1997
|
"paddle"
|
@@ -2094,7 +2093,6 @@
|
|
2094
2093
|
"onnxruntime"
|
2095
2094
|
],
|
2096
2095
|
"PP-OCRv4_server_det": [
|
2097
|
-
"paddle_tensorrt_fp16",
|
2098
2096
|
"tensorrt",
|
2099
2097
|
"onnxruntime",
|
2100
2098
|
"paddle"
|
@@ -52,6 +52,8 @@ class ReaderType(enum.Enum):
|
|
52
52
|
TS = 5
|
53
53
|
PDF = 6
|
54
54
|
YAML = 8
|
55
|
+
MARKDOWN = 9
|
56
|
+
TXT = 10
|
55
57
|
|
56
58
|
|
57
59
|
class _BaseReader(object):
|
@@ -206,6 +208,41 @@ class YAMLReader(_BaseReader):
|
|
206
208
|
return ReaderType.YAML
|
207
209
|
|
208
210
|
|
211
|
+
class MarkDownReader(_BaseReader):
|
212
|
+
|
213
|
+
def __init__(self, backend="Markdown", **bk_args):
|
214
|
+
super().__init__(backend, **bk_args)
|
215
|
+
|
216
|
+
def read(self, in_path):
|
217
|
+
return self._backend.read_file(str(in_path))
|
218
|
+
|
219
|
+
def _init_backend(self, bk_type, bk_args):
|
220
|
+
if bk_type == "Markdown":
|
221
|
+
return TXTReaderBackend(**bk_args)
|
222
|
+
else:
|
223
|
+
raise ValueError("Unsupported backend type")
|
224
|
+
|
225
|
+
def get_type(self):
|
226
|
+
return ReaderType.MARKDOWN
|
227
|
+
|
228
|
+
|
229
|
+
class TXTReader(_BaseReader):
|
230
|
+
"""TXTReader"""
|
231
|
+
|
232
|
+
def __init__(self, backend="txt", **bk_args):
|
233
|
+
super().__init__(backend, **bk_args)
|
234
|
+
|
235
|
+
def read(self, in_path):
|
236
|
+
return self._backend.read_file(str(in_path))
|
237
|
+
|
238
|
+
def _init_backend(self, bk_type, bk_args):
|
239
|
+
if bk_type == "txt":
|
240
|
+
return TXTReaderBackend(**bk_args)
|
241
|
+
|
242
|
+
def get_type(self):
|
243
|
+
return ReaderType.TXT
|
244
|
+
|
245
|
+
|
209
246
|
class _BaseReaderBackend(object):
|
210
247
|
"""_BaseReaderBackend"""
|
211
248
|
|
@@ -261,6 +298,15 @@ class PDFReaderBackend(_BaseReaderBackend):
|
|
261
298
|
yield img_cv
|
262
299
|
|
263
300
|
|
301
|
+
class TXTReaderBackend(_BaseReaderBackend):
|
302
|
+
"""TXTReaderBackend"""
|
303
|
+
|
304
|
+
def read_file(self, in_path):
|
305
|
+
with open(in_path, "r") as f:
|
306
|
+
data = f.read()
|
307
|
+
return data
|
308
|
+
|
309
|
+
|
264
310
|
class _VideoReaderBackend(_BaseReaderBackend):
|
265
311
|
"""_VideoReaderBackend"""
|
266
312
|
|
@@ -359,7 +359,11 @@ PP-LCNet_x1_0_vehicle_attribute_infer.tar",
|
|
359
359
|
PP-OCRv5_server_rec_infer.tar",
|
360
360
|
"PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/\
|
361
361
|
PP-OCRv5_mobile_rec_infer.tar",
|
362
|
+
"eslav_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/\
|
363
|
+
eslav_PP-OCRv5_mobile_rec_infer.tar",
|
362
364
|
"PP-DocBee2-3B": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-DocBee2-3B_infer.tar",
|
365
|
+
"latin_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/latin_PP-OCRv5_mobile_rec_infer.tar",
|
366
|
+
"korean_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/korean_PP-OCRv5_mobile_rec_infer.tar",
|
363
367
|
}
|
364
368
|
|
365
369
|
|
@@ -384,6 +388,7 @@ HUGGINGFACE_MODELS = [
|
|
384
388
|
"PicoDet-S_layout_17cls",
|
385
389
|
"PicoDet-S_layout_3cls",
|
386
390
|
"PP-DocBee2-3B",
|
391
|
+
"PP-Chart2Table",
|
387
392
|
"PP-DocBee-2B",
|
388
393
|
"PP-DocBee-7B",
|
389
394
|
"PP-DocBlockLayout",
|
@@ -396,8 +401,10 @@ HUGGINGFACE_MODELS = [
|
|
396
401
|
"PP-FormulaNet_plus-M",
|
397
402
|
"PP-FormulaNet_plus-S",
|
398
403
|
"PP-FormulaNet-S",
|
404
|
+
"PP-LCNet_x0_25_textline_ori",
|
399
405
|
"PP-LCNet_x1_0_doc_ori",
|
400
406
|
"PP-LCNet_x1_0_table_cls",
|
407
|
+
"PP-LCNet_x1_0_textline_ori",
|
401
408
|
"PP-OCRv3_mobile_det",
|
402
409
|
"PP-OCRv3_mobile_rec",
|
403
410
|
"PP-OCRv3_server_det",
|