paddlex 3.0.0rc1__py3-none-any.whl → 3.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/__init__.py +1 -1
- paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
- paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
- paddlex/configs/pipelines/OCR.yaml +7 -6
- paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
- paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
- paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
- paddlex/configs/pipelines/doc_understanding.yaml +1 -1
- paddlex/configs/pipelines/formula_recognition.yaml +2 -2
- paddlex/configs/pipelines/layout_parsing.yaml +3 -2
- paddlex/configs/pipelines/seal_recognition.yaml +1 -0
- paddlex/configs/pipelines/table_recognition.yaml +2 -1
- paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
- paddlex/hpip_links.html +20 -20
- paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
- paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
- paddlex/inference/common/result/mixin.py +19 -12
- paddlex/inference/models/base/predictor/base_predictor.py +2 -8
- paddlex/inference/models/common/static_infer.py +29 -73
- paddlex/inference/models/common/tokenizer/__init__.py +2 -0
- paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
- paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
- paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
- paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
- paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
- paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
- paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
- paddlex/inference/models/common/tokenizer/vocab.py +7 -7
- paddlex/inference/models/common/ts/funcs.py +19 -8
- paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
- paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
- paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
- paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
- paddlex/inference/models/common/vlm/generation/utils.py +1 -1
- paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
- paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
- paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
- paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
- paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
- paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
- paddlex/inference/models/doc_vlm/predictor.py +79 -24
- paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
- paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
- paddlex/inference/models/doc_vlm/processors/common.py +189 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
- paddlex/inference/models/formula_recognition/predictor.py +8 -2
- paddlex/inference/models/formula_recognition/processors.py +90 -77
- paddlex/inference/models/formula_recognition/result.py +28 -27
- paddlex/inference/models/image_feature/processors.py +3 -4
- paddlex/inference/models/keypoint_detection/predictor.py +3 -0
- paddlex/inference/models/object_detection/predictor.py +2 -0
- paddlex/inference/models/object_detection/processors.py +28 -3
- paddlex/inference/models/object_detection/utils.py +2 -0
- paddlex/inference/models/table_structure_recognition/result.py +0 -10
- paddlex/inference/models/text_detection/predictor.py +8 -0
- paddlex/inference/models/text_detection/processors.py +44 -10
- paddlex/inference/models/text_detection/result.py +0 -10
- paddlex/inference/models/text_recognition/result.py +1 -1
- paddlex/inference/pipelines/__init__.py +9 -5
- paddlex/inference/pipelines/_parallel.py +172 -0
- paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
- paddlex/inference/pipelines/base.py +14 -4
- paddlex/inference/pipelines/components/faisser.py +1 -1
- paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
- paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
- paddlex/inference/pipelines/formula_recognition/result.py +1 -11
- paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
- paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
- paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +832 -260
- paddlex/inference/pipelines/layout_parsing/result.py +4 -17
- paddlex/inference/pipelines/layout_parsing/result_v2.py +259 -245
- paddlex/inference/pipelines/layout_parsing/setting.py +88 -0
- paddlex/inference/pipelines/layout_parsing/utils.py +391 -2028
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1199 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +615 -0
- paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
- paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/ocr/pipeline.py +127 -70
- paddlex/inference/pipelines/ocr/result.py +21 -18
- paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +6 -6
- paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
- paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
- paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
- paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
- paddlex/inference/pipelines/table_recognition/result.py +1 -1
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
- paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
- paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
- paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
- paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
- paddlex/inference/serving/basic_serving/_app.py +46 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
- paddlex/inference/serving/infra/utils.py +20 -22
- paddlex/inference/serving/schemas/formula_recognition.py +1 -1
- paddlex/inference/serving/schemas/layout_parsing.py +1 -2
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
- paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
- paddlex/inference/serving/schemas/seal_recognition.py +1 -1
- paddlex/inference/serving/schemas/table_recognition.py +2 -6
- paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
- paddlex/inference/utils/hpi.py +30 -16
- paddlex/inference/utils/hpi_model_info_collection.json +666 -162
- paddlex/inference/utils/io/readers.py +12 -12
- paddlex/inference/utils/misc.py +20 -0
- paddlex/inference/utils/mkldnn_blocklist.py +59 -0
- paddlex/inference/utils/official_models.py +140 -5
- paddlex/inference/utils/pp_option.py +74 -9
- paddlex/model.py +2 -2
- paddlex/modules/__init__.py +1 -1
- paddlex/modules/anomaly_detection/evaluator.py +2 -2
- paddlex/modules/base/__init__.py +1 -1
- paddlex/modules/base/evaluator.py +5 -5
- paddlex/modules/base/trainer.py +1 -1
- paddlex/modules/doc_vlm/dataset_checker.py +2 -2
- paddlex/modules/doc_vlm/evaluator.py +2 -2
- paddlex/modules/doc_vlm/exportor.py +2 -2
- paddlex/modules/doc_vlm/model_list.py +1 -1
- paddlex/modules/doc_vlm/trainer.py +2 -2
- paddlex/modules/face_recognition/evaluator.py +2 -2
- paddlex/modules/formula_recognition/evaluator.py +5 -2
- paddlex/modules/formula_recognition/model_list.py +3 -0
- paddlex/modules/formula_recognition/trainer.py +3 -0
- paddlex/modules/general_recognition/evaluator.py +1 -1
- paddlex/modules/image_classification/evaluator.py +2 -2
- paddlex/modules/image_classification/model_list.py +1 -0
- paddlex/modules/instance_segmentation/evaluator.py +1 -1
- paddlex/modules/keypoint_detection/evaluator.py +1 -1
- paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
- paddlex/modules/multilabel_classification/evaluator.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
- paddlex/modules/object_detection/evaluator.py +2 -2
- paddlex/modules/object_detection/model_list.py +2 -0
- paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
- paddlex/modules/semantic_segmentation/evaluator.py +2 -2
- paddlex/modules/table_recognition/evaluator.py +2 -2
- paddlex/modules/text_detection/evaluator.py +2 -2
- paddlex/modules/text_detection/model_list.py +2 -0
- paddlex/modules/text_recognition/evaluator.py +2 -2
- paddlex/modules/text_recognition/model_list.py +2 -0
- paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/ts_classification/evaluator.py +2 -2
- paddlex/modules/ts_forecast/evaluator.py +2 -2
- paddlex/modules/video_classification/evaluator.py +2 -2
- paddlex/modules/video_detection/evaluator.py +2 -2
- paddlex/ops/__init__.py +8 -5
- paddlex/paddlex_cli.py +19 -13
- paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
- paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
- paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
- paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
- paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
- paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
- paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
- paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
- paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
- paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
- paddlex/repo_apis/base/config.py +1 -1
- paddlex/repo_manager/core.py +3 -3
- paddlex/repo_manager/meta.py +6 -2
- paddlex/repo_manager/repo.py +17 -16
- paddlex/utils/custom_device_list.py +26 -2
- paddlex/utils/deps.py +3 -3
- paddlex/utils/device.py +5 -13
- paddlex/utils/env.py +4 -0
- paddlex/utils/flags.py +11 -4
- paddlex/utils/fonts/__init__.py +34 -4
- paddlex/utils/misc.py +1 -1
- paddlex/utils/subclass_register.py +2 -2
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/METADATA +349 -208
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/RECORD +240 -211
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/WHEEL +1 -1
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/entry_points.txt +1 -0
- {paddlex-3.0.0rc1.dist-info/licenses → paddlex-3.0.2.dist-info}/LICENSE +0 -0
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/top_level.txt +0 -0
@@ -23,17 +23,15 @@ from ...common.reader import ReadImage
|
|
23
23
|
from ...models.object_detection.result import DetResult
|
24
24
|
from ...utils.hpi import HPIConfig
|
25
25
|
from ...utils.pp_option import PaddlePredictorOption
|
26
|
+
from .._parallel import AutoParallelImageSimpleInferencePipeline
|
26
27
|
from ..base import BasePipeline
|
27
28
|
from ..components import CropByBoxes
|
28
29
|
from .result import SealRecognitionResult
|
29
30
|
|
30
31
|
|
31
|
-
|
32
|
-
class SealRecognitionPipeline(BasePipeline):
|
32
|
+
class _SealRecognitionPipeline(BasePipeline):
|
33
33
|
"""Seal Recognition Pipeline"""
|
34
34
|
|
35
|
-
entities = ["seal_recognition"]
|
36
|
-
|
37
35
|
def __init__(
|
38
36
|
self,
|
39
37
|
config: Dict,
|
@@ -49,9 +47,9 @@ class SealRecognitionPipeline(BasePipeline):
|
|
49
47
|
device (str, optional): Device to run the predictions on. Defaults to None.
|
50
48
|
pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
|
51
49
|
use_hpip (bool, optional): Whether to use the high-performance
|
52
|
-
inference plugin (HPIP). Defaults to False.
|
50
|
+
inference plugin (HPIP) by default. Defaults to False.
|
53
51
|
hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
|
54
|
-
The high-performance inference configuration dictionary.
|
52
|
+
The default high-performance inference configuration dictionary.
|
55
53
|
Defaults to None.
|
56
54
|
"""
|
57
55
|
|
@@ -104,7 +102,7 @@ class SealRecognitionPipeline(BasePipeline):
|
|
104
102
|
|
105
103
|
self._crop_by_boxes = CropByBoxes()
|
106
104
|
|
107
|
-
self.batch_sampler = ImageBatchSampler(batch_size=1)
|
105
|
+
self.batch_sampler = ImageBatchSampler(batch_size=config.get("batch_size", 1))
|
108
106
|
|
109
107
|
self.img_reader = ReadImage(format="BGR")
|
110
108
|
|
@@ -180,7 +178,7 @@ class SealRecognitionPipeline(BasePipeline):
|
|
180
178
|
use_doc_orientation_classify: Optional[bool] = None,
|
181
179
|
use_doc_unwarping: Optional[bool] = None,
|
182
180
|
use_layout_detection: Optional[bool] = None,
|
183
|
-
layout_det_res: Optional[DetResult] = None,
|
181
|
+
layout_det_res: Optional[Union[DetResult, List[DetResult]]] = None,
|
184
182
|
layout_threshold: Optional[Union[float, dict]] = None,
|
185
183
|
layout_nms: Optional[bool] = None,
|
186
184
|
layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None,
|
@@ -201,29 +199,38 @@ class SealRecognitionPipeline(BasePipeline):
|
|
201
199
|
if not self.check_model_settings_valid(model_settings, layout_det_res):
|
202
200
|
yield {"error": "the input params for model settings are invalid!"}
|
203
201
|
|
204
|
-
|
205
|
-
|
202
|
+
external_layout_det_results = layout_det_res
|
203
|
+
if external_layout_det_results is not None:
|
204
|
+
if not isinstance(external_layout_det_results, list):
|
205
|
+
external_layout_det_results = [external_layout_det_results]
|
206
|
+
external_layout_det_results = iter(external_layout_det_results)
|
207
|
+
|
208
|
+
for _, batch_data in enumerate(self.batch_sampler(input)):
|
209
|
+
image_arrays = self.img_reader(batch_data.instances)
|
206
210
|
|
207
211
|
if model_settings["use_doc_preprocessor"]:
|
208
|
-
|
212
|
+
doc_preprocessor_results = list(
|
209
213
|
self.doc_preprocessor_pipeline(
|
210
|
-
|
214
|
+
image_arrays,
|
211
215
|
use_doc_orientation_classify=use_doc_orientation_classify,
|
212
216
|
use_doc_unwarping=use_doc_unwarping,
|
213
217
|
)
|
214
218
|
)
|
215
219
|
else:
|
216
|
-
|
220
|
+
doc_preprocessor_results = [{"output_img": arr} for arr in image_arrays]
|
217
221
|
|
218
|
-
|
222
|
+
doc_preprocessor_images = [
|
223
|
+
item["output_img"] for item in doc_preprocessor_results
|
224
|
+
]
|
219
225
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
226
|
+
if (
|
227
|
+
not model_settings["use_layout_detection"]
|
228
|
+
and external_layout_det_results is None
|
229
|
+
):
|
230
|
+
layout_det_results = [{} for _ in doc_preprocessor_images]
|
231
|
+
flat_seal_results = list(
|
225
232
|
self.seal_ocr_pipeline(
|
226
|
-
|
233
|
+
doc_preprocessor_images,
|
227
234
|
text_det_limit_side_len=seal_det_limit_side_len,
|
228
235
|
text_det_limit_type=seal_det_limit_type,
|
229
236
|
text_det_thresh=seal_det_thresh,
|
@@ -232,48 +239,97 @@ class SealRecognitionPipeline(BasePipeline):
|
|
232
239
|
text_rec_score_thresh=seal_rec_score_thresh,
|
233
240
|
)
|
234
241
|
)
|
235
|
-
|
236
|
-
|
237
|
-
|
242
|
+
for seal_res in flat_seal_results:
|
243
|
+
seal_res["seal_region_id"] = 1
|
244
|
+
seal_results = [[item] for item in flat_seal_results]
|
238
245
|
else:
|
239
246
|
if model_settings["use_layout_detection"]:
|
240
|
-
|
247
|
+
layout_det_results = list(
|
241
248
|
self.layout_det_model(
|
242
|
-
|
249
|
+
doc_preprocessor_images,
|
243
250
|
threshold=layout_threshold,
|
244
251
|
layout_nms=layout_nms,
|
245
252
|
layout_unclip_ratio=layout_unclip_ratio,
|
246
253
|
layout_merge_bboxes_mode=layout_merge_bboxes_mode,
|
247
254
|
)
|
248
255
|
)
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
256
|
+
else:
|
257
|
+
layout_det_results = []
|
258
|
+
for _ in doc_preprocessor_images:
|
259
|
+
try:
|
260
|
+
layout_det_res = next(external_layout_det_results)
|
261
|
+
except StopIteration:
|
262
|
+
raise ValueError("No more layout det results")
|
263
|
+
layout_det_results.append(layout_det_res)
|
264
|
+
|
265
|
+
cropped_imgs = []
|
266
|
+
chunk_indices = [0]
|
267
|
+
for doc_preprocessor_image, layout_det_res in zip(
|
268
|
+
doc_preprocessor_images, layout_det_results
|
269
|
+
):
|
270
|
+
for box_info in layout_det_res["boxes"]:
|
271
|
+
if box_info["label"].lower() in ["seal"]:
|
272
|
+
crop_img_info = self._crop_by_boxes(
|
273
|
+
doc_preprocessor_image, [box_info]
|
265
274
|
)
|
266
|
-
|
267
|
-
|
268
|
-
|
275
|
+
crop_img_info = crop_img_info[0]
|
276
|
+
cropped_imgs.append(crop_img_info["img"])
|
277
|
+
chunk_indices.append(len(cropped_imgs))
|
278
|
+
|
279
|
+
flat_seal_results = list(
|
280
|
+
self.seal_ocr_pipeline(
|
281
|
+
cropped_imgs,
|
282
|
+
text_det_limit_side_len=seal_det_limit_side_len,
|
283
|
+
text_det_limit_type=seal_det_limit_type,
|
284
|
+
text_det_thresh=seal_det_thresh,
|
285
|
+
text_det_box_thresh=seal_det_box_thresh,
|
286
|
+
text_det_unclip_ratio=seal_det_unclip_ratio,
|
287
|
+
text_rec_score_thresh=seal_rec_score_thresh,
|
288
|
+
)
|
289
|
+
)
|
290
|
+
|
291
|
+
seal_results = [
|
292
|
+
flat_seal_results[i:j]
|
293
|
+
for i, j in zip(chunk_indices[:-1], chunk_indices[1:])
|
294
|
+
]
|
295
|
+
|
296
|
+
for seal_results_for_img in seal_results:
|
297
|
+
seal_region_id = 1
|
298
|
+
for seal_res in seal_results_for_img:
|
299
|
+
seal_res["seal_region_id"] = seal_region_id
|
269
300
|
seal_region_id += 1
|
270
301
|
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
302
|
+
for (
|
303
|
+
input_path,
|
304
|
+
page_index,
|
305
|
+
doc_preprocessor_res,
|
306
|
+
layout_det_res,
|
307
|
+
seal_results_for_img,
|
308
|
+
) in zip(
|
309
|
+
batch_data.input_paths,
|
310
|
+
batch_data.page_indexes,
|
311
|
+
doc_preprocessor_results,
|
312
|
+
layout_det_results,
|
313
|
+
seal_results,
|
314
|
+
):
|
315
|
+
single_img_res = {
|
316
|
+
"input_path": input_path,
|
317
|
+
"page_index": page_index,
|
318
|
+
"doc_preprocessor_res": doc_preprocessor_res,
|
319
|
+
"layout_det_res": layout_det_res,
|
320
|
+
"seal_res_list": seal_results_for_img,
|
321
|
+
"model_settings": model_settings,
|
322
|
+
}
|
323
|
+
yield SealRecognitionResult(single_img_res)
|
324
|
+
|
325
|
+
|
326
|
+
@pipeline_requires_extra("ocr")
|
327
|
+
class SealRecognitionPipeline(AutoParallelImageSimpleInferencePipeline):
|
328
|
+
entities = ["seal_recognition"]
|
329
|
+
|
330
|
+
@property
|
331
|
+
def _pipeline_cls(self):
|
332
|
+
return _SealRecognitionPipeline
|
333
|
+
|
334
|
+
def _get_batch_size(self, config):
|
335
|
+
return config.get("batch_size", 1)
|
@@ -20,15 +20,13 @@ from ....utils.deps import pipeline_requires_extra
|
|
20
20
|
from ...models.semantic_segmentation.result import SegResult
|
21
21
|
from ...utils.hpi import HPIConfig
|
22
22
|
from ...utils.pp_option import PaddlePredictorOption
|
23
|
+
from .._parallel import AutoParallelImageSimpleInferencePipeline
|
23
24
|
from ..base import BasePipeline
|
24
25
|
|
25
26
|
|
26
|
-
|
27
|
-
class SemanticSegmentationPipeline(BasePipeline):
|
27
|
+
class _SemanticSegmentationPipeline(BasePipeline):
|
28
28
|
"""Semantic Segmentation Pipeline"""
|
29
29
|
|
30
|
-
entities = "semantic_segmentation"
|
31
|
-
|
32
30
|
def __init__(
|
33
31
|
self,
|
34
32
|
config: Dict,
|
@@ -45,9 +43,9 @@ class SemanticSegmentationPipeline(BasePipeline):
|
|
45
43
|
device (str): The device to run the prediction on. Default is None.
|
46
44
|
pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
|
47
45
|
use_hpip (bool, optional): Whether to use the high-performance
|
48
|
-
inference plugin (HPIP). Defaults to False.
|
46
|
+
inference plugin (HPIP) by default. Defaults to False.
|
49
47
|
hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
|
50
|
-
The high-performance inference configuration dictionary.
|
48
|
+
The default high-performance inference configuration dictionary.
|
51
49
|
Defaults to None.
|
52
50
|
"""
|
53
51
|
super().__init__(
|
@@ -83,3 +81,15 @@ class SemanticSegmentationPipeline(BasePipeline):
|
|
83
81
|
SegResult: The predicted segmentation results.
|
84
82
|
"""
|
85
83
|
yield from self.semantic_segmentation_model(input, target_size=target_size)
|
84
|
+
|
85
|
+
|
86
|
+
@pipeline_requires_extra("cv")
|
87
|
+
class SemanticSegmentationPipeline(AutoParallelImageSimpleInferencePipeline):
|
88
|
+
entities = "semantic_segmentation"
|
89
|
+
|
90
|
+
@property
|
91
|
+
def _pipeline_cls(self):
|
92
|
+
return _SemanticSegmentationPipeline
|
93
|
+
|
94
|
+
def _get_batch_size(self, config):
|
95
|
+
return config["SubModules"]["SemanticSegmentation"].get("batch_size", 1)
|
@@ -20,15 +20,13 @@ from ....utils.deps import pipeline_requires_extra
|
|
20
20
|
from ...models.object_detection.result import DetResult
|
21
21
|
from ...utils.hpi import HPIConfig
|
22
22
|
from ...utils.pp_option import PaddlePredictorOption
|
23
|
+
from .._parallel import AutoParallelImageSimpleInferencePipeline
|
23
24
|
from ..base import BasePipeline
|
24
25
|
|
25
26
|
|
26
|
-
|
27
|
-
class SmallObjectDetectionPipeline(BasePipeline):
|
27
|
+
class _SmallObjectDetectionPipeline(BasePipeline):
|
28
28
|
"""Small Object Detection Pipeline"""
|
29
29
|
|
30
|
-
entities = "small_object_detection"
|
31
|
-
|
32
30
|
def __init__(
|
33
31
|
self,
|
34
32
|
config: Dict,
|
@@ -45,9 +43,9 @@ class SmallObjectDetectionPipeline(BasePipeline):
|
|
45
43
|
device (str): The device to run the prediction on. Default is None.
|
46
44
|
pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
|
47
45
|
use_hpip (bool, optional): Whether to use the high-performance
|
48
|
-
inference plugin (HPIP). Defaults to False.
|
46
|
+
inference plugin (HPIP) by default. Defaults to False.
|
49
47
|
hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
|
50
|
-
The high-performance inference configuration dictionary.
|
48
|
+
The default high-performance inference configuration dictionary.
|
51
49
|
Defaults to None.
|
52
50
|
"""
|
53
51
|
super().__init__(
|
@@ -83,3 +81,15 @@ class SmallObjectDetectionPipeline(BasePipeline):
|
|
83
81
|
DetResult: The predicted small object detection results.
|
84
82
|
"""
|
85
83
|
yield from self.small_object_detection_model(input, threshold=threshold)
|
84
|
+
|
85
|
+
|
86
|
+
@pipeline_requires_extra("cv")
|
87
|
+
class SmallObjectDetectionPipeline(AutoParallelImageSimpleInferencePipeline):
|
88
|
+
entities = "small_object_detection"
|
89
|
+
|
90
|
+
@property
|
91
|
+
def _pipeline_cls(self):
|
92
|
+
return _SmallObjectDetectionPipeline
|
93
|
+
|
94
|
+
def _get_batch_size(self, config):
|
95
|
+
return config["SubModules"]["SmallObjectDetection"].get("batch_size", 1)
|
@@ -24,6 +24,7 @@ from ...common.reader import ReadImage
|
|
24
24
|
from ...models.object_detection.result import DetResult
|
25
25
|
from ...utils.hpi import HPIConfig
|
26
26
|
from ...utils.pp_option import PaddlePredictorOption
|
27
|
+
from .._parallel import AutoParallelImageSimpleInferencePipeline
|
27
28
|
from ..base import BasePipeline
|
28
29
|
from ..components import CropByBoxes
|
29
30
|
from ..doc_preprocessor.result import DocPreprocessorResult
|
@@ -33,12 +34,9 @@ from .table_recognition_post_processing import get_table_recognition_res
|
|
33
34
|
from .utils import get_neighbor_boxes_idx
|
34
35
|
|
35
36
|
|
36
|
-
|
37
|
-
class TableRecognitionPipeline(BasePipeline):
|
37
|
+
class _TableRecognitionPipeline(BasePipeline):
|
38
38
|
"""Table Recognition Pipeline"""
|
39
39
|
|
40
|
-
entities = ["table_recognition"]
|
41
|
-
|
42
40
|
def __init__(
|
43
41
|
self,
|
44
42
|
config: Dict,
|
@@ -54,9 +52,9 @@ class TableRecognitionPipeline(BasePipeline):
|
|
54
52
|
device (str, optional): Device to run the predictions on. Defaults to None.
|
55
53
|
pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
|
56
54
|
use_hpip (bool, optional): Whether to use the high-performance
|
57
|
-
inference plugin (HPIP). Defaults to False.
|
55
|
+
inference plugin (HPIP) by default. Defaults to False.
|
58
56
|
hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
|
59
|
-
The high-performance inference configuration dictionary.
|
57
|
+
The default high-performance inference configuration dictionary.
|
60
58
|
Defaults to None.
|
61
59
|
"""
|
62
60
|
|
@@ -290,7 +288,7 @@ class TableRecognitionPipeline(BasePipeline):
|
|
290
288
|
image_array: np.ndarray,
|
291
289
|
overall_ocr_res: OCRResult,
|
292
290
|
table_box: list,
|
293
|
-
|
291
|
+
use_ocr_results_with_table_cells: bool = False,
|
294
292
|
flag_find_nei_text: bool = True,
|
295
293
|
cell_sort_by_y_projection: bool = False,
|
296
294
|
) -> SingleTableRecognitionResult:
|
@@ -302,17 +300,15 @@ class TableRecognitionPipeline(BasePipeline):
|
|
302
300
|
overall_ocr_res (OCRResult): Overall OCR result obtained after running the OCR pipeline.
|
303
301
|
The overall OCR results containing text recognition information.
|
304
302
|
table_box (list): The table box coordinates.
|
305
|
-
|
303
|
+
use_ocr_results_with_table_cells (bool): whether to use OCR results with cells.
|
306
304
|
flag_find_nei_text (bool): Whether to find neighboring text.
|
307
305
|
cell_sort_by_y_projection (bool): Whether to sort the matched OCR boxes by y-projection.
|
308
306
|
Returns:
|
309
307
|
SingleTableRecognitionResult: single table recognition result.
|
310
308
|
"""
|
311
309
|
table_structure_pred = next(self.table_structure_model(image_array))
|
312
|
-
if
|
313
|
-
table_cells_result =
|
314
|
-
map(lambda arr: arr.tolist(), table_structure_pred["bbox"])
|
315
|
-
)
|
310
|
+
if use_ocr_results_with_table_cells == True:
|
311
|
+
table_cells_result = table_structure_pred["bbox"]
|
316
312
|
table_cells_result = [
|
317
313
|
[rect[0], rect[1], rect[4], rect[5]] for rect in table_cells_result
|
318
314
|
]
|
@@ -326,7 +322,7 @@ class TableRecognitionPipeline(BasePipeline):
|
|
326
322
|
table_structure_pred,
|
327
323
|
overall_ocr_res,
|
328
324
|
cells_texts_list,
|
329
|
-
|
325
|
+
use_ocr_results_with_table_cells,
|
330
326
|
cell_sort_by_y_projection=cell_sort_by_y_projection,
|
331
327
|
)
|
332
328
|
neighbor_text = ""
|
@@ -355,7 +351,7 @@ class TableRecognitionPipeline(BasePipeline):
|
|
355
351
|
text_det_box_thresh: Optional[float] = None,
|
356
352
|
text_det_unclip_ratio: Optional[float] = None,
|
357
353
|
text_rec_score_thresh: Optional[float] = None,
|
358
|
-
|
354
|
+
use_ocr_results_with_table_cells: bool = False,
|
359
355
|
cell_sort_by_y_projection: Optional[bool] = None,
|
360
356
|
**kwargs,
|
361
357
|
) -> TableRecognitionResult:
|
@@ -371,7 +367,7 @@ class TableRecognitionPipeline(BasePipeline):
|
|
371
367
|
It will be used if it is not None and use_ocr_model is False.
|
372
368
|
layout_det_res (DetResult): The layout detection result.
|
373
369
|
It will be used if it is not None and use_layout_detection is False.
|
374
|
-
|
370
|
+
use_ocr_results_with_table_cells (bool): whether to use OCR results with cells.
|
375
371
|
cell_sort_by_y_projection (bool): Whether to sort the matched OCR boxes by y-projection.
|
376
372
|
**kwargs: Additional keyword arguments.
|
377
373
|
|
@@ -421,7 +417,7 @@ class TableRecognitionPipeline(BasePipeline):
|
|
421
417
|
text_rec_score_thresh=text_rec_score_thresh,
|
422
418
|
)
|
423
419
|
)
|
424
|
-
elif
|
420
|
+
elif use_ocr_results_with_table_cells == True:
|
425
421
|
assert self.general_ocr_config_bak != None
|
426
422
|
self.general_ocr_pipeline = self.create_pipeline(
|
427
423
|
self.general_ocr_config_bak
|
@@ -437,7 +433,7 @@ class TableRecognitionPipeline(BasePipeline):
|
|
437
433
|
doc_preprocessor_image,
|
438
434
|
overall_ocr_res,
|
439
435
|
table_box,
|
440
|
-
|
436
|
+
use_ocr_results_with_table_cells,
|
441
437
|
flag_find_nei_text=False,
|
442
438
|
cell_sort_by_y_projection=cell_sort_by_y_projection,
|
443
439
|
)
|
@@ -458,7 +454,7 @@ class TableRecognitionPipeline(BasePipeline):
|
|
458
454
|
crop_img_info["img"],
|
459
455
|
overall_ocr_res,
|
460
456
|
table_box,
|
461
|
-
|
457
|
+
use_ocr_results_with_table_cells,
|
462
458
|
cell_sort_by_y_projection=cell_sort_by_y_projection,
|
463
459
|
)
|
464
460
|
)
|
@@ -476,3 +472,15 @@ class TableRecognitionPipeline(BasePipeline):
|
|
476
472
|
"model_settings": model_settings,
|
477
473
|
}
|
478
474
|
yield TableRecognitionResult(single_img_res)
|
475
|
+
|
476
|
+
|
477
|
+
@pipeline_requires_extra("ocr")
|
478
|
+
class TableRecognitionPipeline(AutoParallelImageSimpleInferencePipeline):
|
479
|
+
entities = ["table_recognition"]
|
480
|
+
|
481
|
+
@property
|
482
|
+
def _pipeline_cls(self):
|
483
|
+
return _TableRecognitionPipeline
|
484
|
+
|
485
|
+
def _get_batch_size(self, config):
|
486
|
+
return 1
|