paddlex 3.0.0rc1__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +1 -1
  3. paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
  4. paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
  5. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
  6. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
  7. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
  8. paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
  9. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
  10. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
  11. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
  12. paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
  13. paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
  14. paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
  15. paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
  16. paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
  17. paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
  18. paddlex/configs/pipelines/OCR.yaml +7 -6
  19. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
  20. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
  21. paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
  22. paddlex/configs/pipelines/doc_understanding.yaml +1 -1
  23. paddlex/configs/pipelines/formula_recognition.yaml +2 -2
  24. paddlex/configs/pipelines/layout_parsing.yaml +3 -2
  25. paddlex/configs/pipelines/seal_recognition.yaml +1 -0
  26. paddlex/configs/pipelines/table_recognition.yaml +2 -1
  27. paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
  28. paddlex/hpip_links.html +20 -20
  29. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
  30. paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
  31. paddlex/inference/common/result/mixin.py +19 -12
  32. paddlex/inference/models/base/predictor/base_predictor.py +2 -8
  33. paddlex/inference/models/common/static_infer.py +11 -59
  34. paddlex/inference/models/common/tokenizer/__init__.py +2 -0
  35. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
  36. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
  37. paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
  38. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
  39. paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
  40. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
  41. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
  42. paddlex/inference/models/common/tokenizer/vocab.py +7 -7
  43. paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
  44. paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
  45. paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
  46. paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
  47. paddlex/inference/models/common/vlm/generation/utils.py +1 -1
  48. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
  49. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
  50. paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
  51. paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
  52. paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
  53. paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
  54. paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
  55. paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
  56. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
  57. paddlex/inference/models/doc_vlm/predictor.py +79 -24
  58. paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
  59. paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
  60. paddlex/inference/models/doc_vlm/processors/common.py +189 -0
  61. paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
  62. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
  63. paddlex/inference/models/formula_recognition/predictor.py +7 -1
  64. paddlex/inference/models/formula_recognition/processors.py +92 -79
  65. paddlex/inference/models/formula_recognition/result.py +28 -27
  66. paddlex/inference/models/image_feature/processors.py +3 -4
  67. paddlex/inference/models/keypoint_detection/predictor.py +3 -0
  68. paddlex/inference/models/object_detection/predictor.py +2 -0
  69. paddlex/inference/models/object_detection/processors.py +28 -3
  70. paddlex/inference/models/object_detection/utils.py +2 -0
  71. paddlex/inference/models/table_structure_recognition/result.py +0 -10
  72. paddlex/inference/models/text_detection/predictor.py +8 -0
  73. paddlex/inference/models/text_detection/processors.py +44 -10
  74. paddlex/inference/models/text_detection/result.py +0 -10
  75. paddlex/inference/pipelines/__init__.py +9 -5
  76. paddlex/inference/pipelines/_parallel.py +172 -0
  77. paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
  78. paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
  79. paddlex/inference/pipelines/base.py +14 -4
  80. paddlex/inference/pipelines/components/faisser.py +1 -1
  81. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
  82. paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
  83. paddlex/inference/pipelines/formula_recognition/result.py +1 -11
  84. paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
  85. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
  86. paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
  87. paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
  88. paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
  89. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +893 -260
  90. paddlex/inference/pipelines/layout_parsing/result.py +4 -17
  91. paddlex/inference/pipelines/layout_parsing/result_v2.py +523 -245
  92. paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
  93. paddlex/inference/pipelines/layout_parsing/utils.py +565 -1998
  94. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
  95. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
  96. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
  97. paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
  98. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
  99. paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
  100. paddlex/inference/pipelines/ocr/pipeline.py +127 -70
  101. paddlex/inference/pipelines/ocr/result.py +19 -16
  102. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
  103. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
  104. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
  105. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
  106. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +5 -5
  107. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
  108. paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
  109. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
  110. paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
  111. paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
  112. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
  113. paddlex/inference/pipelines/table_recognition/result.py +1 -1
  114. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
  115. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
  116. paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
  117. paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
  118. paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
  119. paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
  120. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
  121. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
  122. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
  123. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
  124. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
  125. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
  126. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
  127. paddlex/inference/serving/infra/utils.py +20 -22
  128. paddlex/inference/serving/schemas/formula_recognition.py +1 -1
  129. paddlex/inference/serving/schemas/layout_parsing.py +1 -2
  130. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
  131. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
  132. paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
  133. paddlex/inference/serving/schemas/seal_recognition.py +1 -1
  134. paddlex/inference/serving/schemas/table_recognition.py +2 -6
  135. paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
  136. paddlex/inference/utils/hpi.py +8 -1
  137. paddlex/inference/utils/hpi_model_info_collection.json +81 -2
  138. paddlex/inference/utils/io/readers.py +12 -12
  139. paddlex/inference/utils/mkldnn_blocklist.py +25 -0
  140. paddlex/inference/utils/official_models.py +14 -0
  141. paddlex/inference/utils/pp_option.py +29 -8
  142. paddlex/model.py +2 -2
  143. paddlex/modules/__init__.py +1 -1
  144. paddlex/modules/anomaly_detection/evaluator.py +2 -2
  145. paddlex/modules/base/__init__.py +1 -1
  146. paddlex/modules/base/evaluator.py +5 -5
  147. paddlex/modules/base/trainer.py +1 -1
  148. paddlex/modules/doc_vlm/dataset_checker.py +2 -2
  149. paddlex/modules/doc_vlm/evaluator.py +2 -2
  150. paddlex/modules/doc_vlm/exportor.py +2 -2
  151. paddlex/modules/doc_vlm/model_list.py +1 -1
  152. paddlex/modules/doc_vlm/trainer.py +2 -2
  153. paddlex/modules/face_recognition/evaluator.py +2 -2
  154. paddlex/modules/formula_recognition/evaluator.py +5 -2
  155. paddlex/modules/formula_recognition/model_list.py +3 -0
  156. paddlex/modules/formula_recognition/trainer.py +3 -0
  157. paddlex/modules/general_recognition/evaluator.py +1 -1
  158. paddlex/modules/image_classification/evaluator.py +2 -2
  159. paddlex/modules/image_classification/model_list.py +1 -0
  160. paddlex/modules/instance_segmentation/evaluator.py +1 -1
  161. paddlex/modules/keypoint_detection/evaluator.py +1 -1
  162. paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
  163. paddlex/modules/multilabel_classification/evaluator.py +2 -2
  164. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
  165. paddlex/modules/object_detection/evaluator.py +2 -2
  166. paddlex/modules/object_detection/model_list.py +2 -0
  167. paddlex/modules/semantic_segmentation/evaluator.py +2 -2
  168. paddlex/modules/table_recognition/evaluator.py +2 -2
  169. paddlex/modules/text_detection/evaluator.py +2 -2
  170. paddlex/modules/text_detection/model_list.py +2 -0
  171. paddlex/modules/text_recognition/evaluator.py +2 -2
  172. paddlex/modules/text_recognition/model_list.py +2 -0
  173. paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
  174. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  175. paddlex/modules/ts_classification/evaluator.py +2 -2
  176. paddlex/modules/ts_forecast/evaluator.py +2 -2
  177. paddlex/modules/video_classification/evaluator.py +2 -2
  178. paddlex/modules/video_detection/evaluator.py +2 -2
  179. paddlex/ops/__init__.py +2 -2
  180. paddlex/paddlex_cli.py +19 -13
  181. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
  182. paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
  183. paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
  184. paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
  185. paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
  186. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
  187. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
  188. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
  189. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
  190. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
  191. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
  192. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
  193. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
  194. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
  195. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
  196. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
  197. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
  198. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
  199. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
  200. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
  201. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
  202. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
  203. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
  204. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
  205. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
  206. paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
  207. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
  208. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
  209. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
  210. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
  211. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
  212. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
  213. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
  214. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
  215. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
  216. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
  217. paddlex/repo_apis/base/config.py +1 -1
  218. paddlex/repo_manager/core.py +3 -3
  219. paddlex/repo_manager/meta.py +6 -2
  220. paddlex/repo_manager/repo.py +17 -16
  221. paddlex/utils/custom_device_list.py +26 -2
  222. paddlex/utils/deps.py +1 -1
  223. paddlex/utils/device.py +15 -8
  224. paddlex/utils/env.py +4 -0
  225. paddlex/utils/flags.py +2 -4
  226. paddlex/utils/fonts/__init__.py +34 -4
  227. paddlex/utils/misc.py +1 -1
  228. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/METADATA +52 -56
  229. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/RECORD +233 -206
  230. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
  231. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
  232. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/licenses/LICENSE +0 -0
  233. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -23,17 +23,15 @@ from ...common.reader import ReadImage
23
23
  from ...models.object_detection.result import DetResult
24
24
  from ...utils.hpi import HPIConfig
25
25
  from ...utils.pp_option import PaddlePredictorOption
26
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
26
27
  from ..base import BasePipeline
27
28
  from ..components import CropByBoxes
28
29
  from .result import SealRecognitionResult
29
30
 
30
31
 
31
- @pipeline_requires_extra("ocr")
32
- class SealRecognitionPipeline(BasePipeline):
32
+ class _SealRecognitionPipeline(BasePipeline):
33
33
  """Seal Recognition Pipeline"""
34
34
 
35
- entities = ["seal_recognition"]
36
-
37
35
  def __init__(
38
36
  self,
39
37
  config: Dict,
@@ -49,9 +47,9 @@ class SealRecognitionPipeline(BasePipeline):
49
47
  device (str, optional): Device to run the predictions on. Defaults to None.
50
48
  pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
51
49
  use_hpip (bool, optional): Whether to use the high-performance
52
- inference plugin (HPIP). Defaults to False.
50
+ inference plugin (HPIP) by default. Defaults to False.
53
51
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
54
- The high-performance inference configuration dictionary.
52
+ The default high-performance inference configuration dictionary.
55
53
  Defaults to None.
56
54
  """
57
55
 
@@ -104,7 +102,7 @@ class SealRecognitionPipeline(BasePipeline):
104
102
 
105
103
  self._crop_by_boxes = CropByBoxes()
106
104
 
107
- self.batch_sampler = ImageBatchSampler(batch_size=1)
105
+ self.batch_sampler = ImageBatchSampler(batch_size=config.get("batch_size", 1))
108
106
 
109
107
  self.img_reader = ReadImage(format="BGR")
110
108
 
@@ -180,7 +178,7 @@ class SealRecognitionPipeline(BasePipeline):
180
178
  use_doc_orientation_classify: Optional[bool] = None,
181
179
  use_doc_unwarping: Optional[bool] = None,
182
180
  use_layout_detection: Optional[bool] = None,
183
- layout_det_res: Optional[DetResult] = None,
181
+ layout_det_res: Optional[Union[DetResult, List[DetResult]]] = None,
184
182
  layout_threshold: Optional[Union[float, dict]] = None,
185
183
  layout_nms: Optional[bool] = None,
186
184
  layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None,
@@ -201,29 +199,38 @@ class SealRecognitionPipeline(BasePipeline):
201
199
  if not self.check_model_settings_valid(model_settings, layout_det_res):
202
200
  yield {"error": "the input params for model settings are invalid!"}
203
201
 
204
- for img_id, batch_data in enumerate(self.batch_sampler(input)):
205
- image_array = self.img_reader(batch_data.instances)[0]
202
+ external_layout_det_results = layout_det_res
203
+ if external_layout_det_results is not None:
204
+ if not isinstance(external_layout_det_results, list):
205
+ external_layout_det_results = [external_layout_det_results]
206
+ external_layout_det_results = iter(external_layout_det_results)
207
+
208
+ for _, batch_data in enumerate(self.batch_sampler(input)):
209
+ image_arrays = self.img_reader(batch_data.instances)
206
210
 
207
211
  if model_settings["use_doc_preprocessor"]:
208
- doc_preprocessor_res = next(
212
+ doc_preprocessor_results = list(
209
213
  self.doc_preprocessor_pipeline(
210
- image_array,
214
+ image_arrays,
211
215
  use_doc_orientation_classify=use_doc_orientation_classify,
212
216
  use_doc_unwarping=use_doc_unwarping,
213
217
  )
214
218
  )
215
219
  else:
216
- doc_preprocessor_res = {"output_img": image_array}
220
+ doc_preprocessor_results = [{"output_img": arr} for arr in image_arrays]
217
221
 
218
- doc_preprocessor_image = doc_preprocessor_res["output_img"]
222
+ doc_preprocessor_images = [
223
+ item["output_img"] for item in doc_preprocessor_results
224
+ ]
219
225
 
220
- seal_res_list = []
221
- seal_region_id = 1
222
- if not model_settings["use_layout_detection"] and layout_det_res is None:
223
- layout_det_res = {}
224
- seal_ocr_res = next(
226
+ if (
227
+ not model_settings["use_layout_detection"]
228
+ and external_layout_det_results is None
229
+ ):
230
+ layout_det_results = [{} for _ in doc_preprocessor_images]
231
+ flat_seal_results = list(
225
232
  self.seal_ocr_pipeline(
226
- doc_preprocessor_image,
233
+ doc_preprocessor_images,
227
234
  text_det_limit_side_len=seal_det_limit_side_len,
228
235
  text_det_limit_type=seal_det_limit_type,
229
236
  text_det_thresh=seal_det_thresh,
@@ -232,48 +239,97 @@ class SealRecognitionPipeline(BasePipeline):
232
239
  text_rec_score_thresh=seal_rec_score_thresh,
233
240
  )
234
241
  )
235
- seal_ocr_res["seal_region_id"] = seal_region_id
236
- seal_res_list.append(seal_ocr_res)
237
- seal_region_id += 1
242
+ for seal_res in flat_seal_results:
243
+ seal_res["seal_region_id"] = 1
244
+ seal_results = [[item] for item in flat_seal_results]
238
245
  else:
239
246
  if model_settings["use_layout_detection"]:
240
- layout_det_res = next(
247
+ layout_det_results = list(
241
248
  self.layout_det_model(
242
- doc_preprocessor_image,
249
+ doc_preprocessor_images,
243
250
  threshold=layout_threshold,
244
251
  layout_nms=layout_nms,
245
252
  layout_unclip_ratio=layout_unclip_ratio,
246
253
  layout_merge_bboxes_mode=layout_merge_bboxes_mode,
247
254
  )
248
255
  )
249
-
250
- for box_info in layout_det_res["boxes"]:
251
- if box_info["label"].lower() in ["seal"]:
252
- crop_img_info = self._crop_by_boxes(
253
- doc_preprocessor_image, [box_info]
254
- )
255
- crop_img_info = crop_img_info[0]
256
- seal_ocr_res = next(
257
- self.seal_ocr_pipeline(
258
- crop_img_info["img"],
259
- text_det_limit_side_len=seal_det_limit_side_len,
260
- text_det_limit_type=seal_det_limit_type,
261
- text_det_thresh=seal_det_thresh,
262
- text_det_box_thresh=seal_det_box_thresh,
263
- text_det_unclip_ratio=seal_det_unclip_ratio,
264
- text_rec_score_thresh=seal_rec_score_thresh,
256
+ else:
257
+ layout_det_results = []
258
+ for _ in doc_preprocessor_images:
259
+ try:
260
+ layout_det_res = next(external_layout_det_results)
261
+ except StopIteration:
262
+ raise ValueError("No more layout det results")
263
+ layout_det_results.append(layout_det_res)
264
+
265
+ cropped_imgs = []
266
+ chunk_indices = [0]
267
+ for doc_preprocessor_image, layout_det_res in zip(
268
+ doc_preprocessor_images, layout_det_results
269
+ ):
270
+ for box_info in layout_det_res["boxes"]:
271
+ if box_info["label"].lower() in ["seal"]:
272
+ crop_img_info = self._crop_by_boxes(
273
+ doc_preprocessor_image, [box_info]
265
274
  )
266
- )
267
- seal_ocr_res["seal_region_id"] = seal_region_id
268
- seal_res_list.append(seal_ocr_res)
275
+ crop_img_info = crop_img_info[0]
276
+ cropped_imgs.append(crop_img_info["img"])
277
+ chunk_indices.append(len(cropped_imgs))
278
+
279
+ flat_seal_results = list(
280
+ self.seal_ocr_pipeline(
281
+ cropped_imgs,
282
+ text_det_limit_side_len=seal_det_limit_side_len,
283
+ text_det_limit_type=seal_det_limit_type,
284
+ text_det_thresh=seal_det_thresh,
285
+ text_det_box_thresh=seal_det_box_thresh,
286
+ text_det_unclip_ratio=seal_det_unclip_ratio,
287
+ text_rec_score_thresh=seal_rec_score_thresh,
288
+ )
289
+ )
290
+
291
+ seal_results = [
292
+ flat_seal_results[i:j]
293
+ for i, j in zip(chunk_indices[:-1], chunk_indices[1:])
294
+ ]
295
+
296
+ for seal_results_for_img in seal_results:
297
+ seal_region_id = 1
298
+ for seal_res in seal_results_for_img:
299
+ seal_res["seal_region_id"] = seal_region_id
269
300
  seal_region_id += 1
270
301
 
271
- single_img_res = {
272
- "input_path": batch_data.input_paths[0],
273
- "page_index": batch_data.page_indexes[0],
274
- "doc_preprocessor_res": doc_preprocessor_res,
275
- "layout_det_res": layout_det_res,
276
- "seal_res_list": seal_res_list,
277
- "model_settings": model_settings,
278
- }
279
- yield SealRecognitionResult(single_img_res)
302
+ for (
303
+ input_path,
304
+ page_index,
305
+ doc_preprocessor_res,
306
+ layout_det_res,
307
+ seal_results_for_img,
308
+ ) in zip(
309
+ batch_data.input_paths,
310
+ batch_data.page_indexes,
311
+ doc_preprocessor_results,
312
+ layout_det_results,
313
+ seal_results,
314
+ ):
315
+ single_img_res = {
316
+ "input_path": input_path,
317
+ "page_index": page_index,
318
+ "doc_preprocessor_res": doc_preprocessor_res,
319
+ "layout_det_res": layout_det_res,
320
+ "seal_res_list": seal_results_for_img,
321
+ "model_settings": model_settings,
322
+ }
323
+ yield SealRecognitionResult(single_img_res)
324
+
325
+
326
+ @pipeline_requires_extra("ocr")
327
+ class SealRecognitionPipeline(AutoParallelImageSimpleInferencePipeline):
328
+ entities = ["seal_recognition"]
329
+
330
+ @property
331
+ def _pipeline_cls(self):
332
+ return _SealRecognitionPipeline
333
+
334
+ def _get_batch_size(self, config):
335
+ return config.get("batch_size", 1)
@@ -20,15 +20,13 @@ from ....utils.deps import pipeline_requires_extra
20
20
  from ...models.semantic_segmentation.result import SegResult
21
21
  from ...utils.hpi import HPIConfig
22
22
  from ...utils.pp_option import PaddlePredictorOption
23
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
23
24
  from ..base import BasePipeline
24
25
 
25
26
 
26
- @pipeline_requires_extra("cv")
27
- class SemanticSegmentationPipeline(BasePipeline):
27
+ class _SemanticSegmentationPipeline(BasePipeline):
28
28
  """Semantic Segmentation Pipeline"""
29
29
 
30
- entities = "semantic_segmentation"
31
-
32
30
  def __init__(
33
31
  self,
34
32
  config: Dict,
@@ -45,9 +43,9 @@ class SemanticSegmentationPipeline(BasePipeline):
45
43
  device (str): The device to run the prediction on. Default is None.
46
44
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
47
45
  use_hpip (bool, optional): Whether to use the high-performance
48
- inference plugin (HPIP). Defaults to False.
46
+ inference plugin (HPIP) by default. Defaults to False.
49
47
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
50
- The high-performance inference configuration dictionary.
48
+ The default high-performance inference configuration dictionary.
51
49
  Defaults to None.
52
50
  """
53
51
  super().__init__(
@@ -83,3 +81,15 @@ class SemanticSegmentationPipeline(BasePipeline):
83
81
  SegResult: The predicted segmentation results.
84
82
  """
85
83
  yield from self.semantic_segmentation_model(input, target_size=target_size)
84
+
85
+
86
+ @pipeline_requires_extra("cv")
87
+ class SemanticSegmentationPipeline(AutoParallelImageSimpleInferencePipeline):
88
+ entities = "semantic_segmentation"
89
+
90
+ @property
91
+ def _pipeline_cls(self):
92
+ return _SemanticSegmentationPipeline
93
+
94
+ def _get_batch_size(self, config):
95
+ return config["SubModules"]["SemanticSegmentation"].get("batch_size", 1)
@@ -20,15 +20,13 @@ from ....utils.deps import pipeline_requires_extra
20
20
  from ...models.object_detection.result import DetResult
21
21
  from ...utils.hpi import HPIConfig
22
22
  from ...utils.pp_option import PaddlePredictorOption
23
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
23
24
  from ..base import BasePipeline
24
25
 
25
26
 
26
- @pipeline_requires_extra("cv")
27
- class SmallObjectDetectionPipeline(BasePipeline):
27
+ class _SmallObjectDetectionPipeline(BasePipeline):
28
28
  """Small Object Detection Pipeline"""
29
29
 
30
- entities = "small_object_detection"
31
-
32
30
  def __init__(
33
31
  self,
34
32
  config: Dict,
@@ -45,9 +43,9 @@ class SmallObjectDetectionPipeline(BasePipeline):
45
43
  device (str): The device to run the prediction on. Default is None.
46
44
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
47
45
  use_hpip (bool, optional): Whether to use the high-performance
48
- inference plugin (HPIP). Defaults to False.
46
+ inference plugin (HPIP) by default. Defaults to False.
49
47
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
50
- The high-performance inference configuration dictionary.
48
+ The default high-performance inference configuration dictionary.
51
49
  Defaults to None.
52
50
  """
53
51
  super().__init__(
@@ -83,3 +81,15 @@ class SmallObjectDetectionPipeline(BasePipeline):
83
81
  DetResult: The predicted small object detection results.
84
82
  """
85
83
  yield from self.small_object_detection_model(input, threshold=threshold)
84
+
85
+
86
+ @pipeline_requires_extra("cv")
87
+ class SmallObjectDetectionPipeline(AutoParallelImageSimpleInferencePipeline):
88
+ entities = "small_object_detection"
89
+
90
+ @property
91
+ def _pipeline_cls(self):
92
+ return _SmallObjectDetectionPipeline
93
+
94
+ def _get_batch_size(self, config):
95
+ return config["SubModules"]["SmallObjectDetection"].get("batch_size", 1)
@@ -24,6 +24,7 @@ from ...common.reader import ReadImage
24
24
  from ...models.object_detection.result import DetResult
25
25
  from ...utils.hpi import HPIConfig
26
26
  from ...utils.pp_option import PaddlePredictorOption
27
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
27
28
  from ..base import BasePipeline
28
29
  from ..components import CropByBoxes
29
30
  from ..doc_preprocessor.result import DocPreprocessorResult
@@ -33,12 +34,9 @@ from .table_recognition_post_processing import get_table_recognition_res
33
34
  from .utils import get_neighbor_boxes_idx
34
35
 
35
36
 
36
- @pipeline_requires_extra("ocr")
37
- class TableRecognitionPipeline(BasePipeline):
37
+ class _TableRecognitionPipeline(BasePipeline):
38
38
  """Table Recognition Pipeline"""
39
39
 
40
- entities = ["table_recognition"]
41
-
42
40
  def __init__(
43
41
  self,
44
42
  config: Dict,
@@ -54,9 +52,9 @@ class TableRecognitionPipeline(BasePipeline):
54
52
  device (str, optional): Device to run the predictions on. Defaults to None.
55
53
  pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
56
54
  use_hpip (bool, optional): Whether to use the high-performance
57
- inference plugin (HPIP). Defaults to False.
55
+ inference plugin (HPIP) by default. Defaults to False.
58
56
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
59
- The high-performance inference configuration dictionary.
57
+ The default high-performance inference configuration dictionary.
60
58
  Defaults to None.
61
59
  """
62
60
 
@@ -290,7 +288,7 @@ class TableRecognitionPipeline(BasePipeline):
290
288
  image_array: np.ndarray,
291
289
  overall_ocr_res: OCRResult,
292
290
  table_box: list,
293
- use_table_cells_ocr_results: bool = False,
291
+ use_ocr_results_with_table_cells: bool = False,
294
292
  flag_find_nei_text: bool = True,
295
293
  cell_sort_by_y_projection: bool = False,
296
294
  ) -> SingleTableRecognitionResult:
@@ -302,17 +300,15 @@ class TableRecognitionPipeline(BasePipeline):
302
300
  overall_ocr_res (OCRResult): Overall OCR result obtained after running the OCR pipeline.
303
301
  The overall OCR results containing text recognition information.
304
302
  table_box (list): The table box coordinates.
305
- use_table_cells_ocr_results (bool): whether to use OCR results with cells.
303
+ use_ocr_results_with_table_cells (bool): whether to use OCR results with cells.
306
304
  flag_find_nei_text (bool): Whether to find neighboring text.
307
305
  cell_sort_by_y_projection (bool): Whether to sort the matched OCR boxes by y-projection.
308
306
  Returns:
309
307
  SingleTableRecognitionResult: single table recognition result.
310
308
  """
311
309
  table_structure_pred = next(self.table_structure_model(image_array))
312
- if use_table_cells_ocr_results == True:
313
- table_cells_result = list(
314
- map(lambda arr: arr.tolist(), table_structure_pred["bbox"])
315
- )
310
+ if use_ocr_results_with_table_cells == True:
311
+ table_cells_result = table_structure_pred["bbox"]
316
312
  table_cells_result = [
317
313
  [rect[0], rect[1], rect[4], rect[5]] for rect in table_cells_result
318
314
  ]
@@ -326,7 +322,7 @@ class TableRecognitionPipeline(BasePipeline):
326
322
  table_structure_pred,
327
323
  overall_ocr_res,
328
324
  cells_texts_list,
329
- use_table_cells_ocr_results,
325
+ use_ocr_results_with_table_cells,
330
326
  cell_sort_by_y_projection=cell_sort_by_y_projection,
331
327
  )
332
328
  neighbor_text = ""
@@ -355,7 +351,7 @@ class TableRecognitionPipeline(BasePipeline):
355
351
  text_det_box_thresh: Optional[float] = None,
356
352
  text_det_unclip_ratio: Optional[float] = None,
357
353
  text_rec_score_thresh: Optional[float] = None,
358
- use_table_cells_ocr_results: bool = False,
354
+ use_ocr_results_with_table_cells: bool = False,
359
355
  cell_sort_by_y_projection: Optional[bool] = None,
360
356
  **kwargs,
361
357
  ) -> TableRecognitionResult:
@@ -371,7 +367,7 @@ class TableRecognitionPipeline(BasePipeline):
371
367
  It will be used if it is not None and use_ocr_model is False.
372
368
  layout_det_res (DetResult): The layout detection result.
373
369
  It will be used if it is not None and use_layout_detection is False.
374
- use_table_cells_ocr_results (bool): whether to use OCR results with cells.
370
+ use_ocr_results_with_table_cells (bool): whether to use OCR results with cells.
375
371
  cell_sort_by_y_projection (bool): Whether to sort the matched OCR boxes by y-projection.
376
372
  **kwargs: Additional keyword arguments.
377
373
 
@@ -421,7 +417,7 @@ class TableRecognitionPipeline(BasePipeline):
421
417
  text_rec_score_thresh=text_rec_score_thresh,
422
418
  )
423
419
  )
424
- elif use_table_cells_ocr_results == True:
420
+ elif use_ocr_results_with_table_cells == True:
425
421
  assert self.general_ocr_config_bak != None
426
422
  self.general_ocr_pipeline = self.create_pipeline(
427
423
  self.general_ocr_config_bak
@@ -437,7 +433,7 @@ class TableRecognitionPipeline(BasePipeline):
437
433
  doc_preprocessor_image,
438
434
  overall_ocr_res,
439
435
  table_box,
440
- use_table_cells_ocr_results,
436
+ use_ocr_results_with_table_cells,
441
437
  flag_find_nei_text=False,
442
438
  cell_sort_by_y_projection=cell_sort_by_y_projection,
443
439
  )
@@ -458,7 +454,7 @@ class TableRecognitionPipeline(BasePipeline):
458
454
  crop_img_info["img"],
459
455
  overall_ocr_res,
460
456
  table_box,
461
- use_table_cells_ocr_results,
457
+ use_ocr_results_with_table_cells,
462
458
  cell_sort_by_y_projection=cell_sort_by_y_projection,
463
459
  )
464
460
  )
@@ -476,3 +472,15 @@ class TableRecognitionPipeline(BasePipeline):
476
472
  "model_settings": model_settings,
477
473
  }
478
474
  yield TableRecognitionResult(single_img_res)
475
+
476
+
477
+ @pipeline_requires_extra("ocr")
478
+ class TableRecognitionPipeline(AutoParallelImageSimpleInferencePipeline):
479
+ entities = ["table_recognition"]
480
+
481
+ @property
482
+ def _pipeline_cls(self):
483
+ return _TableRecognitionPipeline
484
+
485
+ def _get_batch_size(self, config):
486
+ return 1