paddlex 3.0.0rc1__py3-none-any.whl → 3.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +1 -1
  3. paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
  4. paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
  5. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
  6. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
  7. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
  8. paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
  9. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
  10. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
  11. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
  12. paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
  13. paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
  14. paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
  15. paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
  16. paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
  17. paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
  18. paddlex/configs/pipelines/OCR.yaml +7 -6
  19. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
  20. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
  21. paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
  22. paddlex/configs/pipelines/doc_understanding.yaml +1 -1
  23. paddlex/configs/pipelines/formula_recognition.yaml +2 -2
  24. paddlex/configs/pipelines/layout_parsing.yaml +3 -2
  25. paddlex/configs/pipelines/seal_recognition.yaml +1 -0
  26. paddlex/configs/pipelines/table_recognition.yaml +2 -1
  27. paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
  28. paddlex/hpip_links.html +20 -20
  29. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
  30. paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
  31. paddlex/inference/common/result/mixin.py +19 -12
  32. paddlex/inference/models/base/predictor/base_predictor.py +2 -8
  33. paddlex/inference/models/common/static_infer.py +29 -73
  34. paddlex/inference/models/common/tokenizer/__init__.py +2 -0
  35. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
  36. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
  37. paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
  38. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
  39. paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
  40. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
  41. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
  42. paddlex/inference/models/common/tokenizer/vocab.py +7 -7
  43. paddlex/inference/models/common/ts/funcs.py +19 -8
  44. paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
  45. paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
  46. paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
  47. paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
  48. paddlex/inference/models/common/vlm/generation/utils.py +1 -1
  49. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
  50. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
  51. paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
  52. paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
  53. paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
  54. paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
  55. paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
  56. paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
  57. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
  58. paddlex/inference/models/doc_vlm/predictor.py +79 -24
  59. paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
  60. paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
  61. paddlex/inference/models/doc_vlm/processors/common.py +189 -0
  62. paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
  63. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
  64. paddlex/inference/models/formula_recognition/predictor.py +8 -2
  65. paddlex/inference/models/formula_recognition/processors.py +90 -77
  66. paddlex/inference/models/formula_recognition/result.py +28 -27
  67. paddlex/inference/models/image_feature/processors.py +3 -4
  68. paddlex/inference/models/keypoint_detection/predictor.py +3 -0
  69. paddlex/inference/models/object_detection/predictor.py +2 -0
  70. paddlex/inference/models/object_detection/processors.py +28 -3
  71. paddlex/inference/models/object_detection/utils.py +2 -0
  72. paddlex/inference/models/table_structure_recognition/result.py +0 -10
  73. paddlex/inference/models/text_detection/predictor.py +8 -0
  74. paddlex/inference/models/text_detection/processors.py +44 -10
  75. paddlex/inference/models/text_detection/result.py +0 -10
  76. paddlex/inference/models/text_recognition/result.py +1 -1
  77. paddlex/inference/pipelines/__init__.py +9 -5
  78. paddlex/inference/pipelines/_parallel.py +172 -0
  79. paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
  80. paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
  81. paddlex/inference/pipelines/base.py +14 -4
  82. paddlex/inference/pipelines/components/faisser.py +1 -1
  83. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
  84. paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
  85. paddlex/inference/pipelines/formula_recognition/result.py +1 -11
  86. paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
  87. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
  88. paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
  89. paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
  90. paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
  91. paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
  92. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +832 -260
  93. paddlex/inference/pipelines/layout_parsing/result.py +4 -17
  94. paddlex/inference/pipelines/layout_parsing/result_v2.py +259 -245
  95. paddlex/inference/pipelines/layout_parsing/setting.py +88 -0
  96. paddlex/inference/pipelines/layout_parsing/utils.py +391 -2028
  97. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
  98. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1199 -0
  99. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +615 -0
  100. paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
  101. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
  102. paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
  103. paddlex/inference/pipelines/ocr/pipeline.py +127 -70
  104. paddlex/inference/pipelines/ocr/result.py +21 -18
  105. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
  106. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
  107. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
  108. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
  109. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +6 -6
  110. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
  111. paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
  112. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
  113. paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
  114. paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
  115. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
  116. paddlex/inference/pipelines/table_recognition/result.py +1 -1
  117. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
  118. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
  119. paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
  120. paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
  121. paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
  122. paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
  123. paddlex/inference/serving/basic_serving/_app.py +46 -13
  124. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
  125. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
  126. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
  127. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
  128. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
  129. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
  130. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
  131. paddlex/inference/serving/infra/utils.py +20 -22
  132. paddlex/inference/serving/schemas/formula_recognition.py +1 -1
  133. paddlex/inference/serving/schemas/layout_parsing.py +1 -2
  134. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
  135. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
  136. paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
  137. paddlex/inference/serving/schemas/seal_recognition.py +1 -1
  138. paddlex/inference/serving/schemas/table_recognition.py +2 -6
  139. paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
  140. paddlex/inference/utils/hpi.py +30 -16
  141. paddlex/inference/utils/hpi_model_info_collection.json +666 -162
  142. paddlex/inference/utils/io/readers.py +12 -12
  143. paddlex/inference/utils/misc.py +20 -0
  144. paddlex/inference/utils/mkldnn_blocklist.py +59 -0
  145. paddlex/inference/utils/official_models.py +140 -5
  146. paddlex/inference/utils/pp_option.py +74 -9
  147. paddlex/model.py +2 -2
  148. paddlex/modules/__init__.py +1 -1
  149. paddlex/modules/anomaly_detection/evaluator.py +2 -2
  150. paddlex/modules/base/__init__.py +1 -1
  151. paddlex/modules/base/evaluator.py +5 -5
  152. paddlex/modules/base/trainer.py +1 -1
  153. paddlex/modules/doc_vlm/dataset_checker.py +2 -2
  154. paddlex/modules/doc_vlm/evaluator.py +2 -2
  155. paddlex/modules/doc_vlm/exportor.py +2 -2
  156. paddlex/modules/doc_vlm/model_list.py +1 -1
  157. paddlex/modules/doc_vlm/trainer.py +2 -2
  158. paddlex/modules/face_recognition/evaluator.py +2 -2
  159. paddlex/modules/formula_recognition/evaluator.py +5 -2
  160. paddlex/modules/formula_recognition/model_list.py +3 -0
  161. paddlex/modules/formula_recognition/trainer.py +3 -0
  162. paddlex/modules/general_recognition/evaluator.py +1 -1
  163. paddlex/modules/image_classification/evaluator.py +2 -2
  164. paddlex/modules/image_classification/model_list.py +1 -0
  165. paddlex/modules/instance_segmentation/evaluator.py +1 -1
  166. paddlex/modules/keypoint_detection/evaluator.py +1 -1
  167. paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
  168. paddlex/modules/multilabel_classification/evaluator.py +2 -2
  169. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
  170. paddlex/modules/object_detection/evaluator.py +2 -2
  171. paddlex/modules/object_detection/model_list.py +2 -0
  172. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
  173. paddlex/modules/semantic_segmentation/evaluator.py +2 -2
  174. paddlex/modules/table_recognition/evaluator.py +2 -2
  175. paddlex/modules/text_detection/evaluator.py +2 -2
  176. paddlex/modules/text_detection/model_list.py +2 -0
  177. paddlex/modules/text_recognition/evaluator.py +2 -2
  178. paddlex/modules/text_recognition/model_list.py +2 -0
  179. paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
  180. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  181. paddlex/modules/ts_classification/evaluator.py +2 -2
  182. paddlex/modules/ts_forecast/evaluator.py +2 -2
  183. paddlex/modules/video_classification/evaluator.py +2 -2
  184. paddlex/modules/video_detection/evaluator.py +2 -2
  185. paddlex/ops/__init__.py +8 -5
  186. paddlex/paddlex_cli.py +19 -13
  187. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
  188. paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
  189. paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
  190. paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
  191. paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
  192. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
  193. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
  194. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
  195. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
  196. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
  197. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
  198. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
  199. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
  200. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
  201. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
  202. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
  203. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
  204. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
  205. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
  206. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
  207. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
  208. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
  209. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
  210. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
  211. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
  212. paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
  213. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
  214. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
  215. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
  216. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
  217. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
  218. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
  219. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
  220. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
  221. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
  222. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
  223. paddlex/repo_apis/base/config.py +1 -1
  224. paddlex/repo_manager/core.py +3 -3
  225. paddlex/repo_manager/meta.py +6 -2
  226. paddlex/repo_manager/repo.py +17 -16
  227. paddlex/utils/custom_device_list.py +26 -2
  228. paddlex/utils/deps.py +3 -3
  229. paddlex/utils/device.py +5 -13
  230. paddlex/utils/env.py +4 -0
  231. paddlex/utils/flags.py +11 -4
  232. paddlex/utils/fonts/__init__.py +34 -4
  233. paddlex/utils/misc.py +1 -1
  234. paddlex/utils/subclass_register.py +2 -2
  235. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/METADATA +349 -208
  236. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/RECORD +240 -211
  237. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/WHEEL +1 -1
  238. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/entry_points.txt +1 -0
  239. {paddlex-3.0.0rc1.dist-info/licenses → paddlex-3.0.2.dist-info}/LICENSE +0 -0
  240. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/top_level.txt +0 -0
@@ -22,17 +22,15 @@ from ...common.batch_sampler import ImageBatchSampler
22
22
  from ...common.reader import ReadImage
23
23
  from ...utils.hpi import HPIConfig
24
24
  from ...utils.pp_option import PaddlePredictorOption
25
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
25
26
  from ..base import BasePipeline
26
27
  from ..components import rotate_image
27
28
  from .result import DocPreprocessorResult
28
29
 
29
30
 
30
- @pipeline_requires_extra("ocr")
31
- class DocPreprocessorPipeline(BasePipeline):
31
+ class _DocPreprocessorPipeline(BasePipeline):
32
32
  """Doc Preprocessor Pipeline"""
33
33
 
34
- entities = "doc_preprocessor"
35
-
36
34
  def __init__(
37
35
  self,
38
36
  config: Dict,
@@ -48,9 +46,9 @@ class DocPreprocessorPipeline(BasePipeline):
48
46
  device (str, optional): Device to run the predictions on. Defaults to None.
49
47
  pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
50
48
  use_hpip (bool, optional): Whether to use the high-performance
51
- inference plugin (HPIP). Defaults to False.
49
+ inference plugin (HPIP) by default. Defaults to False.
52
50
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
53
- The high-performance inference configuration dictionary.
51
+ The default high-performance inference configuration dictionary.
54
52
  Defaults to None.
55
53
  """
56
54
 
@@ -76,7 +74,7 @@ class DocPreprocessorPipeline(BasePipeline):
76
74
  )
77
75
  self.doc_unwarping_model = self.create_model(doc_unwarping_config)
78
76
 
79
- self.batch_sampler = ImageBatchSampler(batch_size=1)
77
+ self.batch_sampler = ImageBatchSampler(batch_size=config.get("batch_size", 1))
80
78
  self.img_reader = ReadImage(format="BGR")
81
79
 
82
80
  def check_model_settings_valid(self, model_settings: Dict) -> bool:
@@ -155,29 +153,57 @@ class DocPreprocessorPipeline(BasePipeline):
155
153
  if not self.check_model_settings_valid(model_settings):
156
154
  yield {"error": "the input params for model settings are invalid!"}
157
155
 
158
- for img_id, batch_data in enumerate(self.batch_sampler(input)):
159
- image_array = self.img_reader(batch_data.instances)[0]
156
+ for _, batch_data in enumerate(self.batch_sampler(input)):
157
+ image_arrays = self.img_reader(batch_data.instances)
160
158
 
161
159
  if model_settings["use_doc_orientation_classify"]:
162
- pred = next(self.doc_ori_classify_model(image_array))
163
- angle = int(pred["label_names"][0])
164
- rot_img = rotate_image(image_array, angle)
160
+ preds = list(self.doc_ori_classify_model(image_arrays))
161
+ angles = []
162
+ rot_imgs = []
163
+ for img, pred in zip(image_arrays, preds):
164
+ angle = int(pred["label_names"][0])
165
+ angles.append(angle)
166
+ rot_img = rotate_image(img, angle)
167
+ rot_imgs.append(rot_img)
165
168
  else:
166
- angle = -1
167
- rot_img = image_array
169
+ angles = [-1 for _ in range(len(image_arrays))]
170
+ rot_imgs = image_arrays
168
171
 
169
172
  if model_settings["use_doc_unwarping"]:
170
- output_img = next(self.doc_unwarping_model(rot_img))["doctr_img"]
173
+ output_imgs = [
174
+ item["doctr_img"][:, :, ::-1]
175
+ for item in self.doc_unwarping_model(rot_imgs)
176
+ ]
171
177
  else:
172
- output_img = rot_img
173
-
174
- single_img_res = {
175
- "input_path": batch_data.input_paths[0],
176
- "page_index": batch_data.page_indexes[0],
177
- "input_img": image_array,
178
- "model_settings": model_settings,
179
- "angle": angle,
180
- "rot_img": rot_img,
181
- "output_img": output_img,
182
- }
183
- yield DocPreprocessorResult(single_img_res)
178
+ output_imgs = rot_imgs
179
+
180
+ for input_path, page_index, image_array, angle, rot_img, output_img in zip(
181
+ batch_data.input_paths,
182
+ batch_data.page_indexes,
183
+ image_arrays,
184
+ angles,
185
+ rot_imgs,
186
+ output_imgs,
187
+ ):
188
+ single_img_res = {
189
+ "input_path": input_path,
190
+ "page_index": page_index,
191
+ "input_img": image_array,
192
+ "model_settings": model_settings,
193
+ "angle": angle,
194
+ "rot_img": rot_img,
195
+ "output_img": output_img,
196
+ }
197
+ yield DocPreprocessorResult(single_img_res)
198
+
199
+
200
+ @pipeline_requires_extra("ocr")
201
+ class DocPreprocessorPipeline(AutoParallelImageSimpleInferencePipeline):
202
+ entities = "doc_preprocessor"
203
+
204
+ @property
205
+ def _pipeline_cls(self):
206
+ return _DocPreprocessorPipeline
207
+
208
+ def _get_batch_size(self, config):
209
+ return config.get("batch_size", 1)
@@ -20,23 +20,18 @@ from ....utils import logging
20
20
  from ....utils.deps import pipeline_requires_extra
21
21
  from ...common.batch_sampler import ImageBatchSampler
22
22
  from ...common.reader import ReadImage
23
- from ...models.formula_recognition.result import (
24
- FormulaRecResult as SingleFormulaRecognitionResult,
25
- )
26
23
  from ...models.object_detection.result import DetResult
27
24
  from ...utils.hpi import HPIConfig
28
25
  from ...utils.pp_option import PaddlePredictorOption
26
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
29
27
  from ..base import BasePipeline
30
28
  from ..components import CropByBoxes
31
29
  from .result import FormulaRecognitionResult
32
30
 
33
31
 
34
- @pipeline_requires_extra("ocr")
35
- class FormulaRecognitionPipeline(BasePipeline):
32
+ class _FormulaRecognitionPipeline(BasePipeline):
36
33
  """Formula Recognition Pipeline"""
37
34
 
38
- entities = ["formula_recognition"]
39
-
40
35
  def __init__(
41
36
  self,
42
37
  config: Dict,
@@ -52,9 +47,9 @@ class FormulaRecognitionPipeline(BasePipeline):
52
47
  device (str, optional): Device to run the predictions on. Defaults to None.
53
48
  pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
54
49
  use_hpip (bool, optional): Whether to use the high-performance
55
- inference plugin (HPIP). Defaults to False.
50
+ inference plugin (HPIP) by default. Defaults to False.
56
51
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
57
- The high-performance inference configuration dictionary.
52
+ The default high-performance inference configuration dictionary.
58
53
  Defaults to None.
59
54
  """
60
55
 
@@ -110,7 +105,7 @@ class FormulaRecognitionPipeline(BasePipeline):
110
105
 
111
106
  self._crop_by_boxes = CropByBoxes()
112
107
 
113
- self.batch_sampler = ImageBatchSampler(batch_size=1)
108
+ self.batch_sampler = ImageBatchSampler(batch_size=config.get("batch_size", 1))
114
109
  self.img_reader = ReadImage(format="BGR")
115
110
 
116
111
  def get_model_settings(
@@ -147,14 +142,14 @@ class FormulaRecognitionPipeline(BasePipeline):
147
142
  )
148
143
 
149
144
  def check_model_settings_valid(
150
- self, model_settings: Dict, layout_det_res: DetResult
145
+ self, model_settings: Dict, layout_det_res: Union[DetResult, List[DetResult]]
151
146
  ) -> bool:
152
147
  """
153
148
  Check if the input parameters are valid based on the initialized models.
154
149
 
155
150
  Args:
156
151
  model_settings (Dict): A dictionary containing input parameters.
157
- layout_det_res (DetResult): The layout detection result.
152
+ layout_det_res (Union[DetResult, List[DetResult]]): The layout detection result(s).
158
153
  Returns:
159
154
  bool: True if all required models are initialized according to input parameters, False otherwise.
160
155
  """
@@ -180,32 +175,13 @@ class FormulaRecognitionPipeline(BasePipeline):
180
175
 
181
176
  return True
182
177
 
183
- def predict_single_formula_recognition_res(
184
- self,
185
- image_array: np.ndarray,
186
- ) -> SingleFormulaRecognitionResult:
187
- """
188
- Predict formula recognition results from an image array, layout detection results.
189
-
190
- Args:
191
- image_array (np.ndarray): The input image represented as a numpy array.
192
- formula_box (list): The formula box coordinates.
193
- flag_find_nei_text (bool): Whether to find neighboring text.
194
- Returns:
195
- SingleFormulaRecognitionResult: single formula recognition result.
196
- """
197
-
198
- formula_recognition_pred = next(self.formula_recognition_model(image_array))
199
-
200
- return formula_recognition_pred
201
-
202
178
  def predict(
203
179
  self,
204
180
  input: Union[str, List[str], np.ndarray, List[np.ndarray]],
205
181
  use_layout_detection: Optional[bool] = None,
206
182
  use_doc_orientation_classify: Optional[bool] = None,
207
183
  use_doc_unwarping: Optional[bool] = None,
208
- layout_det_res: Optional[DetResult] = None,
184
+ layout_det_res: Optional[Union[DetResult, List[DetResult]]] = None,
209
185
  layout_threshold: Optional[Union[float, dict]] = None,
210
186
  layout_nms: Optional[bool] = None,
211
187
  layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None,
@@ -220,14 +196,13 @@ class FormulaRecognitionPipeline(BasePipeline):
220
196
  use_layout_detection (Optional[bool]): Whether to use layout detection.
221
197
  use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
222
198
  use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
223
- layout_det_res (Optional[DetResult]): The layout detection result.
199
+ layout_det_res (Optional[Union[DetResult, List[DetResult]]]): The layout detection result(s).
224
200
  It will be used if it is not None and use_layout_detection is False.
225
201
  **kwargs: Additional keyword arguments.
226
202
 
227
203
  Returns:
228
204
  formulaRecognitionResult: The predicted formula recognition result.
229
205
  """
230
-
231
206
  model_settings = self.get_model_settings(
232
207
  use_doc_orientation_classify,
233
208
  use_doc_unwarping,
@@ -237,73 +212,136 @@ class FormulaRecognitionPipeline(BasePipeline):
237
212
  if not self.check_model_settings_valid(model_settings, layout_det_res):
238
213
  yield {"error": "the input params for model settings are invalid!"}
239
214
 
240
- for img_id, batch_data in enumerate(self.batch_sampler(input)):
241
- image_array = self.img_reader(batch_data.instances)[0]
215
+ external_layout_det_results = layout_det_res
216
+ if external_layout_det_results is not None:
217
+ if not isinstance(external_layout_det_results, list):
218
+ external_layout_det_results = [external_layout_det_results]
219
+ external_layout_det_results = iter(external_layout_det_results)
220
+
221
+ for _, batch_data in enumerate(self.batch_sampler(input)):
222
+ image_arrays = self.img_reader(batch_data.instances)
242
223
 
243
224
  if model_settings["use_doc_preprocessor"]:
244
- doc_preprocessor_res = next(
225
+ doc_preprocessor_results = list(
245
226
  self.doc_preprocessor_pipeline(
246
- image_array,
227
+ image_arrays,
247
228
  use_doc_orientation_classify=use_doc_orientation_classify,
248
229
  use_doc_unwarping=use_doc_unwarping,
249
230
  )
250
231
  )
251
232
  else:
252
- doc_preprocessor_res = {"output_img": image_array}
233
+ doc_preprocessor_results = [{"output_img": arr} for arr in image_arrays]
253
234
 
254
- doc_preprocessor_image = doc_preprocessor_res["output_img"]
235
+ doc_preprocessor_images = [
236
+ item["output_img"] for item in doc_preprocessor_results
237
+ ]
255
238
 
256
- formula_res_list = []
257
- formula_region_id = 1
239
+ formula_results = []
258
240
 
259
- if not model_settings["use_layout_detection"] and layout_det_res is None:
260
- layout_det_res = {}
261
- img_height, img_width = doc_preprocessor_image.shape[:2]
262
- single_formula_rec_res = self.predict_single_formula_recognition_res(
263
- doc_preprocessor_image,
241
+ if (
242
+ not model_settings["use_layout_detection"]
243
+ and external_layout_det_results is None
244
+ ):
245
+ layout_det_results = [{} for _ in doc_preprocessor_images]
246
+ formula_rec_results = list(
247
+ self.formula_recognition_model(doc_preprocessor_images)
264
248
  )
265
- single_formula_rec_res["formula_region_id"] = formula_region_id
266
- formula_res_list.append(single_formula_rec_res)
267
- formula_region_id += 1
249
+ for formula_rec_res in formula_rec_results:
250
+ formula_results_for_img = []
251
+ formula_rec_res["formula_region_id"] = 1
252
+ formula_results_for_img.append(formula_rec_res)
253
+ formula_results.append(formula_results_for_img)
268
254
  else:
269
255
  if model_settings["use_layout_detection"]:
270
- layout_det_res = next(
256
+ layout_det_results = list(
271
257
  self.layout_det_model(
272
- doc_preprocessor_image,
258
+ doc_preprocessor_images,
273
259
  threshold=layout_threshold,
274
260
  layout_nms=layout_nms,
275
261
  layout_unclip_ratio=layout_unclip_ratio,
276
262
  layout_merge_bboxes_mode=layout_merge_bboxes_mode,
277
263
  )
278
264
  )
279
- formula_crop_img = []
280
- for box_info in layout_det_res["boxes"]:
281
- if box_info["label"].lower() in ["formula"]:
282
- crop_img_info = self._crop_by_boxes(
283
- doc_preprocessor_image, [box_info]
284
- )
285
- crop_img_info = crop_img_info[0]
286
- formula_crop_img.append(crop_img_info["img"])
287
- single_formula_rec_res = {}
288
- single_formula_rec_res["formula_region_id"] = formula_region_id
289
- single_formula_rec_res["dt_polys"] = box_info["coordinate"]
290
- formula_res_list.append(single_formula_rec_res)
291
- formula_region_id += 1
292
- for idx, formula_rec_res in enumerate(
293
- self.formula_recognition_model(formula_crop_img)
265
+ else:
266
+ layout_det_results = []
267
+ for _ in doc_preprocessor_images:
268
+ try:
269
+ layout_det_res = next(external_layout_det_results)
270
+ except StopIteration:
271
+ raise ValueError("No more layout det results")
272
+ layout_det_results.append(layout_det_res)
273
+
274
+ formula_crop_imgs = []
275
+ formula_det_results = []
276
+ chunk_indices = [0]
277
+ for doc_preprocessor_image, layout_det_res in zip(
278
+ doc_preprocessor_images, layout_det_results
294
279
  ):
295
- formula_region_id = formula_res_list[idx]["formula_region_id"]
296
- dt_polys = formula_res_list[idx]["dt_polys"]
297
- formula_rec_res["formula_region_id"] = formula_region_id
298
- formula_rec_res["dt_polys"] = dt_polys
299
- formula_res_list[idx] = formula_rec_res
300
-
301
- single_img_res = {
302
- "input_path": batch_data.input_paths[0],
303
- "page_index": batch_data.page_indexes[0],
304
- "layout_det_res": layout_det_res,
305
- "doc_preprocessor_res": doc_preprocessor_res,
306
- "formula_res_list": formula_res_list,
307
- "model_settings": model_settings,
308
- }
309
- yield FormulaRecognitionResult(single_img_res)
280
+ formula_region_id = 1
281
+ for box_info in layout_det_res["boxes"]:
282
+ if box_info["label"].lower() in ["formula"]:
283
+ crop_img_info = self._crop_by_boxes(
284
+ doc_preprocessor_image, [box_info]
285
+ )
286
+ crop_img_info = crop_img_info[0]
287
+ formula_crop_imgs.append(crop_img_info["img"])
288
+ res = {}
289
+ res["formula_region_id"] = formula_region_id
290
+ res["dt_polys"] = box_info["coordinate"]
291
+ formula_det_results.append(res)
292
+ formula_region_id += 1
293
+ chunk_indices.append(len(formula_crop_imgs))
294
+
295
+ formula_rec_results = list(
296
+ self.formula_recognition_model(formula_crop_imgs)
297
+ )
298
+ for idx in range(len(chunk_indices) - 1):
299
+ formula_det_results_for_idx = formula_det_results[
300
+ chunk_indices[idx] : chunk_indices[idx + 1]
301
+ ]
302
+ formula_rec_results_for_idx = formula_rec_results[
303
+ chunk_indices[idx] : chunk_indices[idx + 1]
304
+ ]
305
+ for formula_det_res, formula_rec_res in zip(
306
+ formula_det_results_for_idx, formula_rec_results_for_idx
307
+ ):
308
+ formula_region_id = formula_det_res["formula_region_id"]
309
+ dt_polys = formula_det_res["dt_polys"]
310
+ formula_rec_res["formula_region_id"] = formula_region_id
311
+ formula_rec_res["dt_polys"] = dt_polys
312
+ formula_results.append(formula_rec_results_for_idx)
313
+
314
+ for (
315
+ input_path,
316
+ page_index,
317
+ layout_det_res,
318
+ doc_preprocessor_res,
319
+ formula_results_for_img,
320
+ ) in zip(
321
+ batch_data.input_paths,
322
+ batch_data.page_indexes,
323
+ layout_det_results,
324
+ doc_preprocessor_results,
325
+ formula_results,
326
+ ):
327
+ single_img_res = {
328
+ "input_path": input_path,
329
+ "page_index": page_index,
330
+ "layout_det_res": layout_det_res,
331
+ "doc_preprocessor_res": doc_preprocessor_res,
332
+ "formula_res_list": formula_results_for_img,
333
+ "model_settings": model_settings,
334
+ }
335
+ yield FormulaRecognitionResult(single_img_res)
336
+
337
+
338
+ @pipeline_requires_extra("ocr")
339
+ class FormulaRecognitionPipeline(AutoParallelImageSimpleInferencePipeline):
340
+ entities = ["formula_recognition"]
341
+
342
+ @property
343
+ def _pipeline_cls(self):
344
+ return _FormulaRecognitionPipeline
345
+
346
+ def _get_batch_size(self, config):
347
+ return config.get("batch_size", 1)
@@ -17,7 +17,6 @@ import os
17
17
  import random
18
18
  import subprocess
19
19
  import tempfile
20
- from pathlib import Path
21
20
  from typing import Dict, Tuple
22
21
 
23
22
  import numpy as np
@@ -45,15 +44,6 @@ if is_dep_available("opencv-contrib-python"):
45
44
  class FormulaRecognitionResult(BaseCVResult):
46
45
  """Formula Recognition Result"""
47
46
 
48
- def _get_input_fn(self):
49
- fn = super()._get_input_fn()
50
- if (page_idx := self["page_index"]) is not None:
51
- fp = Path(fn)
52
- stem, suffix = fp.stem, fp.suffix
53
- return f"{stem}_{page_idx}{suffix}"
54
- else:
55
- return fn
56
-
57
47
  def _to_img(self) -> Dict[str, Image.Image]:
58
48
  """
59
49
  Converts the internal data to a PIL Image with detection and recognition results.
@@ -61,7 +51,7 @@ class FormulaRecognitionResult(BaseCVResult):
61
51
  Returns:
62
52
  Dict[str, Image.Image]: An image with detection boxes, texts, and scores blended on it.
63
53
  """
64
- image = Image.fromarray(self["doc_preprocessor_res"]["output_img"])
54
+ image = Image.fromarray(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
65
55
  res_img_dict = {}
66
56
  model_settings = self["model_settings"]
67
57
  if model_settings["use_doc_preprocessor"]:
@@ -20,15 +20,13 @@ from ....utils.deps import pipeline_requires_extra
20
20
  from ...models.image_classification.result import TopkResult
21
21
  from ...utils.hpi import HPIConfig
22
22
  from ...utils.pp_option import PaddlePredictorOption
23
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
23
24
  from ..base import BasePipeline
24
25
 
25
26
 
26
- @pipeline_requires_extra("cv")
27
- class ImageClassificationPipeline(BasePipeline):
27
+ class _ImageClassificationPipeline(BasePipeline):
28
28
  """Image Classification Pipeline"""
29
29
 
30
- entities = "image_classification"
31
-
32
30
  def __init__(
33
31
  self,
34
32
  config: Dict,
@@ -45,9 +43,9 @@ class ImageClassificationPipeline(BasePipeline):
45
43
  device (str): The device to run the prediction on. Default is None.
46
44
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
47
45
  use_hpip (bool, optional): Whether to use the high-performance
48
- inference plugin (HPIP). Defaults to False.
46
+ inference plugin (HPIP) by default. Defaults to False.
49
47
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
50
- The high-performance inference configuration dictionary.
48
+ The default high-performance inference configuration dictionary.
51
49
  Defaults to None.
52
50
  """
53
51
  super().__init__(
@@ -78,3 +76,15 @@ class ImageClassificationPipeline(BasePipeline):
78
76
 
79
77
  topk = kwargs.pop("topk", self.topk)
80
78
  yield from self.image_classification_model(input, topk=topk)
79
+
80
+
81
+ @pipeline_requires_extra("cv")
82
+ class ImageClassificationPipeline(AutoParallelImageSimpleInferencePipeline):
83
+ entities = "image_classification"
84
+
85
+ @property
86
+ def _pipeline_cls(self):
87
+ return _ImageClassificationPipeline
88
+
89
+ def _get_batch_size(self, config):
90
+ return config["SubModules"]["ImageClassification"].get("batch_size", 1)
@@ -20,15 +20,13 @@ from ....utils.deps import pipeline_requires_extra
20
20
  from ...models.image_multilabel_classification.result import MLClassResult
21
21
  from ...utils.hpi import HPIConfig
22
22
  from ...utils.pp_option import PaddlePredictorOption
23
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
23
24
  from ..base import BasePipeline
24
25
 
25
26
 
26
- @pipeline_requires_extra("cv")
27
- class ImageMultiLabelClassificationPipeline(BasePipeline):
27
+ class _ImageMultiLabelClassificationPipeline(BasePipeline):
28
28
  """Image Multi Label Classification Pipeline"""
29
29
 
30
- entities = "image_multilabel_classification"
31
-
32
30
  def __init__(
33
31
  self,
34
32
  config: Dict,
@@ -44,8 +42,8 @@ class ImageMultiLabelClassificationPipeline(BasePipeline):
44
42
  config (Dict): Configuration dictionary containing model and other parameters.
45
43
  device (str): The device to run the prediction on. Default is None.
46
44
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
47
- use_hpip (bool, optional): Whether to use the high-performance
48
- inference plugin (HPIP). Defaults to False.
45
+ use_hpip (Optional[bool], optional): Whether to use the
46
+ high-performance inference plugin (HPIP) by default. Defaults to None.
49
47
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
50
48
  The high-performance inference configuration dictionary.
51
49
  Defaults to None.
@@ -85,3 +83,15 @@ class ImageMultiLabelClassificationPipeline(BasePipeline):
85
83
  input=input,
86
84
  threshold=self.threshold if threshold is None else threshold,
87
85
  )
86
+
87
+
88
+ @pipeline_requires_extra("cv")
89
+ class ImageMultiLabelClassificationPipeline(AutoParallelImageSimpleInferencePipeline):
90
+ entities = "image_multilabel_classification"
91
+
92
+ @property
93
+ def _pipeline_cls(self):
94
+ return _ImageMultiLabelClassificationPipeline
95
+
96
+ def _get_batch_size(self, config):
97
+ return config["SubModules"]["ImageMultiLabelClassification"]["batch_size"]
@@ -20,15 +20,13 @@ from ....utils.deps import pipeline_requires_extra
20
20
  from ...models.instance_segmentation.result import InstanceSegResult
21
21
  from ...utils.hpi import HPIConfig
22
22
  from ...utils.pp_option import PaddlePredictorOption
23
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
23
24
  from ..base import BasePipeline
24
25
 
25
26
 
26
- @pipeline_requires_extra("cv")
27
- class InstanceSegmentationPipeline(BasePipeline):
27
+ class _InstanceSegmentationPipeline(BasePipeline):
28
28
  """Instance Segmentation Pipeline"""
29
29
 
30
- entities = "instance_segmentation"
31
-
32
30
  def __init__(
33
31
  self,
34
32
  config: Dict,
@@ -45,9 +43,9 @@ class InstanceSegmentationPipeline(BasePipeline):
45
43
  device (str): The device to run the prediction on. Default is None.
46
44
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
47
45
  use_hpip (bool, optional): Whether to use the high-performance
48
- inference plugin (HPIP). Defaults to False.
46
+ inference plugin (HPIP) by default. Defaults to False.
49
47
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
50
- The high-performance inference configuration dictionary.
48
+ The default high-performance inference configuration dictionary.
51
49
  Defaults to None.
52
50
  """
53
51
  super().__init__(
@@ -79,3 +77,15 @@ class InstanceSegmentationPipeline(BasePipeline):
79
77
  InstanceSegResult: The predicted instance segmentation results.
80
78
  """
81
79
  yield from self.instance_segmentation_model(input, threshold=threshold)
80
+
81
+
82
+ @pipeline_requires_extra("cv")
83
+ class InstanceSegmentationPipeline(AutoParallelImageSimpleInferencePipeline):
84
+ entities = "instance_segmentation"
85
+
86
+ @property
87
+ def _pipeline_cls(self):
88
+ return _InstanceSegmentationPipeline
89
+
90
+ def _get_batch_size(self, config):
91
+ return config["SubModules"]["InstanceSegmentation"].get("batch_size", 1)
@@ -20,17 +20,15 @@ from ....utils.deps import pipeline_requires_extra
20
20
  from ...models.keypoint_detection.result import KptResult
21
21
  from ...utils.hpi import HPIConfig
22
22
  from ...utils.pp_option import PaddlePredictorOption
23
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
23
24
  from ..base import BasePipeline
24
25
 
25
26
  Number = Union[int, float]
26
27
 
27
28
 
28
- @pipeline_requires_extra("cv")
29
- class KeypointDetectionPipeline(BasePipeline):
29
+ class _KeypointDetectionPipeline(BasePipeline):
30
30
  """Keypoint Detection pipeline"""
31
31
 
32
- entities = "human_keypoint_detection"
33
-
34
32
  def __init__(
35
33
  self,
36
34
  config: Dict,
@@ -47,9 +45,9 @@ class KeypointDetectionPipeline(BasePipeline):
47
45
  device (str): The device to run the prediction on. Default is None.
48
46
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
49
47
  use_hpip (bool, optional): Whether to use the high-performance
50
- inference plugin (HPIP). Defaults to False.
48
+ inference plugin (HPIP) by default. Defaults to False.
51
49
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
52
- The high-performance inference configuration dictionary.
50
+ The default high-performance inference configuration dictionary.
53
51
  Defaults to None.
54
52
  """
55
53
  super().__init__(
@@ -146,3 +144,15 @@ class KeypointDetectionPipeline(BasePipeline):
146
144
  }
147
145
  )
148
146
  yield KptResult(single_img_res)
147
+
148
+
149
+ @pipeline_requires_extra("cv")
150
+ class KeypointDetectionPipeline(AutoParallelImageSimpleInferencePipeline):
151
+ entities = "human_keypoint_detection"
152
+
153
+ @property
154
+ def _pipeline_cls(self):
155
+ return _KeypointDetectionPipeline
156
+
157
+ def _get_batch_size(self, config):
158
+ return config["SubModules"]["ObjectDetection"].get("batch_size", 1)