paddlex 3.0.0rc1__py3-none-any.whl → 3.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +1 -1
  3. paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
  4. paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
  5. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
  6. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
  7. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
  8. paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
  9. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
  10. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
  11. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
  12. paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
  13. paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
  14. paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
  15. paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
  16. paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
  17. paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
  18. paddlex/configs/pipelines/OCR.yaml +7 -6
  19. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
  20. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
  21. paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
  22. paddlex/configs/pipelines/doc_understanding.yaml +1 -1
  23. paddlex/configs/pipelines/formula_recognition.yaml +2 -2
  24. paddlex/configs/pipelines/layout_parsing.yaml +3 -2
  25. paddlex/configs/pipelines/seal_recognition.yaml +1 -0
  26. paddlex/configs/pipelines/table_recognition.yaml +2 -1
  27. paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
  28. paddlex/hpip_links.html +20 -20
  29. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
  30. paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
  31. paddlex/inference/common/result/mixin.py +19 -12
  32. paddlex/inference/models/base/predictor/base_predictor.py +2 -8
  33. paddlex/inference/models/common/static_infer.py +29 -73
  34. paddlex/inference/models/common/tokenizer/__init__.py +2 -0
  35. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
  36. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
  37. paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
  38. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
  39. paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
  40. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
  41. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
  42. paddlex/inference/models/common/tokenizer/vocab.py +7 -7
  43. paddlex/inference/models/common/ts/funcs.py +19 -8
  44. paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
  45. paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
  46. paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
  47. paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
  48. paddlex/inference/models/common/vlm/generation/utils.py +1 -1
  49. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
  50. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
  51. paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
  52. paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
  53. paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
  54. paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
  55. paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
  56. paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
  57. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
  58. paddlex/inference/models/doc_vlm/predictor.py +79 -24
  59. paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
  60. paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
  61. paddlex/inference/models/doc_vlm/processors/common.py +189 -0
  62. paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
  63. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
  64. paddlex/inference/models/formula_recognition/predictor.py +8 -2
  65. paddlex/inference/models/formula_recognition/processors.py +90 -77
  66. paddlex/inference/models/formula_recognition/result.py +28 -27
  67. paddlex/inference/models/image_feature/processors.py +3 -4
  68. paddlex/inference/models/keypoint_detection/predictor.py +3 -0
  69. paddlex/inference/models/object_detection/predictor.py +2 -0
  70. paddlex/inference/models/object_detection/processors.py +28 -3
  71. paddlex/inference/models/object_detection/utils.py +2 -0
  72. paddlex/inference/models/table_structure_recognition/result.py +0 -10
  73. paddlex/inference/models/text_detection/predictor.py +8 -0
  74. paddlex/inference/models/text_detection/processors.py +44 -10
  75. paddlex/inference/models/text_detection/result.py +0 -10
  76. paddlex/inference/models/text_recognition/result.py +1 -1
  77. paddlex/inference/pipelines/__init__.py +9 -5
  78. paddlex/inference/pipelines/_parallel.py +172 -0
  79. paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
  80. paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
  81. paddlex/inference/pipelines/base.py +14 -4
  82. paddlex/inference/pipelines/components/faisser.py +1 -1
  83. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
  84. paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
  85. paddlex/inference/pipelines/formula_recognition/result.py +1 -11
  86. paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
  87. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
  88. paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
  89. paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
  90. paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
  91. paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
  92. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +832 -260
  93. paddlex/inference/pipelines/layout_parsing/result.py +4 -17
  94. paddlex/inference/pipelines/layout_parsing/result_v2.py +259 -245
  95. paddlex/inference/pipelines/layout_parsing/setting.py +88 -0
  96. paddlex/inference/pipelines/layout_parsing/utils.py +391 -2028
  97. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
  98. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1199 -0
  99. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +615 -0
  100. paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
  101. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
  102. paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
  103. paddlex/inference/pipelines/ocr/pipeline.py +127 -70
  104. paddlex/inference/pipelines/ocr/result.py +21 -18
  105. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
  106. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
  107. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
  108. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
  109. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +6 -6
  110. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
  111. paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
  112. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
  113. paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
  114. paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
  115. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
  116. paddlex/inference/pipelines/table_recognition/result.py +1 -1
  117. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
  118. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
  119. paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
  120. paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
  121. paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
  122. paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
  123. paddlex/inference/serving/basic_serving/_app.py +46 -13
  124. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
  125. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
  126. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
  127. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
  128. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
  129. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
  130. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
  131. paddlex/inference/serving/infra/utils.py +20 -22
  132. paddlex/inference/serving/schemas/formula_recognition.py +1 -1
  133. paddlex/inference/serving/schemas/layout_parsing.py +1 -2
  134. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
  135. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
  136. paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
  137. paddlex/inference/serving/schemas/seal_recognition.py +1 -1
  138. paddlex/inference/serving/schemas/table_recognition.py +2 -6
  139. paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
  140. paddlex/inference/utils/hpi.py +30 -16
  141. paddlex/inference/utils/hpi_model_info_collection.json +666 -162
  142. paddlex/inference/utils/io/readers.py +12 -12
  143. paddlex/inference/utils/misc.py +20 -0
  144. paddlex/inference/utils/mkldnn_blocklist.py +59 -0
  145. paddlex/inference/utils/official_models.py +140 -5
  146. paddlex/inference/utils/pp_option.py +74 -9
  147. paddlex/model.py +2 -2
  148. paddlex/modules/__init__.py +1 -1
  149. paddlex/modules/anomaly_detection/evaluator.py +2 -2
  150. paddlex/modules/base/__init__.py +1 -1
  151. paddlex/modules/base/evaluator.py +5 -5
  152. paddlex/modules/base/trainer.py +1 -1
  153. paddlex/modules/doc_vlm/dataset_checker.py +2 -2
  154. paddlex/modules/doc_vlm/evaluator.py +2 -2
  155. paddlex/modules/doc_vlm/exportor.py +2 -2
  156. paddlex/modules/doc_vlm/model_list.py +1 -1
  157. paddlex/modules/doc_vlm/trainer.py +2 -2
  158. paddlex/modules/face_recognition/evaluator.py +2 -2
  159. paddlex/modules/formula_recognition/evaluator.py +5 -2
  160. paddlex/modules/formula_recognition/model_list.py +3 -0
  161. paddlex/modules/formula_recognition/trainer.py +3 -0
  162. paddlex/modules/general_recognition/evaluator.py +1 -1
  163. paddlex/modules/image_classification/evaluator.py +2 -2
  164. paddlex/modules/image_classification/model_list.py +1 -0
  165. paddlex/modules/instance_segmentation/evaluator.py +1 -1
  166. paddlex/modules/keypoint_detection/evaluator.py +1 -1
  167. paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
  168. paddlex/modules/multilabel_classification/evaluator.py +2 -2
  169. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
  170. paddlex/modules/object_detection/evaluator.py +2 -2
  171. paddlex/modules/object_detection/model_list.py +2 -0
  172. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
  173. paddlex/modules/semantic_segmentation/evaluator.py +2 -2
  174. paddlex/modules/table_recognition/evaluator.py +2 -2
  175. paddlex/modules/text_detection/evaluator.py +2 -2
  176. paddlex/modules/text_detection/model_list.py +2 -0
  177. paddlex/modules/text_recognition/evaluator.py +2 -2
  178. paddlex/modules/text_recognition/model_list.py +2 -0
  179. paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
  180. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  181. paddlex/modules/ts_classification/evaluator.py +2 -2
  182. paddlex/modules/ts_forecast/evaluator.py +2 -2
  183. paddlex/modules/video_classification/evaluator.py +2 -2
  184. paddlex/modules/video_detection/evaluator.py +2 -2
  185. paddlex/ops/__init__.py +8 -5
  186. paddlex/paddlex_cli.py +19 -13
  187. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
  188. paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
  189. paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
  190. paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
  191. paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
  192. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
  193. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
  194. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
  195. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
  196. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
  197. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
  198. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
  199. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
  200. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
  201. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
  202. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
  203. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
  204. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
  205. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
  206. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
  207. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
  208. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
  209. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
  210. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
  211. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
  212. paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
  213. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
  214. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
  215. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
  216. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
  217. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
  218. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
  219. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
  220. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
  221. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
  222. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
  223. paddlex/repo_apis/base/config.py +1 -1
  224. paddlex/repo_manager/core.py +3 -3
  225. paddlex/repo_manager/meta.py +6 -2
  226. paddlex/repo_manager/repo.py +17 -16
  227. paddlex/utils/custom_device_list.py +26 -2
  228. paddlex/utils/deps.py +3 -3
  229. paddlex/utils/device.py +5 -13
  230. paddlex/utils/env.py +4 -0
  231. paddlex/utils/flags.py +11 -4
  232. paddlex/utils/fonts/__init__.py +34 -4
  233. paddlex/utils/misc.py +1 -1
  234. paddlex/utils/subclass_register.py +2 -2
  235. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/METADATA +349 -208
  236. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/RECORD +240 -211
  237. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/WHEEL +1 -1
  238. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/entry_points.txt +1 -0
  239. {paddlex-3.0.0rc1.dist-info/licenses → paddlex-3.0.2.dist-info}/LICENSE +0 -0
  240. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/top_level.txt +0 -0
@@ -120,7 +120,7 @@ class TableRecognitionResult(BaseCVResult, HtmlMixin, XlsxMixin):
120
120
 
121
121
  if len(self["table_res_list"]) > 0:
122
122
  table_cell_img = Image.fromarray(
123
- copy.deepcopy(self["doc_preprocessor_res"]["output_img"])
123
+ copy.deepcopy(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
124
124
  )
125
125
  table_draw = ImageDraw.Draw(table_cell_img)
126
126
  rectangle_color = (255, 0, 0)
@@ -131,8 +131,8 @@ def compute_inter(rec1, rec2):
131
131
  Returns:
132
132
  float: Intersection over rec2_area
133
133
  """
134
- x1_1, y1_1, x2_1, y2_1 = rec1
135
- x1_2, y1_2, x2_2, y2_2 = rec2
134
+ x1_1, y1_1, x2_1, y2_1 = map(float, rec1)
135
+ x1_2, y1_2, x2_2, y2_2 = map(float, rec2)
136
136
  x_left = max(x1_1, x1_2)
137
137
  y_top = max(y1_1, y1_2)
138
138
  x_right = min(x2_1, x2_2)
@@ -413,8 +413,10 @@ def get_table_recognition_res(
413
413
  table_structure_result: list,
414
414
  table_cells_result: list,
415
415
  overall_ocr_res: OCRResult,
416
+ table_ocr_pred: dict,
416
417
  cells_texts_list: list,
417
418
  use_table_cells_ocr_results: bool,
419
+ use_table_cells_split_ocr: bool,
418
420
  ) -> SingleTableRecognitionResult:
419
421
  """
420
422
  Retrieve table recognition result from cropped image info, table structure prediction, and overall OCR result.
@@ -424,6 +426,7 @@ def get_table_recognition_res(
424
426
  table_structure_result (list): Predicted table structure.
425
427
  table_cells_result (list): Predicted table cells.
426
428
  overall_ocr_res (OCRResult): Overall OCR result from the input image.
429
+ table_ocr_pred (dict): Table OCR result from the input image.
427
430
  cells_texts_list (list): OCR results with cells.
428
431
  use_table_cells_ocr_results (bool): whether to use OCR results with cells.
429
432
 
@@ -432,9 +435,10 @@ def get_table_recognition_res(
432
435
  """
433
436
 
434
437
  table_cells_result = convert_to_four_point_coordinates(table_cells_result)
435
-
436
438
  table_box = np.array([table_box])
437
- table_ocr_pred = get_sub_regions_ocr_res(overall_ocr_res, table_box)
439
+
440
+ if not (use_table_cells_ocr_results == True and use_table_cells_split_ocr == True):
441
+ table_ocr_pred = get_sub_regions_ocr_res(overall_ocr_res, table_box)
438
442
 
439
443
  crop_start_point = [table_box[0][0], table_box[0][1]]
440
444
  img_shape = overall_ocr_res["doc_preprocessor_res"]["output_img"].shape[0:2]
@@ -456,7 +460,7 @@ def get_table_recognition_res(
456
460
  table_cells_result, crop_start_point, img_shape
457
461
  )
458
462
 
459
- if use_table_cells_ocr_results == True:
463
+ if use_table_cells_ocr_results == True and use_table_cells_split_ocr == False:
460
464
  ocr_dt_boxes = table_cells_result
461
465
  ocr_texts_res = cells_texts_list
462
466
  else:
@@ -44,9 +44,9 @@ class TSAnomalyDetPipeline(BasePipeline):
44
44
  device (str, optional): Device to run the predictions on. Defaults to None.
45
45
  pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
46
46
  use_hpip (bool, optional): Whether to use the high-performance
47
- inference plugin (HPIP). Defaults to False.
47
+ inference plugin (HPIP) by default. Defaults to False.
48
48
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
49
- The high-performance inference configuration dictionary.
49
+ The default high-performance inference configuration dictionary.
50
50
  Defaults to None.
51
51
  """
52
52
 
@@ -44,9 +44,9 @@ class TSClsPipeline(BasePipeline):
44
44
  device (str, optional): Device to run the predictions on. Defaults to None.
45
45
  pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
46
46
  use_hpip (bool, optional): Whether to use the high-performance
47
- inference plugin (HPIP). Defaults to False.
47
+ inference plugin (HPIP) by default. Defaults to False.
48
48
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
49
- The high-performance inference configuration dictionary.
49
+ The default high-performance inference configuration dictionary.
50
50
  Defaults to None.
51
51
  """
52
52
 
@@ -44,9 +44,9 @@ class TSFcPipeline(BasePipeline):
44
44
  device (str, optional): Device to run the predictions on. Defaults to None.
45
45
  pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
46
46
  use_hpip (bool, optional): Whether to use the high-performance
47
- inference plugin (HPIP). Defaults to False.
47
+ inference plugin (HPIP) by default. Defaults to False.
48
48
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
49
- The high-performance inference configuration dictionary.
49
+ The default high-performance inference configuration dictionary.
50
50
  Defaults to None.
51
51
  """
52
52
 
@@ -45,9 +45,9 @@ class VideoClassificationPipeline(BasePipeline):
45
45
  device (str): The device to run the prediction on. Default is None.
46
46
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
47
47
  use_hpip (bool, optional): Whether to use the high-performance
48
- inference plugin (HPIP). Defaults to False.
48
+ inference plugin (HPIP) by default. Defaults to False.
49
49
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
50
- The high-performance inference configuration dictionary.
50
+ The default high-performance inference configuration dictionary.
51
51
  Defaults to None.
52
52
  """
53
53
  super().__init__(
@@ -45,9 +45,9 @@ class VideoDetectionPipeline(BasePipeline):
45
45
  device (str): The device to run the prediction on. Default is None.
46
46
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
47
47
  use_hpip (bool, optional): Whether to use the high-performance
48
- inference plugin (HPIP). Defaults to False.
48
+ inference plugin (HPIP) by default. Defaults to False.
49
49
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
50
- The high-performance inference configuration dictionary.
50
+ The default high-performance inference configuration dictionary.
51
51
  Defaults to None.
52
52
  """
53
53
  super().__init__(
@@ -15,6 +15,8 @@
15
15
  import asyncio
16
16
  import contextlib
17
17
  import json
18
+ from queue import Queue
19
+ from threading import Thread
18
20
  from typing import (
19
21
  Any,
20
22
  AsyncGenerator,
@@ -74,16 +76,22 @@ class PipelineWrapper(Generic[PipelineT]):
74
76
  def __init__(self, pipeline: PipelineT) -> None:
75
77
  super().__init__()
76
78
  self._pipeline = pipeline
77
- self._lock = asyncio.Lock()
79
+ # HACK: We work around a bug in Paddle Inference by performing all
80
+ # inference in the same thread.
81
+ self._queue = Queue()
82
+ self._closed = False
83
+ self._loop = asyncio.get_running_loop()
84
+ self._thread = Thread(target=self._worker, daemon=False)
85
+ self._thread.start()
78
86
 
79
87
  @property
80
88
  def pipeline(self) -> PipelineT:
81
89
  return self._pipeline
82
90
 
83
91
  async def infer(self, *args: Any, **kwargs: Any) -> List[Any]:
84
- def _infer() -> List[Any]:
92
+ def _infer(*args, **kwargs) -> List[Any]:
85
93
  output: list = []
86
- with contextlib.closing(self._pipeline(*args, **kwargs)) as it:
94
+ with contextlib.closing(self._pipeline.predict(*args, **kwargs)) as it:
87
95
  for item in it:
88
96
  if _is_error(item):
89
97
  raise fastapi.HTTPException(
@@ -93,11 +101,33 @@ class PipelineWrapper(Generic[PipelineT]):
93
101
 
94
102
  return output
95
103
 
96
- return await self.call(_infer)
104
+ return await self.call(_infer, *args, **kwargs)
97
105
 
98
106
  async def call(self, func: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
99
- async with self._lock:
100
- return await call_async(func, *args, **kwargs)
107
+ if self._closed:
108
+ raise RuntimeError("`PipelineWrapper` has already been closed")
109
+ fut = self._loop.create_future()
110
+ self._queue.put((func, args, kwargs, fut))
111
+ return await fut
112
+
113
+ async def close(self):
114
+ if not self._closed:
115
+ self._queue.put(None)
116
+ await call_async(self._thread.join)
117
+
118
+ def _worker(self):
119
+ while not self._closed:
120
+ item = self._queue.get()
121
+ if item is None:
122
+ break
123
+ func, args, kwargs, fut = item
124
+ try:
125
+ result = func(*args, **kwargs)
126
+ self._loop.call_soon_threadsafe(fut.set_result, result)
127
+ except Exception as e:
128
+ self._loop.call_soon_threadsafe(fut.set_exception, e)
129
+ finally:
130
+ self._queue.task_done()
101
131
 
102
132
 
103
133
  @class_requires_deps("aiohttp")
@@ -141,14 +171,17 @@ def create_app(
141
171
  @contextlib.asynccontextmanager
142
172
  async def _app_lifespan(app: "fastapi.FastAPI") -> AsyncGenerator[None, None]:
143
173
  ctx.pipeline = PipelineWrapper[PipelineT](pipeline)
144
- if app_aiohttp_session:
145
- async with aiohttp.ClientSession(
146
- cookie_jar=aiohttp.DummyCookieJar()
147
- ) as aiohttp_session:
148
- ctx.aiohttp_session = aiohttp_session
174
+ try:
175
+ if app_aiohttp_session:
176
+ async with aiohttp.ClientSession(
177
+ cookie_jar=aiohttp.DummyCookieJar()
178
+ ) as aiohttp_session:
179
+ ctx.aiohttp_session = aiohttp_session
180
+ yield
181
+ else:
149
182
  yield
150
- else:
151
- yield
183
+ finally:
184
+ await ctx.pipeline.close()
152
185
 
153
186
  # Should we control API versions?
154
187
  app = fastapi.FastAPI(lifespan=_app_lifespan)
@@ -90,7 +90,11 @@ def postprocess_images(
90
90
  output_images: Dict[str, str] = {}
91
91
  for key, img in images.items():
92
92
  output_images[key] = postprocess_image(
93
- np.array(img) if isinstance(img, Image) else img,
93
+ (
94
+ cv2.cvtColor(np.array(img.convert("RGB")), cv2.COLOR_RGB2BGR)
95
+ if isinstance(img, Image)
96
+ else img
97
+ ),
94
98
  log_id=log_id,
95
99
  filename=filename_template.format(key=key),
96
100
  file_storage=file_storage,
@@ -54,7 +54,6 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
54
54
  use_doc_orientation_classify=request.useDocOrientationClassify,
55
55
  use_doc_unwarping=request.useDocUnwarping,
56
56
  use_textline_orientation=request.useTextlineOrientation,
57
- use_general_ocr=request.useGeneralOcr,
58
57
  use_seal_recognition=request.useSealRecognition,
59
58
  use_table_recognition=request.useTableRecognition,
60
59
  use_formula_recognition=request.useFormulaRecognition,
@@ -54,7 +54,6 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
54
54
  images,
55
55
  use_doc_orientation_classify=request.useDocOrientationClassify,
56
56
  use_doc_unwarping=request.useDocUnwarping,
57
- use_general_ocr=request.useGeneralOcr,
58
57
  use_seal_recognition=request.useSealRecognition,
59
58
  use_table_recognition=request.useTableRecognition,
60
59
  layout_threshold=request.layoutThreshold,
@@ -54,7 +54,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
54
54
  images,
55
55
  use_doc_orientation_classify=request.useDocOrientationClassify,
56
56
  use_doc_unwarping=request.useDocUnwarping,
57
- use_general_ocr=request.useGeneralOcr,
57
+ use_textline_orientation=request.useTextlineOrientation,
58
58
  use_seal_recognition=request.useSealRecognition,
59
59
  use_table_recognition=request.useTableRecognition,
60
60
  layout_threshold=request.layoutThreshold,
@@ -54,10 +54,11 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
54
54
  use_doc_orientation_classify=request.useDocOrientationClassify,
55
55
  use_doc_unwarping=request.useDocUnwarping,
56
56
  use_textline_orientation=request.useTextlineOrientation,
57
- use_general_ocr=request.useGeneralOcr,
58
57
  use_seal_recognition=request.useSealRecognition,
59
58
  use_table_recognition=request.useTableRecognition,
60
59
  use_formula_recognition=request.useFormulaRecognition,
60
+ use_chart_recognition=request.useChartRecognition,
61
+ use_region_detection=request.useRegionDetection,
61
62
  layout_threshold=request.layoutThreshold,
62
63
  layout_nms=request.layoutNms,
63
64
  layout_unclip_ratio=request.layoutUnclipRatio,
@@ -74,7 +75,10 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
74
75
  seal_det_box_thresh=request.sealDetBoxThresh,
75
76
  seal_det_unclip_ratio=request.sealDetUnclipRatio,
76
77
  seal_rec_score_thresh=request.sealRecScoreThresh,
77
- use_table_cells_ocr_results=request.useTableCellsOcrResults,
78
+ use_wired_table_cells_trans_to_html=request.useWiredTableCellsTransToHtml,
79
+ use_wireless_table_cells_trans_to_html=request.useWirelessTableCellsTransToHtml,
80
+ use_table_orientation_classify=request.useTableOrientationClassify,
81
+ use_ocr_results_with_table_cells=request.useOcrResultsWithTableCells,
78
82
  use_e2e_wired_table_rec_model=request.useE2eWiredTableRecModel,
79
83
  use_e2e_wireless_table_rec_model=request.useE2eWirelessTableRecModel,
80
84
  )
@@ -53,17 +53,13 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
53
53
  use_doc_unwarping=request.useDocUnwarping,
54
54
  use_layout_detection=request.useLayoutDetection,
55
55
  use_ocr_model=request.useOcrModel,
56
- layout_threshold=request.layoutThreshold,
57
- layout_nms=request.layoutNms,
58
- layout_unclip_ratio=request.layoutUnclipRatio,
59
- layout_merge_bboxes_mode=request.layoutMergeBboxesMode,
60
56
  text_det_limit_side_len=request.textDetLimitSideLen,
61
57
  text_det_limit_type=request.textDetLimitType,
62
58
  text_det_thresh=request.textDetThresh,
63
59
  text_det_box_thresh=request.textDetBoxThresh,
64
60
  text_det_unclip_ratio=request.textDetUnclipRatio,
65
61
  text_rec_score_thresh=request.textRecScoreThresh,
66
- use_table_cells_ocr_results=request.useTableCellsOcrResults,
62
+ use_ocr_results_with_table_cells=request.useOcrResultsWithTableCells,
67
63
  )
68
64
 
69
65
  table_rec_results: List[Dict[str, Any]] = []
@@ -53,19 +53,18 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
53
53
  use_doc_unwarping=request.useDocUnwarping,
54
54
  use_layout_detection=request.useLayoutDetection,
55
55
  use_ocr_model=request.useOcrModel,
56
- layout_threshold=request.layoutThreshold,
57
- layout_nms=request.layoutNms,
58
- layout_unclip_ratio=request.layoutUnclipRatio,
59
- layout_merge_bboxes_mode=request.layoutMergeBboxesMode,
60
56
  text_det_limit_side_len=request.textDetLimitSideLen,
61
57
  text_det_limit_type=request.textDetLimitType,
62
58
  text_det_thresh=request.textDetThresh,
63
59
  text_det_box_thresh=request.textDetBoxThresh,
64
60
  text_det_unclip_ratio=request.textDetUnclipRatio,
65
61
  text_rec_score_thresh=request.textRecScoreThresh,
66
- use_table_cells_ocr_results=request.useTableCellsOcrResults,
67
62
  use_e2e_wired_table_rec_model=request.useE2eWiredTableRecModel,
68
63
  use_e2e_wireless_table_rec_model=request.useE2eWirelessTableRecModel,
64
+ use_wired_table_cells_trans_to_html=request.useWiredTableCellsTransToHtml,
65
+ use_wireless_table_cells_trans_to_html=request.useWirelessTableCellsTransToHtml,
66
+ use_table_orientation_classify=request.useTableOrientationClassify,
67
+ use_ocr_results_with_table_cells=request.useOcrResultsWithTableCells,
69
68
  )
70
69
 
71
70
  table_rec_results: List[Dict[str, Any]] = []
@@ -38,8 +38,8 @@ if is_dep_available("opencv-contrib-python"):
38
38
  import cv2
39
39
  if is_dep_available("filetype"):
40
40
  import filetype
41
- if is_dep_available("PyMuPDF"):
42
- import fitz
41
+ if is_dep_available("pypdfium2"):
42
+ import pypdfium2 as pdfium
43
43
  if is_dep_available("yarl"):
44
44
  import yarl
45
45
 
@@ -176,31 +176,29 @@ def base64_encode(data: bytes) -> str:
176
176
  return base64.b64encode(data).decode("ascii")
177
177
 
178
178
 
179
- @function_requires_deps("PyMuPDF", "opencv-contrib-python")
179
+ @function_requires_deps("pypdfium2", "opencv-contrib-python")
180
180
  def read_pdf(
181
181
  bytes_: bytes, max_num_imgs: Optional[int] = None
182
182
  ) -> Tuple[List[np.ndarray], PDFInfo]:
183
183
  images: List[np.ndarray] = []
184
184
  page_info_list: List[PDFPageInfo] = []
185
- with fitz.open("pdf", bytes_) as doc:
186
- for page in doc:
187
- if max_num_imgs is not None and len(images) >= max_num_imgs:
188
- break
189
- # TODO: Do not always use zoom=2.0
190
- zoom = 2.0
191
- deg = 0
192
- mat = fitz.Matrix(zoom, zoom).prerotate(deg)
193
- pixmap = page.get_pixmap(matrix=mat, alpha=False)
194
- image = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(
195
- pixmap.h, pixmap.w, pixmap.n
196
- )
197
- image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
198
- images.append(image)
199
- page_info = PDFPageInfo(
200
- width=pixmap.w,
201
- height=pixmap.h,
202
- )
203
- page_info_list.append(page_info)
185
+ doc = pdfium.PdfDocument(bytes_)
186
+ for page in doc:
187
+ if max_num_imgs is not None and len(images) >= max_num_imgs:
188
+ break
189
+ # TODO: Do not always use zoom=2.0
190
+ zoom = 2.0
191
+ deg = 0
192
+ image = page.render(scale=zoom, rotation=deg).to_pil()
193
+ image = image.convert("RGB")
194
+ image = np.array(image)
195
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
196
+ images.append(image)
197
+ page_info = PDFPageInfo(
198
+ width=image.shape[1],
199
+ height=image.shape[0],
200
+ )
201
+ page_info_list.append(page_info)
204
202
  pdf_info = PDFInfo(
205
203
  numPages=len(page_info_list),
206
204
  pages=page_info_list,
@@ -34,7 +34,7 @@ class InferRequest(ocr.BaseInferRequest):
34
34
  useLayoutDetection: Optional[bool] = None
35
35
  useDocOrientationClassify: Optional[bool] = None
36
36
  useDocUnwarping: Optional[bool] = None
37
- layoutThreshold: Optional[float] = None
37
+ layoutThreshold: Optional[Union[float, dict]] = None
38
38
  layoutNms: Optional[bool] = None
39
39
  layoutUnclipRatio: Optional[Union[float, Tuple[float, float]]] = None
40
40
  layoutMergeBboxesMode: Optional[str] = None
@@ -34,11 +34,10 @@ class InferRequest(ocr.BaseInferRequest):
34
34
  useDocOrientationClassify: Optional[bool] = None
35
35
  useDocUnwarping: Optional[bool] = None
36
36
  useTextlineOrientation: Optional[bool] = None
37
- useGeneralOcr: Optional[bool] = None
38
37
  useSealRecognition: Optional[bool] = None
39
38
  useTableRecognition: Optional[bool] = None
40
39
  useFormulaRecognition: Optional[bool] = None
41
- layoutThreshold: Optional[float] = None
40
+ layoutThreshold: Optional[Union[float, dict]] = None
42
41
  layoutNms: Optional[bool] = None
43
42
  layoutUnclipRatio: Optional[Union[float, Tuple[float, float]]] = None
44
43
  layoutMergeBboxesMode: Optional[str] = None
@@ -39,10 +39,9 @@ ANALYZE_IMAGES_ENDPOINT: Final[str] = "/chatocr-visual"
39
39
  class AnalyzeImagesRequest(ocr.BaseInferRequest):
40
40
  useDocOrientationClassify: Optional[bool] = None
41
41
  useDocUnwarping: Optional[bool] = None
42
- useGeneralOcr: Optional[bool] = None
43
42
  useSealRecognition: Optional[bool] = None
44
43
  useTableRecognition: Optional[bool] = None
45
- layoutThreshold: Optional[float] = None
44
+ layoutThreshold: Optional[Union[float, dict]] = None
46
45
  layoutNms: Optional[bool] = None
47
46
  layoutUnclipRatio: Optional[Union[float, Tuple[float, float], dict]] = None
48
47
  layoutMergeBboxesMode: Optional[Union[str, dict]] = None
@@ -42,10 +42,10 @@ ANALYZE_IMAGES_ENDPOINT: Final[str] = "/chatocr-visual"
42
42
  class AnalyzeImagesRequest(ocr.BaseInferRequest):
43
43
  useDocOrientationClassify: Optional[bool] = None
44
44
  useDocUnwarping: Optional[bool] = None
45
- useGeneralOcr: Optional[bool] = None
45
+ useTextlineOrientation: Optional[bool] = None
46
46
  useSealRecognition: Optional[bool] = None
47
47
  useTableRecognition: Optional[bool] = None
48
- layoutThreshold: Optional[float] = None
48
+ layoutThreshold: Optional[Union[float, dict]] = None
49
49
  layoutNms: Optional[bool] = None
50
50
  layoutUnclipRatio: Optional[Union[float, Tuple[float, float], dict]] = None
51
51
  layoutMergeBboxesMode: Optional[Union[str, dict]] = None
@@ -32,14 +32,15 @@ INFER_ENDPOINT: Final[str] = "/layout-parsing"
32
32
 
33
33
 
34
34
  class InferRequest(ocr.BaseInferRequest):
35
- useDocOrientationClassify: Optional[bool] = None
36
- useDocUnwarping: Optional[bool] = None
35
+ useDocOrientationClassify: Optional[bool] = False
36
+ useDocUnwarping: Optional[bool] = False
37
37
  useTextlineOrientation: Optional[bool] = None
38
- useGeneralOcr: Optional[bool] = None
39
38
  useSealRecognition: Optional[bool] = None
40
39
  useTableRecognition: Optional[bool] = None
41
40
  useFormulaRecognition: Optional[bool] = None
42
- layoutThreshold: Optional[float] = None
41
+ useChartRecognition: Optional[bool] = False
42
+ useRegionDetection: Optional[bool] = None
43
+ layoutThreshold: Optional[Union[float, dict]] = None
43
44
  layoutNms: Optional[bool] = None
44
45
  layoutUnclipRatio: Optional[Union[float, Tuple[float, float], dict]] = None
45
46
  layoutMergeBboxesMode: Optional[Union[str, dict]] = None
@@ -55,9 +56,12 @@ class InferRequest(ocr.BaseInferRequest):
55
56
  sealDetBoxThresh: Optional[float] = None
56
57
  sealDetUnclipRatio: Optional[float] = None
57
58
  sealRecScoreThresh: Optional[float] = None
58
- useTableCellsOcrResults: bool = False
59
+ useWiredTableCellsTransToHtml: bool = False
60
+ useWirelessTableCellsTransToHtml: bool = False
61
+ useTableOrientationClassify: bool = True
62
+ useOcrResultsWithTableCells: bool = True
59
63
  useE2eWiredTableRecModel: bool = False
60
- useE2eWirelessTableRecModel: bool = False
64
+ useE2eWirelessTableRecModel: bool = True
61
65
 
62
66
 
63
67
  class MarkdownData(BaseModel):
@@ -34,7 +34,7 @@ class InferRequest(ocr.BaseInferRequest):
34
34
  useDocOrientationClassify: Optional[bool] = None
35
35
  useDocUnwarping: Optional[bool] = None
36
36
  useLayoutDetection: Optional[bool] = None
37
- layoutThreshold: Optional[float] = None
37
+ layoutThreshold: Optional[Union[float, dict]] = None
38
38
  layoutNms: Optional[bool] = None
39
39
  layoutUnclipRatio: Optional[Union[float, Tuple[float, float]]] = None
40
40
  layoutMergeBboxesMode: Optional[str] = None
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Dict, Final, List, Optional, Tuple, Union
15
+ from typing import Dict, Final, List, Optional
16
16
 
17
17
  from pydantic import BaseModel
18
18
 
@@ -35,17 +35,13 @@ class InferRequest(ocr.BaseInferRequest):
35
35
  useDocUnwarping: Optional[bool] = None
36
36
  useLayoutDetection: Optional[bool] = None
37
37
  useOcrModel: Optional[bool] = None
38
- layoutThreshold: Optional[float] = None
39
- layoutNms: Optional[bool] = None
40
- layoutUnclipRatio: Optional[Union[float, Tuple[float, float]]] = None
41
- layoutMergeBboxesMode: Optional[str] = None
42
38
  textDetLimitSideLen: Optional[int] = None
43
39
  textDetLimitType: Optional[str] = None
44
40
  textDetThresh: Optional[float] = None
45
41
  textDetBoxThresh: Optional[float] = None
46
42
  textDetUnclipRatio: Optional[float] = None
47
43
  textRecScoreThresh: Optional[float] = None
48
- useTableCellsOcrResults: bool = False
44
+ useOcrResultsWithTableCells: bool = False
49
45
 
50
46
 
51
47
  class TableRecResult(BaseModel):
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Dict, Final, List, Optional, Tuple, Union
15
+ from typing import Dict, Final, List, Optional
16
16
 
17
17
  from pydantic import BaseModel
18
18
 
@@ -35,19 +35,18 @@ class InferRequest(ocr.BaseInferRequest):
35
35
  useDocUnwarping: Optional[bool] = None
36
36
  useLayoutDetection: Optional[bool] = None
37
37
  useOcrModel: Optional[bool] = None
38
- layoutThreshold: Optional[float] = None
39
- layoutNms: Optional[bool] = None
40
- layoutUnclipRatio: Optional[Union[float, Tuple[float, float], dict]] = None
41
- layoutMergeBboxesMode: Optional[Union[str, dict]] = None
42
38
  textDetLimitSideLen: Optional[int] = None
43
39
  textDetLimitType: Optional[str] = None
44
40
  textDetThresh: Optional[float] = None
45
41
  textDetBoxThresh: Optional[float] = None
46
42
  textDetUnclipRatio: Optional[float] = None
47
43
  textRecScoreThresh: Optional[float] = None
48
- useTableCellsOcrResults: bool = False
49
44
  useE2eWiredTableRecModel: bool = False
50
45
  useE2eWirelessTableRecModel: bool = False
46
+ useWiredTableCellsTransToHtml: bool = False
47
+ useWirelessTableCellsTransToHtml: bool = False
48
+ useTableOrientationClassify: bool = True
49
+ useOcrResultsWithTableCells: bool = True
51
50
 
52
51
 
53
52
  class TableRecResult(BaseModel):