paddlex 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (824) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +17 -34
  3. paddlex/__main__.py +1 -1
  4. paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
  5. paddlex/configs/modules/doc_vlm/PP-DocBee-2B.yaml +14 -0
  6. paddlex/configs/modules/doc_vlm/PP-DocBee-7B.yaml +14 -0
  7. paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
  8. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
  9. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
  10. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
  11. paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
  12. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
  13. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
  14. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
  15. paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
  16. paddlex/configs/modules/open_vocabulary_detection/YOLO-Worldv2-L.yaml +13 -0
  17. paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
  18. paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
  19. paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
  20. paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
  21. paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
  22. paddlex/configs/pipelines/OCR.yaml +7 -6
  23. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
  24. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
  25. paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
  26. paddlex/configs/pipelines/anomaly_detection.yaml +1 -1
  27. paddlex/configs/pipelines/doc_understanding.yaml +9 -0
  28. paddlex/configs/pipelines/formula_recognition.yaml +2 -2
  29. paddlex/configs/pipelines/layout_parsing.yaml +3 -2
  30. paddlex/configs/pipelines/seal_recognition.yaml +1 -0
  31. paddlex/configs/pipelines/table_recognition.yaml +2 -1
  32. paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
  33. paddlex/configs/pipelines/ts_anomaly_detection.yaml +1 -1
  34. paddlex/configs/pipelines/ts_classification.yaml +1 -1
  35. paddlex/configs/pipelines/ts_forecast.yaml +1 -1
  36. paddlex/constants.py +17 -0
  37. paddlex/engine.py +7 -5
  38. paddlex/hpip_links.html +23 -11
  39. paddlex/inference/__init__.py +3 -3
  40. paddlex/inference/common/__init__.py +1 -1
  41. paddlex/inference/common/batch_sampler/__init__.py +5 -4
  42. paddlex/inference/common/batch_sampler/audio_batch_sampler.py +5 -6
  43. paddlex/inference/common/batch_sampler/base_batch_sampler.py +20 -16
  44. paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +4 -7
  45. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +87 -0
  46. paddlex/inference/common/batch_sampler/image_batch_sampler.py +45 -60
  47. paddlex/inference/common/batch_sampler/ts_batch_sampler.py +9 -10
  48. paddlex/inference/common/batch_sampler/video_batch_sampler.py +2 -22
  49. paddlex/inference/common/reader/__init__.py +4 -4
  50. paddlex/inference/common/reader/audio_reader.py +3 -3
  51. paddlex/inference/common/reader/det_3d_reader.py +7 -5
  52. paddlex/inference/common/reader/image_reader.py +16 -12
  53. paddlex/inference/common/reader/ts_reader.py +3 -2
  54. paddlex/inference/common/reader/video_reader.py +3 -3
  55. paddlex/inference/common/result/__init__.py +7 -7
  56. paddlex/inference/common/result/base_cv_result.py +12 -2
  57. paddlex/inference/common/result/base_result.py +7 -5
  58. paddlex/inference/common/result/base_ts_result.py +1 -2
  59. paddlex/inference/common/result/base_video_result.py +2 -2
  60. paddlex/inference/common/result/mixin.py +31 -25
  61. paddlex/inference/models/__init__.py +41 -85
  62. paddlex/inference/models/anomaly_detection/__init__.py +1 -1
  63. paddlex/inference/models/anomaly_detection/predictor.py +9 -19
  64. paddlex/inference/models/anomaly_detection/processors.py +9 -2
  65. paddlex/inference/models/anomaly_detection/result.py +3 -2
  66. paddlex/inference/models/base/__init__.py +2 -2
  67. paddlex/inference/models/base/predictor/__init__.py +1 -2
  68. paddlex/inference/models/base/predictor/base_predictor.py +278 -39
  69. paddlex/inference/models/common/__init__.py +6 -15
  70. paddlex/inference/models/common/static_infer.py +724 -251
  71. paddlex/inference/models/common/tokenizer/__init__.py +7 -3
  72. paddlex/inference/models/common/tokenizer/bert_tokenizer.py +1 -1
  73. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +609 -0
  74. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +9 -7
  75. paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
  76. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +438 -0
  77. paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
  78. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +85 -77
  79. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +339 -123
  80. paddlex/inference/models/common/tokenizer/utils.py +1 -1
  81. paddlex/inference/models/common/tokenizer/vocab.py +8 -8
  82. paddlex/inference/models/common/ts/__init__.py +1 -1
  83. paddlex/inference/models/common/ts/funcs.py +13 -6
  84. paddlex/inference/models/common/ts/processors.py +14 -5
  85. paddlex/inference/models/common/vision/__init__.py +3 -3
  86. paddlex/inference/models/common/vision/funcs.py +17 -12
  87. paddlex/inference/models/common/vision/processors.py +61 -46
  88. paddlex/inference/models/common/vlm/__init__.py +13 -0
  89. paddlex/inference/models/common/vlm/activations.py +189 -0
  90. paddlex/inference/models/common/vlm/bert_padding.py +127 -0
  91. paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
  92. paddlex/inference/models/common/vlm/distributed.py +229 -0
  93. paddlex/inference/models/common/vlm/flash_attn_utils.py +119 -0
  94. paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
  95. paddlex/inference/models/common/vlm/generation/__init__.py +34 -0
  96. paddlex/inference/models/common/vlm/generation/configuration_utils.py +533 -0
  97. paddlex/inference/models/common/vlm/generation/logits_process.py +730 -0
  98. paddlex/inference/models/common/vlm/generation/stopping_criteria.py +106 -0
  99. paddlex/inference/models/common/vlm/generation/utils.py +2162 -0
  100. paddlex/inference/models/common/vlm/transformers/__init__.py +16 -0
  101. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +1037 -0
  102. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +408 -0
  103. paddlex/inference/models/common/vlm/transformers/model_outputs.py +1612 -0
  104. paddlex/inference/models/common/vlm/transformers/model_utils.py +2014 -0
  105. paddlex/inference/models/common/vlm/transformers/utils.py +178 -0
  106. paddlex/inference/models/common/vlm/utils.py +109 -0
  107. paddlex/inference/models/doc_vlm/__init__.py +15 -0
  108. paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
  109. paddlex/inference/models/doc_vlm/modeling/__init__.py +17 -0
  110. paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
  111. paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
  112. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +2495 -0
  113. paddlex/inference/models/doc_vlm/predictor.py +253 -0
  114. paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
  115. paddlex/inference/models/doc_vlm/processors/__init__.py +17 -0
  116. paddlex/inference/models/doc_vlm/processors/common.py +561 -0
  117. paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
  118. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +543 -0
  119. paddlex/inference/models/doc_vlm/result.py +21 -0
  120. paddlex/inference/models/face_feature/__init__.py +1 -1
  121. paddlex/inference/models/face_feature/predictor.py +2 -1
  122. paddlex/inference/models/formula_recognition/__init__.py +1 -1
  123. paddlex/inference/models/formula_recognition/predictor.py +18 -28
  124. paddlex/inference/models/formula_recognition/processors.py +126 -97
  125. paddlex/inference/models/formula_recognition/result.py +43 -35
  126. paddlex/inference/models/image_classification/__init__.py +1 -1
  127. paddlex/inference/models/image_classification/predictor.py +9 -19
  128. paddlex/inference/models/image_classification/processors.py +4 -2
  129. paddlex/inference/models/image_classification/result.py +4 -3
  130. paddlex/inference/models/image_feature/__init__.py +1 -1
  131. paddlex/inference/models/image_feature/predictor.py +9 -19
  132. paddlex/inference/models/image_feature/processors.py +7 -5
  133. paddlex/inference/models/image_feature/result.py +2 -3
  134. paddlex/inference/models/image_multilabel_classification/__init__.py +1 -1
  135. paddlex/inference/models/image_multilabel_classification/predictor.py +7 -6
  136. paddlex/inference/models/image_multilabel_classification/processors.py +6 -2
  137. paddlex/inference/models/image_multilabel_classification/result.py +4 -3
  138. paddlex/inference/models/image_unwarping/__init__.py +1 -1
  139. paddlex/inference/models/image_unwarping/predictor.py +8 -16
  140. paddlex/inference/models/image_unwarping/processors.py +6 -2
  141. paddlex/inference/models/image_unwarping/result.py +4 -2
  142. paddlex/inference/models/instance_segmentation/__init__.py +1 -1
  143. paddlex/inference/models/instance_segmentation/predictor.py +7 -15
  144. paddlex/inference/models/instance_segmentation/processors.py +4 -7
  145. paddlex/inference/models/instance_segmentation/result.py +11 -10
  146. paddlex/inference/models/keypoint_detection/__init__.py +1 -1
  147. paddlex/inference/models/keypoint_detection/predictor.py +5 -3
  148. paddlex/inference/models/keypoint_detection/processors.py +11 -3
  149. paddlex/inference/models/keypoint_detection/result.py +9 -4
  150. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
  151. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/predictor.py +15 -26
  152. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/processors.py +26 -14
  153. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/result.py +15 -12
  154. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/visualizer_3d.py +77 -39
  155. paddlex/inference/models/multilingual_speech_recognition/__init__.py +1 -1
  156. paddlex/inference/models/multilingual_speech_recognition/predictor.py +11 -15
  157. paddlex/inference/models/multilingual_speech_recognition/processors.py +45 -53
  158. paddlex/inference/models/multilingual_speech_recognition/result.py +1 -1
  159. paddlex/inference/models/object_detection/__init__.py +1 -1
  160. paddlex/inference/models/object_detection/predictor.py +8 -12
  161. paddlex/inference/models/object_detection/processors.py +63 -33
  162. paddlex/inference/models/object_detection/result.py +5 -4
  163. paddlex/inference/models/object_detection/utils.py +3 -1
  164. paddlex/inference/models/open_vocabulary_detection/__init__.py +1 -1
  165. paddlex/inference/models/open_vocabulary_detection/predictor.py +31 -14
  166. paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +3 -2
  167. paddlex/inference/models/open_vocabulary_detection/processors/common.py +114 -0
  168. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +19 -8
  169. paddlex/inference/models/open_vocabulary_detection/processors/yoloworld_processors.py +209 -0
  170. paddlex/inference/models/open_vocabulary_segmentation/__init__.py +1 -1
  171. paddlex/inference/models/open_vocabulary_segmentation/predictor.py +6 -13
  172. paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +1 -1
  173. paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +12 -12
  174. paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +1 -1
  175. paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +11 -9
  176. paddlex/inference/models/semantic_segmentation/__init__.py +1 -1
  177. paddlex/inference/models/semantic_segmentation/predictor.py +9 -18
  178. paddlex/inference/models/semantic_segmentation/processors.py +11 -8
  179. paddlex/inference/models/semantic_segmentation/result.py +4 -3
  180. paddlex/inference/models/table_structure_recognition/__init__.py +1 -1
  181. paddlex/inference/models/table_structure_recognition/predictor.py +8 -18
  182. paddlex/inference/models/table_structure_recognition/processors.py +23 -29
  183. paddlex/inference/models/table_structure_recognition/result.py +8 -15
  184. paddlex/inference/models/text_detection/__init__.py +1 -1
  185. paddlex/inference/models/text_detection/predictor.py +24 -24
  186. paddlex/inference/models/text_detection/processors.py +116 -44
  187. paddlex/inference/models/text_detection/result.py +8 -13
  188. paddlex/inference/models/text_recognition/__init__.py +1 -1
  189. paddlex/inference/models/text_recognition/predictor.py +11 -19
  190. paddlex/inference/models/text_recognition/processors.py +27 -13
  191. paddlex/inference/models/text_recognition/result.py +3 -2
  192. paddlex/inference/models/ts_anomaly_detection/__init__.py +1 -1
  193. paddlex/inference/models/ts_anomaly_detection/predictor.py +12 -17
  194. paddlex/inference/models/ts_anomaly_detection/processors.py +6 -2
  195. paddlex/inference/models/ts_anomaly_detection/result.py +21 -10
  196. paddlex/inference/models/ts_classification/__init__.py +1 -1
  197. paddlex/inference/models/ts_classification/predictor.py +14 -27
  198. paddlex/inference/models/ts_classification/processors.py +7 -2
  199. paddlex/inference/models/ts_classification/result.py +21 -12
  200. paddlex/inference/models/ts_forecasting/__init__.py +1 -1
  201. paddlex/inference/models/ts_forecasting/predictor.py +13 -18
  202. paddlex/inference/models/ts_forecasting/processors.py +12 -3
  203. paddlex/inference/models/ts_forecasting/result.py +24 -11
  204. paddlex/inference/models/video_classification/__init__.py +1 -1
  205. paddlex/inference/models/video_classification/predictor.py +9 -15
  206. paddlex/inference/models/video_classification/processors.py +24 -24
  207. paddlex/inference/models/video_classification/result.py +7 -3
  208. paddlex/inference/models/video_detection/__init__.py +1 -1
  209. paddlex/inference/models/video_detection/predictor.py +8 -15
  210. paddlex/inference/models/video_detection/processors.py +24 -11
  211. paddlex/inference/models/video_detection/result.py +10 -5
  212. paddlex/inference/pipelines/__init__.py +48 -37
  213. paddlex/inference/pipelines/_parallel.py +172 -0
  214. paddlex/inference/pipelines/anomaly_detection/__init__.py +1 -1
  215. paddlex/inference/pipelines/anomaly_detection/pipeline.py +29 -9
  216. paddlex/inference/pipelines/attribute_recognition/__init__.py +1 -1
  217. paddlex/inference/pipelines/attribute_recognition/pipeline.py +24 -9
  218. paddlex/inference/pipelines/attribute_recognition/result.py +10 -8
  219. paddlex/inference/pipelines/base.py +43 -13
  220. paddlex/inference/pipelines/components/__init__.py +14 -8
  221. paddlex/inference/pipelines/components/chat_server/__init__.py +1 -1
  222. paddlex/inference/pipelines/components/chat_server/base.py +2 -2
  223. paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +8 -8
  224. paddlex/inference/pipelines/components/common/__init__.py +5 -4
  225. paddlex/inference/pipelines/components/common/base_operator.py +2 -1
  226. paddlex/inference/pipelines/components/common/base_result.py +3 -2
  227. paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +1 -2
  228. paddlex/inference/pipelines/components/common/crop_image_regions.py +11 -5
  229. paddlex/inference/pipelines/components/common/seal_det_warp.py +44 -13
  230. paddlex/inference/pipelines/components/common/sort_boxes.py +4 -2
  231. paddlex/inference/pipelines/components/common/warp_image.py +50 -0
  232. paddlex/inference/pipelines/components/faisser.py +10 -5
  233. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +2 -2
  234. paddlex/inference/pipelines/components/prompt_engineering/base.py +2 -2
  235. paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +2 -1
  236. paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +2 -2
  237. paddlex/inference/pipelines/components/retriever/__init__.py +2 -2
  238. paddlex/inference/pipelines/components/retriever/base.py +18 -16
  239. paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +2 -2
  240. paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +87 -84
  241. paddlex/inference/pipelines/components/utils/__init__.py +1 -1
  242. paddlex/inference/pipelines/components/utils/mixin.py +7 -7
  243. paddlex/inference/pipelines/doc_preprocessor/__init__.py +1 -1
  244. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +70 -51
  245. paddlex/inference/pipelines/doc_preprocessor/result.py +5 -10
  246. paddlex/inference/pipelines/doc_understanding/__init__.py +15 -0
  247. paddlex/inference/pipelines/doc_understanding/pipeline.py +71 -0
  248. paddlex/inference/pipelines/face_recognition/__init__.py +1 -1
  249. paddlex/inference/pipelines/face_recognition/pipeline.py +3 -1
  250. paddlex/inference/pipelines/face_recognition/result.py +3 -2
  251. paddlex/inference/pipelines/formula_recognition/__init__.py +1 -1
  252. paddlex/inference/pipelines/formula_recognition/pipeline.py +137 -93
  253. paddlex/inference/pipelines/formula_recognition/result.py +20 -29
  254. paddlex/inference/pipelines/image_classification/__init__.py +1 -1
  255. paddlex/inference/pipelines/image_classification/pipeline.py +30 -11
  256. paddlex/inference/pipelines/image_multilabel_classification/__init__.py +1 -1
  257. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +31 -12
  258. paddlex/inference/pipelines/instance_segmentation/__init__.py +1 -1
  259. paddlex/inference/pipelines/instance_segmentation/pipeline.py +30 -9
  260. paddlex/inference/pipelines/keypoint_detection/__init__.py +1 -1
  261. paddlex/inference/pipelines/keypoint_detection/pipeline.py +30 -9
  262. paddlex/inference/pipelines/layout_parsing/__init__.py +1 -1
  263. paddlex/inference/pipelines/layout_parsing/pipeline.py +54 -56
  264. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +904 -261
  265. paddlex/inference/pipelines/layout_parsing/result.py +9 -21
  266. paddlex/inference/pipelines/layout_parsing/result_v2.py +525 -250
  267. paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
  268. paddlex/inference/pipelines/layout_parsing/utils.py +570 -2004
  269. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
  270. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
  271. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
  272. paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
  273. paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/pipeline.py +17 -10
  274. paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +1 -1
  275. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +17 -6
  276. paddlex/inference/pipelines/object_detection/__init__.py +1 -1
  277. paddlex/inference/pipelines/object_detection/pipeline.py +29 -9
  278. paddlex/inference/pipelines/ocr/__init__.py +1 -1
  279. paddlex/inference/pipelines/ocr/pipeline.py +151 -77
  280. paddlex/inference/pipelines/ocr/result.py +31 -24
  281. paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +1 -1
  282. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +17 -6
  283. paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +1 -1
  284. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +17 -6
  285. paddlex/inference/pipelines/pp_chatocr/__init__.py +1 -1
  286. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +14 -5
  287. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +22 -14
  288. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +34 -16
  289. paddlex/inference/pipelines/pp_shitu_v2/__init__.py +1 -1
  290. paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +12 -8
  291. paddlex/inference/pipelines/pp_shitu_v2/result.py +4 -4
  292. paddlex/inference/pipelines/rotated_object_detection/__init__.py +1 -1
  293. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +30 -9
  294. paddlex/inference/pipelines/seal_recognition/__init__.py +1 -1
  295. paddlex/inference/pipelines/seal_recognition/pipeline.py +127 -63
  296. paddlex/inference/pipelines/seal_recognition/result.py +4 -2
  297. paddlex/inference/pipelines/semantic_segmentation/__init__.py +1 -1
  298. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +30 -9
  299. paddlex/inference/pipelines/small_object_detection/__init__.py +1 -1
  300. paddlex/inference/pipelines/small_object_detection/pipeline.py +30 -9
  301. paddlex/inference/pipelines/table_recognition/__init__.py +1 -1
  302. paddlex/inference/pipelines/table_recognition/pipeline.py +61 -37
  303. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +668 -65
  304. paddlex/inference/pipelines/table_recognition/result.py +12 -10
  305. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +12 -8
  306. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +55 -37
  307. paddlex/inference/pipelines/table_recognition/utils.py +1 -1
  308. paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +1 -1
  309. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +16 -6
  310. paddlex/inference/pipelines/ts_classification/__init__.py +1 -1
  311. paddlex/inference/pipelines/ts_classification/pipeline.py +16 -6
  312. paddlex/inference/pipelines/ts_forecasting/__init__.py +1 -1
  313. paddlex/inference/pipelines/ts_forecasting/pipeline.py +16 -6
  314. paddlex/inference/pipelines/video_classification/__init__.py +1 -1
  315. paddlex/inference/pipelines/video_classification/pipeline.py +17 -6
  316. paddlex/inference/pipelines/video_detection/__init__.py +1 -1
  317. paddlex/inference/pipelines/video_detection/pipeline.py +20 -7
  318. paddlex/inference/serving/__init__.py +5 -1
  319. paddlex/inference/serving/basic_serving/__init__.py +1 -1
  320. paddlex/inference/serving/basic_serving/_app.py +31 -19
  321. paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +7 -4
  322. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +1 -1
  323. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +12 -4
  324. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +1 -1
  325. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +7 -2
  326. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +10 -7
  327. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +10 -7
  328. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_understanding.py +153 -0
  329. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +16 -13
  330. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +10 -7
  331. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +10 -7
  332. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +10 -7
  333. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +10 -7
  334. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +13 -7
  335. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +10 -8
  336. paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +10 -7
  337. paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +10 -7
  338. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +10 -7
  339. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +10 -7
  340. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +10 -7
  341. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +13 -7
  342. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +10 -7
  343. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -12
  344. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +17 -14
  345. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +16 -13
  346. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +16 -9
  347. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +10 -7
  348. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +10 -7
  349. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +10 -7
  350. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +10 -7
  351. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +11 -12
  352. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +14 -12
  353. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +10 -7
  354. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +10 -7
  355. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +10 -7
  356. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +10 -7
  357. paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +10 -7
  358. paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +10 -7
  359. paddlex/inference/serving/basic_serving/_server.py +9 -4
  360. paddlex/inference/serving/infra/__init__.py +1 -1
  361. paddlex/inference/serving/infra/config.py +1 -1
  362. paddlex/inference/serving/infra/models.py +13 -6
  363. paddlex/inference/serving/infra/storage.py +9 -4
  364. paddlex/inference/serving/infra/utils.py +54 -28
  365. paddlex/inference/serving/schemas/__init__.py +1 -1
  366. paddlex/inference/serving/schemas/anomaly_detection.py +1 -1
  367. paddlex/inference/serving/schemas/doc_preprocessor.py +1 -1
  368. paddlex/inference/serving/schemas/doc_understanding.py +78 -0
  369. paddlex/inference/serving/schemas/face_recognition.py +1 -1
  370. paddlex/inference/serving/schemas/formula_recognition.py +2 -2
  371. paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -1
  372. paddlex/inference/serving/schemas/image_classification.py +1 -1
  373. paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -1
  374. paddlex/inference/serving/schemas/instance_segmentation.py +1 -1
  375. paddlex/inference/serving/schemas/layout_parsing.py +2 -3
  376. paddlex/inference/serving/schemas/m_3d_bev_detection.py +1 -1
  377. paddlex/inference/serving/schemas/multilingual_speech_recognition.py +1 -1
  378. paddlex/inference/serving/schemas/object_detection.py +1 -1
  379. paddlex/inference/serving/schemas/ocr.py +1 -1
  380. paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -1
  381. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -1
  382. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -1
  383. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +2 -3
  384. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +3 -3
  385. paddlex/inference/serving/schemas/pp_shituv2.py +1 -1
  386. paddlex/inference/serving/schemas/pp_structurev3.py +11 -7
  387. paddlex/inference/serving/schemas/rotated_object_detection.py +1 -1
  388. paddlex/inference/serving/schemas/seal_recognition.py +2 -2
  389. paddlex/inference/serving/schemas/semantic_segmentation.py +1 -1
  390. paddlex/inference/serving/schemas/shared/__init__.py +1 -1
  391. paddlex/inference/serving/schemas/shared/classification.py +1 -1
  392. paddlex/inference/serving/schemas/shared/image_segmentation.py +1 -1
  393. paddlex/inference/serving/schemas/shared/object_detection.py +1 -1
  394. paddlex/inference/serving/schemas/shared/ocr.py +1 -1
  395. paddlex/inference/serving/schemas/small_object_detection.py +1 -1
  396. paddlex/inference/serving/schemas/table_recognition.py +3 -7
  397. paddlex/inference/serving/schemas/table_recognition_v2.py +6 -7
  398. paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -1
  399. paddlex/inference/serving/schemas/ts_classification.py +1 -1
  400. paddlex/inference/serving/schemas/ts_forecast.py +1 -1
  401. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -1
  402. paddlex/inference/serving/schemas/video_classification.py +1 -1
  403. paddlex/inference/serving/schemas/video_detection.py +1 -1
  404. paddlex/inference/utils/__init__.py +1 -1
  405. paddlex/inference/utils/benchmark.py +332 -179
  406. paddlex/inference/utils/color_map.py +1 -1
  407. paddlex/inference/utils/get_pipeline_path.py +1 -1
  408. paddlex/inference/utils/hpi.py +258 -0
  409. paddlex/inference/utils/hpi_model_info_collection.json +2331 -0
  410. paddlex/inference/utils/io/__init__.py +11 -11
  411. paddlex/inference/utils/io/readers.py +31 -27
  412. paddlex/inference/utils/io/style.py +21 -14
  413. paddlex/inference/utils/io/tablepyxl.py +13 -5
  414. paddlex/inference/utils/io/writers.py +9 -10
  415. paddlex/inference/utils/mkldnn_blocklist.py +25 -0
  416. paddlex/inference/utils/model_paths.py +48 -0
  417. paddlex/inference/utils/{new_ir_blacklist.py → new_ir_blocklist.py} +1 -2
  418. paddlex/inference/utils/official_models.py +278 -262
  419. paddlex/inference/utils/pp_option.py +184 -92
  420. paddlex/inference/utils/trt_blocklist.py +43 -0
  421. paddlex/inference/utils/trt_config.py +420 -0
  422. paddlex/model.py +30 -12
  423. paddlex/modules/__init__.py +57 -80
  424. paddlex/modules/anomaly_detection/__init__.py +2 -2
  425. paddlex/modules/anomaly_detection/dataset_checker/__init__.py +2 -3
  426. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
  427. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +6 -3
  428. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/check_dataset.py +8 -4
  429. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +7 -4
  430. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/split_dataset.py +2 -2
  431. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  432. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/visualizer.py +7 -2
  433. paddlex/modules/anomaly_detection/evaluator.py +3 -3
  434. paddlex/modules/anomaly_detection/exportor.py +1 -1
  435. paddlex/modules/anomaly_detection/model_list.py +1 -1
  436. paddlex/modules/anomaly_detection/trainer.py +3 -4
  437. paddlex/modules/base/__init__.py +5 -5
  438. paddlex/modules/base/build_model.py +1 -2
  439. paddlex/modules/base/dataset_checker/__init__.py +2 -2
  440. paddlex/modules/base/dataset_checker/dataset_checker.py +4 -4
  441. paddlex/modules/base/dataset_checker/utils.py +1 -3
  442. paddlex/modules/base/evaluator.py +13 -13
  443. paddlex/modules/base/exportor.py +12 -13
  444. paddlex/modules/base/trainer.py +21 -11
  445. paddlex/modules/base/utils/__init__.py +13 -0
  446. paddlex/modules/base/utils/cinn_setting.py +89 -0
  447. paddlex/modules/base/utils/coco_eval.py +94 -0
  448. paddlex/modules/base/utils/topk_eval.py +118 -0
  449. paddlex/modules/doc_vlm/__init__.py +18 -0
  450. paddlex/modules/doc_vlm/dataset_checker.py +29 -0
  451. paddlex/modules/doc_vlm/evaluator.py +29 -0
  452. paddlex/modules/doc_vlm/exportor.py +29 -0
  453. paddlex/modules/doc_vlm/model_list.py +16 -0
  454. paddlex/modules/doc_vlm/trainer.py +41 -0
  455. paddlex/modules/face_recognition/__init__.py +2 -2
  456. paddlex/modules/face_recognition/dataset_checker/__init__.py +2 -2
  457. paddlex/modules/face_recognition/dataset_checker/dataset_src/__init__.py +1 -1
  458. paddlex/modules/face_recognition/dataset_checker/dataset_src/check_dataset.py +3 -5
  459. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
  460. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  461. paddlex/modules/face_recognition/evaluator.py +3 -3
  462. paddlex/modules/face_recognition/exportor.py +1 -1
  463. paddlex/modules/face_recognition/model_list.py +1 -1
  464. paddlex/modules/face_recognition/trainer.py +1 -1
  465. paddlex/modules/formula_recognition/__init__.py +2 -2
  466. paddlex/modules/formula_recognition/dataset_checker/__init__.py +3 -3
  467. paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  468. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
  469. paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +2 -6
  470. paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
  471. paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
  472. paddlex/modules/formula_recognition/evaluator.py +6 -3
  473. paddlex/modules/formula_recognition/exportor.py +1 -1
  474. paddlex/modules/formula_recognition/model_list.py +4 -1
  475. paddlex/modules/formula_recognition/trainer.py +5 -3
  476. paddlex/modules/general_recognition/__init__.py +2 -2
  477. paddlex/modules/general_recognition/dataset_checker/__init__.py +2 -2
  478. paddlex/modules/general_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  479. paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +7 -9
  480. paddlex/modules/general_recognition/dataset_checker/dataset_src/check_dataset.py +4 -5
  481. paddlex/modules/general_recognition/dataset_checker/dataset_src/convert_dataset.py +6 -5
  482. paddlex/modules/general_recognition/dataset_checker/dataset_src/split_dataset.py +1 -1
  483. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
  484. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  485. paddlex/modules/general_recognition/evaluator.py +2 -2
  486. paddlex/modules/general_recognition/exportor.py +1 -1
  487. paddlex/modules/general_recognition/model_list.py +1 -1
  488. paddlex/modules/general_recognition/trainer.py +1 -1
  489. paddlex/modules/image_classification/__init__.py +2 -2
  490. paddlex/modules/image_classification/dataset_checker/__init__.py +2 -2
  491. paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py +2 -2
  492. paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  493. paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
  494. paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py +4 -4
  495. paddlex/modules/image_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  496. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
  497. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  498. paddlex/modules/image_classification/evaluator.py +3 -3
  499. paddlex/modules/image_classification/exportor.py +1 -1
  500. paddlex/modules/image_classification/model_list.py +2 -1
  501. paddlex/modules/image_classification/trainer.py +3 -3
  502. paddlex/modules/image_unwarping/__init__.py +1 -1
  503. paddlex/modules/image_unwarping/model_list.py +1 -1
  504. paddlex/modules/instance_segmentation/__init__.py +2 -2
  505. paddlex/modules/instance_segmentation/dataset_checker/__init__.py +2 -3
  506. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
  507. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +9 -5
  508. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/check_dataset.py +8 -5
  509. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/convert_dataset.py +8 -8
  510. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/split_dataset.py +7 -4
  511. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
  512. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +10 -8
  513. paddlex/modules/instance_segmentation/evaluator.py +2 -2
  514. paddlex/modules/instance_segmentation/exportor.py +1 -1
  515. paddlex/modules/instance_segmentation/model_list.py +1 -1
  516. paddlex/modules/instance_segmentation/trainer.py +1 -1
  517. paddlex/modules/keypoint_detection/__init__.py +2 -2
  518. paddlex/modules/keypoint_detection/dataset_checker/__init__.py +2 -2
  519. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +1 -1
  520. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
  521. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  522. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +8 -3
  523. paddlex/modules/keypoint_detection/evaluator.py +2 -2
  524. paddlex/modules/keypoint_detection/exportor.py +1 -1
  525. paddlex/modules/keypoint_detection/model_list.py +1 -1
  526. paddlex/modules/keypoint_detection/trainer.py +2 -2
  527. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/__init__.py +2 -2
  528. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/__init__.py +3 -3
  529. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/__init__.py +2 -2
  530. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/analyse_dataset.py +8 -8
  531. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/check_dataset.py +1 -2
  532. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/evaluator.py +3 -3
  533. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/exportor.py +1 -1
  534. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/model_list.py +1 -1
  535. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/trainer.py +5 -7
  536. paddlex/modules/multilabel_classification/__init__.py +2 -2
  537. paddlex/modules/multilabel_classification/dataset_checker/__init__.py +2 -2
  538. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/__init__.py +2 -2
  539. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  540. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
  541. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/convert_dataset.py +10 -7
  542. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  543. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
  544. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +1 -5
  545. paddlex/modules/multilabel_classification/evaluator.py +3 -3
  546. paddlex/modules/multilabel_classification/exportor.py +1 -1
  547. paddlex/modules/multilabel_classification/model_list.py +1 -1
  548. paddlex/modules/multilabel_classification/trainer.py +3 -3
  549. paddlex/modules/multilingual_speech_recognition/__init__.py +2 -2
  550. paddlex/modules/multilingual_speech_recognition/dataset_checker.py +3 -3
  551. paddlex/modules/multilingual_speech_recognition/evaluator.py +3 -3
  552. paddlex/modules/multilingual_speech_recognition/exportor.py +3 -3
  553. paddlex/modules/multilingual_speech_recognition/model_list.py +1 -1
  554. paddlex/modules/multilingual_speech_recognition/trainer.py +7 -5
  555. paddlex/modules/object_detection/__init__.py +2 -2
  556. paddlex/modules/object_detection/dataset_checker/__init__.py +2 -11
  557. paddlex/modules/object_detection/dataset_checker/dataset_src/__init__.py +2 -2
  558. paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +10 -8
  559. paddlex/modules/object_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
  560. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +17 -12
  561. paddlex/modules/object_detection/dataset_checker/dataset_src/split_dataset.py +8 -4
  562. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  563. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +9 -8
  564. paddlex/modules/object_detection/evaluator.py +11 -6
  565. paddlex/modules/object_detection/exportor.py +1 -1
  566. paddlex/modules/object_detection/model_list.py +3 -1
  567. paddlex/modules/object_detection/trainer.py +4 -5
  568. paddlex/modules/open_vocabulary_detection/__init__.py +2 -2
  569. paddlex/modules/open_vocabulary_detection/dataset_checker.py +3 -3
  570. paddlex/modules/open_vocabulary_detection/evaluator.py +3 -3
  571. paddlex/modules/open_vocabulary_detection/exportor.py +3 -3
  572. paddlex/modules/open_vocabulary_detection/model_list.py +2 -4
  573. paddlex/modules/open_vocabulary_detection/trainer.py +7 -5
  574. paddlex/modules/open_vocabulary_segmentation/__init__.py +2 -2
  575. paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +3 -3
  576. paddlex/modules/open_vocabulary_segmentation/evaluator.py +3 -3
  577. paddlex/modules/open_vocabulary_segmentation/exportor.py +3 -3
  578. paddlex/modules/open_vocabulary_segmentation/model_list.py +1 -1
  579. paddlex/modules/open_vocabulary_segmentation/trainer.py +7 -5
  580. paddlex/modules/semantic_segmentation/__init__.py +2 -2
  581. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +2 -3
  582. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
  583. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/analyse_dataset.py +6 -3
  584. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/check_dataset.py +2 -2
  585. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/convert_dataset.py +7 -4
  586. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/split_dataset.py +2 -2
  587. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
  588. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/visualizer.py +6 -2
  589. paddlex/modules/semantic_segmentation/evaluator.py +3 -3
  590. paddlex/modules/semantic_segmentation/exportor.py +1 -1
  591. paddlex/modules/semantic_segmentation/model_list.py +1 -1
  592. paddlex/modules/semantic_segmentation/trainer.py +3 -4
  593. paddlex/modules/table_recognition/__init__.py +2 -2
  594. paddlex/modules/table_recognition/dataset_checker/__init__.py +5 -5
  595. paddlex/modules/table_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  596. paddlex/modules/table_recognition/dataset_checker/dataset_src/analyse_dataset.py +3 -2
  597. paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +8 -7
  598. paddlex/modules/table_recognition/dataset_checker/dataset_src/split_dataset.py +2 -1
  599. paddlex/modules/table_recognition/evaluator.py +3 -3
  600. paddlex/modules/table_recognition/exportor.py +1 -1
  601. paddlex/modules/table_recognition/model_list.py +1 -1
  602. paddlex/modules/table_recognition/trainer.py +2 -5
  603. paddlex/modules/text_detection/__init__.py +2 -2
  604. paddlex/modules/text_detection/dataset_checker/__init__.py +4 -6
  605. paddlex/modules/text_detection/dataset_checker/dataset_src/__init__.py +2 -2
  606. paddlex/modules/text_detection/dataset_checker/dataset_src/analyse_dataset.py +12 -9
  607. paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +3 -3
  608. paddlex/modules/text_detection/dataset_checker/dataset_src/split_dataset.py +3 -3
  609. paddlex/modules/text_detection/evaluator.py +3 -3
  610. paddlex/modules/text_detection/exportor.py +1 -1
  611. paddlex/modules/text_detection/model_list.py +3 -1
  612. paddlex/modules/text_detection/trainer.py +2 -5
  613. paddlex/modules/text_recognition/__init__.py +2 -2
  614. paddlex/modules/text_recognition/dataset_checker/__init__.py +4 -5
  615. paddlex/modules/text_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  616. paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
  617. paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +2 -5
  618. paddlex/modules/text_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
  619. paddlex/modules/text_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
  620. paddlex/modules/text_recognition/evaluator.py +3 -3
  621. paddlex/modules/text_recognition/exportor.py +1 -1
  622. paddlex/modules/text_recognition/model_list.py +3 -1
  623. paddlex/modules/text_recognition/trainer.py +2 -3
  624. paddlex/modules/ts_anomaly_detection/__init__.py +2 -2
  625. paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +4 -5
  626. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
  627. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +1 -9
  628. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/check_dataset.py +2 -2
  629. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -6
  630. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/split_dataset.py +4 -4
  631. paddlex/modules/ts_anomaly_detection/evaluator.py +3 -3
  632. paddlex/modules/ts_anomaly_detection/exportor.py +2 -3
  633. paddlex/modules/ts_anomaly_detection/model_list.py +1 -1
  634. paddlex/modules/ts_anomaly_detection/trainer.py +8 -8
  635. paddlex/modules/ts_classification/__init__.py +2 -2
  636. paddlex/modules/ts_classification/dataset_checker/__init__.py +4 -5
  637. paddlex/modules/ts_classification/dataset_checker/dataset_src/__init__.py +2 -2
  638. paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -5
  639. paddlex/modules/ts_classification/dataset_checker/dataset_src/check_dataset.py +2 -2
  640. paddlex/modules/ts_classification/dataset_checker/dataset_src/convert_dataset.py +2 -6
  641. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +5 -5
  642. paddlex/modules/ts_classification/evaluator.py +3 -3
  643. paddlex/modules/ts_classification/exportor.py +2 -3
  644. paddlex/modules/ts_classification/model_list.py +1 -1
  645. paddlex/modules/ts_classification/trainer.py +7 -7
  646. paddlex/modules/ts_forecast/__init__.py +2 -2
  647. paddlex/modules/ts_forecast/dataset_checker/__init__.py +4 -5
  648. paddlex/modules/ts_forecast/dataset_checker/dataset_src/__init__.py +2 -2
  649. paddlex/modules/ts_forecast/dataset_checker/dataset_src/analyse_dataset.py +1 -9
  650. paddlex/modules/ts_forecast/dataset_checker/dataset_src/check_dataset.py +2 -2
  651. paddlex/modules/ts_forecast/dataset_checker/dataset_src/convert_dataset.py +2 -6
  652. paddlex/modules/ts_forecast/dataset_checker/dataset_src/split_dataset.py +4 -4
  653. paddlex/modules/ts_forecast/evaluator.py +3 -3
  654. paddlex/modules/ts_forecast/exportor.py +2 -3
  655. paddlex/modules/ts_forecast/model_list.py +1 -1
  656. paddlex/modules/ts_forecast/trainer.py +7 -7
  657. paddlex/modules/video_classification/__init__.py +2 -2
  658. paddlex/modules/video_classification/dataset_checker/__init__.py +2 -2
  659. paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +2 -2
  660. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +9 -9
  661. paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +2 -3
  662. paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  663. paddlex/modules/video_classification/evaluator.py +3 -3
  664. paddlex/modules/video_classification/exportor.py +1 -1
  665. paddlex/modules/video_classification/model_list.py +1 -1
  666. paddlex/modules/video_classification/trainer.py +3 -3
  667. paddlex/modules/video_detection/__init__.py +2 -2
  668. paddlex/modules/video_detection/dataset_checker/__init__.py +2 -2
  669. paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +2 -2
  670. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  671. paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +3 -5
  672. paddlex/modules/video_detection/evaluator.py +3 -3
  673. paddlex/modules/video_detection/exportor.py +1 -1
  674. paddlex/modules/video_detection/model_list.py +1 -1
  675. paddlex/modules/video_detection/trainer.py +3 -3
  676. paddlex/ops/__init__.py +7 -4
  677. paddlex/ops/iou3d_nms/iou3d_cpu.cpp +8 -6
  678. paddlex/ops/iou3d_nms/iou3d_cpu.h +3 -2
  679. paddlex/ops/iou3d_nms/iou3d_nms.cpp +8 -6
  680. paddlex/ops/iou3d_nms/iou3d_nms.h +6 -4
  681. paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +24 -18
  682. paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +9 -7
  683. paddlex/ops/setup.py +3 -3
  684. paddlex/ops/voxel/voxelize_op.cc +22 -19
  685. paddlex/ops/voxel/voxelize_op.cu +25 -25
  686. paddlex/paddlex_cli.py +104 -87
  687. paddlex/repo_apis/Paddle3D_api/__init__.py +1 -1
  688. paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +1 -1
  689. paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +1 -1
  690. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +6 -6
  691. paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +2 -2
  692. paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +1 -1
  693. paddlex/repo_apis/Paddle3D_api/pp3d_config.py +3 -2
  694. paddlex/repo_apis/PaddleClas_api/__init__.py +1 -1
  695. paddlex/repo_apis/PaddleClas_api/cls/__init__.py +3 -3
  696. paddlex/repo_apis/PaddleClas_api/cls/config.py +5 -4
  697. paddlex/repo_apis/PaddleClas_api/cls/model.py +4 -4
  698. paddlex/repo_apis/PaddleClas_api/cls/register.py +12 -3
  699. paddlex/repo_apis/PaddleClas_api/cls/runner.py +2 -3
  700. paddlex/repo_apis/PaddleClas_api/shitu_rec/__init__.py +2 -2
  701. paddlex/repo_apis/PaddleClas_api/shitu_rec/config.py +2 -2
  702. paddlex/repo_apis/PaddleClas_api/shitu_rec/model.py +1 -4
  703. paddlex/repo_apis/PaddleClas_api/shitu_rec/register.py +2 -2
  704. paddlex/repo_apis/PaddleClas_api/shitu_rec/runner.py +1 -6
  705. paddlex/repo_apis/PaddleDetection_api/__init__.py +2 -2
  706. paddlex/repo_apis/PaddleDetection_api/config_helper.py +3 -3
  707. paddlex/repo_apis/PaddleDetection_api/instance_seg/__init__.py +2 -2
  708. paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +2 -3
  709. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +4 -4
  710. paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py +2 -3
  711. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +2 -3
  712. paddlex/repo_apis/PaddleDetection_api/object_det/__init__.py +3 -3
  713. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +5 -4
  714. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +6 -7
  715. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +26 -1
  716. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +32 -3
  717. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +2 -3
  718. paddlex/repo_apis/PaddleNLP_api/__init__.py +1 -1
  719. paddlex/repo_apis/PaddleOCR_api/__init__.py +4 -3
  720. paddlex/repo_apis/PaddleOCR_api/config_utils.py +1 -1
  721. paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +1 -1
  722. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +7 -6
  723. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +9 -13
  724. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +29 -3
  725. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +2 -3
  726. paddlex/repo_apis/PaddleOCR_api/table_rec/__init__.py +1 -1
  727. paddlex/repo_apis/PaddleOCR_api/table_rec/config.py +1 -1
  728. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +4 -4
  729. paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +2 -3
  730. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +3 -3
  731. paddlex/repo_apis/PaddleOCR_api/text_det/__init__.py +1 -1
  732. paddlex/repo_apis/PaddleOCR_api/text_det/config.py +1 -1
  733. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +4 -4
  734. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +20 -3
  735. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +3 -3
  736. paddlex/repo_apis/PaddleOCR_api/text_rec/__init__.py +1 -1
  737. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +7 -6
  738. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +9 -13
  739. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +20 -3
  740. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +2 -3
  741. paddlex/repo_apis/PaddleSeg_api/__init__.py +1 -1
  742. paddlex/repo_apis/PaddleSeg_api/base_seg_config.py +2 -2
  743. paddlex/repo_apis/PaddleSeg_api/seg/__init__.py +1 -1
  744. paddlex/repo_apis/PaddleSeg_api/seg/config.py +3 -6
  745. paddlex/repo_apis/PaddleSeg_api/seg/model.py +6 -6
  746. paddlex/repo_apis/PaddleSeg_api/seg/register.py +2 -3
  747. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +2 -3
  748. paddlex/repo_apis/PaddleTS_api/__init__.py +4 -3
  749. paddlex/repo_apis/PaddleTS_api/ts_ad/__init__.py +1 -1
  750. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +5 -6
  751. paddlex/repo_apis/PaddleTS_api/ts_ad/register.py +2 -2
  752. paddlex/repo_apis/PaddleTS_api/ts_ad/runner.py +2 -2
  753. paddlex/repo_apis/PaddleTS_api/ts_base/__init__.py +1 -1
  754. paddlex/repo_apis/PaddleTS_api/ts_base/config.py +2 -4
  755. paddlex/repo_apis/PaddleTS_api/ts_base/model.py +4 -4
  756. paddlex/repo_apis/PaddleTS_api/ts_base/runner.py +2 -2
  757. paddlex/repo_apis/PaddleTS_api/ts_cls/__init__.py +1 -1
  758. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +4 -5
  759. paddlex/repo_apis/PaddleTS_api/ts_cls/register.py +2 -2
  760. paddlex/repo_apis/PaddleTS_api/ts_cls/runner.py +2 -2
  761. paddlex/repo_apis/PaddleTS_api/ts_fc/__init__.py +1 -1
  762. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +6 -7
  763. paddlex/repo_apis/PaddleTS_api/ts_fc/register.py +1 -1
  764. paddlex/repo_apis/PaddleVideo_api/__init__.py +1 -1
  765. paddlex/repo_apis/PaddleVideo_api/config_utils.py +1 -1
  766. paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +3 -3
  767. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +5 -4
  768. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +4 -4
  769. paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +2 -3
  770. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +2 -3
  771. paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +3 -3
  772. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +5 -4
  773. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +5 -5
  774. paddlex/repo_apis/PaddleVideo_api/video_det/register.py +2 -3
  775. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +2 -3
  776. paddlex/repo_apis/__init__.py +1 -1
  777. paddlex/repo_apis/base/__init__.py +4 -5
  778. paddlex/repo_apis/base/config.py +3 -4
  779. paddlex/repo_apis/base/model.py +11 -19
  780. paddlex/repo_apis/base/register.py +1 -1
  781. paddlex/repo_apis/base/runner.py +11 -12
  782. paddlex/repo_apis/base/utils/__init__.py +1 -1
  783. paddlex/repo_apis/base/utils/arg.py +1 -1
  784. paddlex/repo_apis/base/utils/subprocess.py +1 -1
  785. paddlex/repo_manager/__init__.py +2 -9
  786. paddlex/repo_manager/core.py +12 -30
  787. paddlex/repo_manager/meta.py +41 -31
  788. paddlex/repo_manager/repo.py +171 -161
  789. paddlex/repo_manager/utils.py +13 -224
  790. paddlex/utils/__init__.py +1 -1
  791. paddlex/utils/cache.py +8 -10
  792. paddlex/utils/config.py +6 -5
  793. paddlex/utils/{custom_device_whitelist.py → custom_device_list.py} +53 -199
  794. paddlex/utils/deps.py +249 -0
  795. paddlex/utils/device.py +87 -36
  796. paddlex/utils/download.py +4 -4
  797. paddlex/utils/env.py +37 -7
  798. paddlex/utils/errors/__init__.py +1 -1
  799. paddlex/utils/errors/dataset_checker.py +1 -1
  800. paddlex/utils/errors/others.py +2 -16
  801. paddlex/utils/file_interface.py +4 -5
  802. paddlex/utils/flags.py +17 -12
  803. paddlex/utils/fonts/__init__.py +36 -5
  804. paddlex/utils/func_register.py +1 -1
  805. paddlex/utils/install.py +87 -0
  806. paddlex/utils/interactive_get_pipeline.py +3 -3
  807. paddlex/utils/lazy_loader.py +3 -3
  808. paddlex/utils/logging.py +10 -1
  809. paddlex/utils/misc.py +6 -6
  810. paddlex/utils/pipeline_arguments.py +15 -7
  811. paddlex/utils/result_saver.py +4 -5
  812. paddlex/utils/subclass_register.py +2 -4
  813. paddlex/version.py +2 -1
  814. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/METADATA +237 -102
  815. paddlex-3.0.1.dist-info/RECORD +1095 -0
  816. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
  817. paddlex/inference/models/base/predictor/basic_predictor.py +0 -139
  818. paddlex/paddle2onnx_requirements.txt +0 -1
  819. paddlex/repo_manager/requirements.txt +0 -21
  820. paddlex/serving_requirements.txt +0 -9
  821. paddlex-3.0.0rc0.dist-info/RECORD +0 -1015
  822. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
  823. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info/licenses}/LICENSE +0 -0
  824. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -13,33 +13,52 @@
13
13
  # limitations under the License.
14
14
  from __future__ import annotations
15
15
 
16
- from typing import Optional, Union, Tuple, Iterator
17
- import numpy as np
18
- import re
19
16
  import copy
17
+ import re
18
+ from typing import Any, Dict, List, Optional, Tuple, Union
19
+
20
+ import numpy as np
21
+ from PIL import Image
20
22
 
21
23
  from ....utils import logging
24
+ from ....utils.deps import pipeline_requires_extra
22
25
  from ...common.batch_sampler import ImageBatchSampler
23
26
  from ...common.reader import ReadImage
24
27
  from ...models.object_detection.result import DetResult
28
+ from ...utils.hpi import HPIConfig
25
29
  from ...utils.pp_option import PaddlePredictorOption
30
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
26
31
  from ..base import BasePipeline
27
32
  from ..ocr.result import OCRResult
28
- from .result_v2 import LayoutParsingResultV2
29
- from .utils import get_single_block_parsing_res, get_sub_regions_ocr_res, gather_imgs
30
-
31
-
32
- class LayoutParsingPipelineV2(BasePipeline):
33
+ from .result_v2 import LayoutParsingBlock, LayoutParsingRegion, LayoutParsingResultV2
34
+ from .setting import BLOCK_LABEL_MAP, BLOCK_SETTINGS, LINE_SETTINGS, REGION_SETTINGS
35
+ from .utils import (
36
+ caculate_bbox_area,
37
+ calculate_minimum_enclosing_bbox,
38
+ calculate_overlap_ratio,
39
+ convert_formula_res_to_ocr_format,
40
+ format_line,
41
+ gather_imgs,
42
+ get_bbox_intersection,
43
+ get_sub_regions_ocr_res,
44
+ group_boxes_into_lines,
45
+ remove_overlap_blocks,
46
+ shrink_supplement_region_bbox,
47
+ split_boxes_by_projection,
48
+ update_region_box,
49
+ )
50
+
51
+
52
+ class _LayoutParsingPipelineV2(BasePipeline):
33
53
  """Layout Parsing Pipeline V2"""
34
54
 
35
- entities = ["PP-StructureV3"]
36
-
37
55
  def __init__(
38
56
  self,
39
57
  config: dict,
40
58
  device: str = None,
41
59
  pp_option: PaddlePredictorOption = None,
42
60
  use_hpip: bool = False,
61
+ hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
43
62
  ) -> None:
44
63
  """Initializes the layout parsing pipeline.
45
64
 
@@ -47,19 +66,23 @@ class LayoutParsingPipelineV2(BasePipeline):
47
66
  config (Dict): Configuration dictionary containing various settings.
48
67
  device (str, optional): Device to run the predictions on. Defaults to None.
49
68
  pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
50
- use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
69
+ use_hpip (bool, optional): Whether to use the high-performance
70
+ inference plugin (HPIP) by default. Defaults to False.
71
+ hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
72
+ The default high-performance inference configuration dictionary.
73
+ Defaults to None.
51
74
  """
52
75
 
53
76
  super().__init__(
54
77
  device=device,
55
78
  pp_option=pp_option,
56
79
  use_hpip=use_hpip,
80
+ hpi_config=hpi_config,
57
81
  )
58
82
 
59
83
  self.inintial_predictor(config)
60
84
 
61
- self.batch_sampler = ImageBatchSampler(batch_size=1)
62
-
85
+ self.batch_sampler = ImageBatchSampler(batch_size=config.get("batch_size", 1))
63
86
  self.img_reader = ReadImage(format="BGR")
64
87
 
65
88
  def inintial_predictor(self, config: dict) -> None:
@@ -73,13 +96,20 @@ class LayoutParsingPipelineV2(BasePipeline):
73
96
  """
74
97
 
75
98
  self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
76
- self.use_general_ocr = config.get("use_general_ocr", True)
77
99
  self.use_table_recognition = config.get("use_table_recognition", True)
78
100
  self.use_seal_recognition = config.get("use_seal_recognition", True)
101
+ self.use_region_detection = config.get(
102
+ "use_region_detection",
103
+ True,
104
+ )
79
105
  self.use_formula_recognition = config.get(
80
106
  "use_formula_recognition",
81
107
  True,
82
108
  )
109
+ self.use_chart_recognition = config.get(
110
+ "use_chart_recognition",
111
+ False,
112
+ )
83
113
 
84
114
  if self.use_doc_preprocessor:
85
115
  doc_preprocessor_config = config.get("SubPipelines", {}).get(
@@ -91,6 +121,16 @@ class LayoutParsingPipelineV2(BasePipeline):
91
121
  self.doc_preprocessor_pipeline = self.create_pipeline(
92
122
  doc_preprocessor_config,
93
123
  )
124
+ if self.use_region_detection:
125
+ region_detection_config = config.get("SubModules", {}).get(
126
+ "RegionDetection",
127
+ {
128
+ "model_config_error": "config error for block_region_detection_model!"
129
+ },
130
+ )
131
+ self.region_detection_model = self.create_model(
132
+ region_detection_config,
133
+ )
94
134
 
95
135
  layout_det_config = config.get("SubModules", {}).get(
96
136
  "LayoutDetection",
@@ -113,14 +153,13 @@ class LayoutParsingPipelineV2(BasePipeline):
113
153
  layout_kwargs["layout_merge_bboxes_mode"] = layout_merge_bboxes_mode
114
154
  self.layout_det_model = self.create_model(layout_det_config, **layout_kwargs)
115
155
 
116
- if self.use_general_ocr or self.use_table_recognition:
117
- general_ocr_config = config.get("SubPipelines", {}).get(
118
- "GeneralOCR",
119
- {"pipeline_config_error": "config error for general_ocr_pipeline!"},
120
- )
121
- self.general_ocr_pipeline = self.create_pipeline(
122
- general_ocr_config,
123
- )
156
+ general_ocr_config = config.get("SubPipelines", {}).get(
157
+ "GeneralOCR",
158
+ {"pipeline_config_error": "config error for general_ocr_pipeline!"},
159
+ )
160
+ self.general_ocr_pipeline = self.create_pipeline(
161
+ general_ocr_config,
162
+ )
124
163
 
125
164
  if self.use_seal_recognition:
126
165
  seal_recognition_config = config.get("SubPipelines", {}).get(
@@ -155,6 +194,17 @@ class LayoutParsingPipelineV2(BasePipeline):
155
194
  formula_recognition_config,
156
195
  )
157
196
 
197
+ if self.use_chart_recognition:
198
+ chart_recognition_config = config.get("SubModules", {}).get(
199
+ "ChartRecognition",
200
+ {
201
+ "model_config_error": "config error for block_region_detection_model!"
202
+ },
203
+ )
204
+ self.chart_recognition_model = self.create_model(
205
+ chart_recognition_config,
206
+ )
207
+
158
208
  return
159
209
 
160
210
  def get_text_paragraphs_ocr_res(
@@ -199,12 +249,6 @@ class LayoutParsingPipelineV2(BasePipeline):
199
249
  )
200
250
  return False
201
251
 
202
- if input_params["use_general_ocr"] and not self.use_general_ocr:
203
- logging.error(
204
- "Set use_general_ocr, but the models for general OCR are not initialized.",
205
- )
206
- return False
207
-
208
252
  if input_params["use_seal_recognition"] and not self.use_seal_recognition:
209
253
  logging.error(
210
254
  "Set use_seal_recognition, but the models for seal recognition are not initialized.",
@@ -219,159 +263,643 @@ class LayoutParsingPipelineV2(BasePipeline):
219
263
 
220
264
  return True
221
265
 
222
- def get_layout_parsing_res(
266
+ def standardized_data(
223
267
  self,
224
268
  image: list,
269
+ region_det_res: DetResult,
225
270
  layout_det_res: DetResult,
226
271
  overall_ocr_res: OCRResult,
227
- table_res_list: list,
228
- seal_res_list: list,
229
272
  formula_res_list: list,
230
- imgs_in_doc: list,
231
- text_det_limit_side_len: Optional[int] = None,
232
- text_det_limit_type: Optional[str] = None,
233
- text_det_thresh: Optional[float] = None,
234
- text_det_box_thresh: Optional[float] = None,
235
- text_det_unclip_ratio: Optional[float] = None,
236
- text_rec_score_thresh: Optional[float] = None,
273
+ text_rec_model: Any,
274
+ text_rec_score_thresh: Union[float, None] = None,
237
275
  ) -> list:
238
276
  """
239
277
  Retrieves the layout parsing result based on the layout detection result, OCR result, and other recognition results.
240
278
  Args:
241
279
  image (list): The input image.
242
- layout_det_res (DetResult): The detection result containing the layout information of the document.
243
- overall_ocr_res (OCRResult): The overall OCR result containing text information.
244
- table_res_list (list): A list of table recognition results.
245
- seal_res_list (list): A list of seal recognition results.
280
+ overall_ocr_res (OCRResult): An object containing the overall OCR results, including detected text boxes and recognized text. The structure is expected to have:
281
+ - "input_img": The image on which OCR was performed.
282
+ - "dt_boxes": A list of detected text box coordinates.
283
+ - "rec_texts": A list of recognized text corresponding to the detected boxes.
284
+
285
+ layout_det_res (DetResult): An object containing the layout detection results, including detected layout boxes and their labels. The structure is expected to have:
286
+ - "boxes": A list of dictionaries with keys "coordinate" for box coordinates and "block_label" for the type of content.
287
+
288
+ table_res_list (list): A list of table detection results, where each item is a dictionary containing:
289
+ - "block_bbox": The bounding box of the table layout.
290
+ - "pred_html": The predicted HTML representation of the table.
291
+
246
292
  formula_res_list (list): A list of formula recognition results.
247
- text_det_limit_side_len (Optional[int], optional): The maximum side length of the text detection region. Defaults to None.
248
- text_det_limit_type (Optional[str], optional): The type of limit for the text detection region. Defaults to None.
249
- text_det_thresh (Optional[float], optional): The confidence threshold for text detection. Defaults to None.
250
- text_det_box_thresh (Optional[float], optional): The confidence threshold for text detection bounding boxes. Defaults to None
251
- text_det_unclip_ratio (Optional[float], optional): The unclip ratio for text detection. Defaults to None.
293
+ text_rec_model (Any): The text recognition model.
252
294
  text_rec_score_thresh (Optional[float], optional): The score threshold for text recognition. Defaults to None.
253
295
  Returns:
254
296
  list: A list of dictionaries representing the layout parsing result.
255
297
  """
298
+
256
299
  matched_ocr_dict = {}
257
- image = np.array(image)
300
+ region_to_block_map = {}
301
+ block_to_ocr_map = {}
258
302
  object_boxes = []
259
303
  footnote_list = []
260
- max_bottom_text_coordinate = 0
304
+ paragraph_title_list = []
305
+ bottom_text_y_max = 0
306
+ max_block_area = 0.0
307
+ doc_title_num = 0
308
+
309
+ base_region_bbox = [65535, 65535, 0, 0]
310
+ layout_det_res = remove_overlap_blocks(
311
+ layout_det_res,
312
+ threshold=0.5,
313
+ smaller=True,
314
+ )
315
+
316
+ # convert formula_res_list to OCRResult format
317
+ convert_formula_res_to_ocr_format(formula_res_list, overall_ocr_res)
261
318
 
262
- for object_box_idx, box_info in enumerate(layout_det_res["boxes"]):
319
+ # match layout boxes and ocr boxes and get some information for layout_order_config
320
+ for box_idx, box_info in enumerate(layout_det_res["boxes"]):
263
321
  box = box_info["coordinate"]
264
322
  label = box_info["label"].lower()
265
323
  object_boxes.append(box)
324
+ _, _, _, y2 = box
325
+
326
+ # update the region box and max_block_area according to the layout boxes
327
+ base_region_bbox = update_region_box(box, base_region_bbox)
328
+ max_block_area = max(max_block_area, caculate_bbox_area(box))
329
+
330
+ # update_layout_order_config_block_index(layout_order_config, label, box_idx)
266
331
 
267
332
  # set the label of footnote to text, when it is above the text boxes
268
333
  if label == "footnote":
269
- footnote_list.append(object_box_idx)
270
- if label == "text" and box[3] > max_bottom_text_coordinate:
271
- max_bottom_text_coordinate = box[3]
334
+ footnote_list.append(box_idx)
335
+ elif label == "paragraph_title":
336
+ paragraph_title_list.append(box_idx)
337
+ if label == "text":
338
+ bottom_text_y_max = max(y2, bottom_text_y_max)
339
+ if label == "doc_title":
340
+ doc_title_num += 1
272
341
 
273
342
  if label not in ["formula", "table", "seal"]:
274
- _, matched_idxs = get_sub_regions_ocr_res(
343
+ _, matched_idxes = get_sub_regions_ocr_res(
275
344
  overall_ocr_res, [box], return_match_idx=True
276
345
  )
277
- for matched_idx in matched_idxs:
346
+ block_to_ocr_map[box_idx] = matched_idxes
347
+ for matched_idx in matched_idxes:
278
348
  if matched_ocr_dict.get(matched_idx, None) is None:
279
- matched_ocr_dict[matched_idx] = [object_box_idx]
349
+ matched_ocr_dict[matched_idx] = [box_idx]
280
350
  else:
281
- matched_ocr_dict[matched_idx].append(object_box_idx)
351
+ matched_ocr_dict[matched_idx].append(box_idx)
282
352
 
353
+ # fix the footnote label
283
354
  for footnote_idx in footnote_list:
284
355
  if (
285
356
  layout_det_res["boxes"][footnote_idx]["coordinate"][3]
286
- < max_bottom_text_coordinate
357
+ < bottom_text_y_max
287
358
  ):
288
359
  layout_det_res["boxes"][footnote_idx]["label"] = "text"
289
360
 
290
- already_processed = set()
291
- for matched_idx, layout_box_ids in matched_ocr_dict.items():
292
- if len(layout_box_ids) <= 1:
293
- continue
294
-
295
- # one ocr is matched to multiple layout boxes, split the text into multiple lines
296
- for idx in layout_box_ids:
297
- if idx in already_processed:
298
- continue
299
-
300
- already_processed.add(idx)
301
- wht_im = np.ones(image.shape, dtype=image.dtype) * 255
302
- box = object_boxes[idx]
303
- x1, y1, x2, y2 = [int(i) for i in box]
304
- wht_im[y1:y2, x1:x2, :] = image[y1:y2, x1:x2, :]
305
- sub_ocr_res = next(
306
- self.general_ocr_pipeline(
307
- wht_im,
308
- text_det_limit_side_len=text_det_limit_side_len,
309
- text_det_limit_type=text_det_limit_type,
310
- text_det_thresh=text_det_thresh,
311
- text_det_box_thresh=text_det_box_thresh,
312
- text_det_unclip_ratio=text_det_unclip_ratio,
313
- text_rec_score_thresh=text_rec_score_thresh,
361
+ # check if there is only one paragraph title and without doc_title
362
+ only_one_paragraph_title = len(paragraph_title_list) == 1 and doc_title_num == 0
363
+ if only_one_paragraph_title:
364
+ paragraph_title_block_area = caculate_bbox_area(
365
+ layout_det_res["boxes"][paragraph_title_list[0]]["coordinate"]
366
+ )
367
+ title_area_max_block_threshold = BLOCK_SETTINGS.get(
368
+ "title_conversion_area_ratio_threshold", 0.3
369
+ )
370
+ if (
371
+ paragraph_title_block_area
372
+ > max_block_area * title_area_max_block_threshold
373
+ ):
374
+ layout_det_res["boxes"][paragraph_title_list[0]]["label"] = "doc_title"
375
+
376
+ # Replace the OCR information of the hurdles.
377
+ for overall_ocr_idx, layout_box_ids in matched_ocr_dict.items():
378
+ if len(layout_box_ids) > 1:
379
+ matched_no = 0
380
+ overall_ocr_box = copy.deepcopy(
381
+ overall_ocr_res["rec_boxes"][overall_ocr_idx]
382
+ )
383
+ overall_ocr_dt_poly = copy.deepcopy(
384
+ overall_ocr_res["dt_polys"][overall_ocr_idx]
385
+ )
386
+ for box_idx in layout_box_ids:
387
+ layout_box = layout_det_res["boxes"][box_idx]["coordinate"]
388
+ crop_box = get_bbox_intersection(overall_ocr_box, layout_box)
389
+ for ocr_idx in block_to_ocr_map[box_idx]:
390
+ ocr_box = overall_ocr_res["rec_boxes"][ocr_idx]
391
+ iou = calculate_overlap_ratio(ocr_box, crop_box, "small")
392
+ if iou > 0.8:
393
+ overall_ocr_res["rec_texts"][ocr_idx] = ""
394
+ x1, y1, x2, y2 = [int(i) for i in crop_box]
395
+ crop_img = np.array(image)[y1:y2, x1:x2]
396
+ crop_img_rec_res = list(text_rec_model([crop_img]))[0]
397
+ crop_img_dt_poly = get_bbox_intersection(
398
+ overall_ocr_dt_poly, layout_box, return_format="poly"
399
+ )
400
+ crop_img_rec_score = crop_img_rec_res["rec_score"]
401
+ crop_img_rec_text = crop_img_rec_res["rec_text"]
402
+ text_rec_score_thresh = (
403
+ text_rec_score_thresh
404
+ if text_rec_score_thresh is not None
405
+ else (self.general_ocr_pipeline.text_rec_score_thresh)
314
406
  )
407
+ if crop_img_rec_score >= text_rec_score_thresh:
408
+ matched_no += 1
409
+ if matched_no == 1:
410
+ # the first matched ocr be replaced by the first matched layout box
411
+ overall_ocr_res["dt_polys"][
412
+ overall_ocr_idx
413
+ ] = crop_img_dt_poly
414
+ overall_ocr_res["rec_boxes"][overall_ocr_idx] = crop_box
415
+ overall_ocr_res["rec_polys"][
416
+ overall_ocr_idx
417
+ ] = crop_img_dt_poly
418
+ overall_ocr_res["rec_scores"][
419
+ overall_ocr_idx
420
+ ] = crop_img_rec_score
421
+ overall_ocr_res["rec_texts"][
422
+ overall_ocr_idx
423
+ ] = crop_img_rec_text
424
+ else:
425
+ # the other matched ocr be appended to the overall ocr result
426
+ overall_ocr_res["dt_polys"].append(crop_img_dt_poly)
427
+ overall_ocr_res["rec_boxes"] = np.vstack(
428
+ (overall_ocr_res["rec_boxes"], crop_box)
429
+ )
430
+ overall_ocr_res["rec_polys"].append(crop_img_dt_poly)
431
+ overall_ocr_res["rec_scores"].append(crop_img_rec_score)
432
+ overall_ocr_res["rec_texts"].append(crop_img_rec_text)
433
+ overall_ocr_res["rec_labels"].append("text")
434
+ block_to_ocr_map[box_idx].remove(overall_ocr_idx)
435
+ block_to_ocr_map[box_idx].append(
436
+ len(overall_ocr_res["rec_texts"]) - 1
437
+ )
438
+
439
+ # use layout bbox to do ocr recognition when there is no matched ocr
440
+ for layout_box_idx, overall_ocr_idxes in block_to_ocr_map.items():
441
+ has_text = False
442
+ for idx in overall_ocr_idxes:
443
+ if overall_ocr_res["rec_texts"][idx] != "":
444
+ has_text = True
445
+ break
446
+ if not has_text and layout_det_res["boxes"][layout_box_idx][
447
+ "label"
448
+ ] not in BLOCK_LABEL_MAP.get("vision_labels", []):
449
+ crop_box = layout_det_res["boxes"][layout_box_idx]["coordinate"]
450
+ x1, y1, x2, y2 = [int(i) for i in crop_box]
451
+ crop_img = np.array(image)[y1:y2, x1:x2]
452
+ crop_img_rec_res = next(text_rec_model([crop_img]))
453
+ crop_img_dt_poly = get_bbox_intersection(
454
+ crop_box, crop_box, return_format="poly"
315
455
  )
316
- _, matched_idxs = get_sub_regions_ocr_res(
317
- overall_ocr_res, [box], return_match_idx=True
456
+ crop_img_rec_score = crop_img_rec_res["rec_score"]
457
+ crop_img_rec_text = crop_img_rec_res["rec_text"]
458
+ text_rec_score_thresh = (
459
+ text_rec_score_thresh
460
+ if text_rec_score_thresh is not None
461
+ else (self.general_ocr_pipeline.text_rec_score_thresh)
318
462
  )
319
- for matched_idx in sorted(matched_idxs, reverse=True):
320
- del overall_ocr_res["dt_polys"][matched_idx]
321
- del overall_ocr_res["rec_texts"][matched_idx]
322
- overall_ocr_res["rec_boxes"] = np.delete(
323
- overall_ocr_res["rec_boxes"], matched_idx, axis=0
463
+ if crop_img_rec_score >= text_rec_score_thresh:
464
+ overall_ocr_res["rec_boxes"] = np.vstack(
465
+ (overall_ocr_res["rec_boxes"], crop_box)
466
+ )
467
+ overall_ocr_res["rec_polys"].append(crop_img_dt_poly)
468
+ overall_ocr_res["rec_scores"].append(crop_img_rec_score)
469
+ overall_ocr_res["rec_texts"].append(crop_img_rec_text)
470
+ overall_ocr_res["rec_labels"].append("text")
471
+ block_to_ocr_map[layout_box_idx].append(
472
+ len(overall_ocr_res["rec_texts"]) - 1
324
473
  )
325
- del overall_ocr_res["rec_polys"][matched_idx]
326
- del overall_ocr_res["rec_scores"][matched_idx]
327
474
 
328
- if sub_ocr_res["rec_boxes"].size > 0:
329
- sub_ocr_res["rec_labels"] = ["text"] * len(sub_ocr_res["rec_texts"])
475
+ # when there is no layout detection result but there is ocr result, convert ocr detection result to layout detection result
476
+ if len(layout_det_res["boxes"]) == 0 and len(overall_ocr_res["rec_boxes"]) > 0:
477
+ for idx, ocr_rec_box in enumerate(overall_ocr_res["rec_boxes"]):
478
+ base_region_bbox = update_region_box(ocr_rec_box, base_region_bbox)
479
+ layout_det_res["boxes"].append(
480
+ {
481
+ "label": "text",
482
+ "coordinate": ocr_rec_box,
483
+ "score": overall_ocr_res["rec_scores"][idx],
484
+ }
485
+ )
486
+ block_to_ocr_map[idx] = [idx]
330
487
 
331
- overall_ocr_res["dt_polys"].extend(sub_ocr_res["dt_polys"])
332
- overall_ocr_res["rec_texts"].extend(sub_ocr_res["rec_texts"])
333
- overall_ocr_res["rec_boxes"] = np.concatenate(
334
- [overall_ocr_res["rec_boxes"], sub_ocr_res["rec_boxes"]], axis=0
488
+ block_bboxes = [box["coordinate"] for box in layout_det_res["boxes"]]
489
+ region_det_res["boxes"] = sorted(
490
+ region_det_res["boxes"],
491
+ key=lambda item: caculate_bbox_area(item["coordinate"]),
492
+ )
493
+ if len(region_det_res["boxes"]) == 0:
494
+ region_det_res["boxes"] = [
495
+ {
496
+ "coordinate": base_region_bbox,
497
+ "label": "SupplementaryRegion",
498
+ "score": 1,
499
+ }
500
+ ]
501
+ region_to_block_map[0] = range(len(block_bboxes))
502
+ else:
503
+ block_idxes_set = set(range(len(block_bboxes)))
504
+ # match block to region
505
+ for region_idx, region_info in enumerate(region_det_res["boxes"]):
506
+ matched_idxes = []
507
+ region_to_block_map[region_idx] = []
508
+ region_bbox = region_info["coordinate"]
509
+ for block_idx in block_idxes_set:
510
+ overlap_ratio = calculate_overlap_ratio(
511
+ region_bbox, block_bboxes[block_idx], mode="small"
512
+ )
513
+ if overlap_ratio > REGION_SETTINGS.get(
514
+ "match_block_overlap_ratio_threshold", 0.8
515
+ ):
516
+ region_to_block_map[region_idx].append(block_idx)
517
+ matched_idxes.append(block_idx)
518
+ if len(matched_idxes) > 0:
519
+ for block_idx in matched_idxes:
520
+ block_idxes_set.remove(block_idx)
521
+ matched_bboxes = [block_bboxes[idx] for idx in matched_idxes]
522
+ new_region_bbox = calculate_minimum_enclosing_bbox(matched_bboxes)
523
+ region_det_res["boxes"][region_idx]["coordinate"] = new_region_bbox
524
+ # Supplement region when there is no matched block
525
+ if len(block_idxes_set) > 0:
526
+ while len(block_idxes_set) > 0:
527
+ matched_idxes = []
528
+ unmatched_bboxes = [block_bboxes[idx] for idx in block_idxes_set]
529
+ supplement_region_bbox = calculate_minimum_enclosing_bbox(
530
+ unmatched_bboxes
531
+ )
532
+ # check if the new region bbox is overlapped with other region bbox, if have, then shrink the new region bbox
533
+ for region_info in region_det_res["boxes"]:
534
+ region_bbox = region_info["coordinate"]
535
+ overlap_ratio = calculate_overlap_ratio(
536
+ supplement_region_bbox, region_bbox
537
+ )
538
+ if overlap_ratio > 0:
539
+ supplement_region_bbox, matched_idxes = (
540
+ shrink_supplement_region_bbox(
541
+ supplement_region_bbox,
542
+ region_bbox,
543
+ image.shape[1],
544
+ image.shape[0],
545
+ block_idxes_set,
546
+ block_bboxes,
547
+ )
548
+ )
549
+ if len(matched_idxes) == 0:
550
+ matched_idxes = list(block_idxes_set)
551
+ region_idx = len(region_det_res["boxes"])
552
+ region_to_block_map[region_idx] = list(matched_idxes)
553
+ for block_idx in matched_idxes:
554
+ block_idxes_set.remove(block_idx)
555
+ region_det_res["boxes"].append(
556
+ {
557
+ "coordinate": supplement_region_bbox,
558
+ "label": "SupplementaryRegion",
559
+ "score": 1,
560
+ }
561
+ )
562
+
563
+ region_block_ocr_idx_map = dict(
564
+ region_to_block_map=region_to_block_map,
565
+ block_to_ocr_map=block_to_ocr_map,
566
+ )
567
+
568
+ return region_block_ocr_idx_map, region_det_res, layout_det_res
569
+
570
+ def sort_line_by_projection(
571
+ self,
572
+ line: List[List[Union[List[int], str]]],
573
+ input_img: np.ndarray,
574
+ text_rec_model: Any,
575
+ text_rec_score_thresh: Union[float, None] = None,
576
+ direction: str = "vertical",
577
+ ) -> None:
578
+ """
579
+ Sort a line of text spans based on their vertical position within the layout bounding box.
580
+
581
+ Args:
582
+ line (list): A list of spans, where each span is a list containing a bounding box and text.
583
+ input_img (ndarray): The input image used for OCR.
584
+ general_ocr_pipeline (Any): The general OCR pipeline used for text recognition.
585
+
586
+ Returns:
587
+ list: The sorted line of text spans.
588
+ """
589
+ sort_index = 0 if direction == "horizontal" else 1
590
+ splited_boxes = split_boxes_by_projection(line, direction)
591
+ splited_lines = []
592
+ if len(line) != len(splited_boxes):
593
+ splited_boxes.sort(key=lambda span: span[0][sort_index])
594
+ for span in splited_boxes:
595
+ bbox, text, label = span
596
+ if label == "text":
597
+ crop_img = input_img[
598
+ int(bbox[1]) : int(bbox[3]),
599
+ int(bbox[0]) : int(bbox[2]),
600
+ ]
601
+ crop_img_rec_res = list(text_rec_model([crop_img]))[0]
602
+ crop_img_rec_score = crop_img_rec_res["rec_score"]
603
+ crop_img_rec_text = crop_img_rec_res["rec_text"]
604
+ text = (
605
+ crop_img_rec_text
606
+ if crop_img_rec_score >= text_rec_score_thresh
607
+ else ""
335
608
  )
336
- overall_ocr_res["rec_polys"].extend(sub_ocr_res["rec_polys"])
337
- overall_ocr_res["rec_scores"].extend(sub_ocr_res["rec_scores"])
338
- overall_ocr_res["rec_labels"].extend(sub_ocr_res["rec_labels"])
339
-
340
- for formula_res in formula_res_list:
341
- x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
342
- poly_points = [
343
- (x_min, y_min),
344
- (x_max, y_min),
345
- (x_max, y_max),
346
- (x_min, y_max),
609
+ span[1] = text
610
+
611
+ splited_lines.append(span)
612
+ else:
613
+ splited_lines = line
614
+
615
+ return splited_lines
616
+
617
+ def get_block_rec_content(
618
+ self,
619
+ image: list,
620
+ ocr_rec_res: dict,
621
+ block: LayoutParsingBlock,
622
+ text_rec_model: Any,
623
+ text_rec_score_thresh: Union[float, None] = None,
624
+ ) -> str:
625
+
626
+ if len(ocr_rec_res["rec_texts"]) == 0:
627
+ block.content = ""
628
+ return block
629
+
630
+ lines, text_direction, text_line_height = group_boxes_into_lines(
631
+ ocr_rec_res,
632
+ LINE_SETTINGS.get("line_height_iou_threshold", 0.8),
633
+ )
634
+
635
+ # format line
636
+ text_lines = []
637
+ need_new_line_num = 0
638
+ # words start coordinate and stop coordinate in the line
639
+ words_start_index = 0 if text_direction == "horizontal" else 1
640
+ words_stop_index = words_start_index + 2
641
+ lines_start_index = 1 if text_direction == "horizontal" else 3
642
+ line_width_list = []
643
+
644
+ if block.label == "reference":
645
+ rec_boxes = ocr_rec_res["boxes"]
646
+ block_start_coordinate = min([box[words_start_index] for box in rec_boxes])
647
+ block_stop_coordinate = max([box[words_stop_index] for box in rec_boxes])
648
+ else:
649
+ block_start_coordinate = block.bbox[words_start_index]
650
+ block_stop_coordinate = block.bbox[words_stop_index]
651
+
652
+ for idx, line in enumerate(lines):
653
+ line.sort(
654
+ key=lambda span: (
655
+ span[0][words_start_index] // 2,
656
+ (
657
+ span[0][lines_start_index]
658
+ if text_direction == "horizontal"
659
+ else -span[0][lines_start_index]
660
+ ),
661
+ )
662
+ )
663
+
664
+ line_width = line[-1][0][words_stop_index] - line[0][0][words_start_index]
665
+ line_width_list.append(line_width)
666
+ # merge formula and text
667
+ ocr_labels = [span[2] for span in line]
668
+ if "formula" in ocr_labels:
669
+ line = self.sort_line_by_projection(
670
+ line, image, text_rec_model, text_rec_score_thresh, text_direction
671
+ )
672
+
673
+ line_text, need_new_line = format_line(
674
+ line,
675
+ text_direction,
676
+ np.max(line_width_list),
677
+ block_start_coordinate,
678
+ block_stop_coordinate,
679
+ line_gap_limit=text_line_height * 1.5,
680
+ block_label=block.label,
681
+ )
682
+ if need_new_line:
683
+ need_new_line_num += 1
684
+ if idx == 0:
685
+ line_start_coordinate = line[0][0][0]
686
+ block.seg_start_coordinate = line_start_coordinate
687
+ elif idx == len(lines) - 1:
688
+ line_end_coordinate = line[-1][0][2]
689
+ block.seg_end_coordinate = line_end_coordinate
690
+ text_lines.append(line_text)
691
+
692
+ delim = LINE_SETTINGS["delimiter_map"].get(block.label, "")
693
+ if need_new_line_num > len(text_lines) * 0.5 and delim == "":
694
+ text_lines = [text.replace("\n", "") for text in text_lines]
695
+ delim = "\n"
696
+ content = delim.join(text_lines)
697
+ block.content = content
698
+ block.num_of_lines = len(text_lines)
699
+ block.direction = text_direction
700
+ block.text_line_height = text_line_height
701
+ block.text_line_width = np.mean(line_width_list)
702
+
703
+ return block
704
+
705
+ def get_layout_parsing_blocks(
706
+ self,
707
+ image: list,
708
+ region_block_ocr_idx_map: dict,
709
+ region_det_res: DetResult,
710
+ overall_ocr_res: OCRResult,
711
+ layout_det_res: DetResult,
712
+ table_res_list: list,
713
+ seal_res_list: list,
714
+ chart_res_list: list,
715
+ text_rec_model: Any,
716
+ text_rec_score_thresh: Union[float, None] = None,
717
+ ) -> list:
718
+ """
719
+ Extract structured information from OCR and layout detection results.
720
+
721
+ Args:
722
+ image (list): The input image.
723
+ overall_ocr_res (OCRResult): An object containing the overall OCR results, including detected text boxes and recognized text. The structure is expected to have:
724
+ - "input_img": The image on which OCR was performed.
725
+ - "dt_boxes": A list of detected text box coordinates.
726
+ - "rec_texts": A list of recognized text corresponding to the detected boxes.
727
+
728
+ layout_det_res (DetResult): An object containing the layout detection results, including detected layout boxes and their labels. The structure is expected to have:
729
+ - "boxes": A list of dictionaries with keys "coordinate" for box coordinates and "block_label" for the type of content.
730
+
731
+ table_res_list (list): A list of table detection results, where each item is a dictionary containing:
732
+ - "block_bbox": The bounding box of the table layout.
733
+ - "pred_html": The predicted HTML representation of the table.
734
+
735
+ seal_res_list (List): A list of seal detection results. The details of each item depend on the specific application context.
736
+ text_rec_model (Any): A model for text recognition.
737
+ text_rec_score_thresh (Union[float, None]): The minimum score required for a recognized character to be considered valid. If None, use the default value specified during initialization. Default is None.
738
+
739
+ Returns:
740
+ list: A list of structured boxes where each item is a dictionary containing:
741
+ - "block_label": The label of the content (e.g., 'table', 'chart', 'image').
742
+ - The label as a key with either table HTML or image data and text.
743
+ - "block_bbox": The coordinates of the layout box.
744
+ """
745
+
746
+ table_index = 0
747
+ seal_index = 0
748
+ chart_index = 0
749
+ layout_parsing_blocks: List[LayoutParsingBlock] = []
750
+
751
+ for box_idx, box_info in enumerate(layout_det_res["boxes"]):
752
+
753
+ label = box_info["label"]
754
+ block_bbox = box_info["coordinate"]
755
+ rec_res = {"boxes": [], "rec_texts": [], "rec_labels": []}
756
+
757
+ block = LayoutParsingBlock(label=label, bbox=block_bbox)
758
+
759
+ if label == "table" and len(table_res_list) > 0:
760
+ block.content = table_res_list[table_index]["pred_html"]
761
+ table_index += 1
762
+ elif label == "seal" and len(seal_res_list) > 0:
763
+ block.content = "\n".join(seal_res_list[seal_index]["rec_texts"])
764
+ seal_index += 1
765
+ elif label == "chart" and len(chart_res_list) > 0:
766
+ block.content = chart_res_list[chart_index]
767
+ chart_index += 1
768
+ else:
769
+ if label == "formula":
770
+ _, ocr_idx_list = get_sub_regions_ocr_res(
771
+ overall_ocr_res, [block_bbox], return_match_idx=True
772
+ )
773
+ region_block_ocr_idx_map["block_to_ocr_map"][box_idx] = ocr_idx_list
774
+ else:
775
+ ocr_idx_list = region_block_ocr_idx_map["block_to_ocr_map"].get(
776
+ box_idx, []
777
+ )
778
+ for box_no in ocr_idx_list:
779
+ rec_res["boxes"].append(overall_ocr_res["rec_boxes"][box_no])
780
+ rec_res["rec_texts"].append(
781
+ overall_ocr_res["rec_texts"][box_no],
782
+ )
783
+ rec_res["rec_labels"].append(
784
+ overall_ocr_res["rec_labels"][box_no],
785
+ )
786
+ block = self.get_block_rec_content(
787
+ image=image,
788
+ block=block,
789
+ ocr_rec_res=rec_res,
790
+ text_rec_model=text_rec_model,
791
+ text_rec_score_thresh=text_rec_score_thresh,
792
+ )
793
+
794
+ if (
795
+ label
796
+ in ["seal", "table", "formula", "chart"]
797
+ + BLOCK_LABEL_MAP["image_labels"]
798
+ ):
799
+ x_min, y_min, x_max, y_max = list(map(int, block_bbox))
800
+ img_path = (
801
+ f"imgs/img_in_{block.label}_box_{x_min}_{y_min}_{x_max}_{y_max}.jpg"
802
+ )
803
+ img = Image.fromarray(image[y_min:y_max, x_min:x_max, ::-1])
804
+ block.image = {"path": img_path, "img": img}
805
+
806
+ layout_parsing_blocks.append(block)
807
+
808
+ region_list: List[LayoutParsingRegion] = []
809
+ for region_idx, region_info in enumerate(region_det_res["boxes"]):
810
+ region_bbox = region_info["coordinate"]
811
+ region_blocks = [
812
+ layout_parsing_blocks[idx]
813
+ for idx in region_block_ocr_idx_map["region_to_block_map"][region_idx]
347
814
  ]
348
- overall_ocr_res["dt_polys"].append(poly_points)
349
- overall_ocr_res["rec_texts"].append(f"${formula_res['rec_formula']}$")
350
- overall_ocr_res["rec_boxes"] = np.vstack(
351
- (overall_ocr_res["rec_boxes"], [formula_res["dt_polys"]])
815
+ region = LayoutParsingRegion(
816
+ bbox=region_bbox,
817
+ blocks=region_blocks,
818
+ image_shape=image.shape[:2],
819
+ )
820
+ region_list.append(region)
821
+
822
+ region_list = sorted(
823
+ region_list,
824
+ key=lambda r: (r.weighted_distance),
825
+ )
826
+
827
+ return region_list
828
+
829
+ def get_layout_parsing_res(
830
+ self,
831
+ image: list,
832
+ region_det_res: DetResult,
833
+ layout_det_res: DetResult,
834
+ overall_ocr_res: OCRResult,
835
+ table_res_list: list,
836
+ seal_res_list: list,
837
+ chart_res_list: list,
838
+ formula_res_list: list,
839
+ text_rec_score_thresh: Union[float, None] = None,
840
+ ) -> list:
841
+ """
842
+ Retrieves the layout parsing result based on the layout detection result, OCR result, and other recognition results.
843
+ Args:
844
+ image (list): The input image.
845
+ layout_det_res (DetResult): The detection result containing the layout information of the document.
846
+ overall_ocr_res (OCRResult): The overall OCR result containing text information.
847
+ table_res_list (list): A list of table recognition results.
848
+ seal_res_list (list): A list of seal recognition results.
849
+ formula_res_list (list): A list of formula recognition results.
850
+ text_rec_score_thresh (Optional[float], optional): The score threshold for text recognition. Defaults to None.
851
+ Returns:
852
+ list: A list of dictionaries representing the layout parsing result.
853
+ """
854
+
855
+ # Standardize data
856
+ region_block_ocr_idx_map, region_det_res, layout_det_res = (
857
+ self.standardized_data(
858
+ image=image,
859
+ region_det_res=region_det_res,
860
+ layout_det_res=layout_det_res,
861
+ overall_ocr_res=overall_ocr_res,
862
+ formula_res_list=formula_res_list,
863
+ text_rec_model=self.general_ocr_pipeline.text_rec_model,
864
+ text_rec_score_thresh=text_rec_score_thresh,
352
865
  )
353
- overall_ocr_res["rec_labels"].append("formula")
354
- overall_ocr_res["rec_polys"].append(poly_points)
355
- overall_ocr_res["rec_scores"].append(1)
866
+ )
356
867
 
357
- parsing_res_list = get_single_block_parsing_res(
358
- self.general_ocr_pipeline,
868
+ # Format layout parsing block
869
+ region_list = self.get_layout_parsing_blocks(
870
+ image=image,
871
+ region_block_ocr_idx_map=region_block_ocr_idx_map,
872
+ region_det_res=region_det_res,
359
873
  overall_ocr_res=overall_ocr_res,
360
874
  layout_det_res=layout_det_res,
361
875
  table_res_list=table_res_list,
362
876
  seal_res_list=seal_res_list,
877
+ chart_res_list=chart_res_list,
878
+ text_rec_model=self.general_ocr_pipeline.text_rec_model,
879
+ text_rec_score_thresh=self.general_ocr_pipeline.text_rec_score_thresh,
363
880
  )
364
881
 
882
+ parsing_res_list = []
883
+ for region in region_list:
884
+ parsing_res_list.extend(region.sort())
885
+
886
+ index = 1
887
+ for block in parsing_res_list:
888
+ if block.label in BLOCK_LABEL_MAP["visualize_index_labels"]:
889
+ block.order_index = index
890
+ index += 1
891
+
365
892
  return parsing_res_list
366
893
 
367
894
  def get_model_settings(
368
895
  self,
369
896
  use_doc_orientation_classify: Union[bool, None],
370
897
  use_doc_unwarping: Union[bool, None],
371
- use_general_ocr: Union[bool, None],
372
898
  use_seal_recognition: Union[bool, None],
373
899
  use_table_recognition: Union[bool, None],
374
900
  use_formula_recognition: Union[bool, None],
901
+ use_chart_recognition: Union[bool, None],
902
+ use_region_detection: Union[bool, None],
375
903
  ) -> dict:
376
904
  """
377
905
  Get the model settings based on the provided parameters or default values.
@@ -379,7 +907,6 @@ class LayoutParsingPipelineV2(BasePipeline):
379
907
  Args:
380
908
  use_doc_orientation_classify (Union[bool, None]): Enables document orientation classification if True. Defaults to system setting if None.
381
909
  use_doc_unwarping (Union[bool, None]): Enables document unwarping if True. Defaults to system setting if None.
382
- use_general_ocr (Union[bool, None]): Enables general OCR if True. Defaults to system setting if None.
383
910
  use_seal_recognition (Union[bool, None]): Enables seal recognition if True. Defaults to system setting if None.
384
911
  use_table_recognition (Union[bool, None]): Enables table recognition if True. Defaults to system setting if None.
385
912
  use_formula_recognition (Union[bool, None]): Enables formula recognition if True. Defaults to system setting if None.
@@ -396,9 +923,6 @@ class LayoutParsingPipelineV2(BasePipeline):
396
923
  else:
397
924
  use_doc_preprocessor = False
398
925
 
399
- if use_general_ocr is None:
400
- use_general_ocr = self.use_general_ocr
401
-
402
926
  if use_seal_recognition is None:
403
927
  use_seal_recognition = self.use_seal_recognition
404
928
 
@@ -408,24 +932,32 @@ class LayoutParsingPipelineV2(BasePipeline):
408
932
  if use_formula_recognition is None:
409
933
  use_formula_recognition = self.use_formula_recognition
410
934
 
935
+ if use_region_detection is None:
936
+ use_region_detection = self.use_region_detection
937
+
938
+ if use_chart_recognition is None:
939
+ use_chart_recognition = self.use_chart_recognition
940
+
411
941
  return dict(
412
942
  use_doc_preprocessor=use_doc_preprocessor,
413
- use_general_ocr=use_general_ocr,
414
943
  use_seal_recognition=use_seal_recognition,
415
944
  use_table_recognition=use_table_recognition,
416
945
  use_formula_recognition=use_formula_recognition,
946
+ use_chart_recognition=use_chart_recognition,
947
+ use_region_detection=use_region_detection,
417
948
  )
418
949
 
419
950
  def predict(
420
951
  self,
421
952
  input: Union[str, list[str], np.ndarray, list[np.ndarray]],
422
- use_doc_orientation_classify: Union[bool, None] = None,
423
- use_doc_unwarping: Union[bool, None] = None,
953
+ use_doc_orientation_classify: Union[bool, None] = False,
954
+ use_doc_unwarping: Union[bool, None] = False,
424
955
  use_textline_orientation: Optional[bool] = None,
425
- use_general_ocr: Union[bool, None] = None,
426
956
  use_seal_recognition: Union[bool, None] = None,
427
957
  use_table_recognition: Union[bool, None] = None,
428
958
  use_formula_recognition: Union[bool, None] = None,
959
+ use_chart_recognition: Union[bool, None] = False,
960
+ use_region_detection: Union[bool, None] = None,
429
961
  layout_threshold: Optional[Union[float, dict]] = None,
430
962
  layout_nms: Optional[bool] = None,
431
963
  layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None,
@@ -442,7 +974,10 @@ class LayoutParsingPipelineV2(BasePipeline):
442
974
  seal_det_box_thresh: Union[float, None] = None,
443
975
  seal_det_unclip_ratio: Union[float, None] = None,
444
976
  seal_rec_score_thresh: Union[float, None] = None,
445
- use_table_cells_ocr_results: bool = False,
977
+ use_wired_table_cells_trans_to_html: bool = False,
978
+ use_wireless_table_cells_trans_to_html: bool = False,
979
+ use_table_orientation_classify: bool = True,
980
+ use_ocr_results_with_table_cells: bool = True,
446
981
  use_e2e_wired_table_rec_model: bool = False,
447
982
  use_e2e_wireless_table_rec_model: bool = True,
448
983
  **kwargs,
@@ -454,10 +989,10 @@ class LayoutParsingPipelineV2(BasePipeline):
454
989
  use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
455
990
  use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
456
991
  use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
457
- use_general_ocr (Optional[bool]): Whether to use general OCR.
458
992
  use_seal_recognition (Optional[bool]): Whether to use seal recognition.
459
993
  use_table_recognition (Optional[bool]): Whether to use table recognition.
460
994
  use_formula_recognition (Optional[bool]): Whether to use formula recognition.
995
+ use_region_detection (Optional[bool]): Whether to use region detection.
461
996
  layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
462
997
  layout_nms (bool, optional): Whether to use layout-aware NMS. Defaults to False.
463
998
  layout_unclip_ratio (Optional[Union[float, Tuple[float, float]]], optional): The ratio of unclipping the bounding box.
@@ -478,7 +1013,10 @@ class LayoutParsingPipelineV2(BasePipeline):
478
1013
  seal_det_box_thresh (Optional[float]): Threshold for seal detection boxes.
479
1014
  seal_det_unclip_ratio (Optional[float]): Ratio for unclipping seal detection boxes.
480
1015
  seal_rec_score_thresh (Optional[float]): Score threshold for seal recognition.
481
- use_table_cells_ocr_results (bool): whether to use OCR results with cells.
1016
+ use_wired_table_cells_trans_to_html (bool): Whether to use wired table cells trans to HTML.
1017
+ use_wireless_table_cells_trans_to_html (bool): Whether to use wireless table cells trans to HTML.
1018
+ use_table_orientation_classify (bool): Whether to use table orientation classification.
1019
+ use_ocr_results_with_table_cells (bool): Whether to use OCR results processed by table cells.
482
1020
  use_e2e_wired_table_rec_model (bool): Whether to use end-to-end wired table recognition model.
483
1021
  use_e2e_wireless_table_rec_model (bool): Whether to use end-to-end wireless table recognition model.
484
1022
  **kwargs (Any): Additional settings to extend functionality.
@@ -490,150 +1028,204 @@ class LayoutParsingPipelineV2(BasePipeline):
490
1028
  model_settings = self.get_model_settings(
491
1029
  use_doc_orientation_classify,
492
1030
  use_doc_unwarping,
493
- use_general_ocr,
494
1031
  use_seal_recognition,
495
1032
  use_table_recognition,
496
1033
  use_formula_recognition,
1034
+ use_chart_recognition,
1035
+ use_region_detection,
497
1036
  )
498
1037
 
499
1038
  if not self.check_model_settings_valid(model_settings):
500
1039
  yield {"error": "the input params for model settings are invalid!"}
501
1040
 
502
1041
  for batch_data in self.batch_sampler(input):
503
- image_array = self.img_reader(batch_data.instances)[0]
1042
+ image_arrays = self.img_reader(batch_data.instances)
504
1043
 
505
1044
  if model_settings["use_doc_preprocessor"]:
506
- doc_preprocessor_res = next(
1045
+ doc_preprocessor_results = list(
507
1046
  self.doc_preprocessor_pipeline(
508
- image_array,
1047
+ image_arrays,
509
1048
  use_doc_orientation_classify=use_doc_orientation_classify,
510
1049
  use_doc_unwarping=use_doc_unwarping,
511
- ),
1050
+ )
512
1051
  )
513
1052
  else:
514
- doc_preprocessor_res = {"output_img": image_array}
1053
+ doc_preprocessor_results = [{"output_img": arr} for arr in image_arrays]
515
1054
 
516
- doc_preprocessor_image = doc_preprocessor_res["output_img"]
1055
+ doc_preprocessor_images = [
1056
+ item["output_img"] for item in doc_preprocessor_results
1057
+ ]
517
1058
 
518
- layout_det_res = next(
1059
+ layout_det_results = list(
519
1060
  self.layout_det_model(
520
- doc_preprocessor_image,
1061
+ doc_preprocessor_images,
521
1062
  threshold=layout_threshold,
522
1063
  layout_nms=layout_nms,
523
1064
  layout_unclip_ratio=layout_unclip_ratio,
524
1065
  layout_merge_bboxes_mode=layout_merge_bboxes_mode,
525
1066
  )
526
1067
  )
527
- imgs_in_doc = gather_imgs(doc_preprocessor_image, layout_det_res["boxes"])
1068
+ imgs_in_doc = [
1069
+ gather_imgs(img, res["boxes"])
1070
+ for img, res in zip(doc_preprocessor_images, layout_det_results)
1071
+ ]
1072
+
1073
+ if model_settings["use_region_detection"]:
1074
+ region_det_results = list(
1075
+ self.region_detection_model(
1076
+ doc_preprocessor_images,
1077
+ layout_nms=True,
1078
+ layout_merge_bboxes_mode="small",
1079
+ ),
1080
+ )
1081
+ else:
1082
+ region_det_results = [{"boxes": []} for _ in doc_preprocessor_images]
528
1083
 
529
1084
  if model_settings["use_formula_recognition"]:
530
- formula_res_all = next(
1085
+ formula_res_all = list(
531
1086
  self.formula_recognition_pipeline(
532
- doc_preprocessor_image,
1087
+ doc_preprocessor_images,
533
1088
  use_layout_detection=False,
534
1089
  use_doc_orientation_classify=False,
535
1090
  use_doc_unwarping=False,
536
- layout_det_res=layout_det_res,
1091
+ layout_det_res=layout_det_results,
537
1092
  ),
538
1093
  )
539
- formula_res_list = formula_res_all["formula_res_list"]
1094
+ formula_res_lists = [
1095
+ item["formula_res_list"] for item in formula_res_all
1096
+ ]
540
1097
  else:
541
- formula_res_list = []
1098
+ formula_res_lists = [[] for _ in doc_preprocessor_images]
542
1099
 
543
- for formula_res in formula_res_list:
544
- x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
545
- doc_preprocessor_image[y_min:y_max, x_min:x_max, :] = 255.0
546
-
547
- if (
548
- model_settings["use_general_ocr"]
549
- or model_settings["use_table_recognition"]
1100
+ for doc_preprocessor_image, formula_res_list in zip(
1101
+ doc_preprocessor_images, formula_res_lists
550
1102
  ):
551
- overall_ocr_res = next(
552
- self.general_ocr_pipeline(
553
- doc_preprocessor_image,
554
- use_textline_orientation=use_textline_orientation,
555
- text_det_limit_side_len=text_det_limit_side_len,
556
- text_det_limit_type=text_det_limit_type,
557
- text_det_thresh=text_det_thresh,
558
- text_det_box_thresh=text_det_box_thresh,
559
- text_det_unclip_ratio=text_det_unclip_ratio,
560
- text_rec_score_thresh=text_rec_score_thresh,
561
- ),
562
- )
563
- else:
564
- overall_ocr_res = {}
1103
+ for formula_res in formula_res_list:
1104
+ x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
1105
+ doc_preprocessor_image[y_min:y_max, x_min:x_max, :] = 255.0
1106
+
1107
+ overall_ocr_results = list(
1108
+ self.general_ocr_pipeline(
1109
+ doc_preprocessor_images,
1110
+ use_textline_orientation=use_textline_orientation,
1111
+ text_det_limit_side_len=text_det_limit_side_len,
1112
+ text_det_limit_type=text_det_limit_type,
1113
+ text_det_thresh=text_det_thresh,
1114
+ text_det_box_thresh=text_det_box_thresh,
1115
+ text_det_unclip_ratio=text_det_unclip_ratio,
1116
+ text_rec_score_thresh=text_rec_score_thresh,
1117
+ ),
1118
+ )
565
1119
 
566
- overall_ocr_res["rec_labels"] = ["text"] * len(overall_ocr_res["rec_texts"])
1120
+ for overall_ocr_res in overall_ocr_results:
1121
+ overall_ocr_res["rec_labels"] = ["text"] * len(
1122
+ overall_ocr_res["rec_texts"]
1123
+ )
567
1124
 
568
1125
  if model_settings["use_table_recognition"]:
569
- table_contents = copy.deepcopy(overall_ocr_res)
570
- for formula_res in formula_res_list:
571
- x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
572
- poly_points = [
573
- (x_min, y_min),
574
- (x_max, y_min),
575
- (x_max, y_max),
576
- (x_min, y_max),
577
- ]
578
- table_contents["dt_polys"].append(poly_points)
579
- table_contents["rec_texts"].append(
580
- f"${formula_res['rec_formula']}$"
581
- )
582
- table_contents["rec_boxes"] = np.vstack(
583
- (table_contents["rec_boxes"], [formula_res["dt_polys"]])
1126
+ table_res_lists = []
1127
+ for (
1128
+ layout_det_res,
1129
+ doc_preprocessor_image,
1130
+ overall_ocr_res,
1131
+ formula_res_list,
1132
+ imgs_in_doc_for_img,
1133
+ ) in zip(
1134
+ layout_det_results,
1135
+ doc_preprocessor_images,
1136
+ overall_ocr_results,
1137
+ formula_res_lists,
1138
+ imgs_in_doc,
1139
+ ):
1140
+ table_contents_for_img = copy.deepcopy(overall_ocr_res)
1141
+ for formula_res in formula_res_list:
1142
+ x_min, y_min, x_max, y_max = list(
1143
+ map(int, formula_res["dt_polys"])
1144
+ )
1145
+ poly_points = [
1146
+ (x_min, y_min),
1147
+ (x_max, y_min),
1148
+ (x_max, y_max),
1149
+ (x_min, y_max),
1150
+ ]
1151
+ table_contents_for_img["dt_polys"].append(poly_points)
1152
+ rec_formula = formula_res["rec_formula"]
1153
+ if not rec_formula.startswith("$") or not rec_formula.endswith(
1154
+ "$"
1155
+ ):
1156
+ rec_formula = f"${rec_formula}$"
1157
+ table_contents_for_img["rec_texts"].append(f"{rec_formula}")
1158
+ if table_contents_for_img["rec_boxes"].size == 0:
1159
+ table_contents_for_img["rec_boxes"] = np.array(
1160
+ [formula_res["dt_polys"]]
1161
+ )
1162
+ else:
1163
+ table_contents_for_img["rec_boxes"] = np.vstack(
1164
+ (
1165
+ table_contents_for_img["rec_boxes"],
1166
+ [formula_res["dt_polys"]],
1167
+ )
1168
+ )
1169
+ table_contents_for_img["rec_polys"].append(poly_points)
1170
+ table_contents_for_img["rec_scores"].append(1)
1171
+
1172
+ for img in imgs_in_doc_for_img:
1173
+ img_path = img["path"]
1174
+ x_min, y_min, x_max, y_max = img["coordinate"]
1175
+ poly_points = [
1176
+ (x_min, y_min),
1177
+ (x_max, y_min),
1178
+ (x_max, y_max),
1179
+ (x_min, y_max),
1180
+ ]
1181
+ table_contents_for_img["dt_polys"].append(poly_points)
1182
+ table_contents_for_img["rec_texts"].append(
1183
+ f'<div style="text-align: center;"><img src="{img_path}" alt="Image" /></div>'
1184
+ )
1185
+ if table_contents_for_img["rec_boxes"].size == 0:
1186
+ table_contents_for_img["rec_boxes"] = np.array(
1187
+ [img["coordinate"]]
1188
+ )
1189
+ else:
1190
+ table_contents_for_img["rec_boxes"] = np.vstack(
1191
+ (table_contents_for_img["rec_boxes"], img["coordinate"])
1192
+ )
1193
+ table_contents_for_img["rec_polys"].append(poly_points)
1194
+ table_contents_for_img["rec_scores"].append(img["score"])
1195
+
1196
+ table_res_all = list(
1197
+ self.table_recognition_pipeline(
1198
+ doc_preprocessor_image,
1199
+ use_doc_orientation_classify=False,
1200
+ use_doc_unwarping=False,
1201
+ use_layout_detection=False,
1202
+ use_ocr_model=False,
1203
+ overall_ocr_res=table_contents_for_img,
1204
+ layout_det_res=layout_det_res,
1205
+ cell_sort_by_y_projection=True,
1206
+ use_wired_table_cells_trans_to_html=use_wired_table_cells_trans_to_html,
1207
+ use_wireless_table_cells_trans_to_html=use_wireless_table_cells_trans_to_html,
1208
+ use_table_orientation_classify=use_table_orientation_classify,
1209
+ use_ocr_results_with_table_cells=use_ocr_results_with_table_cells,
1210
+ use_e2e_wired_table_rec_model=use_e2e_wired_table_rec_model,
1211
+ use_e2e_wireless_table_rec_model=use_e2e_wireless_table_rec_model,
1212
+ ),
584
1213
  )
585
- table_contents["rec_polys"].append(poly_points)
586
- table_contents["rec_scores"].append(1)
587
-
588
- for img in imgs_in_doc:
589
- img_path = img["path"]
590
- x_min, y_min, x_max, y_max = img["coordinate"]
591
- poly_points = [
592
- (x_min, y_min),
593
- (x_max, y_min),
594
- (x_max, y_max),
595
- (x_min, y_max),
1214
+ single_table_res_lists = [
1215
+ item["table_res_list"] for item in table_res_all
596
1216
  ]
597
- table_contents["dt_polys"].append(poly_points)
598
- table_contents["rec_texts"].append(
599
- f'<div style="text-align: center;"><img src="{img_path}" alt="Image" /></div>'
600
- )
601
- if table_contents["rec_boxes"].size == 0:
602
- table_contents["rec_boxes"] = np.array([img["coordinate"]])
603
- else:
604
- table_contents["rec_boxes"] = np.vstack(
605
- (table_contents["rec_boxes"], img["coordinate"])
606
- )
607
- table_contents["rec_polys"].append(poly_points)
608
- table_contents["rec_scores"].append(img["score"])
609
-
610
- table_res_all = next(
611
- self.table_recognition_pipeline(
612
- doc_preprocessor_image,
613
- use_doc_orientation_classify=False,
614
- use_doc_unwarping=False,
615
- use_layout_detection=False,
616
- use_ocr_model=False,
617
- overall_ocr_res=table_contents,
618
- layout_det_res=layout_det_res,
619
- cell_sort_by_y_projection=True,
620
- use_table_cells_ocr_results=use_table_cells_ocr_results,
621
- use_e2e_wired_table_rec_model=use_e2e_wired_table_rec_model,
622
- use_e2e_wireless_table_rec_model=use_e2e_wireless_table_rec_model,
623
- ),
624
- )
625
- table_res_list = table_res_all["table_res_list"]
1217
+ table_res_lists.extend(single_table_res_lists)
626
1218
  else:
627
- table_res_list = []
1219
+ table_res_lists = [[] for _ in doc_preprocessor_images]
628
1220
 
629
1221
  if model_settings["use_seal_recognition"]:
630
- seal_res_all = next(
1222
+ seal_res_all = list(
631
1223
  self.seal_recognition_pipeline(
632
- doc_preprocessor_image,
1224
+ doc_preprocessor_images,
633
1225
  use_doc_orientation_classify=False,
634
1226
  use_doc_unwarping=False,
635
1227
  use_layout_detection=False,
636
- layout_det_res=layout_det_res,
1228
+ layout_det_res=layout_det_results,
637
1229
  seal_det_limit_side_len=seal_det_limit_side_len,
638
1230
  seal_det_limit_type=seal_det_limit_type,
639
1231
  seal_det_thresh=seal_det_thresh,
@@ -642,46 +1234,85 @@ class LayoutParsingPipelineV2(BasePipeline):
642
1234
  seal_rec_score_thresh=seal_rec_score_thresh,
643
1235
  ),
644
1236
  )
645
- seal_res_list = seal_res_all["seal_res_list"]
1237
+ seal_res_lists = [item["seal_res_list"] for item in seal_res_all]
646
1238
  else:
647
- seal_res_list = []
1239
+ seal_res_lists = [[] for _ in doc_preprocessor_images]
648
1240
 
649
- parsing_res_list = self.get_layout_parsing_res(
1241
+ for (
1242
+ input_path,
1243
+ page_index,
650
1244
  doc_preprocessor_image,
651
- layout_det_res=layout_det_res,
652
- overall_ocr_res=overall_ocr_res,
653
- table_res_list=table_res_list,
654
- seal_res_list=seal_res_list,
655
- formula_res_list=formula_res_list,
656
- imgs_in_doc=imgs_in_doc,
657
- text_det_limit_side_len=text_det_limit_side_len,
658
- text_det_limit_type=text_det_limit_type,
659
- text_det_thresh=text_det_thresh,
660
- text_det_box_thresh=text_det_box_thresh,
661
- text_det_unclip_ratio=text_det_unclip_ratio,
662
- text_rec_score_thresh=text_rec_score_thresh,
663
- )
1245
+ doc_preprocessor_res,
1246
+ layout_det_res,
1247
+ region_det_res,
1248
+ overall_ocr_res,
1249
+ table_res_list,
1250
+ seal_res_list,
1251
+ formula_res_list,
1252
+ imgs_in_doc_for_img,
1253
+ ) in zip(
1254
+ batch_data.input_paths,
1255
+ batch_data.page_indexes,
1256
+ doc_preprocessor_images,
1257
+ doc_preprocessor_results,
1258
+ layout_det_results,
1259
+ region_det_results,
1260
+ overall_ocr_results,
1261
+ table_res_lists,
1262
+ seal_res_lists,
1263
+ formula_res_lists,
1264
+ imgs_in_doc,
1265
+ ):
1266
+ chart_res_list = []
1267
+ if model_settings["use_chart_recognition"]:
1268
+ chart_imgs_list = []
1269
+ for bbox in layout_det_res["boxes"]:
1270
+ if bbox["label"] == "chart":
1271
+ x_min, y_min, x_max, y_max = bbox["coordinate"]
1272
+ chart_img = doc_preprocessor_image[
1273
+ int(y_min) : int(y_max), int(x_min) : int(x_max), :
1274
+ ]
1275
+ chart_imgs_list.append({"image": chart_img})
1276
+
1277
+ for chart_res_batch in self.chart_recognition_model(
1278
+ input=chart_imgs_list
1279
+ ):
1280
+ chart_res_list.append(chart_res_batch["result"])
1281
+
1282
+ parsing_res_list = self.get_layout_parsing_res(
1283
+ doc_preprocessor_image,
1284
+ region_det_res=region_det_res,
1285
+ layout_det_res=layout_det_res,
1286
+ overall_ocr_res=overall_ocr_res,
1287
+ table_res_list=table_res_list,
1288
+ seal_res_list=seal_res_list,
1289
+ chart_res_list=chart_res_list,
1290
+ formula_res_list=formula_res_list,
1291
+ text_rec_score_thresh=text_rec_score_thresh,
1292
+ )
664
1293
 
665
- for formula_res in formula_res_list:
666
- x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
667
- doc_preprocessor_image[y_min:y_max, x_min:x_max, :] = formula_res[
668
- "input_img"
669
- ]
1294
+ for formula_res in formula_res_list:
1295
+ x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
1296
+ doc_preprocessor_image[y_min:y_max, x_min:x_max, :] = formula_res[
1297
+ "input_img"
1298
+ ]
670
1299
 
671
- single_img_res = {
672
- "input_path": batch_data.input_paths[0],
673
- "page_index": batch_data.page_indexes[0],
674
- "doc_preprocessor_res": doc_preprocessor_res,
675
- "layout_det_res": layout_det_res,
676
- "overall_ocr_res": overall_ocr_res,
677
- "table_res_list": table_res_list,
678
- "seal_res_list": seal_res_list,
679
- "formula_res_list": formula_res_list,
680
- "parsing_res_list": parsing_res_list,
681
- "imgs_in_doc": imgs_in_doc,
682
- "model_settings": model_settings,
683
- }
684
- yield LayoutParsingResultV2(single_img_res)
1300
+ single_img_res = {
1301
+ "input_path": input_path,
1302
+ "page_index": page_index,
1303
+ "doc_preprocessor_res": doc_preprocessor_res,
1304
+ "layout_det_res": layout_det_res,
1305
+ "region_det_res": region_det_res,
1306
+ "overall_ocr_res": overall_ocr_res,
1307
+ "table_res_list": table_res_list,
1308
+ "seal_res_list": seal_res_list,
1309
+ "chart_res_list": chart_res_list,
1310
+ "formula_res_list": formula_res_list,
1311
+ "parsing_res_list": parsing_res_list,
1312
+ "imgs_in_doc": imgs_in_doc_for_img,
1313
+ "model_settings": model_settings,
1314
+ }
1315
+ yield LayoutParsingResultV2(single_img_res)
685
1316
 
686
1317
  def concatenate_markdown_pages(self, markdown_list: list) -> tuple:
687
1318
  """
@@ -737,3 +1368,15 @@ class LayoutParsingPipelineV2(BasePipeline):
737
1368
  )
738
1369
 
739
1370
  return markdown_texts
1371
+
1372
+
1373
+ @pipeline_requires_extra("ocr")
1374
+ class LayoutParsingPipelineV2(AutoParallelImageSimpleInferencePipeline):
1375
+ entities = ["PP-StructureV3"]
1376
+
1377
+ @property
1378
+ def _pipeline_cls(self):
1379
+ return _LayoutParsingPipelineV2
1380
+
1381
+ def _get_batch_size(self, config):
1382
+ return config.get("batch_size", 1)