paddlex 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (824) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +17 -34
  3. paddlex/__main__.py +1 -1
  4. paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
  5. paddlex/configs/modules/doc_vlm/PP-DocBee-2B.yaml +14 -0
  6. paddlex/configs/modules/doc_vlm/PP-DocBee-7B.yaml +14 -0
  7. paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
  8. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
  9. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
  10. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
  11. paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
  12. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
  13. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
  14. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
  15. paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
  16. paddlex/configs/modules/open_vocabulary_detection/YOLO-Worldv2-L.yaml +13 -0
  17. paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
  18. paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
  19. paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
  20. paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
  21. paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
  22. paddlex/configs/pipelines/OCR.yaml +7 -6
  23. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
  24. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
  25. paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
  26. paddlex/configs/pipelines/anomaly_detection.yaml +1 -1
  27. paddlex/configs/pipelines/doc_understanding.yaml +9 -0
  28. paddlex/configs/pipelines/formula_recognition.yaml +2 -2
  29. paddlex/configs/pipelines/layout_parsing.yaml +3 -2
  30. paddlex/configs/pipelines/seal_recognition.yaml +1 -0
  31. paddlex/configs/pipelines/table_recognition.yaml +2 -1
  32. paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
  33. paddlex/configs/pipelines/ts_anomaly_detection.yaml +1 -1
  34. paddlex/configs/pipelines/ts_classification.yaml +1 -1
  35. paddlex/configs/pipelines/ts_forecast.yaml +1 -1
  36. paddlex/constants.py +17 -0
  37. paddlex/engine.py +7 -5
  38. paddlex/hpip_links.html +23 -11
  39. paddlex/inference/__init__.py +3 -3
  40. paddlex/inference/common/__init__.py +1 -1
  41. paddlex/inference/common/batch_sampler/__init__.py +5 -4
  42. paddlex/inference/common/batch_sampler/audio_batch_sampler.py +5 -6
  43. paddlex/inference/common/batch_sampler/base_batch_sampler.py +20 -16
  44. paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +4 -7
  45. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +87 -0
  46. paddlex/inference/common/batch_sampler/image_batch_sampler.py +45 -60
  47. paddlex/inference/common/batch_sampler/ts_batch_sampler.py +9 -10
  48. paddlex/inference/common/batch_sampler/video_batch_sampler.py +2 -22
  49. paddlex/inference/common/reader/__init__.py +4 -4
  50. paddlex/inference/common/reader/audio_reader.py +3 -3
  51. paddlex/inference/common/reader/det_3d_reader.py +7 -5
  52. paddlex/inference/common/reader/image_reader.py +16 -12
  53. paddlex/inference/common/reader/ts_reader.py +3 -2
  54. paddlex/inference/common/reader/video_reader.py +3 -3
  55. paddlex/inference/common/result/__init__.py +7 -7
  56. paddlex/inference/common/result/base_cv_result.py +12 -2
  57. paddlex/inference/common/result/base_result.py +7 -5
  58. paddlex/inference/common/result/base_ts_result.py +1 -2
  59. paddlex/inference/common/result/base_video_result.py +2 -2
  60. paddlex/inference/common/result/mixin.py +31 -25
  61. paddlex/inference/models/__init__.py +41 -85
  62. paddlex/inference/models/anomaly_detection/__init__.py +1 -1
  63. paddlex/inference/models/anomaly_detection/predictor.py +9 -19
  64. paddlex/inference/models/anomaly_detection/processors.py +9 -2
  65. paddlex/inference/models/anomaly_detection/result.py +3 -2
  66. paddlex/inference/models/base/__init__.py +2 -2
  67. paddlex/inference/models/base/predictor/__init__.py +1 -2
  68. paddlex/inference/models/base/predictor/base_predictor.py +278 -39
  69. paddlex/inference/models/common/__init__.py +6 -15
  70. paddlex/inference/models/common/static_infer.py +724 -251
  71. paddlex/inference/models/common/tokenizer/__init__.py +7 -3
  72. paddlex/inference/models/common/tokenizer/bert_tokenizer.py +1 -1
  73. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +609 -0
  74. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +9 -7
  75. paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
  76. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +438 -0
  77. paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
  78. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +85 -77
  79. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +339 -123
  80. paddlex/inference/models/common/tokenizer/utils.py +1 -1
  81. paddlex/inference/models/common/tokenizer/vocab.py +8 -8
  82. paddlex/inference/models/common/ts/__init__.py +1 -1
  83. paddlex/inference/models/common/ts/funcs.py +13 -6
  84. paddlex/inference/models/common/ts/processors.py +14 -5
  85. paddlex/inference/models/common/vision/__init__.py +3 -3
  86. paddlex/inference/models/common/vision/funcs.py +17 -12
  87. paddlex/inference/models/common/vision/processors.py +61 -46
  88. paddlex/inference/models/common/vlm/__init__.py +13 -0
  89. paddlex/inference/models/common/vlm/activations.py +189 -0
  90. paddlex/inference/models/common/vlm/bert_padding.py +127 -0
  91. paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
  92. paddlex/inference/models/common/vlm/distributed.py +229 -0
  93. paddlex/inference/models/common/vlm/flash_attn_utils.py +119 -0
  94. paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
  95. paddlex/inference/models/common/vlm/generation/__init__.py +34 -0
  96. paddlex/inference/models/common/vlm/generation/configuration_utils.py +533 -0
  97. paddlex/inference/models/common/vlm/generation/logits_process.py +730 -0
  98. paddlex/inference/models/common/vlm/generation/stopping_criteria.py +106 -0
  99. paddlex/inference/models/common/vlm/generation/utils.py +2162 -0
  100. paddlex/inference/models/common/vlm/transformers/__init__.py +16 -0
  101. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +1037 -0
  102. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +408 -0
  103. paddlex/inference/models/common/vlm/transformers/model_outputs.py +1612 -0
  104. paddlex/inference/models/common/vlm/transformers/model_utils.py +2014 -0
  105. paddlex/inference/models/common/vlm/transformers/utils.py +178 -0
  106. paddlex/inference/models/common/vlm/utils.py +109 -0
  107. paddlex/inference/models/doc_vlm/__init__.py +15 -0
  108. paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
  109. paddlex/inference/models/doc_vlm/modeling/__init__.py +17 -0
  110. paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
  111. paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
  112. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +2495 -0
  113. paddlex/inference/models/doc_vlm/predictor.py +253 -0
  114. paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
  115. paddlex/inference/models/doc_vlm/processors/__init__.py +17 -0
  116. paddlex/inference/models/doc_vlm/processors/common.py +561 -0
  117. paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
  118. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +543 -0
  119. paddlex/inference/models/doc_vlm/result.py +21 -0
  120. paddlex/inference/models/face_feature/__init__.py +1 -1
  121. paddlex/inference/models/face_feature/predictor.py +2 -1
  122. paddlex/inference/models/formula_recognition/__init__.py +1 -1
  123. paddlex/inference/models/formula_recognition/predictor.py +18 -28
  124. paddlex/inference/models/formula_recognition/processors.py +126 -97
  125. paddlex/inference/models/formula_recognition/result.py +43 -35
  126. paddlex/inference/models/image_classification/__init__.py +1 -1
  127. paddlex/inference/models/image_classification/predictor.py +9 -19
  128. paddlex/inference/models/image_classification/processors.py +4 -2
  129. paddlex/inference/models/image_classification/result.py +4 -3
  130. paddlex/inference/models/image_feature/__init__.py +1 -1
  131. paddlex/inference/models/image_feature/predictor.py +9 -19
  132. paddlex/inference/models/image_feature/processors.py +7 -5
  133. paddlex/inference/models/image_feature/result.py +2 -3
  134. paddlex/inference/models/image_multilabel_classification/__init__.py +1 -1
  135. paddlex/inference/models/image_multilabel_classification/predictor.py +7 -6
  136. paddlex/inference/models/image_multilabel_classification/processors.py +6 -2
  137. paddlex/inference/models/image_multilabel_classification/result.py +4 -3
  138. paddlex/inference/models/image_unwarping/__init__.py +1 -1
  139. paddlex/inference/models/image_unwarping/predictor.py +8 -16
  140. paddlex/inference/models/image_unwarping/processors.py +6 -2
  141. paddlex/inference/models/image_unwarping/result.py +4 -2
  142. paddlex/inference/models/instance_segmentation/__init__.py +1 -1
  143. paddlex/inference/models/instance_segmentation/predictor.py +7 -15
  144. paddlex/inference/models/instance_segmentation/processors.py +4 -7
  145. paddlex/inference/models/instance_segmentation/result.py +11 -10
  146. paddlex/inference/models/keypoint_detection/__init__.py +1 -1
  147. paddlex/inference/models/keypoint_detection/predictor.py +5 -3
  148. paddlex/inference/models/keypoint_detection/processors.py +11 -3
  149. paddlex/inference/models/keypoint_detection/result.py +9 -4
  150. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
  151. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/predictor.py +15 -26
  152. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/processors.py +26 -14
  153. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/result.py +15 -12
  154. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/visualizer_3d.py +77 -39
  155. paddlex/inference/models/multilingual_speech_recognition/__init__.py +1 -1
  156. paddlex/inference/models/multilingual_speech_recognition/predictor.py +11 -15
  157. paddlex/inference/models/multilingual_speech_recognition/processors.py +45 -53
  158. paddlex/inference/models/multilingual_speech_recognition/result.py +1 -1
  159. paddlex/inference/models/object_detection/__init__.py +1 -1
  160. paddlex/inference/models/object_detection/predictor.py +8 -12
  161. paddlex/inference/models/object_detection/processors.py +63 -33
  162. paddlex/inference/models/object_detection/result.py +5 -4
  163. paddlex/inference/models/object_detection/utils.py +3 -1
  164. paddlex/inference/models/open_vocabulary_detection/__init__.py +1 -1
  165. paddlex/inference/models/open_vocabulary_detection/predictor.py +31 -14
  166. paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +3 -2
  167. paddlex/inference/models/open_vocabulary_detection/processors/common.py +114 -0
  168. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +19 -8
  169. paddlex/inference/models/open_vocabulary_detection/processors/yoloworld_processors.py +209 -0
  170. paddlex/inference/models/open_vocabulary_segmentation/__init__.py +1 -1
  171. paddlex/inference/models/open_vocabulary_segmentation/predictor.py +6 -13
  172. paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +1 -1
  173. paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +12 -12
  174. paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +1 -1
  175. paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +11 -9
  176. paddlex/inference/models/semantic_segmentation/__init__.py +1 -1
  177. paddlex/inference/models/semantic_segmentation/predictor.py +9 -18
  178. paddlex/inference/models/semantic_segmentation/processors.py +11 -8
  179. paddlex/inference/models/semantic_segmentation/result.py +4 -3
  180. paddlex/inference/models/table_structure_recognition/__init__.py +1 -1
  181. paddlex/inference/models/table_structure_recognition/predictor.py +8 -18
  182. paddlex/inference/models/table_structure_recognition/processors.py +23 -29
  183. paddlex/inference/models/table_structure_recognition/result.py +8 -15
  184. paddlex/inference/models/text_detection/__init__.py +1 -1
  185. paddlex/inference/models/text_detection/predictor.py +24 -24
  186. paddlex/inference/models/text_detection/processors.py +116 -44
  187. paddlex/inference/models/text_detection/result.py +8 -13
  188. paddlex/inference/models/text_recognition/__init__.py +1 -1
  189. paddlex/inference/models/text_recognition/predictor.py +11 -19
  190. paddlex/inference/models/text_recognition/processors.py +27 -13
  191. paddlex/inference/models/text_recognition/result.py +3 -2
  192. paddlex/inference/models/ts_anomaly_detection/__init__.py +1 -1
  193. paddlex/inference/models/ts_anomaly_detection/predictor.py +12 -17
  194. paddlex/inference/models/ts_anomaly_detection/processors.py +6 -2
  195. paddlex/inference/models/ts_anomaly_detection/result.py +21 -10
  196. paddlex/inference/models/ts_classification/__init__.py +1 -1
  197. paddlex/inference/models/ts_classification/predictor.py +14 -27
  198. paddlex/inference/models/ts_classification/processors.py +7 -2
  199. paddlex/inference/models/ts_classification/result.py +21 -12
  200. paddlex/inference/models/ts_forecasting/__init__.py +1 -1
  201. paddlex/inference/models/ts_forecasting/predictor.py +13 -18
  202. paddlex/inference/models/ts_forecasting/processors.py +12 -3
  203. paddlex/inference/models/ts_forecasting/result.py +24 -11
  204. paddlex/inference/models/video_classification/__init__.py +1 -1
  205. paddlex/inference/models/video_classification/predictor.py +9 -15
  206. paddlex/inference/models/video_classification/processors.py +24 -24
  207. paddlex/inference/models/video_classification/result.py +7 -3
  208. paddlex/inference/models/video_detection/__init__.py +1 -1
  209. paddlex/inference/models/video_detection/predictor.py +8 -15
  210. paddlex/inference/models/video_detection/processors.py +24 -11
  211. paddlex/inference/models/video_detection/result.py +10 -5
  212. paddlex/inference/pipelines/__init__.py +48 -37
  213. paddlex/inference/pipelines/_parallel.py +172 -0
  214. paddlex/inference/pipelines/anomaly_detection/__init__.py +1 -1
  215. paddlex/inference/pipelines/anomaly_detection/pipeline.py +29 -9
  216. paddlex/inference/pipelines/attribute_recognition/__init__.py +1 -1
  217. paddlex/inference/pipelines/attribute_recognition/pipeline.py +24 -9
  218. paddlex/inference/pipelines/attribute_recognition/result.py +10 -8
  219. paddlex/inference/pipelines/base.py +43 -13
  220. paddlex/inference/pipelines/components/__init__.py +14 -8
  221. paddlex/inference/pipelines/components/chat_server/__init__.py +1 -1
  222. paddlex/inference/pipelines/components/chat_server/base.py +2 -2
  223. paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +8 -8
  224. paddlex/inference/pipelines/components/common/__init__.py +5 -4
  225. paddlex/inference/pipelines/components/common/base_operator.py +2 -1
  226. paddlex/inference/pipelines/components/common/base_result.py +3 -2
  227. paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +1 -2
  228. paddlex/inference/pipelines/components/common/crop_image_regions.py +11 -5
  229. paddlex/inference/pipelines/components/common/seal_det_warp.py +44 -13
  230. paddlex/inference/pipelines/components/common/sort_boxes.py +4 -2
  231. paddlex/inference/pipelines/components/common/warp_image.py +50 -0
  232. paddlex/inference/pipelines/components/faisser.py +10 -5
  233. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +2 -2
  234. paddlex/inference/pipelines/components/prompt_engineering/base.py +2 -2
  235. paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +2 -1
  236. paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +2 -2
  237. paddlex/inference/pipelines/components/retriever/__init__.py +2 -2
  238. paddlex/inference/pipelines/components/retriever/base.py +18 -16
  239. paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +2 -2
  240. paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +87 -84
  241. paddlex/inference/pipelines/components/utils/__init__.py +1 -1
  242. paddlex/inference/pipelines/components/utils/mixin.py +7 -7
  243. paddlex/inference/pipelines/doc_preprocessor/__init__.py +1 -1
  244. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +70 -51
  245. paddlex/inference/pipelines/doc_preprocessor/result.py +5 -10
  246. paddlex/inference/pipelines/doc_understanding/__init__.py +15 -0
  247. paddlex/inference/pipelines/doc_understanding/pipeline.py +71 -0
  248. paddlex/inference/pipelines/face_recognition/__init__.py +1 -1
  249. paddlex/inference/pipelines/face_recognition/pipeline.py +3 -1
  250. paddlex/inference/pipelines/face_recognition/result.py +3 -2
  251. paddlex/inference/pipelines/formula_recognition/__init__.py +1 -1
  252. paddlex/inference/pipelines/formula_recognition/pipeline.py +137 -93
  253. paddlex/inference/pipelines/formula_recognition/result.py +20 -29
  254. paddlex/inference/pipelines/image_classification/__init__.py +1 -1
  255. paddlex/inference/pipelines/image_classification/pipeline.py +30 -11
  256. paddlex/inference/pipelines/image_multilabel_classification/__init__.py +1 -1
  257. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +31 -12
  258. paddlex/inference/pipelines/instance_segmentation/__init__.py +1 -1
  259. paddlex/inference/pipelines/instance_segmentation/pipeline.py +30 -9
  260. paddlex/inference/pipelines/keypoint_detection/__init__.py +1 -1
  261. paddlex/inference/pipelines/keypoint_detection/pipeline.py +30 -9
  262. paddlex/inference/pipelines/layout_parsing/__init__.py +1 -1
  263. paddlex/inference/pipelines/layout_parsing/pipeline.py +54 -56
  264. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +904 -261
  265. paddlex/inference/pipelines/layout_parsing/result.py +9 -21
  266. paddlex/inference/pipelines/layout_parsing/result_v2.py +525 -250
  267. paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
  268. paddlex/inference/pipelines/layout_parsing/utils.py +570 -2004
  269. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
  270. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
  271. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
  272. paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
  273. paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/pipeline.py +17 -10
  274. paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +1 -1
  275. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +17 -6
  276. paddlex/inference/pipelines/object_detection/__init__.py +1 -1
  277. paddlex/inference/pipelines/object_detection/pipeline.py +29 -9
  278. paddlex/inference/pipelines/ocr/__init__.py +1 -1
  279. paddlex/inference/pipelines/ocr/pipeline.py +151 -77
  280. paddlex/inference/pipelines/ocr/result.py +31 -24
  281. paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +1 -1
  282. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +17 -6
  283. paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +1 -1
  284. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +17 -6
  285. paddlex/inference/pipelines/pp_chatocr/__init__.py +1 -1
  286. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +14 -5
  287. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +22 -14
  288. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +34 -16
  289. paddlex/inference/pipelines/pp_shitu_v2/__init__.py +1 -1
  290. paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +12 -8
  291. paddlex/inference/pipelines/pp_shitu_v2/result.py +4 -4
  292. paddlex/inference/pipelines/rotated_object_detection/__init__.py +1 -1
  293. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +30 -9
  294. paddlex/inference/pipelines/seal_recognition/__init__.py +1 -1
  295. paddlex/inference/pipelines/seal_recognition/pipeline.py +127 -63
  296. paddlex/inference/pipelines/seal_recognition/result.py +4 -2
  297. paddlex/inference/pipelines/semantic_segmentation/__init__.py +1 -1
  298. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +30 -9
  299. paddlex/inference/pipelines/small_object_detection/__init__.py +1 -1
  300. paddlex/inference/pipelines/small_object_detection/pipeline.py +30 -9
  301. paddlex/inference/pipelines/table_recognition/__init__.py +1 -1
  302. paddlex/inference/pipelines/table_recognition/pipeline.py +61 -37
  303. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +668 -65
  304. paddlex/inference/pipelines/table_recognition/result.py +12 -10
  305. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +12 -8
  306. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +55 -37
  307. paddlex/inference/pipelines/table_recognition/utils.py +1 -1
  308. paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +1 -1
  309. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +16 -6
  310. paddlex/inference/pipelines/ts_classification/__init__.py +1 -1
  311. paddlex/inference/pipelines/ts_classification/pipeline.py +16 -6
  312. paddlex/inference/pipelines/ts_forecasting/__init__.py +1 -1
  313. paddlex/inference/pipelines/ts_forecasting/pipeline.py +16 -6
  314. paddlex/inference/pipelines/video_classification/__init__.py +1 -1
  315. paddlex/inference/pipelines/video_classification/pipeline.py +17 -6
  316. paddlex/inference/pipelines/video_detection/__init__.py +1 -1
  317. paddlex/inference/pipelines/video_detection/pipeline.py +20 -7
  318. paddlex/inference/serving/__init__.py +5 -1
  319. paddlex/inference/serving/basic_serving/__init__.py +1 -1
  320. paddlex/inference/serving/basic_serving/_app.py +31 -19
  321. paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +7 -4
  322. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +1 -1
  323. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +12 -4
  324. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +1 -1
  325. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +7 -2
  326. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +10 -7
  327. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +10 -7
  328. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_understanding.py +153 -0
  329. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +16 -13
  330. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +10 -7
  331. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +10 -7
  332. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +10 -7
  333. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +10 -7
  334. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +13 -7
  335. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +10 -8
  336. paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +10 -7
  337. paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +10 -7
  338. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +10 -7
  339. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +10 -7
  340. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +10 -7
  341. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +13 -7
  342. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +10 -7
  343. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -12
  344. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +17 -14
  345. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +16 -13
  346. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +16 -9
  347. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +10 -7
  348. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +10 -7
  349. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +10 -7
  350. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +10 -7
  351. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +11 -12
  352. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +14 -12
  353. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +10 -7
  354. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +10 -7
  355. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +10 -7
  356. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +10 -7
  357. paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +10 -7
  358. paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +10 -7
  359. paddlex/inference/serving/basic_serving/_server.py +9 -4
  360. paddlex/inference/serving/infra/__init__.py +1 -1
  361. paddlex/inference/serving/infra/config.py +1 -1
  362. paddlex/inference/serving/infra/models.py +13 -6
  363. paddlex/inference/serving/infra/storage.py +9 -4
  364. paddlex/inference/serving/infra/utils.py +54 -28
  365. paddlex/inference/serving/schemas/__init__.py +1 -1
  366. paddlex/inference/serving/schemas/anomaly_detection.py +1 -1
  367. paddlex/inference/serving/schemas/doc_preprocessor.py +1 -1
  368. paddlex/inference/serving/schemas/doc_understanding.py +78 -0
  369. paddlex/inference/serving/schemas/face_recognition.py +1 -1
  370. paddlex/inference/serving/schemas/formula_recognition.py +2 -2
  371. paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -1
  372. paddlex/inference/serving/schemas/image_classification.py +1 -1
  373. paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -1
  374. paddlex/inference/serving/schemas/instance_segmentation.py +1 -1
  375. paddlex/inference/serving/schemas/layout_parsing.py +2 -3
  376. paddlex/inference/serving/schemas/m_3d_bev_detection.py +1 -1
  377. paddlex/inference/serving/schemas/multilingual_speech_recognition.py +1 -1
  378. paddlex/inference/serving/schemas/object_detection.py +1 -1
  379. paddlex/inference/serving/schemas/ocr.py +1 -1
  380. paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -1
  381. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -1
  382. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -1
  383. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +2 -3
  384. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +3 -3
  385. paddlex/inference/serving/schemas/pp_shituv2.py +1 -1
  386. paddlex/inference/serving/schemas/pp_structurev3.py +11 -7
  387. paddlex/inference/serving/schemas/rotated_object_detection.py +1 -1
  388. paddlex/inference/serving/schemas/seal_recognition.py +2 -2
  389. paddlex/inference/serving/schemas/semantic_segmentation.py +1 -1
  390. paddlex/inference/serving/schemas/shared/__init__.py +1 -1
  391. paddlex/inference/serving/schemas/shared/classification.py +1 -1
  392. paddlex/inference/serving/schemas/shared/image_segmentation.py +1 -1
  393. paddlex/inference/serving/schemas/shared/object_detection.py +1 -1
  394. paddlex/inference/serving/schemas/shared/ocr.py +1 -1
  395. paddlex/inference/serving/schemas/small_object_detection.py +1 -1
  396. paddlex/inference/serving/schemas/table_recognition.py +3 -7
  397. paddlex/inference/serving/schemas/table_recognition_v2.py +6 -7
  398. paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -1
  399. paddlex/inference/serving/schemas/ts_classification.py +1 -1
  400. paddlex/inference/serving/schemas/ts_forecast.py +1 -1
  401. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -1
  402. paddlex/inference/serving/schemas/video_classification.py +1 -1
  403. paddlex/inference/serving/schemas/video_detection.py +1 -1
  404. paddlex/inference/utils/__init__.py +1 -1
  405. paddlex/inference/utils/benchmark.py +332 -179
  406. paddlex/inference/utils/color_map.py +1 -1
  407. paddlex/inference/utils/get_pipeline_path.py +1 -1
  408. paddlex/inference/utils/hpi.py +258 -0
  409. paddlex/inference/utils/hpi_model_info_collection.json +2331 -0
  410. paddlex/inference/utils/io/__init__.py +11 -11
  411. paddlex/inference/utils/io/readers.py +31 -27
  412. paddlex/inference/utils/io/style.py +21 -14
  413. paddlex/inference/utils/io/tablepyxl.py +13 -5
  414. paddlex/inference/utils/io/writers.py +9 -10
  415. paddlex/inference/utils/mkldnn_blocklist.py +25 -0
  416. paddlex/inference/utils/model_paths.py +48 -0
  417. paddlex/inference/utils/{new_ir_blacklist.py → new_ir_blocklist.py} +1 -2
  418. paddlex/inference/utils/official_models.py +278 -262
  419. paddlex/inference/utils/pp_option.py +184 -92
  420. paddlex/inference/utils/trt_blocklist.py +43 -0
  421. paddlex/inference/utils/trt_config.py +420 -0
  422. paddlex/model.py +30 -12
  423. paddlex/modules/__init__.py +57 -80
  424. paddlex/modules/anomaly_detection/__init__.py +2 -2
  425. paddlex/modules/anomaly_detection/dataset_checker/__init__.py +2 -3
  426. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
  427. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +6 -3
  428. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/check_dataset.py +8 -4
  429. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +7 -4
  430. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/split_dataset.py +2 -2
  431. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  432. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/visualizer.py +7 -2
  433. paddlex/modules/anomaly_detection/evaluator.py +3 -3
  434. paddlex/modules/anomaly_detection/exportor.py +1 -1
  435. paddlex/modules/anomaly_detection/model_list.py +1 -1
  436. paddlex/modules/anomaly_detection/trainer.py +3 -4
  437. paddlex/modules/base/__init__.py +5 -5
  438. paddlex/modules/base/build_model.py +1 -2
  439. paddlex/modules/base/dataset_checker/__init__.py +2 -2
  440. paddlex/modules/base/dataset_checker/dataset_checker.py +4 -4
  441. paddlex/modules/base/dataset_checker/utils.py +1 -3
  442. paddlex/modules/base/evaluator.py +13 -13
  443. paddlex/modules/base/exportor.py +12 -13
  444. paddlex/modules/base/trainer.py +21 -11
  445. paddlex/modules/base/utils/__init__.py +13 -0
  446. paddlex/modules/base/utils/cinn_setting.py +89 -0
  447. paddlex/modules/base/utils/coco_eval.py +94 -0
  448. paddlex/modules/base/utils/topk_eval.py +118 -0
  449. paddlex/modules/doc_vlm/__init__.py +18 -0
  450. paddlex/modules/doc_vlm/dataset_checker.py +29 -0
  451. paddlex/modules/doc_vlm/evaluator.py +29 -0
  452. paddlex/modules/doc_vlm/exportor.py +29 -0
  453. paddlex/modules/doc_vlm/model_list.py +16 -0
  454. paddlex/modules/doc_vlm/trainer.py +41 -0
  455. paddlex/modules/face_recognition/__init__.py +2 -2
  456. paddlex/modules/face_recognition/dataset_checker/__init__.py +2 -2
  457. paddlex/modules/face_recognition/dataset_checker/dataset_src/__init__.py +1 -1
  458. paddlex/modules/face_recognition/dataset_checker/dataset_src/check_dataset.py +3 -5
  459. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
  460. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  461. paddlex/modules/face_recognition/evaluator.py +3 -3
  462. paddlex/modules/face_recognition/exportor.py +1 -1
  463. paddlex/modules/face_recognition/model_list.py +1 -1
  464. paddlex/modules/face_recognition/trainer.py +1 -1
  465. paddlex/modules/formula_recognition/__init__.py +2 -2
  466. paddlex/modules/formula_recognition/dataset_checker/__init__.py +3 -3
  467. paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  468. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
  469. paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +2 -6
  470. paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
  471. paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
  472. paddlex/modules/formula_recognition/evaluator.py +6 -3
  473. paddlex/modules/formula_recognition/exportor.py +1 -1
  474. paddlex/modules/formula_recognition/model_list.py +4 -1
  475. paddlex/modules/formula_recognition/trainer.py +5 -3
  476. paddlex/modules/general_recognition/__init__.py +2 -2
  477. paddlex/modules/general_recognition/dataset_checker/__init__.py +2 -2
  478. paddlex/modules/general_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  479. paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +7 -9
  480. paddlex/modules/general_recognition/dataset_checker/dataset_src/check_dataset.py +4 -5
  481. paddlex/modules/general_recognition/dataset_checker/dataset_src/convert_dataset.py +6 -5
  482. paddlex/modules/general_recognition/dataset_checker/dataset_src/split_dataset.py +1 -1
  483. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
  484. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  485. paddlex/modules/general_recognition/evaluator.py +2 -2
  486. paddlex/modules/general_recognition/exportor.py +1 -1
  487. paddlex/modules/general_recognition/model_list.py +1 -1
  488. paddlex/modules/general_recognition/trainer.py +1 -1
  489. paddlex/modules/image_classification/__init__.py +2 -2
  490. paddlex/modules/image_classification/dataset_checker/__init__.py +2 -2
  491. paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py +2 -2
  492. paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  493. paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
  494. paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py +4 -4
  495. paddlex/modules/image_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  496. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
  497. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  498. paddlex/modules/image_classification/evaluator.py +3 -3
  499. paddlex/modules/image_classification/exportor.py +1 -1
  500. paddlex/modules/image_classification/model_list.py +2 -1
  501. paddlex/modules/image_classification/trainer.py +3 -3
  502. paddlex/modules/image_unwarping/__init__.py +1 -1
  503. paddlex/modules/image_unwarping/model_list.py +1 -1
  504. paddlex/modules/instance_segmentation/__init__.py +2 -2
  505. paddlex/modules/instance_segmentation/dataset_checker/__init__.py +2 -3
  506. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
  507. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +9 -5
  508. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/check_dataset.py +8 -5
  509. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/convert_dataset.py +8 -8
  510. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/split_dataset.py +7 -4
  511. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
  512. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +10 -8
  513. paddlex/modules/instance_segmentation/evaluator.py +2 -2
  514. paddlex/modules/instance_segmentation/exportor.py +1 -1
  515. paddlex/modules/instance_segmentation/model_list.py +1 -1
  516. paddlex/modules/instance_segmentation/trainer.py +1 -1
  517. paddlex/modules/keypoint_detection/__init__.py +2 -2
  518. paddlex/modules/keypoint_detection/dataset_checker/__init__.py +2 -2
  519. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +1 -1
  520. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
  521. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  522. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +8 -3
  523. paddlex/modules/keypoint_detection/evaluator.py +2 -2
  524. paddlex/modules/keypoint_detection/exportor.py +1 -1
  525. paddlex/modules/keypoint_detection/model_list.py +1 -1
  526. paddlex/modules/keypoint_detection/trainer.py +2 -2
  527. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/__init__.py +2 -2
  528. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/__init__.py +3 -3
  529. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/__init__.py +2 -2
  530. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/analyse_dataset.py +8 -8
  531. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/check_dataset.py +1 -2
  532. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/evaluator.py +3 -3
  533. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/exportor.py +1 -1
  534. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/model_list.py +1 -1
  535. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/trainer.py +5 -7
  536. paddlex/modules/multilabel_classification/__init__.py +2 -2
  537. paddlex/modules/multilabel_classification/dataset_checker/__init__.py +2 -2
  538. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/__init__.py +2 -2
  539. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  540. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
  541. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/convert_dataset.py +10 -7
  542. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  543. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
  544. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +1 -5
  545. paddlex/modules/multilabel_classification/evaluator.py +3 -3
  546. paddlex/modules/multilabel_classification/exportor.py +1 -1
  547. paddlex/modules/multilabel_classification/model_list.py +1 -1
  548. paddlex/modules/multilabel_classification/trainer.py +3 -3
  549. paddlex/modules/multilingual_speech_recognition/__init__.py +2 -2
  550. paddlex/modules/multilingual_speech_recognition/dataset_checker.py +3 -3
  551. paddlex/modules/multilingual_speech_recognition/evaluator.py +3 -3
  552. paddlex/modules/multilingual_speech_recognition/exportor.py +3 -3
  553. paddlex/modules/multilingual_speech_recognition/model_list.py +1 -1
  554. paddlex/modules/multilingual_speech_recognition/trainer.py +7 -5
  555. paddlex/modules/object_detection/__init__.py +2 -2
  556. paddlex/modules/object_detection/dataset_checker/__init__.py +2 -11
  557. paddlex/modules/object_detection/dataset_checker/dataset_src/__init__.py +2 -2
  558. paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +10 -8
  559. paddlex/modules/object_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
  560. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +17 -12
  561. paddlex/modules/object_detection/dataset_checker/dataset_src/split_dataset.py +8 -4
  562. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  563. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +9 -8
  564. paddlex/modules/object_detection/evaluator.py +11 -6
  565. paddlex/modules/object_detection/exportor.py +1 -1
  566. paddlex/modules/object_detection/model_list.py +3 -1
  567. paddlex/modules/object_detection/trainer.py +4 -5
  568. paddlex/modules/open_vocabulary_detection/__init__.py +2 -2
  569. paddlex/modules/open_vocabulary_detection/dataset_checker.py +3 -3
  570. paddlex/modules/open_vocabulary_detection/evaluator.py +3 -3
  571. paddlex/modules/open_vocabulary_detection/exportor.py +3 -3
  572. paddlex/modules/open_vocabulary_detection/model_list.py +2 -4
  573. paddlex/modules/open_vocabulary_detection/trainer.py +7 -5
  574. paddlex/modules/open_vocabulary_segmentation/__init__.py +2 -2
  575. paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +3 -3
  576. paddlex/modules/open_vocabulary_segmentation/evaluator.py +3 -3
  577. paddlex/modules/open_vocabulary_segmentation/exportor.py +3 -3
  578. paddlex/modules/open_vocabulary_segmentation/model_list.py +1 -1
  579. paddlex/modules/open_vocabulary_segmentation/trainer.py +7 -5
  580. paddlex/modules/semantic_segmentation/__init__.py +2 -2
  581. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +2 -3
  582. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
  583. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/analyse_dataset.py +6 -3
  584. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/check_dataset.py +2 -2
  585. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/convert_dataset.py +7 -4
  586. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/split_dataset.py +2 -2
  587. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
  588. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/visualizer.py +6 -2
  589. paddlex/modules/semantic_segmentation/evaluator.py +3 -3
  590. paddlex/modules/semantic_segmentation/exportor.py +1 -1
  591. paddlex/modules/semantic_segmentation/model_list.py +1 -1
  592. paddlex/modules/semantic_segmentation/trainer.py +3 -4
  593. paddlex/modules/table_recognition/__init__.py +2 -2
  594. paddlex/modules/table_recognition/dataset_checker/__init__.py +5 -5
  595. paddlex/modules/table_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  596. paddlex/modules/table_recognition/dataset_checker/dataset_src/analyse_dataset.py +3 -2
  597. paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +8 -7
  598. paddlex/modules/table_recognition/dataset_checker/dataset_src/split_dataset.py +2 -1
  599. paddlex/modules/table_recognition/evaluator.py +3 -3
  600. paddlex/modules/table_recognition/exportor.py +1 -1
  601. paddlex/modules/table_recognition/model_list.py +1 -1
  602. paddlex/modules/table_recognition/trainer.py +2 -5
  603. paddlex/modules/text_detection/__init__.py +2 -2
  604. paddlex/modules/text_detection/dataset_checker/__init__.py +4 -6
  605. paddlex/modules/text_detection/dataset_checker/dataset_src/__init__.py +2 -2
  606. paddlex/modules/text_detection/dataset_checker/dataset_src/analyse_dataset.py +12 -9
  607. paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +3 -3
  608. paddlex/modules/text_detection/dataset_checker/dataset_src/split_dataset.py +3 -3
  609. paddlex/modules/text_detection/evaluator.py +3 -3
  610. paddlex/modules/text_detection/exportor.py +1 -1
  611. paddlex/modules/text_detection/model_list.py +3 -1
  612. paddlex/modules/text_detection/trainer.py +2 -5
  613. paddlex/modules/text_recognition/__init__.py +2 -2
  614. paddlex/modules/text_recognition/dataset_checker/__init__.py +4 -5
  615. paddlex/modules/text_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  616. paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
  617. paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +2 -5
  618. paddlex/modules/text_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
  619. paddlex/modules/text_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
  620. paddlex/modules/text_recognition/evaluator.py +3 -3
  621. paddlex/modules/text_recognition/exportor.py +1 -1
  622. paddlex/modules/text_recognition/model_list.py +3 -1
  623. paddlex/modules/text_recognition/trainer.py +2 -3
  624. paddlex/modules/ts_anomaly_detection/__init__.py +2 -2
  625. paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +4 -5
  626. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
  627. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +1 -9
  628. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/check_dataset.py +2 -2
  629. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -6
  630. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/split_dataset.py +4 -4
  631. paddlex/modules/ts_anomaly_detection/evaluator.py +3 -3
  632. paddlex/modules/ts_anomaly_detection/exportor.py +2 -3
  633. paddlex/modules/ts_anomaly_detection/model_list.py +1 -1
  634. paddlex/modules/ts_anomaly_detection/trainer.py +8 -8
  635. paddlex/modules/ts_classification/__init__.py +2 -2
  636. paddlex/modules/ts_classification/dataset_checker/__init__.py +4 -5
  637. paddlex/modules/ts_classification/dataset_checker/dataset_src/__init__.py +2 -2
  638. paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -5
  639. paddlex/modules/ts_classification/dataset_checker/dataset_src/check_dataset.py +2 -2
  640. paddlex/modules/ts_classification/dataset_checker/dataset_src/convert_dataset.py +2 -6
  641. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +5 -5
  642. paddlex/modules/ts_classification/evaluator.py +3 -3
  643. paddlex/modules/ts_classification/exportor.py +2 -3
  644. paddlex/modules/ts_classification/model_list.py +1 -1
  645. paddlex/modules/ts_classification/trainer.py +7 -7
  646. paddlex/modules/ts_forecast/__init__.py +2 -2
  647. paddlex/modules/ts_forecast/dataset_checker/__init__.py +4 -5
  648. paddlex/modules/ts_forecast/dataset_checker/dataset_src/__init__.py +2 -2
  649. paddlex/modules/ts_forecast/dataset_checker/dataset_src/analyse_dataset.py +1 -9
  650. paddlex/modules/ts_forecast/dataset_checker/dataset_src/check_dataset.py +2 -2
  651. paddlex/modules/ts_forecast/dataset_checker/dataset_src/convert_dataset.py +2 -6
  652. paddlex/modules/ts_forecast/dataset_checker/dataset_src/split_dataset.py +4 -4
  653. paddlex/modules/ts_forecast/evaluator.py +3 -3
  654. paddlex/modules/ts_forecast/exportor.py +2 -3
  655. paddlex/modules/ts_forecast/model_list.py +1 -1
  656. paddlex/modules/ts_forecast/trainer.py +7 -7
  657. paddlex/modules/video_classification/__init__.py +2 -2
  658. paddlex/modules/video_classification/dataset_checker/__init__.py +2 -2
  659. paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +2 -2
  660. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +9 -9
  661. paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +2 -3
  662. paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  663. paddlex/modules/video_classification/evaluator.py +3 -3
  664. paddlex/modules/video_classification/exportor.py +1 -1
  665. paddlex/modules/video_classification/model_list.py +1 -1
  666. paddlex/modules/video_classification/trainer.py +3 -3
  667. paddlex/modules/video_detection/__init__.py +2 -2
  668. paddlex/modules/video_detection/dataset_checker/__init__.py +2 -2
  669. paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +2 -2
  670. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  671. paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +3 -5
  672. paddlex/modules/video_detection/evaluator.py +3 -3
  673. paddlex/modules/video_detection/exportor.py +1 -1
  674. paddlex/modules/video_detection/model_list.py +1 -1
  675. paddlex/modules/video_detection/trainer.py +3 -3
  676. paddlex/ops/__init__.py +7 -4
  677. paddlex/ops/iou3d_nms/iou3d_cpu.cpp +8 -6
  678. paddlex/ops/iou3d_nms/iou3d_cpu.h +3 -2
  679. paddlex/ops/iou3d_nms/iou3d_nms.cpp +8 -6
  680. paddlex/ops/iou3d_nms/iou3d_nms.h +6 -4
  681. paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +24 -18
  682. paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +9 -7
  683. paddlex/ops/setup.py +3 -3
  684. paddlex/ops/voxel/voxelize_op.cc +22 -19
  685. paddlex/ops/voxel/voxelize_op.cu +25 -25
  686. paddlex/paddlex_cli.py +104 -87
  687. paddlex/repo_apis/Paddle3D_api/__init__.py +1 -1
  688. paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +1 -1
  689. paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +1 -1
  690. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +6 -6
  691. paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +2 -2
  692. paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +1 -1
  693. paddlex/repo_apis/Paddle3D_api/pp3d_config.py +3 -2
  694. paddlex/repo_apis/PaddleClas_api/__init__.py +1 -1
  695. paddlex/repo_apis/PaddleClas_api/cls/__init__.py +3 -3
  696. paddlex/repo_apis/PaddleClas_api/cls/config.py +5 -4
  697. paddlex/repo_apis/PaddleClas_api/cls/model.py +4 -4
  698. paddlex/repo_apis/PaddleClas_api/cls/register.py +12 -3
  699. paddlex/repo_apis/PaddleClas_api/cls/runner.py +2 -3
  700. paddlex/repo_apis/PaddleClas_api/shitu_rec/__init__.py +2 -2
  701. paddlex/repo_apis/PaddleClas_api/shitu_rec/config.py +2 -2
  702. paddlex/repo_apis/PaddleClas_api/shitu_rec/model.py +1 -4
  703. paddlex/repo_apis/PaddleClas_api/shitu_rec/register.py +2 -2
  704. paddlex/repo_apis/PaddleClas_api/shitu_rec/runner.py +1 -6
  705. paddlex/repo_apis/PaddleDetection_api/__init__.py +2 -2
  706. paddlex/repo_apis/PaddleDetection_api/config_helper.py +3 -3
  707. paddlex/repo_apis/PaddleDetection_api/instance_seg/__init__.py +2 -2
  708. paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +2 -3
  709. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +4 -4
  710. paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py +2 -3
  711. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +2 -3
  712. paddlex/repo_apis/PaddleDetection_api/object_det/__init__.py +3 -3
  713. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +5 -4
  714. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +6 -7
  715. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +26 -1
  716. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +32 -3
  717. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +2 -3
  718. paddlex/repo_apis/PaddleNLP_api/__init__.py +1 -1
  719. paddlex/repo_apis/PaddleOCR_api/__init__.py +4 -3
  720. paddlex/repo_apis/PaddleOCR_api/config_utils.py +1 -1
  721. paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +1 -1
  722. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +7 -6
  723. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +9 -13
  724. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +29 -3
  725. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +2 -3
  726. paddlex/repo_apis/PaddleOCR_api/table_rec/__init__.py +1 -1
  727. paddlex/repo_apis/PaddleOCR_api/table_rec/config.py +1 -1
  728. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +4 -4
  729. paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +2 -3
  730. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +3 -3
  731. paddlex/repo_apis/PaddleOCR_api/text_det/__init__.py +1 -1
  732. paddlex/repo_apis/PaddleOCR_api/text_det/config.py +1 -1
  733. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +4 -4
  734. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +20 -3
  735. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +3 -3
  736. paddlex/repo_apis/PaddleOCR_api/text_rec/__init__.py +1 -1
  737. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +7 -6
  738. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +9 -13
  739. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +20 -3
  740. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +2 -3
  741. paddlex/repo_apis/PaddleSeg_api/__init__.py +1 -1
  742. paddlex/repo_apis/PaddleSeg_api/base_seg_config.py +2 -2
  743. paddlex/repo_apis/PaddleSeg_api/seg/__init__.py +1 -1
  744. paddlex/repo_apis/PaddleSeg_api/seg/config.py +3 -6
  745. paddlex/repo_apis/PaddleSeg_api/seg/model.py +6 -6
  746. paddlex/repo_apis/PaddleSeg_api/seg/register.py +2 -3
  747. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +2 -3
  748. paddlex/repo_apis/PaddleTS_api/__init__.py +4 -3
  749. paddlex/repo_apis/PaddleTS_api/ts_ad/__init__.py +1 -1
  750. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +5 -6
  751. paddlex/repo_apis/PaddleTS_api/ts_ad/register.py +2 -2
  752. paddlex/repo_apis/PaddleTS_api/ts_ad/runner.py +2 -2
  753. paddlex/repo_apis/PaddleTS_api/ts_base/__init__.py +1 -1
  754. paddlex/repo_apis/PaddleTS_api/ts_base/config.py +2 -4
  755. paddlex/repo_apis/PaddleTS_api/ts_base/model.py +4 -4
  756. paddlex/repo_apis/PaddleTS_api/ts_base/runner.py +2 -2
  757. paddlex/repo_apis/PaddleTS_api/ts_cls/__init__.py +1 -1
  758. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +4 -5
  759. paddlex/repo_apis/PaddleTS_api/ts_cls/register.py +2 -2
  760. paddlex/repo_apis/PaddleTS_api/ts_cls/runner.py +2 -2
  761. paddlex/repo_apis/PaddleTS_api/ts_fc/__init__.py +1 -1
  762. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +6 -7
  763. paddlex/repo_apis/PaddleTS_api/ts_fc/register.py +1 -1
  764. paddlex/repo_apis/PaddleVideo_api/__init__.py +1 -1
  765. paddlex/repo_apis/PaddleVideo_api/config_utils.py +1 -1
  766. paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +3 -3
  767. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +5 -4
  768. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +4 -4
  769. paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +2 -3
  770. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +2 -3
  771. paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +3 -3
  772. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +5 -4
  773. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +5 -5
  774. paddlex/repo_apis/PaddleVideo_api/video_det/register.py +2 -3
  775. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +2 -3
  776. paddlex/repo_apis/__init__.py +1 -1
  777. paddlex/repo_apis/base/__init__.py +4 -5
  778. paddlex/repo_apis/base/config.py +3 -4
  779. paddlex/repo_apis/base/model.py +11 -19
  780. paddlex/repo_apis/base/register.py +1 -1
  781. paddlex/repo_apis/base/runner.py +11 -12
  782. paddlex/repo_apis/base/utils/__init__.py +1 -1
  783. paddlex/repo_apis/base/utils/arg.py +1 -1
  784. paddlex/repo_apis/base/utils/subprocess.py +1 -1
  785. paddlex/repo_manager/__init__.py +2 -9
  786. paddlex/repo_manager/core.py +12 -30
  787. paddlex/repo_manager/meta.py +41 -31
  788. paddlex/repo_manager/repo.py +171 -161
  789. paddlex/repo_manager/utils.py +13 -224
  790. paddlex/utils/__init__.py +1 -1
  791. paddlex/utils/cache.py +8 -10
  792. paddlex/utils/config.py +6 -5
  793. paddlex/utils/{custom_device_whitelist.py → custom_device_list.py} +53 -199
  794. paddlex/utils/deps.py +249 -0
  795. paddlex/utils/device.py +87 -36
  796. paddlex/utils/download.py +4 -4
  797. paddlex/utils/env.py +37 -7
  798. paddlex/utils/errors/__init__.py +1 -1
  799. paddlex/utils/errors/dataset_checker.py +1 -1
  800. paddlex/utils/errors/others.py +2 -16
  801. paddlex/utils/file_interface.py +4 -5
  802. paddlex/utils/flags.py +17 -12
  803. paddlex/utils/fonts/__init__.py +36 -5
  804. paddlex/utils/func_register.py +1 -1
  805. paddlex/utils/install.py +87 -0
  806. paddlex/utils/interactive_get_pipeline.py +3 -3
  807. paddlex/utils/lazy_loader.py +3 -3
  808. paddlex/utils/logging.py +10 -1
  809. paddlex/utils/misc.py +6 -6
  810. paddlex/utils/pipeline_arguments.py +15 -7
  811. paddlex/utils/result_saver.py +4 -5
  812. paddlex/utils/subclass_register.py +2 -4
  813. paddlex/version.py +2 -1
  814. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/METADATA +237 -102
  815. paddlex-3.0.1.dist-info/RECORD +1095 -0
  816. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
  817. paddlex/inference/models/base/predictor/basic_predictor.py +0 -139
  818. paddlex/paddle2onnx_requirements.txt +0 -1
  819. paddlex/repo_manager/requirements.txt +0 -21
  820. paddlex/serving_requirements.txt +0 -9
  821. paddlex-3.0.0rc0.dist-info/RECORD +0 -1015
  822. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
  823. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info/licenses}/LICENSE +0 -0
  824. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -12,42 +12,50 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import os, sys
16
- from typing import Any, Dict, Optional, Union, List, Tuple
17
- import numpy as np
18
15
  import math
19
- import cv2
20
- from sklearn.cluster import KMeans
16
+ import re
17
+ from typing import Any, Dict, List, Optional, Tuple, Union
18
+
19
+ import numpy as np
20
+
21
+ from ....utils import logging
22
+ from ....utils.deps import (
23
+ function_requires_deps,
24
+ is_dep_available,
25
+ pipeline_requires_extra,
26
+ )
27
+ from ...common.batch_sampler import ImageBatchSampler
28
+ from ...common.reader import ReadImage
29
+ from ...models.object_detection.result import DetResult
30
+ from ...utils.hpi import HPIConfig
31
+ from ...utils.pp_option import PaddlePredictorOption
32
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
21
33
  from ..base import BasePipeline
22
34
  from ..components import CropByBoxes
23
- from .utils import get_neighbor_boxes_idx
24
- from .table_recognition_post_processing_v2 import get_table_recognition_res
35
+ from ..doc_preprocessor.result import DocPreprocessorResult
36
+ from ..layout_parsing.utils import get_sub_regions_ocr_res
37
+ from ..ocr.result import OCRResult
38
+ from .result import SingleTableRecognitionResult, TableRecognitionResult
25
39
  from .table_recognition_post_processing import (
26
40
  get_table_recognition_res as get_table_recognition_res_e2e,
27
41
  )
28
- from .result import SingleTableRecognitionResult, TableRecognitionResult
29
- from ....utils import logging
30
- from ...utils.pp_option import PaddlePredictorOption
31
- from ...common.reader import ReadImage
32
- from ...common.batch_sampler import ImageBatchSampler
33
- from ..ocr.result import OCRResult
34
- from ..doc_preprocessor.result import DocPreprocessorResult
42
+ from .table_recognition_post_processing_v2 import get_table_recognition_res
43
+ from .utils import get_neighbor_boxes_idx
35
44
 
36
- from ...models.object_detection.result import DetResult
45
+ if is_dep_available("scikit-learn"):
46
+ from sklearn.cluster import KMeans
37
47
 
38
48
 
39
- class TableRecognitionPipelineV2(BasePipeline):
49
+ class _TableRecognitionPipelineV2(BasePipeline):
40
50
  """Table Recognition Pipeline"""
41
51
 
42
- entities = ["table_recognition_v2"]
43
-
44
52
  def __init__(
45
53
  self,
46
54
  config: Dict,
47
55
  device: str = None,
48
56
  pp_option: PaddlePredictorOption = None,
49
57
  use_hpip: bool = False,
50
- hpi_params: Optional[Dict[str, Any]] = None,
58
+ hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
51
59
  ) -> None:
52
60
  """Initializes the layout parsing pipeline.
53
61
 
@@ -55,12 +63,15 @@ class TableRecognitionPipelineV2(BasePipeline):
55
63
  config (Dict): Configuration dictionary containing various settings.
56
64
  device (str, optional): Device to run the predictions on. Defaults to None.
57
65
  pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
58
- use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
59
- hpi_params (Optional[Dict[str, Any]], optional): HPIP parameters. Defaults to None.
66
+ use_hpip (bool, optional): Whether to use the high-performance
67
+ inference plugin (HPIP) by default. Defaults to False.
68
+ hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
69
+ The default high-performance inference configuration dictionary.
70
+ Defaults to None.
60
71
  """
61
72
 
62
73
  super().__init__(
63
- device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_params=hpi_params
74
+ device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
64
75
  )
65
76
 
66
77
  self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
@@ -122,6 +133,7 @@ class TableRecognitionPipelineV2(BasePipeline):
122
133
  )
123
134
 
124
135
  self.use_ocr_model = config.get("use_ocr_model", True)
136
+ self.general_ocr_pipeline = None
125
137
  if self.use_ocr_model:
126
138
  general_ocr_config = config.get("SubPipelines", {}).get(
127
139
  "GeneralOCR",
@@ -130,12 +142,15 @@ class TableRecognitionPipelineV2(BasePipeline):
130
142
  self.general_ocr_pipeline = self.create_pipeline(general_ocr_config)
131
143
  else:
132
144
  self.general_ocr_config_bak = config.get("SubPipelines", {}).get(
133
- "GeneralOCR",
134
- None
145
+ "GeneralOCR", None
135
146
  )
136
147
 
137
- self._crop_by_boxes = CropByBoxes()
148
+ self.table_orientation_classify_model = None
149
+ self.table_orientation_classify_config = config.get("SubModules", {}).get(
150
+ "TableOrientationClassify", None
151
+ )
138
152
 
153
+ self._crop_by_boxes = CropByBoxes()
139
154
  self.batch_sampler = ImageBatchSampler(batch_size=1)
140
155
  self.img_reader = ReadImage(format="BGR")
141
156
 
@@ -414,12 +429,13 @@ class TableRecognitionPipelineV2(BasePipeline):
414
429
  intersection_area = (x_right - x_left) * (y_bottom - y_top)
415
430
  # Calculate the area of both rectangles
416
431
  box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
417
- box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
432
+ (box2[2] - box2[0]) * (box2[3] - box2[1])
418
433
  # Calculate the IoU
419
434
  iou = intersection_area / float(box1_area)
420
435
  return iou
421
436
 
422
437
  # Function to combine rectangles into N rectangles
438
+ @function_requires_deps("scikit-learn")
423
439
  def combine_rectangles(rectangles, N):
424
440
  """
425
441
  Combine rectangles into N rectangles based on geometric proximity.
@@ -528,7 +544,177 @@ class TableRecognitionPipelineV2(BasePipeline):
528
544
  final_results = combine_rectangles(ocr_det_results, html_pred_boxes_nums)
529
545
  return final_results
530
546
 
531
- def split_ocr_bboxes_by_table_cells(self, ori_img, cells_bboxes):
547
+ def split_ocr_bboxes_by_table_cells(
548
+ self, cells_det_results, overall_ocr_res, ori_img, k=2
549
+ ):
550
+ """
551
+ Split OCR bounding boxes based on table cell boundaries when they span multiple cells horizontally.
552
+
553
+ Args:
554
+ cells_det_results (list): List of cell bounding boxes in format [x1, y1, x2, y2]
555
+ overall_ocr_res (dict): Dictionary containing OCR results with keys:
556
+ - 'rec_boxes': OCR bounding boxes (will be converted to list)
557
+ - 'rec_texts': OCR recognized texts
558
+ ori_img (np.array): Original input image array
559
+ k (int): Threshold for determining when to split (minimum number of cells spanned)
560
+
561
+ Returns:
562
+ dict: Modified overall_ocr_res with split boxes and texts
563
+ """
564
+
565
+ def calculate_iou(box1, box2):
566
+ """
567
+ Calculate Intersection over Union (IoU) between two bounding boxes.
568
+
569
+ Args:
570
+ box1 (list): [x1, y1, x2, y2]
571
+ box2 (list): [x1, y1, x2, y2]
572
+
573
+ Returns:
574
+ float: IoU value
575
+ """
576
+ # Determine intersection coordinates
577
+ x_left = max(box1[0], box2[0])
578
+ y_top = max(box1[1], box2[1])
579
+ x_right = min(box1[2], box2[2])
580
+ y_bottom = min(box1[3], box2[3])
581
+ if x_right < x_left or y_bottom < y_top:
582
+ return 0.0
583
+ # Calculate areas
584
+ intersection_area = (x_right - x_left) * (y_bottom - y_top)
585
+ box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
586
+ box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
587
+ # return intersection_area / float(box1_area + box2_area - intersection_area)
588
+ return intersection_area / box2_area
589
+
590
+ def get_overlapping_cells(ocr_box, cells):
591
+ """
592
+ Find cells that overlap significantly with the OCR box (IoU > 0.5).
593
+
594
+ Args:
595
+ ocr_box (list): OCR bounding box [x1, y1, x2, y2]
596
+ cells (list): List of cell bounding boxes
597
+
598
+ Returns:
599
+ list: Indices of overlapping cells, sorted by x-coordinate
600
+ """
601
+ overlapping = []
602
+ for idx, cell in enumerate(cells):
603
+ if calculate_iou(ocr_box, cell) > 0.5:
604
+ overlapping.append(idx)
605
+ # Sort overlapping cells by their x-coordinate (left to right)
606
+ overlapping.sort(key=lambda i: cells[i][0])
607
+ return overlapping
608
+
609
+ def split_box_by_cells(ocr_box, cell_indices, cells):
610
+ """
611
+ Split OCR box vertically at cell boundaries.
612
+
613
+ Args:
614
+ ocr_box (list): Original OCR box [x1, y1, x2, y2]
615
+ cell_indices (list): Indices of cells to split by
616
+ cells (list): All cell bounding boxes
617
+
618
+ Returns:
619
+ list: List of split boxes
620
+ """
621
+ if not cell_indices:
622
+ return [ocr_box]
623
+ split_boxes = []
624
+ cells_to_split = [cells[i] for i in cell_indices]
625
+ if ocr_box[0] < cells_to_split[0][0]:
626
+ split_boxes.append(
627
+ [ocr_box[0], ocr_box[1], cells_to_split[0][0], ocr_box[3]]
628
+ )
629
+ for i in range(len(cells_to_split)):
630
+ current_cell = cells_to_split[i]
631
+ split_boxes.append(
632
+ [
633
+ max(ocr_box[0], current_cell[0]),
634
+ ocr_box[1],
635
+ min(ocr_box[2], current_cell[2]),
636
+ ocr_box[3],
637
+ ]
638
+ )
639
+ if i < len(cells_to_split) - 1:
640
+ next_cell = cells_to_split[i + 1]
641
+ if current_cell[2] < next_cell[0]:
642
+ split_boxes.append(
643
+ [current_cell[2], ocr_box[1], next_cell[0], ocr_box[3]]
644
+ )
645
+ last_cell = cells_to_split[-1]
646
+ if last_cell[2] < ocr_box[2]:
647
+ split_boxes.append([last_cell[2], ocr_box[1], ocr_box[2], ocr_box[3]])
648
+ unique_boxes = []
649
+ seen = set()
650
+ for box in split_boxes:
651
+ box_tuple = tuple(box)
652
+ if box_tuple not in seen:
653
+ seen.add(box_tuple)
654
+ unique_boxes.append(box)
655
+
656
+ return unique_boxes
657
+
658
+ # Convert OCR boxes to list if needed
659
+ if hasattr(overall_ocr_res["rec_boxes"], "tolist"):
660
+ ocr_det_results = overall_ocr_res["rec_boxes"].tolist()
661
+ else:
662
+ ocr_det_results = overall_ocr_res["rec_boxes"]
663
+ ocr_texts = overall_ocr_res["rec_texts"]
664
+
665
+ # Make copies to modify
666
+ new_boxes = []
667
+ new_texts = []
668
+
669
+ # Process each OCR box
670
+ i = 0
671
+ while i < len(ocr_det_results):
672
+ ocr_box = ocr_det_results[i]
673
+ text = ocr_texts[i]
674
+ # Find cells that significantly overlap with this OCR box
675
+ overlapping_cells = get_overlapping_cells(ocr_box, cells_det_results)
676
+ # Check if we need to split (spans >= k cells)
677
+ if len(overlapping_cells) >= k:
678
+ # Split the box at cell boundaries
679
+ split_boxes = split_box_by_cells(
680
+ ocr_box, overlapping_cells, cells_det_results
681
+ )
682
+ # Process each split box
683
+ split_texts = []
684
+ for box in split_boxes:
685
+ x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
686
+ if y2 - y1 > 1 and x2 - x1 > 1:
687
+ ocr_result = next(
688
+ self.general_ocr_pipeline.text_rec_model(
689
+ ori_img[y1:y2, x1:x2, :]
690
+ )
691
+ )
692
+ # Extract the recognized text from the OCR result
693
+ if "rec_text" in ocr_result:
694
+ result = ocr_result[
695
+ "rec_text"
696
+ ] # Assumes "rec_texts" contains a single string
697
+ else:
698
+ result = ""
699
+ else:
700
+ result = ""
701
+ split_texts.append(result)
702
+ # Add split boxes and texts to results
703
+ new_boxes.extend(split_boxes)
704
+ new_texts.extend(split_texts)
705
+ else:
706
+ # Keep original box and text
707
+ new_boxes.append(ocr_box)
708
+ new_texts.append(text)
709
+ i += 1
710
+
711
+ # Update the results dictionary
712
+ overall_ocr_res["rec_boxes"] = new_boxes
713
+ overall_ocr_res["rec_texts"] = new_texts
714
+
715
+ return overall_ocr_res
716
+
717
+ def gen_ocr_with_table_cells(self, ori_img, cells_bboxes):
532
718
  """
533
719
  Splits OCR bounding boxes by table cells and retrieves text.
534
720
 
@@ -549,20 +735,228 @@ class TableRecognitionPipelineV2(BasePipeline):
549
735
  # Extract and round up the coordinates of the bounding box.
550
736
  x1, y1, x2, y2 = [math.ceil(k) for k in cells_bboxes[i]]
551
737
  # Perform OCR on the defined region of the image and get the recognized text.
552
- rec_te = next(self.general_ocr_pipeline(ori_img[y1:y2, x1:x2, :]))
553
- # Concatenate the texts and append them to the texts_list.
554
- texts_list.append("".join(rec_te["rec_texts"]))
738
+ if y2 - y1 > 1 and x2 - x1 > 1:
739
+ rec_te = next(self.general_ocr_pipeline(ori_img[y1:y2, x1:x2, :]))
740
+ # Concatenate the texts and append them to the texts_list.
741
+ texts_list.append("".join(rec_te["rec_texts"]))
555
742
  # Return the list of recognized texts from each cell.
556
743
  return texts_list
557
744
 
745
+ def map_cells_to_original_image(
746
+ self, detections, table_angle, img_width, img_height
747
+ ):
748
+ """
749
+ Map bounding boxes from the rotated image back to the original image.
750
+
751
+ Parameters:
752
+ - detections: list of numpy arrays, each containing bounding box coordinates [x1, y1, x2, y2]
753
+ - table_angle: rotation angle in degrees (90, 180, or 270)
754
+ - width_orig: width of the original image (img1)
755
+ - height_orig: height of the original image (img1)
756
+
757
+ Returns:
758
+ - mapped_detections: list of numpy arrays with mapped bounding box coordinates
759
+ """
760
+
761
+ mapped_detections = []
762
+ for i in range(len(detections)):
763
+ tbx1, tby1, tbx2, tby2 = (
764
+ detections[i][0],
765
+ detections[i][1],
766
+ detections[i][2],
767
+ detections[i][3],
768
+ )
769
+ if table_angle == "270":
770
+ new_x1, new_y1 = tby1, img_width - tbx2
771
+ new_x2, new_y2 = tby2, img_width - tbx1
772
+ elif table_angle == "180":
773
+ new_x1, new_y1 = img_width - tbx2, img_height - tby2
774
+ new_x2, new_y2 = img_width - tbx1, img_height - tby1
775
+ elif table_angle == "90":
776
+ new_x1, new_y1 = img_height - tby2, tbx1
777
+ new_x2, new_y2 = img_height - tby1, tbx2
778
+ new_box = np.array([new_x1, new_y1, new_x2, new_y2])
779
+ mapped_detections.append(new_box)
780
+ return mapped_detections
781
+
782
+ def split_string_by_keywords(self, html_string):
783
+ """
784
+ Split HTML string by keywords.
785
+
786
+ Args:
787
+ html_string (str): The HTML string.
788
+ Returns:
789
+ split_html (list): The list of html keywords.
790
+ """
791
+
792
+ keywords = [
793
+ "<thead>",
794
+ "</thead>",
795
+ "<tbody>",
796
+ "</tbody>",
797
+ "<tr>",
798
+ "</tr>",
799
+ "<td>",
800
+ "<td",
801
+ ">",
802
+ "</td>",
803
+ 'colspan="2"',
804
+ 'colspan="3"',
805
+ 'colspan="4"',
806
+ 'colspan="5"',
807
+ 'colspan="6"',
808
+ 'colspan="7"',
809
+ 'colspan="8"',
810
+ 'colspan="9"',
811
+ 'colspan="10"',
812
+ 'colspan="11"',
813
+ 'colspan="12"',
814
+ 'colspan="13"',
815
+ 'colspan="14"',
816
+ 'colspan="15"',
817
+ 'colspan="16"',
818
+ 'colspan="17"',
819
+ 'colspan="18"',
820
+ 'colspan="19"',
821
+ 'colspan="20"',
822
+ 'rowspan="2"',
823
+ 'rowspan="3"',
824
+ 'rowspan="4"',
825
+ 'rowspan="5"',
826
+ 'rowspan="6"',
827
+ 'rowspan="7"',
828
+ 'rowspan="8"',
829
+ 'rowspan="9"',
830
+ 'rowspan="10"',
831
+ 'rowspan="11"',
832
+ 'rowspan="12"',
833
+ 'rowspan="13"',
834
+ 'rowspan="14"',
835
+ 'rowspan="15"',
836
+ 'rowspan="16"',
837
+ 'rowspan="17"',
838
+ 'rowspan="18"',
839
+ 'rowspan="19"',
840
+ 'rowspan="20"',
841
+ ]
842
+ regex_pattern = "|".join(re.escape(keyword) for keyword in keywords)
843
+ split_result = re.split(f"({regex_pattern})", html_string)
844
+ split_html = [part for part in split_result if part]
845
+ return split_html
846
+
847
+ def cluster_positions(self, positions, tolerance):
848
+ if not positions:
849
+ return []
850
+ positions = sorted(set(positions))
851
+ clustered = []
852
+ current_cluster = [positions[0]]
853
+ for pos in positions[1:]:
854
+ if abs(pos - current_cluster[-1]) <= tolerance:
855
+ current_cluster.append(pos)
856
+ else:
857
+ clustered.append(sum(current_cluster) / len(current_cluster))
858
+ current_cluster = [pos]
859
+ clustered.append(sum(current_cluster) / len(current_cluster))
860
+ return clustered
861
+
862
+ def trans_cells_det_results_to_html(self, cells_det_results):
863
+ """
864
+ Trans table cells bboxes to HTML.
865
+
866
+ Args:
867
+ cells_det_results (list): The table cells detection results.
868
+ Returns:
869
+ html (list): The list of html keywords.
870
+ """
871
+
872
+ tolerance = 5
873
+ x_coords = [x for cell in cells_det_results for x in (cell[0], cell[2])]
874
+ y_coords = [y for cell in cells_det_results for y in (cell[1], cell[3])]
875
+ x_positions = self.cluster_positions(x_coords, tolerance)
876
+ y_positions = self.cluster_positions(y_coords, tolerance)
877
+ x_position_to_index = {x: i for i, x in enumerate(x_positions)}
878
+ y_position_to_index = {y: i for i, y in enumerate(y_positions)}
879
+ num_rows = len(y_positions) - 1
880
+ num_cols = len(x_positions) - 1
881
+ grid = [[None for _ in range(num_cols)] for _ in range(num_rows)]
882
+ cells_info = []
883
+ cell_index = 0
884
+ cell_map = {}
885
+ for index, cell in enumerate(cells_det_results):
886
+ x1, y1, x2, y2 = cell
887
+ x1_idx = min(
888
+ range(len(x_positions)), key=lambda i: abs(x_positions[i] - x1)
889
+ )
890
+ x2_idx = min(
891
+ range(len(x_positions)), key=lambda i: abs(x_positions[i] - x2)
892
+ )
893
+ y1_idx = min(
894
+ range(len(y_positions)), key=lambda i: abs(y_positions[i] - y1)
895
+ )
896
+ y2_idx = min(
897
+ range(len(y_positions)), key=lambda i: abs(y_positions[i] - y2)
898
+ )
899
+ col_start = min(x1_idx, x2_idx)
900
+ col_end = max(x1_idx, x2_idx)
901
+ row_start = min(y1_idx, y2_idx)
902
+ row_end = max(y1_idx, y2_idx)
903
+ rowspan = row_end - row_start
904
+ colspan = col_end - col_start
905
+ if rowspan == 0:
906
+ rowspan = 1
907
+ if colspan == 0:
908
+ colspan = 1
909
+ cells_info.append(
910
+ {
911
+ "row_start": row_start,
912
+ "col_start": col_start,
913
+ "rowspan": rowspan,
914
+ "colspan": colspan,
915
+ "content": "",
916
+ }
917
+ )
918
+ for r in range(row_start, row_start + rowspan):
919
+ for c in range(col_start, col_start + colspan):
920
+ key = (r, c)
921
+ if key in cell_map:
922
+ continue
923
+ else:
924
+ cell_map[key] = index
925
+ html = "<table><tbody>"
926
+ for r in range(num_rows):
927
+ html += "<tr>"
928
+ c = 0
929
+ while c < num_cols:
930
+ key = (r, c)
931
+ if key in cell_map:
932
+ cell_index = cell_map[key]
933
+ cell_info = cells_info[cell_index]
934
+ if cell_info["row_start"] == r and cell_info["col_start"] == c:
935
+ rowspan = cell_info["rowspan"]
936
+ colspan = cell_info["colspan"]
937
+ rowspan_attr = f' rowspan="{rowspan}"' if rowspan > 1 else ""
938
+ colspan_attr = f' colspan="{colspan}"' if colspan > 1 else ""
939
+ content = cell_info["content"]
940
+ html += f"<td{rowspan_attr}{colspan_attr}>{content}</td>"
941
+ c += cell_info["colspan"]
942
+ else:
943
+ html += "<td></td>"
944
+ c += 1
945
+ html += "</tr>"
946
+ html += "</tbody></table>"
947
+ html = self.split_string_by_keywords(html)
948
+ return html
949
+
558
950
  def predict_single_table_recognition_res(
559
951
  self,
560
952
  image_array: np.ndarray,
561
953
  overall_ocr_res: OCRResult,
562
954
  table_box: list,
563
- use_table_cells_ocr_results: bool = False,
564
955
  use_e2e_wired_table_rec_model: bool = False,
565
956
  use_e2e_wireless_table_rec_model: bool = False,
957
+ use_wired_table_cells_trans_to_html: bool = False,
958
+ use_wireless_table_cells_trans_to_html: bool = False,
959
+ use_ocr_results_with_table_cells: bool = True,
566
960
  flag_find_nei_text: bool = True,
567
961
  ) -> SingleTableRecognitionResult:
568
962
  """
@@ -573,9 +967,11 @@ class TableRecognitionPipelineV2(BasePipeline):
573
967
  overall_ocr_res (OCRResult): Overall OCR result obtained after running the OCR pipeline.
574
968
  The overall OCR results containing text recognition information.
575
969
  table_box (list): The table box coordinates.
576
- use_table_cells_ocr_results (bool): whether to use OCR results with cells.
577
970
  use_e2e_wired_table_rec_model (bool): Whether to use end-to-end wired table recognition model.
578
971
  use_e2e_wireless_table_rec_model (bool): Whether to use end-to-end wireless table recognition model.
972
+ use_wired_table_cells_trans_to_html (bool): Whether to use wired table cells trans to HTML.
973
+ use_wireless_table_cells_trans_to_html (bool): Whether to use wireless table cells trans to HTML.
974
+ use_ocr_results_with_table_cells (bool): Whether to use OCR results processed by table cells.
579
975
  flag_find_nei_text (bool): Whether to find neighboring text.
580
976
  Returns:
581
977
  SingleTableRecognitionResult: single table recognition result.
@@ -584,62 +980,110 @@ class TableRecognitionPipelineV2(BasePipeline):
584
980
  table_cls_pred = next(self.table_cls_model(image_array))
585
981
  table_cls_result = self.extract_results(table_cls_pred, "cls")
586
982
  use_e2e_model = False
983
+ cells_trans_to_html = False
587
984
 
588
985
  if table_cls_result == "wired_table":
589
- table_structure_pred = next(self.wired_table_rec_model(image_array))
986
+ if use_wired_table_cells_trans_to_html == True:
987
+ cells_trans_to_html = True
988
+ else:
989
+ table_structure_pred = next(self.wired_table_rec_model(image_array))
590
990
  if use_e2e_wired_table_rec_model == True:
591
991
  use_e2e_model = True
992
+ if cells_trans_to_html == True:
993
+ table_structure_pred = next(self.wired_table_rec_model(image_array))
592
994
  else:
593
995
  table_cells_pred = next(
594
996
  self.wired_table_cells_detection_model(image_array, threshold=0.3)
595
997
  ) # Setting the threshold to 0.3 can improve the accuracy of table cells detection.
596
998
  # If you really want more or fewer table cells detection boxes, the threshold can be adjusted.
597
999
  elif table_cls_result == "wireless_table":
598
- table_structure_pred = next(self.wireless_table_rec_model(image_array))
1000
+ if use_wireless_table_cells_trans_to_html == True:
1001
+ cells_trans_to_html = True
1002
+ else:
1003
+ table_structure_pred = next(self.wireless_table_rec_model(image_array))
599
1004
  if use_e2e_wireless_table_rec_model == True:
600
1005
  use_e2e_model = True
1006
+ if cells_trans_to_html == True:
1007
+ table_structure_pred = next(
1008
+ self.wireless_table_rec_model(image_array)
1009
+ )
601
1010
  else:
602
1011
  table_cells_pred = next(
603
- self.wireless_table_cells_detection_model(image_array, threshold=0.3)
1012
+ self.wireless_table_cells_detection_model(
1013
+ image_array, threshold=0.3
1014
+ )
604
1015
  ) # Setting the threshold to 0.3 can improve the accuracy of table cells detection.
605
1016
  # If you really want more or fewer table cells detection boxes, the threshold can be adjusted.
606
1017
 
607
1018
  if use_e2e_model == False:
608
- table_structure_result = self.extract_results(table_structure_pred, "table_stru")
609
- table_cells_result, table_cells_score = self.extract_results(table_cells_pred, "det")
610
- table_cells_result, table_cells_score = self.cells_det_results_nms(table_cells_result, table_cells_score)
611
- ocr_det_boxes = self.get_region_ocr_det_boxes(overall_ocr_res["rec_boxes"].tolist(), table_box)
612
- table_cells_result = self.cells_det_results_reprocessing(
613
- table_cells_result,
614
- table_cells_score,
615
- ocr_det_boxes,
616
- len(table_structure_pred["bbox"]),
1019
+ table_cells_result, table_cells_score = self.extract_results(
1020
+ table_cells_pred, "det"
1021
+ )
1022
+ table_cells_result, table_cells_score = self.cells_det_results_nms(
1023
+ table_cells_result, table_cells_score
617
1024
  )
618
- if use_table_cells_ocr_results == True:
619
- cells_texts_list = self.split_ocr_bboxes_by_table_cells(image_array, table_cells_result)
1025
+ if cells_trans_to_html == True:
1026
+ table_structure_result = self.trans_cells_det_results_to_html(
1027
+ table_cells_result
1028
+ )
620
1029
  else:
1030
+ table_structure_result = self.extract_results(
1031
+ table_structure_pred, "table_stru"
1032
+ )
1033
+ ocr_det_boxes = self.get_region_ocr_det_boxes(
1034
+ overall_ocr_res["rec_boxes"].tolist(), table_box
1035
+ )
1036
+ table_cells_result = self.cells_det_results_reprocessing(
1037
+ table_cells_result,
1038
+ table_cells_score,
1039
+ ocr_det_boxes,
1040
+ len(table_structure_pred["bbox"]),
1041
+ )
1042
+ if use_ocr_results_with_table_cells == True:
1043
+ if self.cells_split_ocr == True:
1044
+ table_box_copy = np.array([table_box])
1045
+ table_ocr_pred = get_sub_regions_ocr_res(
1046
+ overall_ocr_res, table_box_copy
1047
+ )
1048
+ table_ocr_pred = self.split_ocr_bboxes_by_table_cells(
1049
+ table_cells_result, table_ocr_pred, image_array
1050
+ )
1051
+ cells_texts_list = []
1052
+ else:
1053
+ cells_texts_list = self.gen_ocr_with_table_cells(
1054
+ image_array, table_cells_result
1055
+ )
1056
+ table_ocr_pred = {}
1057
+ else:
1058
+ table_ocr_pred = {}
621
1059
  cells_texts_list = []
622
1060
  single_table_recognition_res = get_table_recognition_res(
623
1061
  table_box,
624
1062
  table_structure_result,
625
1063
  table_cells_result,
626
1064
  overall_ocr_res,
1065
+ table_ocr_pred,
627
1066
  cells_texts_list,
628
- use_table_cells_ocr_results,
1067
+ use_ocr_results_with_table_cells,
1068
+ self.cells_split_ocr,
629
1069
  )
630
1070
  else:
631
- if use_table_cells_ocr_results == True:
632
- table_cells_result_e2e = list(map(lambda arr: arr.tolist(), table_structure_pred["bbox"]))
633
- table_cells_result_e2e = [[rect[0], rect[1], rect[4], rect[5]]for rect in table_cells_result_e2e]
634
- cells_texts_list = self.split_ocr_bboxes_by_table_cells(image_array, table_cells_result_e2e)
635
- else:
636
- cells_texts_list = []
1071
+ cells_texts_list = []
1072
+ use_ocr_results_with_table_cells = False
1073
+ table_cells_result_e2e = table_structure_pred["bbox"]
1074
+ table_cells_result_e2e = [
1075
+ [rect[0], rect[1], rect[4], rect[5]] for rect in table_cells_result_e2e
1076
+ ]
1077
+ if cells_trans_to_html == True:
1078
+ table_structure_pred["structure"] = (
1079
+ self.trans_cells_det_results_to_html(table_cells_result_e2e)
1080
+ )
637
1081
  single_table_recognition_res = get_table_recognition_res_e2e(
638
1082
  table_box,
639
1083
  table_structure_pred,
640
1084
  overall_ocr_res,
641
1085
  cells_texts_list,
642
- use_table_cells_ocr_results,
1086
+ use_ocr_results_with_table_cells,
643
1087
  )
644
1088
 
645
1089
  neighbor_text = ""
@@ -668,9 +1112,12 @@ class TableRecognitionPipelineV2(BasePipeline):
668
1112
  text_det_box_thresh: Optional[float] = None,
669
1113
  text_det_unclip_ratio: Optional[float] = None,
670
1114
  text_rec_score_thresh: Optional[float] = None,
671
- use_table_cells_ocr_results: bool = False,
672
1115
  use_e2e_wired_table_rec_model: bool = False,
673
1116
  use_e2e_wireless_table_rec_model: bool = False,
1117
+ use_wired_table_cells_trans_to_html: bool = False,
1118
+ use_wireless_table_cells_trans_to_html: bool = False,
1119
+ use_table_orientation_classify: bool = True,
1120
+ use_ocr_results_with_table_cells: bool = True,
674
1121
  **kwargs,
675
1122
  ) -> TableRecognitionResult:
676
1123
  """
@@ -685,16 +1132,28 @@ class TableRecognitionPipelineV2(BasePipeline):
685
1132
  It will be used if it is not None and use_ocr_model is False.
686
1133
  layout_det_res (DetResult): The layout detection result.
687
1134
  It will be used if it is not None and use_layout_detection is False.
688
- use_table_cells_ocr_results (bool): whether to use OCR results with cells.
689
1135
  use_e2e_wired_table_rec_model (bool): Whether to use end-to-end wired table recognition model.
690
1136
  use_e2e_wireless_table_rec_model (bool): Whether to use end-to-end wireless table recognition model.
691
- flag_find_nei_text (bool): Whether to find neighboring text.
1137
+ use_wired_table_cells_trans_to_html (bool): Whether to use wired table cells trans to HTML.
1138
+ use_wireless_table_cells_trans_to_html (bool): Whether to use wireless table cells trans to HTML.
1139
+ use_table_orientation_classify (bool): Whether to use table orientation classification.
1140
+ use_ocr_results_with_table_cells (bool): Whether to use OCR results processed by table cells.
692
1141
  **kwargs: Additional keyword arguments.
693
1142
 
694
1143
  Returns:
695
1144
  TableRecognitionResult: The predicted table recognition result.
696
1145
  """
697
1146
 
1147
+ self.cells_split_ocr = True
1148
+
1149
+ if use_table_orientation_classify == True and (
1150
+ self.table_orientation_classify_model is None
1151
+ ):
1152
+ assert self.table_orientation_classify_config != None
1153
+ self.table_orientation_classify_model = self.create_model(
1154
+ self.table_orientation_classify_config
1155
+ )
1156
+
698
1157
  model_settings = self.get_model_settings(
699
1158
  use_doc_orientation_classify,
700
1159
  use_doc_unwarping,
@@ -735,48 +1194,179 @@ class TableRecognitionPipelineV2(BasePipeline):
735
1194
  text_rec_score_thresh=text_rec_score_thresh,
736
1195
  )
737
1196
  )
738
- elif use_table_cells_ocr_results == True:
1197
+ elif self.general_ocr_pipeline is None and (
1198
+ (
1199
+ use_ocr_results_with_table_cells == True
1200
+ and self.cells_split_ocr == False
1201
+ )
1202
+ or use_table_orientation_classify == True
1203
+ ):
739
1204
  assert self.general_ocr_config_bak != None
740
- self.general_ocr_pipeline = self.create_pipeline(self.general_ocr_config_bak)
1205
+ self.general_ocr_pipeline = self.create_pipeline(
1206
+ self.general_ocr_config_bak
1207
+ )
1208
+
1209
+ if use_table_orientation_classify == False:
1210
+ table_angle = "0"
741
1211
 
742
1212
  table_res_list = []
743
1213
  table_region_id = 1
1214
+
744
1215
  if not model_settings["use_layout_detection"] and layout_det_res is None:
745
- layout_det_res = {}
746
1216
  img_height, img_width = doc_preprocessor_image.shape[:2]
747
1217
  table_box = [0, 0, img_width - 1, img_height - 1]
1218
+ if use_table_orientation_classify == True:
1219
+ table_angle = next(
1220
+ self.table_orientation_classify_model(doc_preprocessor_image)
1221
+ )["label_names"][0]
1222
+ if table_angle == "90":
1223
+ doc_preprocessor_image = np.rot90(doc_preprocessor_image, k=1)
1224
+ elif table_angle == "180":
1225
+ doc_preprocessor_image = np.rot90(doc_preprocessor_image, k=2)
1226
+ elif table_angle == "270":
1227
+ doc_preprocessor_image = np.rot90(doc_preprocessor_image, k=3)
1228
+ if table_angle in ["90", "180", "270"]:
1229
+ overall_ocr_res = next(
1230
+ self.general_ocr_pipeline(
1231
+ doc_preprocessor_image,
1232
+ text_det_limit_side_len=text_det_limit_side_len,
1233
+ text_det_limit_type=text_det_limit_type,
1234
+ text_det_thresh=text_det_thresh,
1235
+ text_det_box_thresh=text_det_box_thresh,
1236
+ text_det_unclip_ratio=text_det_unclip_ratio,
1237
+ text_rec_score_thresh=text_rec_score_thresh,
1238
+ )
1239
+ )
1240
+ tbx1, tby1, tbx2, tby2 = (
1241
+ table_box[0],
1242
+ table_box[1],
1243
+ table_box[2],
1244
+ table_box[3],
1245
+ )
1246
+ if table_angle == "90":
1247
+ new_x1, new_y1 = tby1, img_width - tbx2
1248
+ new_x2, new_y2 = tby2, img_width - tbx1
1249
+ elif table_angle == "180":
1250
+ new_x1, new_y1 = img_width - tbx2, img_height - tby2
1251
+ new_x2, new_y2 = img_width - tbx1, img_height - tby1
1252
+ elif table_angle == "270":
1253
+ new_x1, new_y1 = img_height - tby2, tbx1
1254
+ new_x2, new_y2 = img_height - tby1, tbx2
1255
+ table_box = [new_x1, new_y1, new_x2, new_y2]
1256
+ layout_det_res = {}
748
1257
  single_table_rec_res = self.predict_single_table_recognition_res(
749
1258
  doc_preprocessor_image,
750
1259
  overall_ocr_res,
751
1260
  table_box,
752
- use_table_cells_ocr_results,
753
1261
  use_e2e_wired_table_rec_model,
754
1262
  use_e2e_wireless_table_rec_model,
1263
+ use_wired_table_cells_trans_to_html,
1264
+ use_wireless_table_cells_trans_to_html,
1265
+ use_ocr_results_with_table_cells,
755
1266
  flag_find_nei_text=False,
756
1267
  )
757
1268
  single_table_rec_res["table_region_id"] = table_region_id
1269
+ if use_table_orientation_classify == True and table_angle != "0":
1270
+ img_height, img_width = doc_preprocessor_image.shape[:2]
1271
+ single_table_rec_res["cell_box_list"] = (
1272
+ self.map_cells_to_original_image(
1273
+ single_table_rec_res["cell_box_list"],
1274
+ table_angle,
1275
+ img_width,
1276
+ img_height,
1277
+ )
1278
+ )
758
1279
  table_res_list.append(single_table_rec_res)
759
1280
  table_region_id += 1
760
1281
  else:
761
1282
  if model_settings["use_layout_detection"]:
762
1283
  layout_det_res = next(self.layout_det_model(doc_preprocessor_image))
763
-
1284
+ img_height, img_width = doc_preprocessor_image.shape[:2]
764
1285
  for box_info in layout_det_res["boxes"]:
765
1286
  if box_info["label"].lower() in ["table"]:
766
- crop_img_info = self._crop_by_boxes(image_array, [box_info])
1287
+ crop_img_info = self._crop_by_boxes(
1288
+ doc_preprocessor_image, [box_info]
1289
+ )
767
1290
  crop_img_info = crop_img_info[0]
768
1291
  table_box = crop_img_info["box"]
1292
+ if use_table_orientation_classify == True:
1293
+ doc_preprocessor_image_copy = doc_preprocessor_image.copy()
1294
+ table_angle = next(
1295
+ self.table_orientation_classify_model(
1296
+ crop_img_info["img"]
1297
+ )
1298
+ )["label_names"][0]
1299
+ if table_angle == "90":
1300
+ crop_img_info["img"] = np.rot90(crop_img_info["img"], k=1)
1301
+ doc_preprocessor_image_copy = np.rot90(
1302
+ doc_preprocessor_image_copy, k=1
1303
+ )
1304
+ elif table_angle == "180":
1305
+ crop_img_info["img"] = np.rot90(crop_img_info["img"], k=2)
1306
+ doc_preprocessor_image_copy = np.rot90(
1307
+ doc_preprocessor_image_copy, k=2
1308
+ )
1309
+ elif table_angle == "270":
1310
+ crop_img_info["img"] = np.rot90(crop_img_info["img"], k=3)
1311
+ doc_preprocessor_image_copy = np.rot90(
1312
+ doc_preprocessor_image_copy, k=3
1313
+ )
1314
+ if table_angle in ["90", "180", "270"]:
1315
+ overall_ocr_res = next(
1316
+ self.general_ocr_pipeline(
1317
+ doc_preprocessor_image_copy,
1318
+ text_det_limit_side_len=text_det_limit_side_len,
1319
+ text_det_limit_type=text_det_limit_type,
1320
+ text_det_thresh=text_det_thresh,
1321
+ text_det_box_thresh=text_det_box_thresh,
1322
+ text_det_unclip_ratio=text_det_unclip_ratio,
1323
+ text_rec_score_thresh=text_rec_score_thresh,
1324
+ )
1325
+ )
1326
+ tbx1, tby1, tbx2, tby2 = (
1327
+ table_box[0],
1328
+ table_box[1],
1329
+ table_box[2],
1330
+ table_box[3],
1331
+ )
1332
+ if table_angle == "90":
1333
+ new_x1, new_y1 = tby1, img_width - tbx2
1334
+ new_x2, new_y2 = tby2, img_width - tbx1
1335
+ elif table_angle == "180":
1336
+ new_x1, new_y1 = img_width - tbx2, img_height - tby2
1337
+ new_x2, new_y2 = img_width - tbx1, img_height - tby1
1338
+ elif table_angle == "270":
1339
+ new_x1, new_y1 = img_height - tby2, tbx1
1340
+ new_x2, new_y2 = img_height - tby1, tbx2
1341
+ table_box = [new_x1, new_y1, new_x2, new_y2]
769
1342
  single_table_rec_res = (
770
1343
  self.predict_single_table_recognition_res(
771
1344
  crop_img_info["img"],
772
1345
  overall_ocr_res,
773
1346
  table_box,
774
- use_table_cells_ocr_results,
775
1347
  use_e2e_wired_table_rec_model,
776
1348
  use_e2e_wireless_table_rec_model,
1349
+ use_wired_table_cells_trans_to_html,
1350
+ use_wireless_table_cells_trans_to_html,
1351
+ use_ocr_results_with_table_cells,
777
1352
  )
778
1353
  )
779
1354
  single_table_rec_res["table_region_id"] = table_region_id
1355
+ if (
1356
+ use_table_orientation_classify == True
1357
+ and table_angle != "0"
1358
+ ):
1359
+ img_height_copy, img_width_copy = (
1360
+ doc_preprocessor_image_copy.shape[:2]
1361
+ )
1362
+ single_table_rec_res["cell_box_list"] = (
1363
+ self.map_cells_to_original_image(
1364
+ single_table_rec_res["cell_box_list"],
1365
+ table_angle,
1366
+ img_width_copy,
1367
+ img_height_copy,
1368
+ )
1369
+ )
780
1370
  table_res_list.append(single_table_rec_res)
781
1371
  table_region_id += 1
782
1372
 
@@ -789,4 +1379,17 @@ class TableRecognitionPipelineV2(BasePipeline):
789
1379
  "table_res_list": table_res_list,
790
1380
  "model_settings": model_settings,
791
1381
  }
1382
+
792
1383
  yield TableRecognitionResult(single_img_res)
1384
+
1385
+
1386
+ @pipeline_requires_extra("ocr")
1387
+ class TableRecognitionPipelineV2(AutoParallelImageSimpleInferencePipeline):
1388
+ entities = ["table_recognition_v2"]
1389
+
1390
+ @property
1391
+ def _pipeline_cls(self):
1392
+ return _TableRecognitionPipelineV2
1393
+
1394
+ def _get_batch_size(self, config):
1395
+ return 1