paddlex 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (824) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +17 -34
  3. paddlex/__main__.py +1 -1
  4. paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
  5. paddlex/configs/modules/doc_vlm/PP-DocBee-2B.yaml +14 -0
  6. paddlex/configs/modules/doc_vlm/PP-DocBee-7B.yaml +14 -0
  7. paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
  8. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
  9. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
  10. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
  11. paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
  12. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
  13. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
  14. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
  15. paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
  16. paddlex/configs/modules/open_vocabulary_detection/YOLO-Worldv2-L.yaml +13 -0
  17. paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
  18. paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
  19. paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
  20. paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
  21. paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
  22. paddlex/configs/pipelines/OCR.yaml +7 -6
  23. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
  24. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
  25. paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
  26. paddlex/configs/pipelines/anomaly_detection.yaml +1 -1
  27. paddlex/configs/pipelines/doc_understanding.yaml +9 -0
  28. paddlex/configs/pipelines/formula_recognition.yaml +2 -2
  29. paddlex/configs/pipelines/layout_parsing.yaml +3 -2
  30. paddlex/configs/pipelines/seal_recognition.yaml +1 -0
  31. paddlex/configs/pipelines/table_recognition.yaml +2 -1
  32. paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
  33. paddlex/configs/pipelines/ts_anomaly_detection.yaml +1 -1
  34. paddlex/configs/pipelines/ts_classification.yaml +1 -1
  35. paddlex/configs/pipelines/ts_forecast.yaml +1 -1
  36. paddlex/constants.py +17 -0
  37. paddlex/engine.py +7 -5
  38. paddlex/hpip_links.html +23 -11
  39. paddlex/inference/__init__.py +3 -3
  40. paddlex/inference/common/__init__.py +1 -1
  41. paddlex/inference/common/batch_sampler/__init__.py +5 -4
  42. paddlex/inference/common/batch_sampler/audio_batch_sampler.py +5 -6
  43. paddlex/inference/common/batch_sampler/base_batch_sampler.py +20 -16
  44. paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +4 -7
  45. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +87 -0
  46. paddlex/inference/common/batch_sampler/image_batch_sampler.py +45 -60
  47. paddlex/inference/common/batch_sampler/ts_batch_sampler.py +9 -10
  48. paddlex/inference/common/batch_sampler/video_batch_sampler.py +2 -22
  49. paddlex/inference/common/reader/__init__.py +4 -4
  50. paddlex/inference/common/reader/audio_reader.py +3 -3
  51. paddlex/inference/common/reader/det_3d_reader.py +7 -5
  52. paddlex/inference/common/reader/image_reader.py +16 -12
  53. paddlex/inference/common/reader/ts_reader.py +3 -2
  54. paddlex/inference/common/reader/video_reader.py +3 -3
  55. paddlex/inference/common/result/__init__.py +7 -7
  56. paddlex/inference/common/result/base_cv_result.py +12 -2
  57. paddlex/inference/common/result/base_result.py +7 -5
  58. paddlex/inference/common/result/base_ts_result.py +1 -2
  59. paddlex/inference/common/result/base_video_result.py +2 -2
  60. paddlex/inference/common/result/mixin.py +31 -25
  61. paddlex/inference/models/__init__.py +41 -85
  62. paddlex/inference/models/anomaly_detection/__init__.py +1 -1
  63. paddlex/inference/models/anomaly_detection/predictor.py +9 -19
  64. paddlex/inference/models/anomaly_detection/processors.py +9 -2
  65. paddlex/inference/models/anomaly_detection/result.py +3 -2
  66. paddlex/inference/models/base/__init__.py +2 -2
  67. paddlex/inference/models/base/predictor/__init__.py +1 -2
  68. paddlex/inference/models/base/predictor/base_predictor.py +278 -39
  69. paddlex/inference/models/common/__init__.py +6 -15
  70. paddlex/inference/models/common/static_infer.py +724 -251
  71. paddlex/inference/models/common/tokenizer/__init__.py +7 -3
  72. paddlex/inference/models/common/tokenizer/bert_tokenizer.py +1 -1
  73. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +609 -0
  74. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +9 -7
  75. paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
  76. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +438 -0
  77. paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
  78. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +85 -77
  79. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +339 -123
  80. paddlex/inference/models/common/tokenizer/utils.py +1 -1
  81. paddlex/inference/models/common/tokenizer/vocab.py +8 -8
  82. paddlex/inference/models/common/ts/__init__.py +1 -1
  83. paddlex/inference/models/common/ts/funcs.py +13 -6
  84. paddlex/inference/models/common/ts/processors.py +14 -5
  85. paddlex/inference/models/common/vision/__init__.py +3 -3
  86. paddlex/inference/models/common/vision/funcs.py +17 -12
  87. paddlex/inference/models/common/vision/processors.py +61 -46
  88. paddlex/inference/models/common/vlm/__init__.py +13 -0
  89. paddlex/inference/models/common/vlm/activations.py +189 -0
  90. paddlex/inference/models/common/vlm/bert_padding.py +127 -0
  91. paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
  92. paddlex/inference/models/common/vlm/distributed.py +229 -0
  93. paddlex/inference/models/common/vlm/flash_attn_utils.py +119 -0
  94. paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
  95. paddlex/inference/models/common/vlm/generation/__init__.py +34 -0
  96. paddlex/inference/models/common/vlm/generation/configuration_utils.py +533 -0
  97. paddlex/inference/models/common/vlm/generation/logits_process.py +730 -0
  98. paddlex/inference/models/common/vlm/generation/stopping_criteria.py +106 -0
  99. paddlex/inference/models/common/vlm/generation/utils.py +2162 -0
  100. paddlex/inference/models/common/vlm/transformers/__init__.py +16 -0
  101. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +1037 -0
  102. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +408 -0
  103. paddlex/inference/models/common/vlm/transformers/model_outputs.py +1612 -0
  104. paddlex/inference/models/common/vlm/transformers/model_utils.py +2014 -0
  105. paddlex/inference/models/common/vlm/transformers/utils.py +178 -0
  106. paddlex/inference/models/common/vlm/utils.py +109 -0
  107. paddlex/inference/models/doc_vlm/__init__.py +15 -0
  108. paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
  109. paddlex/inference/models/doc_vlm/modeling/__init__.py +17 -0
  110. paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
  111. paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
  112. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +2495 -0
  113. paddlex/inference/models/doc_vlm/predictor.py +253 -0
  114. paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
  115. paddlex/inference/models/doc_vlm/processors/__init__.py +17 -0
  116. paddlex/inference/models/doc_vlm/processors/common.py +561 -0
  117. paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
  118. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +543 -0
  119. paddlex/inference/models/doc_vlm/result.py +21 -0
  120. paddlex/inference/models/face_feature/__init__.py +1 -1
  121. paddlex/inference/models/face_feature/predictor.py +2 -1
  122. paddlex/inference/models/formula_recognition/__init__.py +1 -1
  123. paddlex/inference/models/formula_recognition/predictor.py +18 -28
  124. paddlex/inference/models/formula_recognition/processors.py +126 -97
  125. paddlex/inference/models/formula_recognition/result.py +43 -35
  126. paddlex/inference/models/image_classification/__init__.py +1 -1
  127. paddlex/inference/models/image_classification/predictor.py +9 -19
  128. paddlex/inference/models/image_classification/processors.py +4 -2
  129. paddlex/inference/models/image_classification/result.py +4 -3
  130. paddlex/inference/models/image_feature/__init__.py +1 -1
  131. paddlex/inference/models/image_feature/predictor.py +9 -19
  132. paddlex/inference/models/image_feature/processors.py +7 -5
  133. paddlex/inference/models/image_feature/result.py +2 -3
  134. paddlex/inference/models/image_multilabel_classification/__init__.py +1 -1
  135. paddlex/inference/models/image_multilabel_classification/predictor.py +7 -6
  136. paddlex/inference/models/image_multilabel_classification/processors.py +6 -2
  137. paddlex/inference/models/image_multilabel_classification/result.py +4 -3
  138. paddlex/inference/models/image_unwarping/__init__.py +1 -1
  139. paddlex/inference/models/image_unwarping/predictor.py +8 -16
  140. paddlex/inference/models/image_unwarping/processors.py +6 -2
  141. paddlex/inference/models/image_unwarping/result.py +4 -2
  142. paddlex/inference/models/instance_segmentation/__init__.py +1 -1
  143. paddlex/inference/models/instance_segmentation/predictor.py +7 -15
  144. paddlex/inference/models/instance_segmentation/processors.py +4 -7
  145. paddlex/inference/models/instance_segmentation/result.py +11 -10
  146. paddlex/inference/models/keypoint_detection/__init__.py +1 -1
  147. paddlex/inference/models/keypoint_detection/predictor.py +5 -3
  148. paddlex/inference/models/keypoint_detection/processors.py +11 -3
  149. paddlex/inference/models/keypoint_detection/result.py +9 -4
  150. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
  151. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/predictor.py +15 -26
  152. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/processors.py +26 -14
  153. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/result.py +15 -12
  154. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/visualizer_3d.py +77 -39
  155. paddlex/inference/models/multilingual_speech_recognition/__init__.py +1 -1
  156. paddlex/inference/models/multilingual_speech_recognition/predictor.py +11 -15
  157. paddlex/inference/models/multilingual_speech_recognition/processors.py +45 -53
  158. paddlex/inference/models/multilingual_speech_recognition/result.py +1 -1
  159. paddlex/inference/models/object_detection/__init__.py +1 -1
  160. paddlex/inference/models/object_detection/predictor.py +8 -12
  161. paddlex/inference/models/object_detection/processors.py +63 -33
  162. paddlex/inference/models/object_detection/result.py +5 -4
  163. paddlex/inference/models/object_detection/utils.py +3 -1
  164. paddlex/inference/models/open_vocabulary_detection/__init__.py +1 -1
  165. paddlex/inference/models/open_vocabulary_detection/predictor.py +31 -14
  166. paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +3 -2
  167. paddlex/inference/models/open_vocabulary_detection/processors/common.py +114 -0
  168. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +19 -8
  169. paddlex/inference/models/open_vocabulary_detection/processors/yoloworld_processors.py +209 -0
  170. paddlex/inference/models/open_vocabulary_segmentation/__init__.py +1 -1
  171. paddlex/inference/models/open_vocabulary_segmentation/predictor.py +6 -13
  172. paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +1 -1
  173. paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +12 -12
  174. paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +1 -1
  175. paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +11 -9
  176. paddlex/inference/models/semantic_segmentation/__init__.py +1 -1
  177. paddlex/inference/models/semantic_segmentation/predictor.py +9 -18
  178. paddlex/inference/models/semantic_segmentation/processors.py +11 -8
  179. paddlex/inference/models/semantic_segmentation/result.py +4 -3
  180. paddlex/inference/models/table_structure_recognition/__init__.py +1 -1
  181. paddlex/inference/models/table_structure_recognition/predictor.py +8 -18
  182. paddlex/inference/models/table_structure_recognition/processors.py +23 -29
  183. paddlex/inference/models/table_structure_recognition/result.py +8 -15
  184. paddlex/inference/models/text_detection/__init__.py +1 -1
  185. paddlex/inference/models/text_detection/predictor.py +24 -24
  186. paddlex/inference/models/text_detection/processors.py +116 -44
  187. paddlex/inference/models/text_detection/result.py +8 -13
  188. paddlex/inference/models/text_recognition/__init__.py +1 -1
  189. paddlex/inference/models/text_recognition/predictor.py +11 -19
  190. paddlex/inference/models/text_recognition/processors.py +27 -13
  191. paddlex/inference/models/text_recognition/result.py +3 -2
  192. paddlex/inference/models/ts_anomaly_detection/__init__.py +1 -1
  193. paddlex/inference/models/ts_anomaly_detection/predictor.py +12 -17
  194. paddlex/inference/models/ts_anomaly_detection/processors.py +6 -2
  195. paddlex/inference/models/ts_anomaly_detection/result.py +21 -10
  196. paddlex/inference/models/ts_classification/__init__.py +1 -1
  197. paddlex/inference/models/ts_classification/predictor.py +14 -27
  198. paddlex/inference/models/ts_classification/processors.py +7 -2
  199. paddlex/inference/models/ts_classification/result.py +21 -12
  200. paddlex/inference/models/ts_forecasting/__init__.py +1 -1
  201. paddlex/inference/models/ts_forecasting/predictor.py +13 -18
  202. paddlex/inference/models/ts_forecasting/processors.py +12 -3
  203. paddlex/inference/models/ts_forecasting/result.py +24 -11
  204. paddlex/inference/models/video_classification/__init__.py +1 -1
  205. paddlex/inference/models/video_classification/predictor.py +9 -15
  206. paddlex/inference/models/video_classification/processors.py +24 -24
  207. paddlex/inference/models/video_classification/result.py +7 -3
  208. paddlex/inference/models/video_detection/__init__.py +1 -1
  209. paddlex/inference/models/video_detection/predictor.py +8 -15
  210. paddlex/inference/models/video_detection/processors.py +24 -11
  211. paddlex/inference/models/video_detection/result.py +10 -5
  212. paddlex/inference/pipelines/__init__.py +48 -37
  213. paddlex/inference/pipelines/_parallel.py +172 -0
  214. paddlex/inference/pipelines/anomaly_detection/__init__.py +1 -1
  215. paddlex/inference/pipelines/anomaly_detection/pipeline.py +29 -9
  216. paddlex/inference/pipelines/attribute_recognition/__init__.py +1 -1
  217. paddlex/inference/pipelines/attribute_recognition/pipeline.py +24 -9
  218. paddlex/inference/pipelines/attribute_recognition/result.py +10 -8
  219. paddlex/inference/pipelines/base.py +43 -13
  220. paddlex/inference/pipelines/components/__init__.py +14 -8
  221. paddlex/inference/pipelines/components/chat_server/__init__.py +1 -1
  222. paddlex/inference/pipelines/components/chat_server/base.py +2 -2
  223. paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +8 -8
  224. paddlex/inference/pipelines/components/common/__init__.py +5 -4
  225. paddlex/inference/pipelines/components/common/base_operator.py +2 -1
  226. paddlex/inference/pipelines/components/common/base_result.py +3 -2
  227. paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +1 -2
  228. paddlex/inference/pipelines/components/common/crop_image_regions.py +11 -5
  229. paddlex/inference/pipelines/components/common/seal_det_warp.py +44 -13
  230. paddlex/inference/pipelines/components/common/sort_boxes.py +4 -2
  231. paddlex/inference/pipelines/components/common/warp_image.py +50 -0
  232. paddlex/inference/pipelines/components/faisser.py +10 -5
  233. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +2 -2
  234. paddlex/inference/pipelines/components/prompt_engineering/base.py +2 -2
  235. paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +2 -1
  236. paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +2 -2
  237. paddlex/inference/pipelines/components/retriever/__init__.py +2 -2
  238. paddlex/inference/pipelines/components/retriever/base.py +18 -16
  239. paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +2 -2
  240. paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +87 -84
  241. paddlex/inference/pipelines/components/utils/__init__.py +1 -1
  242. paddlex/inference/pipelines/components/utils/mixin.py +7 -7
  243. paddlex/inference/pipelines/doc_preprocessor/__init__.py +1 -1
  244. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +70 -51
  245. paddlex/inference/pipelines/doc_preprocessor/result.py +5 -10
  246. paddlex/inference/pipelines/doc_understanding/__init__.py +15 -0
  247. paddlex/inference/pipelines/doc_understanding/pipeline.py +71 -0
  248. paddlex/inference/pipelines/face_recognition/__init__.py +1 -1
  249. paddlex/inference/pipelines/face_recognition/pipeline.py +3 -1
  250. paddlex/inference/pipelines/face_recognition/result.py +3 -2
  251. paddlex/inference/pipelines/formula_recognition/__init__.py +1 -1
  252. paddlex/inference/pipelines/formula_recognition/pipeline.py +137 -93
  253. paddlex/inference/pipelines/formula_recognition/result.py +20 -29
  254. paddlex/inference/pipelines/image_classification/__init__.py +1 -1
  255. paddlex/inference/pipelines/image_classification/pipeline.py +30 -11
  256. paddlex/inference/pipelines/image_multilabel_classification/__init__.py +1 -1
  257. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +31 -12
  258. paddlex/inference/pipelines/instance_segmentation/__init__.py +1 -1
  259. paddlex/inference/pipelines/instance_segmentation/pipeline.py +30 -9
  260. paddlex/inference/pipelines/keypoint_detection/__init__.py +1 -1
  261. paddlex/inference/pipelines/keypoint_detection/pipeline.py +30 -9
  262. paddlex/inference/pipelines/layout_parsing/__init__.py +1 -1
  263. paddlex/inference/pipelines/layout_parsing/pipeline.py +54 -56
  264. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +904 -261
  265. paddlex/inference/pipelines/layout_parsing/result.py +9 -21
  266. paddlex/inference/pipelines/layout_parsing/result_v2.py +525 -250
  267. paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
  268. paddlex/inference/pipelines/layout_parsing/utils.py +570 -2004
  269. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
  270. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
  271. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
  272. paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
  273. paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/pipeline.py +17 -10
  274. paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +1 -1
  275. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +17 -6
  276. paddlex/inference/pipelines/object_detection/__init__.py +1 -1
  277. paddlex/inference/pipelines/object_detection/pipeline.py +29 -9
  278. paddlex/inference/pipelines/ocr/__init__.py +1 -1
  279. paddlex/inference/pipelines/ocr/pipeline.py +151 -77
  280. paddlex/inference/pipelines/ocr/result.py +31 -24
  281. paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +1 -1
  282. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +17 -6
  283. paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +1 -1
  284. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +17 -6
  285. paddlex/inference/pipelines/pp_chatocr/__init__.py +1 -1
  286. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +14 -5
  287. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +22 -14
  288. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +34 -16
  289. paddlex/inference/pipelines/pp_shitu_v2/__init__.py +1 -1
  290. paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +12 -8
  291. paddlex/inference/pipelines/pp_shitu_v2/result.py +4 -4
  292. paddlex/inference/pipelines/rotated_object_detection/__init__.py +1 -1
  293. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +30 -9
  294. paddlex/inference/pipelines/seal_recognition/__init__.py +1 -1
  295. paddlex/inference/pipelines/seal_recognition/pipeline.py +127 -63
  296. paddlex/inference/pipelines/seal_recognition/result.py +4 -2
  297. paddlex/inference/pipelines/semantic_segmentation/__init__.py +1 -1
  298. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +30 -9
  299. paddlex/inference/pipelines/small_object_detection/__init__.py +1 -1
  300. paddlex/inference/pipelines/small_object_detection/pipeline.py +30 -9
  301. paddlex/inference/pipelines/table_recognition/__init__.py +1 -1
  302. paddlex/inference/pipelines/table_recognition/pipeline.py +61 -37
  303. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +668 -65
  304. paddlex/inference/pipelines/table_recognition/result.py +12 -10
  305. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +12 -8
  306. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +55 -37
  307. paddlex/inference/pipelines/table_recognition/utils.py +1 -1
  308. paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +1 -1
  309. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +16 -6
  310. paddlex/inference/pipelines/ts_classification/__init__.py +1 -1
  311. paddlex/inference/pipelines/ts_classification/pipeline.py +16 -6
  312. paddlex/inference/pipelines/ts_forecasting/__init__.py +1 -1
  313. paddlex/inference/pipelines/ts_forecasting/pipeline.py +16 -6
  314. paddlex/inference/pipelines/video_classification/__init__.py +1 -1
  315. paddlex/inference/pipelines/video_classification/pipeline.py +17 -6
  316. paddlex/inference/pipelines/video_detection/__init__.py +1 -1
  317. paddlex/inference/pipelines/video_detection/pipeline.py +20 -7
  318. paddlex/inference/serving/__init__.py +5 -1
  319. paddlex/inference/serving/basic_serving/__init__.py +1 -1
  320. paddlex/inference/serving/basic_serving/_app.py +31 -19
  321. paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +7 -4
  322. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +1 -1
  323. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +12 -4
  324. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +1 -1
  325. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +7 -2
  326. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +10 -7
  327. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +10 -7
  328. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_understanding.py +153 -0
  329. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +16 -13
  330. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +10 -7
  331. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +10 -7
  332. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +10 -7
  333. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +10 -7
  334. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +13 -7
  335. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +10 -8
  336. paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +10 -7
  337. paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +10 -7
  338. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +10 -7
  339. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +10 -7
  340. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +10 -7
  341. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +13 -7
  342. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +10 -7
  343. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -12
  344. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +17 -14
  345. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +16 -13
  346. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +16 -9
  347. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +10 -7
  348. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +10 -7
  349. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +10 -7
  350. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +10 -7
  351. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +11 -12
  352. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +14 -12
  353. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +10 -7
  354. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +10 -7
  355. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +10 -7
  356. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +10 -7
  357. paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +10 -7
  358. paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +10 -7
  359. paddlex/inference/serving/basic_serving/_server.py +9 -4
  360. paddlex/inference/serving/infra/__init__.py +1 -1
  361. paddlex/inference/serving/infra/config.py +1 -1
  362. paddlex/inference/serving/infra/models.py +13 -6
  363. paddlex/inference/serving/infra/storage.py +9 -4
  364. paddlex/inference/serving/infra/utils.py +54 -28
  365. paddlex/inference/serving/schemas/__init__.py +1 -1
  366. paddlex/inference/serving/schemas/anomaly_detection.py +1 -1
  367. paddlex/inference/serving/schemas/doc_preprocessor.py +1 -1
  368. paddlex/inference/serving/schemas/doc_understanding.py +78 -0
  369. paddlex/inference/serving/schemas/face_recognition.py +1 -1
  370. paddlex/inference/serving/schemas/formula_recognition.py +2 -2
  371. paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -1
  372. paddlex/inference/serving/schemas/image_classification.py +1 -1
  373. paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -1
  374. paddlex/inference/serving/schemas/instance_segmentation.py +1 -1
  375. paddlex/inference/serving/schemas/layout_parsing.py +2 -3
  376. paddlex/inference/serving/schemas/m_3d_bev_detection.py +1 -1
  377. paddlex/inference/serving/schemas/multilingual_speech_recognition.py +1 -1
  378. paddlex/inference/serving/schemas/object_detection.py +1 -1
  379. paddlex/inference/serving/schemas/ocr.py +1 -1
  380. paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -1
  381. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -1
  382. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -1
  383. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +2 -3
  384. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +3 -3
  385. paddlex/inference/serving/schemas/pp_shituv2.py +1 -1
  386. paddlex/inference/serving/schemas/pp_structurev3.py +11 -7
  387. paddlex/inference/serving/schemas/rotated_object_detection.py +1 -1
  388. paddlex/inference/serving/schemas/seal_recognition.py +2 -2
  389. paddlex/inference/serving/schemas/semantic_segmentation.py +1 -1
  390. paddlex/inference/serving/schemas/shared/__init__.py +1 -1
  391. paddlex/inference/serving/schemas/shared/classification.py +1 -1
  392. paddlex/inference/serving/schemas/shared/image_segmentation.py +1 -1
  393. paddlex/inference/serving/schemas/shared/object_detection.py +1 -1
  394. paddlex/inference/serving/schemas/shared/ocr.py +1 -1
  395. paddlex/inference/serving/schemas/small_object_detection.py +1 -1
  396. paddlex/inference/serving/schemas/table_recognition.py +3 -7
  397. paddlex/inference/serving/schemas/table_recognition_v2.py +6 -7
  398. paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -1
  399. paddlex/inference/serving/schemas/ts_classification.py +1 -1
  400. paddlex/inference/serving/schemas/ts_forecast.py +1 -1
  401. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -1
  402. paddlex/inference/serving/schemas/video_classification.py +1 -1
  403. paddlex/inference/serving/schemas/video_detection.py +1 -1
  404. paddlex/inference/utils/__init__.py +1 -1
  405. paddlex/inference/utils/benchmark.py +332 -179
  406. paddlex/inference/utils/color_map.py +1 -1
  407. paddlex/inference/utils/get_pipeline_path.py +1 -1
  408. paddlex/inference/utils/hpi.py +258 -0
  409. paddlex/inference/utils/hpi_model_info_collection.json +2331 -0
  410. paddlex/inference/utils/io/__init__.py +11 -11
  411. paddlex/inference/utils/io/readers.py +31 -27
  412. paddlex/inference/utils/io/style.py +21 -14
  413. paddlex/inference/utils/io/tablepyxl.py +13 -5
  414. paddlex/inference/utils/io/writers.py +9 -10
  415. paddlex/inference/utils/mkldnn_blocklist.py +25 -0
  416. paddlex/inference/utils/model_paths.py +48 -0
  417. paddlex/inference/utils/{new_ir_blacklist.py → new_ir_blocklist.py} +1 -2
  418. paddlex/inference/utils/official_models.py +278 -262
  419. paddlex/inference/utils/pp_option.py +184 -92
  420. paddlex/inference/utils/trt_blocklist.py +43 -0
  421. paddlex/inference/utils/trt_config.py +420 -0
  422. paddlex/model.py +30 -12
  423. paddlex/modules/__init__.py +57 -80
  424. paddlex/modules/anomaly_detection/__init__.py +2 -2
  425. paddlex/modules/anomaly_detection/dataset_checker/__init__.py +2 -3
  426. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
  427. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +6 -3
  428. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/check_dataset.py +8 -4
  429. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +7 -4
  430. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/split_dataset.py +2 -2
  431. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  432. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/visualizer.py +7 -2
  433. paddlex/modules/anomaly_detection/evaluator.py +3 -3
  434. paddlex/modules/anomaly_detection/exportor.py +1 -1
  435. paddlex/modules/anomaly_detection/model_list.py +1 -1
  436. paddlex/modules/anomaly_detection/trainer.py +3 -4
  437. paddlex/modules/base/__init__.py +5 -5
  438. paddlex/modules/base/build_model.py +1 -2
  439. paddlex/modules/base/dataset_checker/__init__.py +2 -2
  440. paddlex/modules/base/dataset_checker/dataset_checker.py +4 -4
  441. paddlex/modules/base/dataset_checker/utils.py +1 -3
  442. paddlex/modules/base/evaluator.py +13 -13
  443. paddlex/modules/base/exportor.py +12 -13
  444. paddlex/modules/base/trainer.py +21 -11
  445. paddlex/modules/base/utils/__init__.py +13 -0
  446. paddlex/modules/base/utils/cinn_setting.py +89 -0
  447. paddlex/modules/base/utils/coco_eval.py +94 -0
  448. paddlex/modules/base/utils/topk_eval.py +118 -0
  449. paddlex/modules/doc_vlm/__init__.py +18 -0
  450. paddlex/modules/doc_vlm/dataset_checker.py +29 -0
  451. paddlex/modules/doc_vlm/evaluator.py +29 -0
  452. paddlex/modules/doc_vlm/exportor.py +29 -0
  453. paddlex/modules/doc_vlm/model_list.py +16 -0
  454. paddlex/modules/doc_vlm/trainer.py +41 -0
  455. paddlex/modules/face_recognition/__init__.py +2 -2
  456. paddlex/modules/face_recognition/dataset_checker/__init__.py +2 -2
  457. paddlex/modules/face_recognition/dataset_checker/dataset_src/__init__.py +1 -1
  458. paddlex/modules/face_recognition/dataset_checker/dataset_src/check_dataset.py +3 -5
  459. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
  460. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  461. paddlex/modules/face_recognition/evaluator.py +3 -3
  462. paddlex/modules/face_recognition/exportor.py +1 -1
  463. paddlex/modules/face_recognition/model_list.py +1 -1
  464. paddlex/modules/face_recognition/trainer.py +1 -1
  465. paddlex/modules/formula_recognition/__init__.py +2 -2
  466. paddlex/modules/formula_recognition/dataset_checker/__init__.py +3 -3
  467. paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  468. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
  469. paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +2 -6
  470. paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
  471. paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
  472. paddlex/modules/formula_recognition/evaluator.py +6 -3
  473. paddlex/modules/formula_recognition/exportor.py +1 -1
  474. paddlex/modules/formula_recognition/model_list.py +4 -1
  475. paddlex/modules/formula_recognition/trainer.py +5 -3
  476. paddlex/modules/general_recognition/__init__.py +2 -2
  477. paddlex/modules/general_recognition/dataset_checker/__init__.py +2 -2
  478. paddlex/modules/general_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  479. paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +7 -9
  480. paddlex/modules/general_recognition/dataset_checker/dataset_src/check_dataset.py +4 -5
  481. paddlex/modules/general_recognition/dataset_checker/dataset_src/convert_dataset.py +6 -5
  482. paddlex/modules/general_recognition/dataset_checker/dataset_src/split_dataset.py +1 -1
  483. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
  484. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  485. paddlex/modules/general_recognition/evaluator.py +2 -2
  486. paddlex/modules/general_recognition/exportor.py +1 -1
  487. paddlex/modules/general_recognition/model_list.py +1 -1
  488. paddlex/modules/general_recognition/trainer.py +1 -1
  489. paddlex/modules/image_classification/__init__.py +2 -2
  490. paddlex/modules/image_classification/dataset_checker/__init__.py +2 -2
  491. paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py +2 -2
  492. paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  493. paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
  494. paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py +4 -4
  495. paddlex/modules/image_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  496. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
  497. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  498. paddlex/modules/image_classification/evaluator.py +3 -3
  499. paddlex/modules/image_classification/exportor.py +1 -1
  500. paddlex/modules/image_classification/model_list.py +2 -1
  501. paddlex/modules/image_classification/trainer.py +3 -3
  502. paddlex/modules/image_unwarping/__init__.py +1 -1
  503. paddlex/modules/image_unwarping/model_list.py +1 -1
  504. paddlex/modules/instance_segmentation/__init__.py +2 -2
  505. paddlex/modules/instance_segmentation/dataset_checker/__init__.py +2 -3
  506. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
  507. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +9 -5
  508. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/check_dataset.py +8 -5
  509. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/convert_dataset.py +8 -8
  510. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/split_dataset.py +7 -4
  511. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
  512. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +10 -8
  513. paddlex/modules/instance_segmentation/evaluator.py +2 -2
  514. paddlex/modules/instance_segmentation/exportor.py +1 -1
  515. paddlex/modules/instance_segmentation/model_list.py +1 -1
  516. paddlex/modules/instance_segmentation/trainer.py +1 -1
  517. paddlex/modules/keypoint_detection/__init__.py +2 -2
  518. paddlex/modules/keypoint_detection/dataset_checker/__init__.py +2 -2
  519. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +1 -1
  520. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
  521. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  522. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +8 -3
  523. paddlex/modules/keypoint_detection/evaluator.py +2 -2
  524. paddlex/modules/keypoint_detection/exportor.py +1 -1
  525. paddlex/modules/keypoint_detection/model_list.py +1 -1
  526. paddlex/modules/keypoint_detection/trainer.py +2 -2
  527. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/__init__.py +2 -2
  528. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/__init__.py +3 -3
  529. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/__init__.py +2 -2
  530. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/analyse_dataset.py +8 -8
  531. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/check_dataset.py +1 -2
  532. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/evaluator.py +3 -3
  533. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/exportor.py +1 -1
  534. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/model_list.py +1 -1
  535. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/trainer.py +5 -7
  536. paddlex/modules/multilabel_classification/__init__.py +2 -2
  537. paddlex/modules/multilabel_classification/dataset_checker/__init__.py +2 -2
  538. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/__init__.py +2 -2
  539. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  540. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
  541. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/convert_dataset.py +10 -7
  542. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  543. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
  544. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +1 -5
  545. paddlex/modules/multilabel_classification/evaluator.py +3 -3
  546. paddlex/modules/multilabel_classification/exportor.py +1 -1
  547. paddlex/modules/multilabel_classification/model_list.py +1 -1
  548. paddlex/modules/multilabel_classification/trainer.py +3 -3
  549. paddlex/modules/multilingual_speech_recognition/__init__.py +2 -2
  550. paddlex/modules/multilingual_speech_recognition/dataset_checker.py +3 -3
  551. paddlex/modules/multilingual_speech_recognition/evaluator.py +3 -3
  552. paddlex/modules/multilingual_speech_recognition/exportor.py +3 -3
  553. paddlex/modules/multilingual_speech_recognition/model_list.py +1 -1
  554. paddlex/modules/multilingual_speech_recognition/trainer.py +7 -5
  555. paddlex/modules/object_detection/__init__.py +2 -2
  556. paddlex/modules/object_detection/dataset_checker/__init__.py +2 -11
  557. paddlex/modules/object_detection/dataset_checker/dataset_src/__init__.py +2 -2
  558. paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +10 -8
  559. paddlex/modules/object_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
  560. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +17 -12
  561. paddlex/modules/object_detection/dataset_checker/dataset_src/split_dataset.py +8 -4
  562. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  563. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +9 -8
  564. paddlex/modules/object_detection/evaluator.py +11 -6
  565. paddlex/modules/object_detection/exportor.py +1 -1
  566. paddlex/modules/object_detection/model_list.py +3 -1
  567. paddlex/modules/object_detection/trainer.py +4 -5
  568. paddlex/modules/open_vocabulary_detection/__init__.py +2 -2
  569. paddlex/modules/open_vocabulary_detection/dataset_checker.py +3 -3
  570. paddlex/modules/open_vocabulary_detection/evaluator.py +3 -3
  571. paddlex/modules/open_vocabulary_detection/exportor.py +3 -3
  572. paddlex/modules/open_vocabulary_detection/model_list.py +2 -4
  573. paddlex/modules/open_vocabulary_detection/trainer.py +7 -5
  574. paddlex/modules/open_vocabulary_segmentation/__init__.py +2 -2
  575. paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +3 -3
  576. paddlex/modules/open_vocabulary_segmentation/evaluator.py +3 -3
  577. paddlex/modules/open_vocabulary_segmentation/exportor.py +3 -3
  578. paddlex/modules/open_vocabulary_segmentation/model_list.py +1 -1
  579. paddlex/modules/open_vocabulary_segmentation/trainer.py +7 -5
  580. paddlex/modules/semantic_segmentation/__init__.py +2 -2
  581. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +2 -3
  582. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
  583. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/analyse_dataset.py +6 -3
  584. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/check_dataset.py +2 -2
  585. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/convert_dataset.py +7 -4
  586. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/split_dataset.py +2 -2
  587. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
  588. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/visualizer.py +6 -2
  589. paddlex/modules/semantic_segmentation/evaluator.py +3 -3
  590. paddlex/modules/semantic_segmentation/exportor.py +1 -1
  591. paddlex/modules/semantic_segmentation/model_list.py +1 -1
  592. paddlex/modules/semantic_segmentation/trainer.py +3 -4
  593. paddlex/modules/table_recognition/__init__.py +2 -2
  594. paddlex/modules/table_recognition/dataset_checker/__init__.py +5 -5
  595. paddlex/modules/table_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  596. paddlex/modules/table_recognition/dataset_checker/dataset_src/analyse_dataset.py +3 -2
  597. paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +8 -7
  598. paddlex/modules/table_recognition/dataset_checker/dataset_src/split_dataset.py +2 -1
  599. paddlex/modules/table_recognition/evaluator.py +3 -3
  600. paddlex/modules/table_recognition/exportor.py +1 -1
  601. paddlex/modules/table_recognition/model_list.py +1 -1
  602. paddlex/modules/table_recognition/trainer.py +2 -5
  603. paddlex/modules/text_detection/__init__.py +2 -2
  604. paddlex/modules/text_detection/dataset_checker/__init__.py +4 -6
  605. paddlex/modules/text_detection/dataset_checker/dataset_src/__init__.py +2 -2
  606. paddlex/modules/text_detection/dataset_checker/dataset_src/analyse_dataset.py +12 -9
  607. paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +3 -3
  608. paddlex/modules/text_detection/dataset_checker/dataset_src/split_dataset.py +3 -3
  609. paddlex/modules/text_detection/evaluator.py +3 -3
  610. paddlex/modules/text_detection/exportor.py +1 -1
  611. paddlex/modules/text_detection/model_list.py +3 -1
  612. paddlex/modules/text_detection/trainer.py +2 -5
  613. paddlex/modules/text_recognition/__init__.py +2 -2
  614. paddlex/modules/text_recognition/dataset_checker/__init__.py +4 -5
  615. paddlex/modules/text_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  616. paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
  617. paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +2 -5
  618. paddlex/modules/text_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
  619. paddlex/modules/text_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
  620. paddlex/modules/text_recognition/evaluator.py +3 -3
  621. paddlex/modules/text_recognition/exportor.py +1 -1
  622. paddlex/modules/text_recognition/model_list.py +3 -1
  623. paddlex/modules/text_recognition/trainer.py +2 -3
  624. paddlex/modules/ts_anomaly_detection/__init__.py +2 -2
  625. paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +4 -5
  626. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
  627. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +1 -9
  628. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/check_dataset.py +2 -2
  629. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -6
  630. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/split_dataset.py +4 -4
  631. paddlex/modules/ts_anomaly_detection/evaluator.py +3 -3
  632. paddlex/modules/ts_anomaly_detection/exportor.py +2 -3
  633. paddlex/modules/ts_anomaly_detection/model_list.py +1 -1
  634. paddlex/modules/ts_anomaly_detection/trainer.py +8 -8
  635. paddlex/modules/ts_classification/__init__.py +2 -2
  636. paddlex/modules/ts_classification/dataset_checker/__init__.py +4 -5
  637. paddlex/modules/ts_classification/dataset_checker/dataset_src/__init__.py +2 -2
  638. paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -5
  639. paddlex/modules/ts_classification/dataset_checker/dataset_src/check_dataset.py +2 -2
  640. paddlex/modules/ts_classification/dataset_checker/dataset_src/convert_dataset.py +2 -6
  641. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +5 -5
  642. paddlex/modules/ts_classification/evaluator.py +3 -3
  643. paddlex/modules/ts_classification/exportor.py +2 -3
  644. paddlex/modules/ts_classification/model_list.py +1 -1
  645. paddlex/modules/ts_classification/trainer.py +7 -7
  646. paddlex/modules/ts_forecast/__init__.py +2 -2
  647. paddlex/modules/ts_forecast/dataset_checker/__init__.py +4 -5
  648. paddlex/modules/ts_forecast/dataset_checker/dataset_src/__init__.py +2 -2
  649. paddlex/modules/ts_forecast/dataset_checker/dataset_src/analyse_dataset.py +1 -9
  650. paddlex/modules/ts_forecast/dataset_checker/dataset_src/check_dataset.py +2 -2
  651. paddlex/modules/ts_forecast/dataset_checker/dataset_src/convert_dataset.py +2 -6
  652. paddlex/modules/ts_forecast/dataset_checker/dataset_src/split_dataset.py +4 -4
  653. paddlex/modules/ts_forecast/evaluator.py +3 -3
  654. paddlex/modules/ts_forecast/exportor.py +2 -3
  655. paddlex/modules/ts_forecast/model_list.py +1 -1
  656. paddlex/modules/ts_forecast/trainer.py +7 -7
  657. paddlex/modules/video_classification/__init__.py +2 -2
  658. paddlex/modules/video_classification/dataset_checker/__init__.py +2 -2
  659. paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +2 -2
  660. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +9 -9
  661. paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +2 -3
  662. paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  663. paddlex/modules/video_classification/evaluator.py +3 -3
  664. paddlex/modules/video_classification/exportor.py +1 -1
  665. paddlex/modules/video_classification/model_list.py +1 -1
  666. paddlex/modules/video_classification/trainer.py +3 -3
  667. paddlex/modules/video_detection/__init__.py +2 -2
  668. paddlex/modules/video_detection/dataset_checker/__init__.py +2 -2
  669. paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +2 -2
  670. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  671. paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +3 -5
  672. paddlex/modules/video_detection/evaluator.py +3 -3
  673. paddlex/modules/video_detection/exportor.py +1 -1
  674. paddlex/modules/video_detection/model_list.py +1 -1
  675. paddlex/modules/video_detection/trainer.py +3 -3
  676. paddlex/ops/__init__.py +7 -4
  677. paddlex/ops/iou3d_nms/iou3d_cpu.cpp +8 -6
  678. paddlex/ops/iou3d_nms/iou3d_cpu.h +3 -2
  679. paddlex/ops/iou3d_nms/iou3d_nms.cpp +8 -6
  680. paddlex/ops/iou3d_nms/iou3d_nms.h +6 -4
  681. paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +24 -18
  682. paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +9 -7
  683. paddlex/ops/setup.py +3 -3
  684. paddlex/ops/voxel/voxelize_op.cc +22 -19
  685. paddlex/ops/voxel/voxelize_op.cu +25 -25
  686. paddlex/paddlex_cli.py +104 -87
  687. paddlex/repo_apis/Paddle3D_api/__init__.py +1 -1
  688. paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +1 -1
  689. paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +1 -1
  690. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +6 -6
  691. paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +2 -2
  692. paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +1 -1
  693. paddlex/repo_apis/Paddle3D_api/pp3d_config.py +3 -2
  694. paddlex/repo_apis/PaddleClas_api/__init__.py +1 -1
  695. paddlex/repo_apis/PaddleClas_api/cls/__init__.py +3 -3
  696. paddlex/repo_apis/PaddleClas_api/cls/config.py +5 -4
  697. paddlex/repo_apis/PaddleClas_api/cls/model.py +4 -4
  698. paddlex/repo_apis/PaddleClas_api/cls/register.py +12 -3
  699. paddlex/repo_apis/PaddleClas_api/cls/runner.py +2 -3
  700. paddlex/repo_apis/PaddleClas_api/shitu_rec/__init__.py +2 -2
  701. paddlex/repo_apis/PaddleClas_api/shitu_rec/config.py +2 -2
  702. paddlex/repo_apis/PaddleClas_api/shitu_rec/model.py +1 -4
  703. paddlex/repo_apis/PaddleClas_api/shitu_rec/register.py +2 -2
  704. paddlex/repo_apis/PaddleClas_api/shitu_rec/runner.py +1 -6
  705. paddlex/repo_apis/PaddleDetection_api/__init__.py +2 -2
  706. paddlex/repo_apis/PaddleDetection_api/config_helper.py +3 -3
  707. paddlex/repo_apis/PaddleDetection_api/instance_seg/__init__.py +2 -2
  708. paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +2 -3
  709. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +4 -4
  710. paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py +2 -3
  711. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +2 -3
  712. paddlex/repo_apis/PaddleDetection_api/object_det/__init__.py +3 -3
  713. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +5 -4
  714. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +6 -7
  715. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +26 -1
  716. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +32 -3
  717. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +2 -3
  718. paddlex/repo_apis/PaddleNLP_api/__init__.py +1 -1
  719. paddlex/repo_apis/PaddleOCR_api/__init__.py +4 -3
  720. paddlex/repo_apis/PaddleOCR_api/config_utils.py +1 -1
  721. paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +1 -1
  722. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +7 -6
  723. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +9 -13
  724. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +29 -3
  725. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +2 -3
  726. paddlex/repo_apis/PaddleOCR_api/table_rec/__init__.py +1 -1
  727. paddlex/repo_apis/PaddleOCR_api/table_rec/config.py +1 -1
  728. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +4 -4
  729. paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +2 -3
  730. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +3 -3
  731. paddlex/repo_apis/PaddleOCR_api/text_det/__init__.py +1 -1
  732. paddlex/repo_apis/PaddleOCR_api/text_det/config.py +1 -1
  733. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +4 -4
  734. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +20 -3
  735. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +3 -3
  736. paddlex/repo_apis/PaddleOCR_api/text_rec/__init__.py +1 -1
  737. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +7 -6
  738. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +9 -13
  739. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +20 -3
  740. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +2 -3
  741. paddlex/repo_apis/PaddleSeg_api/__init__.py +1 -1
  742. paddlex/repo_apis/PaddleSeg_api/base_seg_config.py +2 -2
  743. paddlex/repo_apis/PaddleSeg_api/seg/__init__.py +1 -1
  744. paddlex/repo_apis/PaddleSeg_api/seg/config.py +3 -6
  745. paddlex/repo_apis/PaddleSeg_api/seg/model.py +6 -6
  746. paddlex/repo_apis/PaddleSeg_api/seg/register.py +2 -3
  747. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +2 -3
  748. paddlex/repo_apis/PaddleTS_api/__init__.py +4 -3
  749. paddlex/repo_apis/PaddleTS_api/ts_ad/__init__.py +1 -1
  750. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +5 -6
  751. paddlex/repo_apis/PaddleTS_api/ts_ad/register.py +2 -2
  752. paddlex/repo_apis/PaddleTS_api/ts_ad/runner.py +2 -2
  753. paddlex/repo_apis/PaddleTS_api/ts_base/__init__.py +1 -1
  754. paddlex/repo_apis/PaddleTS_api/ts_base/config.py +2 -4
  755. paddlex/repo_apis/PaddleTS_api/ts_base/model.py +4 -4
  756. paddlex/repo_apis/PaddleTS_api/ts_base/runner.py +2 -2
  757. paddlex/repo_apis/PaddleTS_api/ts_cls/__init__.py +1 -1
  758. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +4 -5
  759. paddlex/repo_apis/PaddleTS_api/ts_cls/register.py +2 -2
  760. paddlex/repo_apis/PaddleTS_api/ts_cls/runner.py +2 -2
  761. paddlex/repo_apis/PaddleTS_api/ts_fc/__init__.py +1 -1
  762. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +6 -7
  763. paddlex/repo_apis/PaddleTS_api/ts_fc/register.py +1 -1
  764. paddlex/repo_apis/PaddleVideo_api/__init__.py +1 -1
  765. paddlex/repo_apis/PaddleVideo_api/config_utils.py +1 -1
  766. paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +3 -3
  767. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +5 -4
  768. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +4 -4
  769. paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +2 -3
  770. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +2 -3
  771. paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +3 -3
  772. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +5 -4
  773. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +5 -5
  774. paddlex/repo_apis/PaddleVideo_api/video_det/register.py +2 -3
  775. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +2 -3
  776. paddlex/repo_apis/__init__.py +1 -1
  777. paddlex/repo_apis/base/__init__.py +4 -5
  778. paddlex/repo_apis/base/config.py +3 -4
  779. paddlex/repo_apis/base/model.py +11 -19
  780. paddlex/repo_apis/base/register.py +1 -1
  781. paddlex/repo_apis/base/runner.py +11 -12
  782. paddlex/repo_apis/base/utils/__init__.py +1 -1
  783. paddlex/repo_apis/base/utils/arg.py +1 -1
  784. paddlex/repo_apis/base/utils/subprocess.py +1 -1
  785. paddlex/repo_manager/__init__.py +2 -9
  786. paddlex/repo_manager/core.py +12 -30
  787. paddlex/repo_manager/meta.py +41 -31
  788. paddlex/repo_manager/repo.py +171 -161
  789. paddlex/repo_manager/utils.py +13 -224
  790. paddlex/utils/__init__.py +1 -1
  791. paddlex/utils/cache.py +8 -10
  792. paddlex/utils/config.py +6 -5
  793. paddlex/utils/{custom_device_whitelist.py → custom_device_list.py} +53 -199
  794. paddlex/utils/deps.py +249 -0
  795. paddlex/utils/device.py +87 -36
  796. paddlex/utils/download.py +4 -4
  797. paddlex/utils/env.py +37 -7
  798. paddlex/utils/errors/__init__.py +1 -1
  799. paddlex/utils/errors/dataset_checker.py +1 -1
  800. paddlex/utils/errors/others.py +2 -16
  801. paddlex/utils/file_interface.py +4 -5
  802. paddlex/utils/flags.py +17 -12
  803. paddlex/utils/fonts/__init__.py +36 -5
  804. paddlex/utils/func_register.py +1 -1
  805. paddlex/utils/install.py +87 -0
  806. paddlex/utils/interactive_get_pipeline.py +3 -3
  807. paddlex/utils/lazy_loader.py +3 -3
  808. paddlex/utils/logging.py +10 -1
  809. paddlex/utils/misc.py +6 -6
  810. paddlex/utils/pipeline_arguments.py +15 -7
  811. paddlex/utils/result_saver.py +4 -5
  812. paddlex/utils/subclass_register.py +2 -4
  813. paddlex/version.py +2 -1
  814. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/METADATA +237 -102
  815. paddlex-3.0.1.dist-info/RECORD +1095 -0
  816. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
  817. paddlex/inference/models/base/predictor/basic_predictor.py +0 -139
  818. paddlex/paddle2onnx_requirements.txt +0 -1
  819. paddlex/repo_manager/requirements.txt +0 -21
  820. paddlex/serving_requirements.txt +0 -9
  821. paddlex-3.0.0rc0.dist-info/RECORD +0 -1015
  822. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
  823. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info/licenses}/LICENSE +0 -0
  824. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -14,14 +14,15 @@
14
14
  from __future__ import annotations
15
15
 
16
16
  import copy
17
- from pathlib import Path
18
- from PIL import Image, ImageDraw
19
-
17
+ import math
20
18
  import re
19
+ from functools import partial
20
+ from typing import List
21
+
21
22
  import numpy as np
22
- from PIL import Image
23
- from PIL import ImageDraw
23
+ from PIL import Image, ImageDraw, ImageFont
24
24
 
25
+ from ....utils.fonts import PINGFANG_FONT_FILE_PATH
25
26
  from ...common.result import (
26
27
  BaseCVResult,
27
28
  HtmlMixin,
@@ -29,8 +30,166 @@ from ...common.result import (
29
30
  MarkdownMixin,
30
31
  XlsxMixin,
31
32
  )
32
- from .utils import get_layout_ordering
33
- from .utils import get_show_color
33
+ from .setting import BLOCK_LABEL_MAP
34
+
35
+
36
+ def compile_title_pattern():
37
+ # Precompiled regex pattern for matching numbering at the beginning of the title
38
+ numbering_pattern = (
39
+ r"(?:" + r"[1-9][0-9]*(?:\.[1-9][0-9]*)*[\.、]?|" + r"[\(\(](?:[1-9][0-9]*|["
40
+ r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+)[\)\)]|" + r"["
41
+ r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+"
42
+ r"[、\.]?|" + r"(?:I|II|III|IV|V|VI|VII|VIII|IX|X)\.?" + r")"
43
+ )
44
+ return re.compile(r"^\s*(" + numbering_pattern + r")(\s*)(.*)$")
45
+
46
+
47
+ TITLE_RE_PATTERN = compile_title_pattern()
48
+
49
+
50
+ def format_title_func(block):
51
+ """
52
+ Normalize chapter title.
53
+ Add the '#' to indicate the level of the title.
54
+ If numbering exists, ensure there's exactly one space between it and the title content.
55
+ If numbering does not exist, return the original title unchanged.
56
+
57
+ :param title: Original chapter title string.
58
+ :return: Normalized chapter title string.
59
+ """
60
+ title = block.content
61
+ match = TITLE_RE_PATTERN.match(title)
62
+ if match:
63
+ numbering = match.group(1).strip()
64
+ title_content = match.group(3).lstrip()
65
+ # Return numbering and title content separated by one space
66
+ title = numbering + " " + title_content
67
+
68
+ title = title.rstrip(".")
69
+ level = (
70
+ title.count(
71
+ ".",
72
+ )
73
+ + 1
74
+ if "." in title
75
+ else 1
76
+ )
77
+ return f"#{'#' * level} {title}".replace("-\n", "").replace(
78
+ "\n",
79
+ " ",
80
+ )
81
+
82
+
83
+ def format_centered_by_html(string):
84
+ return (
85
+ f'<div style="text-align: center;">{string}</div>'.replace(
86
+ "-\n",
87
+ "",
88
+ ).replace("\n", " ")
89
+ + "\n"
90
+ )
91
+
92
+
93
+ def format_text_plain_func(block):
94
+ return block.content
95
+
96
+
97
+ def format_image_scaled_by_html_func(block, original_image_width):
98
+ img_tags = []
99
+ image_path = block.image["path"]
100
+ image_width = block.image["img"].width
101
+ scale = int(image_width / original_image_width * 100)
102
+ img_tags.append(
103
+ '<img src="{}" alt="Image" width="{}%" />'.format(
104
+ image_path.replace("-\n", "").replace("\n", " "), scale
105
+ ),
106
+ )
107
+ return "\n".join(img_tags)
108
+
109
+
110
+ def format_image_plain_func(block):
111
+ img_tags = []
112
+ image_path = block.image["path"]
113
+ img_tags.append("![]({})".format(image_path.replace("-\n", "").replace("\n", " ")))
114
+ return "\n".join(img_tags)
115
+
116
+
117
+ def format_chart2table_func(block):
118
+ lines_list = block.content.split("\n")
119
+ column_num = len(lines_list[0].split("|"))
120
+ lines_list.insert(1, "|".join(["---"] * column_num))
121
+ lines_list = [f"|{line}|" for line in lines_list]
122
+ return "\n".join(lines_list)
123
+
124
+
125
+ def simplify_table_func(table_code):
126
+ return "\n" + table_code.replace("<html>", "").replace("</html>", "").replace(
127
+ "<body>", ""
128
+ ).replace("</body>", "")
129
+
130
+
131
+ def format_first_line_func(block, templates, format_func, spliter):
132
+ lines = block.content.split(spliter)
133
+ for idx in range(len(lines)):
134
+ line = lines[idx]
135
+ if line.strip() == "":
136
+ continue
137
+ if line.lower() in templates:
138
+ lines[idx] = format_func(line)
139
+ break
140
+ return spliter.join(lines)
141
+
142
+
143
+ def get_seg_flag(block: LayoutParsingBlock, prev_block: LayoutParsingBlock):
144
+
145
+ seg_start_flag = True
146
+ seg_end_flag = True
147
+
148
+ block_box = block.bbox
149
+ context_left_coordinate = block_box[0]
150
+ context_right_coordinate = block_box[2]
151
+ seg_start_coordinate = block.seg_start_coordinate
152
+ seg_end_coordinate = block.seg_end_coordinate
153
+
154
+ if prev_block is not None:
155
+ prev_block_bbox = prev_block.bbox
156
+ num_of_prev_lines = prev_block.num_of_lines
157
+ pre_block_seg_end_coordinate = prev_block.seg_end_coordinate
158
+ prev_end_space_small = (
159
+ abs(prev_block_bbox[2] - pre_block_seg_end_coordinate) < 10
160
+ )
161
+ prev_lines_more_than_one = num_of_prev_lines > 1
162
+
163
+ overlap_blocks = context_left_coordinate < prev_block_bbox[2]
164
+
165
+ # update context_left_coordinate and context_right_coordinate
166
+ if overlap_blocks:
167
+ context_left_coordinate = min(prev_block_bbox[0], context_left_coordinate)
168
+ context_right_coordinate = max(prev_block_bbox[2], context_right_coordinate)
169
+ prev_end_space_small = (
170
+ abs(context_right_coordinate - pre_block_seg_end_coordinate) < 10
171
+ )
172
+ edge_distance = 0
173
+ else:
174
+ edge_distance = abs(block_box[0] - prev_block_bbox[2])
175
+
176
+ current_start_space_small = seg_start_coordinate - context_left_coordinate < 10
177
+
178
+ if (
179
+ prev_end_space_small
180
+ and current_start_space_small
181
+ and prev_lines_more_than_one
182
+ and edge_distance < max(prev_block.width, block.width)
183
+ ):
184
+ seg_start_flag = False
185
+ else:
186
+ if seg_start_coordinate - context_left_coordinate < 10:
187
+ seg_start_flag = False
188
+
189
+ if context_right_coordinate - seg_end_coordinate < 10:
190
+ seg_end_flag = False
191
+
192
+ return seg_start_flag, seg_end_flag
34
193
 
35
194
 
36
195
  class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
@@ -43,30 +202,10 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
43
202
  XlsxMixin.__init__(self)
44
203
  MarkdownMixin.__init__(self)
45
204
  JsonMixin.__init__(self)
46
- self.title_pattern = self._build_title_pattern()
47
-
48
- def _build_title_pattern(self):
49
- # Precompiled regex pattern for matching numbering at the beginning of the title
50
- numbering_pattern = (
51
- r"(?:"
52
- + r"[1-9][0-9]*(?:\.[1-9][0-9]*)*[\.、]?|"
53
- + r"[\(\(](?:[1-9][0-9]*|["
54
- r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+)[\)\)]|" + r"["
55
- r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+"
56
- r"[、\.]?|" + r"(?:I|II|III|IV|V|VI|VII|VIII|IX|X)\.?" + r")"
57
- )
58
- return re.compile(r"^\s*(" + numbering_pattern + r")(\s*)(.*)$")
59
-
60
- def _get_input_fn(self):
61
- fn = super()._get_input_fn()
62
- if (page_idx := self["page_index"]) is not None:
63
- fp = Path(fn)
64
- stem, suffix = fp.stem, fp.suffix
65
- return f"{stem}_{page_idx}{suffix}"
66
- else:
67
- return fn
68
205
 
69
206
  def _to_img(self) -> dict[str, np.ndarray]:
207
+ from .utils import get_show_color
208
+
70
209
  res_img_dict = {}
71
210
  model_settings = self["model_settings"]
72
211
  if model_settings["use_doc_preprocessor"]:
@@ -74,12 +213,14 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
74
213
  res_img_dict[key] = value
75
214
  res_img_dict["layout_det_res"] = self["layout_det_res"].img["res"]
76
215
 
77
- if model_settings["use_general_ocr"] or model_settings["use_table_recognition"]:
78
- res_img_dict["overall_ocr_res"] = self["overall_ocr_res"].img["ocr_res_img"]
216
+ if model_settings["use_region_detection"]:
217
+ res_img_dict["region_det_res"] = self["region_det_res"].img["res"]
218
+
219
+ res_img_dict["overall_ocr_res"] = self["overall_ocr_res"].img["ocr_res_img"]
79
220
 
80
221
  if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
81
222
  table_cell_img = Image.fromarray(
82
- copy.deepcopy(self["doc_preprocessor_res"]["output_img"])
223
+ copy.deepcopy(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
83
224
  )
84
225
  table_draw = ImageDraw.Draw(table_cell_img)
85
226
  rectangle_color = (255, 0, 0)
@@ -104,16 +245,23 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
104
245
  # for layout ordering image
105
246
  image = Image.fromarray(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
106
247
  draw = ImageDraw.Draw(image, "RGBA")
107
- parsing_result = self["parsing_res_list"]
248
+ font_size = int(0.018 * int(image.width)) + 2
249
+ font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
250
+ parsing_result: List[LayoutParsingBlock] = self["parsing_res_list"]
108
251
  for block in parsing_result:
109
- bbox = block["block_bbox"]
110
- index = block.get("index", None)
111
- label = block["sub_label"]
112
- fill_color = get_show_color(label)
252
+ bbox = block.bbox
253
+ index = block.order_index
254
+ label = block.label
255
+ fill_color = get_show_color(label, False)
113
256
  draw.rectangle(bbox, fill=fill_color)
114
257
  if index is not None:
115
- text_position = (bbox[2] + 2, bbox[1] - 10)
116
- draw.text(text_position, str(index), fill="red")
258
+ text_position = (bbox[2] + 2, bbox[1] - font_size // 2)
259
+ if int(image.width) - bbox[2] < font_size:
260
+ text_position = (
261
+ int(bbox[2] - font_size * 1.1),
262
+ bbox[1] - font_size // 2,
263
+ )
264
+ draw.text(text_position, str(index), font=font, fill="red")
117
265
 
118
266
  res_img_dict["layout_order_res"] = image
119
267
 
@@ -137,8 +285,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
137
285
  if self["model_settings"]["use_doc_preprocessor"]:
138
286
  data["doc_preprocessor_res"] = self["doc_preprocessor_res"].str["res"]
139
287
  data["layout_det_res"] = self["layout_det_res"].str["res"]
140
- if model_settings["use_general_ocr"] or model_settings["use_table_recognition"]:
141
- data["overall_ocr_res"] = self["overall_ocr_res"].str["res"]
288
+ data["overall_ocr_res"] = self["overall_ocr_res"].str["res"]
142
289
  if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
143
290
  data["table_res_list"] = []
144
291
  for sno in range(len(self["table_res_list"])):
@@ -179,9 +326,9 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
179
326
  parsing_res_list = self["parsing_res_list"]
180
327
  parsing_res_list = [
181
328
  {
182
- "block_label": parsing_res["block_label"],
183
- "block_content": parsing_res["block_content"],
184
- "block_bbox": parsing_res["block_bbox"],
329
+ "block_label": parsing_res.label,
330
+ "block_content": parsing_res.content,
331
+ "block_bbox": parsing_res.bbox,
185
332
  }
186
333
  for parsing_res in parsing_res_list
187
334
  ]
@@ -189,8 +336,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
189
336
  if self["model_settings"]["use_doc_preprocessor"]:
190
337
  data["doc_preprocessor_res"] = self["doc_preprocessor_res"].json["res"]
191
338
  data["layout_det_res"] = self["layout_det_res"].json["res"]
192
- if model_settings["use_general_ocr"] or model_settings["use_table_recognition"]:
193
- data["overall_ocr_res"] = self["overall_ocr_res"].json["res"]
339
+ data["overall_ocr_res"] = self["overall_ocr_res"].json["res"]
194
340
  if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
195
341
  data["table_res_list"] = []
196
342
  for sno in range(len(self["table_res_list"])):
@@ -243,228 +389,357 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
243
389
  res_xlsx_dict[key] = table_res.xlsx["pred"]
244
390
  return res_xlsx_dict
245
391
 
246
- def _to_markdown(self) -> dict:
392
+ def _to_markdown(self, pretty=True) -> dict:
247
393
  """
248
394
  Save the parsing result to a Markdown file.
249
395
 
396
+ Args:
397
+ pretty (Optional[bool]): whether to pretty markdown by HTML, default by True.
398
+
250
399
  Returns:
251
400
  Dict
252
401
  """
402
+ original_image_width = self["doc_preprocessor_res"]["output_img"].shape[1]
253
403
 
254
- def _format_data(obj):
255
-
256
- def format_title(title):
257
- """
258
- Normalize chapter title.
259
- Add the '#' to indicate the level of the title.
260
- If numbering exists, ensure there's exactly one space between it and the title content.
261
- If numbering does not exist, return the original title unchanged.
262
-
263
- :param title: Original chapter title string.
264
- :return: Normalized chapter title string.
265
- """
266
- match = self.title_pattern.match(title)
267
- if match:
268
- numbering = match.group(1).strip()
269
- title_content = match.group(3).lstrip()
270
- # Return numbering and title content separated by one space
271
- title = numbering + " " + title_content
272
-
273
- title = title.rstrip(".")
274
- level = (
275
- title.count(
276
- ".",
277
- )
278
- + 1
279
- if "." in title
280
- else 1
281
- )
282
- return f"#{'#' * level} {title}".replace("-\n", "").replace(
283
- "\n",
284
- " ",
285
- )
286
-
287
- def format_centered_text(key):
288
- return (
289
- f'<div style="text-align: center;">{block[key]}</div>'.replace(
290
- "-\n",
291
- "",
292
- ).replace("\n", " ")
293
- + "\n"
404
+ if pretty:
405
+ format_text_func = lambda block: format_centered_by_html(
406
+ format_text_plain_func(block)
407
+ )
408
+ format_image_func = lambda block: format_centered_by_html(
409
+ format_image_scaled_by_html_func(
410
+ block,
411
+ original_image_width=original_image_width,
294
412
  )
413
+ )
414
+ else:
415
+ format_text_func = lambda block: block.content
416
+ format_image_func = format_image_plain_func
295
417
 
296
- def format_image(label):
297
- img_tags = []
298
- image_path = "".join(block[label].keys())
299
- img_tags.append(
300
- '<div style="text-align: center;"><img src="{}" alt="Image" /></div>'.format(
301
- image_path.replace("-\n", "").replace("\n", " "),
302
- ),
303
- )
304
- return "\n".join(img_tags)
305
-
306
- def format_first_line(templates, format_func, spliter):
307
- lines = block["block_content"].split(spliter)
308
- for idx in range(len(lines)):
309
- line = lines[idx]
310
- if line.strip() == "":
311
- continue
312
- if line.lower() in templates:
313
- lines[idx] = format_func(line)
314
- break
315
- return spliter.join(lines)
316
-
317
- def format_table():
318
- return "\n" + block["block_content"]
319
-
320
- def get_seg_flag(block, prev_block):
321
-
322
- seg_start_flag = True
323
- seg_end_flag = True
324
-
325
- block_box = block["block_bbox"]
326
- context_left_coordinate = block_box[0]
327
- context_right_coordinate = block_box[2]
328
- seg_start_coordinate = block.get("seg_start_coordinate")
329
- seg_end_coordinate = block.get("seg_end_coordinate")
330
-
331
- if prev_block is not None:
332
- prev_block_bbox = prev_block["block_bbox"]
333
- num_of_prev_lines = prev_block.get("num_of_lines")
334
- pre_block_seg_end_coordinate = prev_block.get("seg_end_coordinate")
335
- prev_end_space_small = (
336
- prev_block_bbox[2] - pre_block_seg_end_coordinate < 10
337
- )
338
- prev_lines_more_than_one = num_of_prev_lines > 1
339
-
340
- overlap_blocks = context_left_coordinate < prev_block_bbox[2]
341
-
342
- # update context_left_coordinate and context_right_coordinate
343
- if overlap_blocks:
344
- context_left_coordinate = min(
345
- prev_block_bbox[0], context_left_coordinate
346
- )
347
- context_right_coordinate = max(
348
- prev_block_bbox[2], context_right_coordinate
349
- )
350
- prev_end_space_small = (
351
- context_right_coordinate - pre_block_seg_end_coordinate < 10
352
- )
353
-
354
- current_start_space_small = (
355
- seg_start_coordinate - context_left_coordinate < 10
356
- )
418
+ if self["model_settings"].get("use_chart_recognition", False):
419
+ format_chart_func = format_chart2table_func
420
+ else:
421
+ format_chart_func = format_image_func
357
422
 
358
- if (
359
- prev_end_space_small
360
- and current_start_space_small
361
- and prev_lines_more_than_one
362
- ):
363
- seg_start_flag = False
364
- else:
365
- if seg_start_coordinate - context_left_coordinate < 10:
366
- seg_start_flag = False
367
-
368
- if context_right_coordinate - seg_end_coordinate < 10:
369
- seg_end_flag = False
370
-
371
- return seg_start_flag, seg_end_flag
372
-
373
- handlers = {
374
- "paragraph_title": lambda: format_title(block["block_content"]),
375
- "doc_title": lambda: f"# {block['block_content']}".replace(
376
- "-\n",
377
- "",
378
- ).replace("\n", " "),
379
- "table_title": lambda: format_centered_text("block_content"),
380
- "figure_title": lambda: format_centered_text("block_content"),
381
- "chart_title": lambda: format_centered_text("block_content"),
382
- "text": lambda: block["block_content"]
383
- .replace("-\n", " ")
384
- .replace("\n", " "),
385
- "abstract": lambda: format_first_line(
386
- ["摘要", "abstract"], lambda l: f"## {l}\n", " "
387
- ),
388
- "content": lambda: block["block_content"]
389
- .replace("-\n", " \n")
390
- .replace("\n", " \n"),
391
- "image": lambda: format_image("block_image"),
392
- "chart": lambda: format_image("block_image"),
393
- "formula": lambda: f"$${block['block_content']}$$",
394
- "table": format_table,
395
- "reference": lambda: format_first_line(
396
- ["参考文献", "references"], lambda l: f"## {l}", "\n"
397
- ),
398
- "algorithm": lambda: block["block_content"].strip("\n"),
399
- "seal": lambda: f"Words of Seals:\n{block['block_content']}",
400
- }
401
- parsing_res_list = obj["parsing_res_list"]
402
- markdown_content = ""
403
- last_label = None
404
- seg_start_flag = None
405
- seg_end_flag = None
406
- prev_block = None
407
- page_first_element_seg_start_flag = None
408
- page_last_element_seg_end_flag = None
409
- parsing_res_list = sorted(
410
- parsing_res_list,
411
- key=lambda x: x.get("sub_index", 999),
423
+ if self["model_settings"].get("use_seal_recognition", False):
424
+ format_seal_func = lambda block: "\n".join(
425
+ [format_image_func(block), format_text_func(block)]
412
426
  )
413
- for block in parsing_res_list:
414
- seg_start_flag, seg_end_flag = get_seg_flag(block, prev_block)
415
-
416
- label = block.get("block_label")
417
- page_first_element_seg_start_flag = (
418
- seg_start_flag
419
- if (page_first_element_seg_start_flag is None)
420
- else page_first_element_seg_start_flag
427
+ else:
428
+ format_seal_func = format_image_func
429
+
430
+ if self["model_settings"].get("use_table_recognition", False):
431
+ if pretty:
432
+ format_table_func = lambda block: "\n" + format_text_func(
433
+ block
434
+ ).replace("<table>", '<table border="1">')
435
+ else:
436
+ format_table_func = lambda block: simplify_table_func(
437
+ "\n" + block.content
421
438
  )
422
- handler = handlers.get(label)
423
- if handler:
424
- prev_block = block
425
- if label == last_label == "text" and seg_start_flag == False:
426
- last_char_of_markdown = (
427
- markdown_content[-1] if markdown_content else ""
428
- )
429
- first_char_of_handler = handler()[0] if handler() else ""
430
- last_is_chinese_char = (
431
- re.match(r"[\u4e00-\u9fff]", last_char_of_markdown)
432
- if last_char_of_markdown
433
- else False
434
- )
435
- first_is_chinese_char = (
436
- re.match(r"[\u4e00-\u9fff]", first_char_of_handler)
437
- if first_char_of_handler
438
- else False
439
- )
440
- if not (last_is_chinese_char or first_is_chinese_char):
441
- markdown_content += " " + handler()
442
- else:
443
- markdown_content += handler()
444
- else:
445
- markdown_content += (
446
- "\n\n" + handler() if markdown_content else handler()
447
- )
448
- last_label = label
449
- page_last_element_seg_end_flag = seg_end_flag
450
-
451
- return markdown_content, (
452
- page_first_element_seg_start_flag,
453
- page_last_element_seg_end_flag,
439
+ else:
440
+ format_table_func = format_image_func
441
+
442
+ if self["model_settings"].get("use_formula_recognition", False):
443
+ format_formula_func = lambda block: f"$${block.content}$$"
444
+ else:
445
+ format_formula_func = format_image_func
446
+
447
+ handle_funcs_dict = {
448
+ "paragraph_title": format_title_func,
449
+ "abstract_title": format_title_func,
450
+ "reference_title": format_title_func,
451
+ "content_title": format_title_func,
452
+ "doc_title": lambda block: f"# {block.content}".replace(
453
+ "-\n",
454
+ "",
455
+ ).replace("\n", " "),
456
+ "table_title": format_text_func,
457
+ "figure_title": format_text_func,
458
+ "chart_title": format_text_func,
459
+ "text": lambda block: block.content.replace("\n\n", "\n").replace(
460
+ "\n", "\n\n"
461
+ ),
462
+ "abstract": partial(
463
+ format_first_line_func,
464
+ templates=["摘要", "abstract"],
465
+ format_func=lambda l: f"## {l}\n",
466
+ spliter=" ",
467
+ ),
468
+ "content": lambda block: block.content.replace("-\n", " \n").replace(
469
+ "\n", " \n"
470
+ ),
471
+ "image": format_image_func,
472
+ "chart": format_chart_func,
473
+ "formula": format_formula_func,
474
+ "table": format_table_func,
475
+ "reference": partial(
476
+ format_first_line_func,
477
+ templates=["参考文献", "references"],
478
+ format_func=lambda l: f"## {l}",
479
+ spliter="\n",
480
+ ),
481
+ "algorithm": lambda block: block.content.strip("\n"),
482
+ "seal": format_seal_func,
483
+ }
484
+
485
+ markdown_content = ""
486
+ last_label = None
487
+ seg_start_flag = None
488
+ seg_end_flag = None
489
+ prev_block = None
490
+ page_first_element_seg_start_flag = None
491
+ page_last_element_seg_end_flag = None
492
+ markdown_info = {}
493
+ markdown_info["markdown_images"] = {}
494
+ for block in self["parsing_res_list"]:
495
+ seg_start_flag, seg_end_flag = get_seg_flag(block, prev_block)
496
+
497
+ label = block.label
498
+ if block.image is not None:
499
+ markdown_info["markdown_images"][block.image["path"]] = block.image[
500
+ "img"
501
+ ]
502
+ page_first_element_seg_start_flag = (
503
+ seg_start_flag
504
+ if (page_first_element_seg_start_flag is None)
505
+ else page_first_element_seg_start_flag
454
506
  )
455
507
 
456
- markdown_info = dict()
457
- markdown_info["markdown_texts"], (
458
- page_first_element_seg_start_flag,
459
- page_last_element_seg_end_flag,
460
- ) = _format_data(self)
508
+ handle_func = handle_funcs_dict.get(label, None)
509
+ if handle_func:
510
+ prev_block = block
511
+ if label == last_label == "text" and seg_start_flag == False:
512
+ markdown_content += handle_func(block)
513
+ else:
514
+ markdown_content += (
515
+ "\n\n" + handle_func(block)
516
+ if markdown_content
517
+ else handle_func(block)
518
+ )
519
+ last_label = label
520
+ page_last_element_seg_end_flag = seg_end_flag
521
+
522
+ markdown_info["markdown_texts"] = markdown_content
461
523
  markdown_info["page_continuation_flags"] = (
462
524
  page_first_element_seg_start_flag,
463
525
  page_last_element_seg_end_flag,
464
526
  )
465
-
466
- markdown_info["markdown_images"] = {}
467
527
  for img in self["imgs_in_doc"]:
468
528
  markdown_info["markdown_images"][img["path"]] = img["img"]
469
529
 
470
530
  return markdown_info
531
+
532
+
533
+ class LayoutParsingBlock:
534
+
535
+ def __init__(self, label, bbox, content="") -> None:
536
+ self.label = label
537
+ self.order_label = None
538
+ self.bbox = list(map(int, bbox))
539
+ self.content = content
540
+ self.seg_start_coordinate = float("inf")
541
+ self.seg_end_coordinate = float("-inf")
542
+ self.width = bbox[2] - bbox[0]
543
+ self.height = bbox[3] - bbox[1]
544
+ self.area = self.width * self.height
545
+ self.num_of_lines = 1
546
+ self.image = None
547
+ self.index = None
548
+ self.order_index = None
549
+ self.text_line_width = 1
550
+ self.text_line_height = 1
551
+ self.direction = self.get_bbox_direction()
552
+ self.child_blocks = []
553
+ self.update_direction_info()
554
+
555
+ def __str__(self) -> str:
556
+ return f"{self.__dict__}"
557
+
558
+ def __repr__(self) -> str:
559
+ _str = f"\n\n#################\nindex:\t{self.index}\nlabel:\t{self.label}\nregion_label:\t{self.order_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
560
+ return _str
561
+
562
+ def to_dict(self) -> dict:
563
+ return self.__dict__
564
+
565
+ def update_direction_info(self) -> None:
566
+ if self.direction == "horizontal":
567
+ self.secondary_direction = "vertical"
568
+ self.short_side_length = self.height
569
+ self.long_side_length = self.width
570
+ self.start_coordinate = self.bbox[0]
571
+ self.end_coordinate = self.bbox[2]
572
+ self.secondary_direction_start_coordinate = self.bbox[1]
573
+ self.secondary_direction_end_coordinate = self.bbox[3]
574
+ else:
575
+ self.secondary_direction = "horizontal"
576
+ self.short_side_length = self.width
577
+ self.long_side_length = self.height
578
+ self.start_coordinate = self.bbox[1]
579
+ self.end_coordinate = self.bbox[3]
580
+ self.secondary_direction_start_coordinate = self.bbox[0]
581
+ self.secondary_direction_end_coordinate = self.bbox[2]
582
+
583
+ def append_child_block(self, child_block: LayoutParsingBlock) -> None:
584
+ if not self.child_blocks:
585
+ self.ori_bbox = self.bbox.copy()
586
+ x1, y1, x2, y2 = self.bbox
587
+ x1_child, y1_child, x2_child, y2_child = child_block.bbox
588
+ union_bbox = (
589
+ min(x1, x1_child),
590
+ min(y1, y1_child),
591
+ max(x2, x2_child),
592
+ max(y2, y2_child),
593
+ )
594
+ self.bbox = union_bbox
595
+ self.update_direction_info()
596
+ child_blocks = [child_block]
597
+ if child_block.child_blocks:
598
+ child_blocks.extend(child_block.get_child_blocks())
599
+ self.child_blocks.extend(child_blocks)
600
+
601
+ def get_child_blocks(self) -> list:
602
+ self.bbox = self.ori_bbox
603
+ child_blocks = self.child_blocks.copy()
604
+ self.child_blocks = []
605
+ return child_blocks
606
+
607
+ def get_centroid(self) -> tuple:
608
+ x1, y1, x2, y2 = self.bbox
609
+ centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
610
+ return centroid
611
+
612
+ def get_bbox_direction(self, direction_ratio: float = 1.0) -> bool:
613
+ """
614
+ Determine if a bounding box is horizontal or vertical.
615
+
616
+ Args:
617
+ bbox (List[float]): Bounding box [x_min, y_min, x_max, y_max].
618
+ direction_ratio (float): Ratio for determining direction. Default is 1.0.
619
+
620
+ Returns:
621
+ str: "horizontal" or "vertical".
622
+ """
623
+ return (
624
+ "horizontal" if self.width * direction_ratio >= self.height else "vertical"
625
+ )
626
+
627
+
628
+ class LayoutParsingRegion:
629
+
630
+ def __init__(
631
+ self, bbox, blocks: List[LayoutParsingBlock] = [], image_shape=None
632
+ ) -> None:
633
+ self.bbox = bbox
634
+ self.block_map = {}
635
+ self.direction = "horizontal"
636
+ self.calculate_bbox_metrics(image_shape)
637
+ self.doc_title_block_idxes = []
638
+ self.paragraph_title_block_idxes = []
639
+ self.vision_block_idxes = []
640
+ self.unordered_block_idxes = []
641
+ self.vision_title_block_idxes = []
642
+ self.normal_text_block_idxes = []
643
+ self.header_block_idxes = []
644
+ self.footer_block_idxes = []
645
+ self.text_line_width = 20
646
+ self.text_line_height = 10
647
+ self.init_region_info_from_layout(blocks)
648
+ self.init_direction_info()
649
+
650
+ def init_region_info_from_layout(self, blocks: List[LayoutParsingBlock]):
651
+ horizontal_normal_text_block_num = 0
652
+ text_line_height_list = []
653
+ text_line_width_list = []
654
+ for idx, block in enumerate(blocks):
655
+ self.block_map[idx] = block
656
+ block.index = idx
657
+ if block.label in BLOCK_LABEL_MAP["header_labels"]:
658
+ self.header_block_idxes.append(idx)
659
+ elif block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
660
+ self.doc_title_block_idxes.append(idx)
661
+ elif block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]:
662
+ self.paragraph_title_block_idxes.append(idx)
663
+ elif block.label in BLOCK_LABEL_MAP["vision_labels"]:
664
+ self.vision_block_idxes.append(idx)
665
+ elif block.label in BLOCK_LABEL_MAP["vision_title_labels"]:
666
+ self.vision_title_block_idxes.append(idx)
667
+ elif block.label in BLOCK_LABEL_MAP["footer_labels"]:
668
+ self.footer_block_idxes.append(idx)
669
+ elif block.label in BLOCK_LABEL_MAP["unordered_labels"]:
670
+ self.unordered_block_idxes.append(idx)
671
+ else:
672
+ self.normal_text_block_idxes.append(idx)
673
+ text_line_height_list.append(block.text_line_height)
674
+ text_line_width_list.append(block.text_line_width)
675
+ if block.direction == "horizontal":
676
+ horizontal_normal_text_block_num += 1
677
+ self.direction = (
678
+ "horizontal"
679
+ if horizontal_normal_text_block_num
680
+ >= len(self.normal_text_block_idxes) * 0.5
681
+ else "vertical"
682
+ )
683
+ self.text_line_width = (
684
+ np.mean(text_line_width_list) if text_line_width_list else 20
685
+ )
686
+ self.text_line_height = (
687
+ np.mean(text_line_height_list) if text_line_height_list else 10
688
+ )
689
+
690
+ def init_direction_info(self):
691
+ if self.direction == "horizontal":
692
+ self.direction_start_index = 0
693
+ self.direction_end_index = 2
694
+ self.secondary_direction_start_index = 1
695
+ self.secondary_direction_end_index = 3
696
+ self.secondary_direction = "vertical"
697
+ else:
698
+ self.direction_start_index = 1
699
+ self.direction_end_index = 3
700
+ self.secondary_direction_start_index = 0
701
+ self.secondary_direction_end_index = 2
702
+ self.secondary_direction = "horizontal"
703
+
704
+ self.direction_center_coordinate = (
705
+ self.bbox[self.direction_start_index] + self.bbox[self.direction_end_index]
706
+ ) / 2
707
+ self.secondary_direction_center_coordinate = (
708
+ self.bbox[self.secondary_direction_start_index]
709
+ + self.bbox[self.secondary_direction_end_index]
710
+ ) / 2
711
+
712
+ def calculate_bbox_metrics(self, image_shape):
713
+ x1, y1, x2, y2 = self.bbox
714
+ image_height, image_width = image_shape
715
+ width = x2 - x1
716
+ x_center, y_center = (x1 + x2) / 2, (y1 + y2) / 2
717
+ self.euclidean_distance = math.sqrt(((x1) ** 2 + (y1) ** 2))
718
+ self.center_euclidean_distance = math.sqrt(((x_center) ** 2 + (y_center) ** 2))
719
+ self.angle_rad = math.atan2(y_center, x_center)
720
+ self.weighted_distance = (
721
+ y2 + width + (x1 // (image_width // 10)) * (image_width // 10) * 1.5
722
+ )
723
+
724
+ def sort_normal_blocks(self, blocks):
725
+ if self.direction == "horizontal":
726
+ blocks.sort(
727
+ key=lambda x: (
728
+ x.bbox[1] // self.text_line_height,
729
+ x.bbox[0] // self.text_line_width,
730
+ x.bbox[1] ** 2 + x.bbox[0] ** 2,
731
+ ),
732
+ )
733
+ else:
734
+ blocks.sort(
735
+ key=lambda x: (
736
+ -x.bbox[0] // self.text_line_width,
737
+ x.bbox[1] // self.text_line_height,
738
+ -(x.bbox[2] ** 2 + x.bbox[1] ** 2),
739
+ ),
740
+ )
741
+
742
+ def sort(self):
743
+ from .xycut_enhanced import xycut_enhanced
744
+
745
+ return xycut_enhanced(self)