paddlex 3.0.0b2__py3-none-any.whl → 3.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (940) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +1 -0
  3. paddlex/__main__.py +3 -4
  4. paddlex/configs/modules/3d_bev_detection/BEVFusion.yaml +38 -0
  5. paddlex/configs/modules/face_feature/MobileFaceNet.yaml +41 -0
  6. paddlex/configs/modules/face_feature/ResNet50_face.yaml +41 -0
  7. paddlex/configs/modules/formula_recognition/LaTeX_OCR_rec.yaml +40 -0
  8. paddlex/configs/modules/formula_recognition/PP-FormulaNet-L.yaml +40 -0
  9. paddlex/configs/modules/formula_recognition/PP-FormulaNet-S.yaml +40 -0
  10. paddlex/configs/modules/formula_recognition/UniMERNet.yaml +40 -0
  11. paddlex/configs/modules/image_classification/CLIP_vit_base_patch16_224.yaml +41 -0
  12. paddlex/configs/modules/image_classification/CLIP_vit_large_patch14_224.yaml +41 -0
  13. paddlex/configs/modules/image_classification/ConvNeXt_large_384.yaml +41 -0
  14. paddlex/configs/modules/keypoint_detection/PP-TinyPose_128x96.yaml +40 -0
  15. paddlex/configs/modules/keypoint_detection/PP-TinyPose_256x192.yaml +40 -0
  16. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +40 -0
  17. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +40 -0
  18. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +40 -0
  19. paddlex/configs/modules/multilingual_speech_recognition/whisper_base.yaml +12 -0
  20. paddlex/configs/modules/multilingual_speech_recognition/whisper_large.yaml +12 -0
  21. paddlex/configs/modules/multilingual_speech_recognition/whisper_medium.yaml +12 -0
  22. paddlex/configs/modules/multilingual_speech_recognition/whisper_small.yaml +12 -0
  23. paddlex/configs/modules/multilingual_speech_recognition/whisper_tiny.yaml +12 -0
  24. paddlex/configs/modules/object_detection/Co-DINO-R50.yaml +40 -0
  25. paddlex/configs/modules/object_detection/Co-DINO-Swin-L.yaml +40 -0
  26. paddlex/configs/modules/object_detection/Co-Deformable-DETR-R50.yaml +40 -0
  27. paddlex/configs/modules/object_detection/Co-Deformable-DETR-Swin-T.yaml +40 -0
  28. paddlex/configs/modules/object_detection/YOLOX-X.yaml +40 -0
  29. paddlex/configs/modules/open_vocabulary_detection/GroundingDINO-T.yaml +13 -0
  30. paddlex/configs/modules/open_vocabulary_segmentation/SAM-H_box.yaml +17 -0
  31. paddlex/configs/modules/open_vocabulary_segmentation/SAM-H_point.yaml +15 -0
  32. paddlex/configs/modules/rotated_object_detection/PP-YOLOE-R-L.yaml +40 -0
  33. paddlex/configs/modules/semantic_segmentation/MaskFormer_small.yaml +42 -0
  34. paddlex/configs/modules/semantic_segmentation/MaskFormer_tiny.yaml +42 -0
  35. paddlex/configs/modules/semantic_segmentation/SeaFormer_base.yaml +40 -0
  36. paddlex/configs/modules/semantic_segmentation/SeaFormer_large.yaml +40 -0
  37. paddlex/configs/modules/semantic_segmentation/SeaFormer_small.yaml +40 -0
  38. paddlex/configs/modules/semantic_segmentation/SeaFormer_tiny.yaml +40 -0
  39. paddlex/configs/modules/table_cells_detection/RT-DETR-L_wired_table_cell_det.yaml +40 -0
  40. paddlex/configs/modules/table_cells_detection/RT-DETR-L_wireless_table_cell_det.yaml +40 -0
  41. paddlex/configs/modules/table_classification/PP-LCNet_x1_0_table_cls.yaml +41 -0
  42. paddlex/configs/modules/table_structure_recognition/SLANeXt_wired.yaml +39 -0
  43. paddlex/configs/modules/table_structure_recognition/SLANeXt_wireless.yaml +39 -0
  44. paddlex/configs/modules/text_detection/PP-OCRv3_mobile_det.yaml +40 -0
  45. paddlex/configs/modules/text_detection/PP-OCRv3_server_det.yaml +40 -0
  46. paddlex/configs/modules/text_recognition/PP-OCRv3_mobile_rec.yaml +39 -0
  47. paddlex/configs/modules/text_recognition/PP-OCRv4_server_rec_doc.yaml +39 -0
  48. paddlex/configs/modules/text_recognition/arabic_PP-OCRv3_mobile_rec.yaml +39 -0
  49. paddlex/configs/modules/text_recognition/chinese_cht_PP-OCRv3_mobile_rec.yaml +39 -0
  50. paddlex/configs/modules/text_recognition/cyrillic_PP-OCRv3_mobile_rec.yaml +39 -0
  51. paddlex/configs/modules/text_recognition/devanagari_PP-OCRv3_mobile_rec.yaml +39 -0
  52. paddlex/configs/modules/text_recognition/en_PP-OCRv3_mobile_rec.yaml +39 -0
  53. paddlex/configs/modules/text_recognition/en_PP-OCRv4_mobile_rec.yaml +39 -0
  54. paddlex/configs/modules/text_recognition/japan_PP-OCRv3_mobile_rec.yaml +39 -0
  55. paddlex/configs/modules/text_recognition/ka_PP-OCRv3_mobile_rec.yaml +39 -0
  56. paddlex/configs/modules/text_recognition/korean_PP-OCRv3_mobile_rec.yaml +39 -0
  57. paddlex/configs/modules/text_recognition/latin_PP-OCRv3_mobile_rec.yaml +39 -0
  58. paddlex/configs/modules/text_recognition/ta_PP-OCRv3_mobile_rec.yaml +39 -0
  59. paddlex/configs/modules/text_recognition/te_PP-OCRv3_mobile_rec.yaml +39 -0
  60. paddlex/configs/modules/textline_orientation/PP-LCNet_x0_25_textline_ori.yaml +41 -0
  61. paddlex/configs/modules/video_classification/PP-TSM-R50_8frames_uniform.yaml +42 -0
  62. paddlex/configs/modules/video_classification/PP-TSMv2-LCNetV2_16frames_uniform.yaml +42 -0
  63. paddlex/configs/modules/video_classification/PP-TSMv2-LCNetV2_8frames_uniform.yaml +42 -0
  64. paddlex/configs/modules/video_detection/YOWO.yaml +40 -0
  65. paddlex/configs/pipelines/3d_bev_detection.yaml +9 -0
  66. paddlex/configs/pipelines/OCR.yaml +44 -0
  67. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +149 -0
  68. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +184 -0
  69. paddlex/configs/pipelines/PP-ShiTuV2.yaml +18 -0
  70. paddlex/configs/pipelines/PP-StructureV3.yaml +226 -0
  71. paddlex/configs/pipelines/anomaly_detection.yaml +8 -0
  72. paddlex/configs/pipelines/doc_preprocessor.yaml +15 -0
  73. paddlex/configs/pipelines/face_recognition.yaml +18 -0
  74. paddlex/configs/pipelines/formula_recognition.yaml +39 -0
  75. paddlex/configs/pipelines/human_keypoint_detection.yaml +17 -0
  76. paddlex/configs/pipelines/image_classification.yaml +10 -0
  77. paddlex/configs/pipelines/image_multilabel_classification.yaml +9 -0
  78. paddlex/configs/pipelines/instance_segmentation.yaml +10 -0
  79. paddlex/configs/pipelines/layout_parsing.yaml +101 -0
  80. paddlex/configs/pipelines/multilingual_speech_recognition.yaml +9 -0
  81. paddlex/configs/pipelines/object_detection.yaml +10 -0
  82. paddlex/configs/pipelines/open_vocabulary_detection.yaml +12 -0
  83. paddlex/configs/pipelines/open_vocabulary_segmentation.yaml +13 -0
  84. paddlex/configs/pipelines/pedestrian_attribute_recognition.yaml +15 -0
  85. paddlex/configs/pipelines/rotated_object_detection.yaml +10 -0
  86. paddlex/configs/pipelines/seal_recognition.yaml +51 -0
  87. paddlex/configs/pipelines/semantic_segmentation.yaml +10 -0
  88. paddlex/configs/pipelines/small_object_detection.yaml +10 -0
  89. paddlex/configs/pipelines/table_recognition.yaml +56 -0
  90. paddlex/configs/pipelines/table_recognition_v2.yaml +76 -0
  91. paddlex/configs/pipelines/ts_anomaly_detection.yaml +8 -0
  92. paddlex/configs/pipelines/ts_classification.yaml +8 -0
  93. paddlex/configs/pipelines/ts_forecast.yaml +8 -0
  94. paddlex/configs/pipelines/vehicle_attribute_recognition.yaml +15 -0
  95. paddlex/configs/pipelines/video_classification.yaml +9 -0
  96. paddlex/configs/pipelines/video_detection.yaml +10 -0
  97. paddlex/engine.py +1 -1
  98. paddlex/hpip_links.html +19 -0
  99. paddlex/inference/__init__.py +3 -1
  100. paddlex/inference/common/batch_sampler/__init__.py +20 -0
  101. paddlex/inference/common/batch_sampler/audio_batch_sampler.py +84 -0
  102. paddlex/inference/common/batch_sampler/base_batch_sampler.py +90 -0
  103. paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +147 -0
  104. paddlex/inference/common/batch_sampler/image_batch_sampler.py +136 -0
  105. paddlex/inference/common/batch_sampler/ts_batch_sampler.py +110 -0
  106. paddlex/inference/common/batch_sampler/video_batch_sampler.py +94 -0
  107. paddlex/inference/common/reader/__init__.py +19 -0
  108. paddlex/inference/common/reader/audio_reader.py +46 -0
  109. paddlex/inference/common/reader/det_3d_reader.py +239 -0
  110. paddlex/inference/common/reader/image_reader.py +69 -0
  111. paddlex/inference/common/reader/ts_reader.py +45 -0
  112. paddlex/inference/common/reader/video_reader.py +42 -0
  113. paddlex/inference/common/result/__init__.py +29 -0
  114. paddlex/inference/common/result/base_cv_result.py +31 -0
  115. paddlex/inference/common/result/base_result.py +70 -0
  116. paddlex/inference/common/result/base_ts_result.py +42 -0
  117. paddlex/inference/common/result/base_video_result.py +36 -0
  118. paddlex/inference/common/result/mixin.py +703 -0
  119. paddlex/inference/models/3d_bev_detection/__init__.py +15 -0
  120. paddlex/inference/models/3d_bev_detection/predictor.py +314 -0
  121. paddlex/inference/models/3d_bev_detection/processors.py +978 -0
  122. paddlex/inference/models/3d_bev_detection/result.py +65 -0
  123. paddlex/inference/models/3d_bev_detection/visualizer_3d.py +131 -0
  124. paddlex/inference/models/__init__.py +37 -13
  125. paddlex/inference/models/anomaly_detection/__init__.py +15 -0
  126. paddlex/inference/models/anomaly_detection/predictor.py +145 -0
  127. paddlex/inference/models/anomaly_detection/processors.py +46 -0
  128. paddlex/inference/models/anomaly_detection/result.py +70 -0
  129. paddlex/inference/models/base/__init__.py +1 -2
  130. paddlex/inference/models/base/predictor/__init__.py +16 -0
  131. paddlex/inference/models/base/predictor/base_predictor.py +175 -0
  132. paddlex/inference/models/base/predictor/basic_predictor.py +139 -0
  133. paddlex/inference/models/common/__init__.py +35 -0
  134. paddlex/inference/models/common/static_infer.py +329 -0
  135. paddlex/inference/models/common/tokenizer/__init__.py +17 -0
  136. paddlex/inference/models/common/tokenizer/bert_tokenizer.py +655 -0
  137. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +451 -0
  138. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +2141 -0
  139. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3504 -0
  140. paddlex/inference/models/common/tokenizer/utils.py +66 -0
  141. paddlex/inference/models/common/tokenizer/vocab.py +647 -0
  142. paddlex/inference/models/common/ts/__init__.py +15 -0
  143. paddlex/inference/models/common/ts/funcs.py +533 -0
  144. paddlex/inference/models/common/ts/processors.py +313 -0
  145. paddlex/inference/models/common/vision/__init__.py +23 -0
  146. paddlex/inference/models/common/vision/funcs.py +93 -0
  147. paddlex/inference/models/common/vision/processors.py +270 -0
  148. paddlex/inference/models/face_feature/__init__.py +15 -0
  149. paddlex/inference/models/face_feature/predictor.py +65 -0
  150. paddlex/inference/models/formula_recognition/__init__.py +15 -0
  151. paddlex/inference/models/formula_recognition/predictor.py +203 -0
  152. paddlex/inference/models/formula_recognition/processors.py +986 -0
  153. paddlex/inference/models/formula_recognition/result.py +403 -0
  154. paddlex/inference/models/image_classification/__init__.py +15 -0
  155. paddlex/inference/models/image_classification/predictor.py +182 -0
  156. paddlex/inference/models/image_classification/processors.py +87 -0
  157. paddlex/inference/models/image_classification/result.py +92 -0
  158. paddlex/inference/models/image_feature/__init__.py +15 -0
  159. paddlex/inference/models/image_feature/predictor.py +156 -0
  160. paddlex/inference/models/image_feature/processors.py +29 -0
  161. paddlex/inference/models/image_feature/result.py +33 -0
  162. paddlex/inference/models/image_multilabel_classification/__init__.py +15 -0
  163. paddlex/inference/models/image_multilabel_classification/predictor.py +94 -0
  164. paddlex/inference/models/image_multilabel_classification/processors.py +85 -0
  165. paddlex/inference/models/image_multilabel_classification/result.py +95 -0
  166. paddlex/inference/models/image_unwarping/__init__.py +15 -0
  167. paddlex/inference/models/image_unwarping/predictor.py +105 -0
  168. paddlex/inference/models/image_unwarping/processors.py +88 -0
  169. paddlex/inference/models/image_unwarping/result.py +45 -0
  170. paddlex/inference/models/instance_segmentation/__init__.py +15 -0
  171. paddlex/inference/models/instance_segmentation/predictor.py +210 -0
  172. paddlex/inference/models/instance_segmentation/processors.py +105 -0
  173. paddlex/inference/models/instance_segmentation/result.py +161 -0
  174. paddlex/inference/models/keypoint_detection/__init__.py +15 -0
  175. paddlex/inference/models/keypoint_detection/predictor.py +188 -0
  176. paddlex/inference/models/keypoint_detection/processors.py +359 -0
  177. paddlex/inference/models/keypoint_detection/result.py +192 -0
  178. paddlex/inference/models/multilingual_speech_recognition/__init__.py +15 -0
  179. paddlex/inference/models/multilingual_speech_recognition/predictor.py +141 -0
  180. paddlex/inference/models/multilingual_speech_recognition/processors.py +1941 -0
  181. paddlex/inference/models/multilingual_speech_recognition/result.py +21 -0
  182. paddlex/inference/models/object_detection/__init__.py +15 -0
  183. paddlex/inference/models/object_detection/predictor.py +348 -0
  184. paddlex/inference/models/object_detection/processors.py +855 -0
  185. paddlex/inference/models/object_detection/result.py +113 -0
  186. paddlex/inference/models/object_detection/utils.py +68 -0
  187. paddlex/inference/models/open_vocabulary_detection/__init__.py +15 -0
  188. paddlex/inference/models/open_vocabulary_detection/predictor.py +155 -0
  189. paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +15 -0
  190. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +485 -0
  191. paddlex/inference/models/open_vocabulary_segmentation/__init__.py +15 -0
  192. paddlex/inference/models/open_vocabulary_segmentation/predictor.py +120 -0
  193. paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +15 -0
  194. paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +249 -0
  195. paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +15 -0
  196. paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +147 -0
  197. paddlex/inference/models/semantic_segmentation/__init__.py +15 -0
  198. paddlex/inference/models/semantic_segmentation/predictor.py +167 -0
  199. paddlex/inference/models/semantic_segmentation/processors.py +114 -0
  200. paddlex/inference/models/semantic_segmentation/result.py +72 -0
  201. paddlex/inference/models/table_structure_recognition/__init__.py +15 -0
  202. paddlex/inference/models/table_structure_recognition/predictor.py +171 -0
  203. paddlex/inference/models/table_structure_recognition/processors.py +235 -0
  204. paddlex/inference/models/table_structure_recognition/result.py +70 -0
  205. paddlex/inference/models/text_detection/__init__.py +15 -0
  206. paddlex/inference/models/text_detection/predictor.py +191 -0
  207. paddlex/inference/models/text_detection/processors.py +466 -0
  208. paddlex/inference/models/text_detection/result.py +51 -0
  209. paddlex/inference/models/text_recognition/__init__.py +15 -0
  210. paddlex/inference/models/text_recognition/predictor.py +106 -0
  211. paddlex/inference/models/text_recognition/processors.py +231 -0
  212. paddlex/inference/models/text_recognition/result.py +75 -0
  213. paddlex/inference/models/ts_anomaly_detection/__init__.py +15 -0
  214. paddlex/inference/models/ts_anomaly_detection/predictor.py +146 -0
  215. paddlex/inference/models/ts_anomaly_detection/processors.py +94 -0
  216. paddlex/inference/models/ts_anomaly_detection/result.py +72 -0
  217. paddlex/inference/models/ts_classification/__init__.py +15 -0
  218. paddlex/inference/models/ts_classification/predictor.py +135 -0
  219. paddlex/inference/models/ts_classification/processors.py +117 -0
  220. paddlex/inference/models/ts_classification/result.py +78 -0
  221. paddlex/inference/models/ts_forecasting/__init__.py +15 -0
  222. paddlex/inference/models/ts_forecasting/predictor.py +159 -0
  223. paddlex/inference/models/ts_forecasting/processors.py +149 -0
  224. paddlex/inference/models/ts_forecasting/result.py +83 -0
  225. paddlex/inference/models/video_classification/__init__.py +15 -0
  226. paddlex/inference/models/video_classification/predictor.py +147 -0
  227. paddlex/inference/models/video_classification/processors.py +409 -0
  228. paddlex/inference/models/video_classification/result.py +92 -0
  229. paddlex/inference/models/video_detection/__init__.py +15 -0
  230. paddlex/inference/models/video_detection/predictor.py +136 -0
  231. paddlex/inference/models/video_detection/processors.py +450 -0
  232. paddlex/inference/models/video_detection/result.py +104 -0
  233. paddlex/inference/pipelines/3d_bev_detection/__init__.py +15 -0
  234. paddlex/inference/pipelines/3d_bev_detection/pipeline.py +67 -0
  235. paddlex/inference/pipelines/__init__.py +174 -73
  236. paddlex/inference/pipelines/anomaly_detection/__init__.py +15 -0
  237. paddlex/inference/pipelines/anomaly_detection/pipeline.py +62 -0
  238. paddlex/inference/pipelines/attribute_recognition/__init__.py +15 -0
  239. paddlex/inference/pipelines/attribute_recognition/pipeline.py +105 -0
  240. paddlex/inference/pipelines/attribute_recognition/result.py +100 -0
  241. paddlex/inference/pipelines/base.py +103 -57
  242. paddlex/inference/pipelines/components/__init__.py +23 -0
  243. paddlex/inference/pipelines/components/chat_server/__init__.py +16 -0
  244. paddlex/inference/pipelines/components/chat_server/base.py +39 -0
  245. paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +236 -0
  246. paddlex/inference/pipelines/components/common/__init__.py +18 -0
  247. paddlex/inference/pipelines/components/common/base_operator.py +36 -0
  248. paddlex/inference/pipelines/components/common/base_result.py +65 -0
  249. paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +46 -0
  250. paddlex/inference/pipelines/components/common/crop_image_regions.py +550 -0
  251. paddlex/inference/pipelines/components/common/seal_det_warp.py +941 -0
  252. paddlex/inference/pipelines/components/common/sort_boxes.py +83 -0
  253. paddlex/inference/pipelines/components/faisser.py +352 -0
  254. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +16 -0
  255. paddlex/inference/pipelines/components/prompt_engineering/base.py +35 -0
  256. paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +127 -0
  257. paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +148 -0
  258. paddlex/inference/pipelines/components/retriever/__init__.py +16 -0
  259. paddlex/inference/pipelines/components/retriever/base.py +226 -0
  260. paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +70 -0
  261. paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +163 -0
  262. paddlex/inference/pipelines/components/utils/__init__.py +13 -0
  263. paddlex/inference/pipelines/components/utils/mixin.py +206 -0
  264. paddlex/inference/pipelines/doc_preprocessor/__init__.py +15 -0
  265. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +190 -0
  266. paddlex/inference/pipelines/doc_preprocessor/result.py +103 -0
  267. paddlex/inference/pipelines/face_recognition/__init__.py +15 -0
  268. paddlex/inference/pipelines/face_recognition/pipeline.py +61 -0
  269. paddlex/inference/pipelines/face_recognition/result.py +43 -0
  270. paddlex/inference/pipelines/formula_recognition/__init__.py +15 -0
  271. paddlex/inference/pipelines/formula_recognition/pipeline.py +303 -0
  272. paddlex/inference/pipelines/formula_recognition/result.py +291 -0
  273. paddlex/inference/pipelines/image_classification/__init__.py +15 -0
  274. paddlex/inference/pipelines/image_classification/pipeline.py +71 -0
  275. paddlex/inference/pipelines/image_multilabel_classification/__init__.py +15 -0
  276. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +78 -0
  277. paddlex/inference/pipelines/instance_segmentation/__init__.py +15 -0
  278. paddlex/inference/pipelines/instance_segmentation/pipeline.py +70 -0
  279. paddlex/inference/pipelines/keypoint_detection/__init__.py +15 -0
  280. paddlex/inference/pipelines/keypoint_detection/pipeline.py +137 -0
  281. paddlex/inference/pipelines/layout_parsing/__init__.py +2 -1
  282. paddlex/inference/pipelines/layout_parsing/pipeline.py +570 -0
  283. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +739 -0
  284. paddlex/inference/pipelines/layout_parsing/result.py +203 -0
  285. paddlex/inference/pipelines/layout_parsing/result_v2.py +470 -0
  286. paddlex/inference/pipelines/layout_parsing/utils.py +2385 -0
  287. paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +15 -0
  288. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +67 -0
  289. paddlex/inference/pipelines/object_detection/__init__.py +15 -0
  290. paddlex/inference/pipelines/object_detection/pipeline.py +95 -0
  291. paddlex/inference/pipelines/ocr/__init__.py +15 -0
  292. paddlex/inference/pipelines/ocr/pipeline.py +389 -0
  293. paddlex/inference/pipelines/ocr/result.py +248 -0
  294. paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +15 -0
  295. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +75 -0
  296. paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +15 -0
  297. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +89 -0
  298. paddlex/inference/pipelines/pp_chatocr/__init__.py +16 -0
  299. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +102 -0
  300. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +773 -0
  301. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +977 -0
  302. paddlex/inference/pipelines/pp_shitu_v2/__init__.py +15 -0
  303. paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +152 -0
  304. paddlex/inference/pipelines/pp_shitu_v2/result.py +126 -0
  305. paddlex/inference/pipelines/rotated_object_detection/__init__.py +15 -0
  306. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +74 -0
  307. paddlex/inference/pipelines/seal_recognition/__init__.py +15 -0
  308. paddlex/inference/pipelines/seal_recognition/pipeline.py +271 -0
  309. paddlex/inference/pipelines/seal_recognition/result.py +87 -0
  310. paddlex/inference/pipelines/semantic_segmentation/__init__.py +15 -0
  311. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +74 -0
  312. paddlex/inference/pipelines/small_object_detection/__init__.py +15 -0
  313. paddlex/inference/pipelines/small_object_detection/pipeline.py +74 -0
  314. paddlex/inference/pipelines/table_recognition/__init__.py +2 -1
  315. paddlex/inference/pipelines/table_recognition/pipeline.py +462 -0
  316. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +792 -0
  317. paddlex/inference/pipelines/table_recognition/result.py +216 -0
  318. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +362 -0
  319. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +470 -0
  320. paddlex/inference/pipelines/table_recognition/utils.py +23 -436
  321. paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +15 -0
  322. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +62 -0
  323. paddlex/inference/pipelines/ts_classification/__init__.py +15 -0
  324. paddlex/inference/pipelines/ts_classification/pipeline.py +62 -0
  325. paddlex/inference/pipelines/ts_forecasting/__init__.py +15 -0
  326. paddlex/inference/pipelines/ts_forecasting/pipeline.py +62 -0
  327. paddlex/inference/pipelines/video_classification/__init__.py +15 -0
  328. paddlex/inference/pipelines/video_classification/pipeline.py +68 -0
  329. paddlex/inference/pipelines/video_detection/__init__.py +15 -0
  330. paddlex/inference/pipelines/video_detection/pipeline.py +73 -0
  331. paddlex/inference/serving/__init__.py +13 -0
  332. paddlex/inference/serving/basic_serving/__init__.py +18 -0
  333. paddlex/inference/serving/basic_serving/_app.py +209 -0
  334. paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +41 -0
  335. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +13 -0
  336. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +96 -0
  337. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +36 -0
  338. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +90 -0
  339. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +64 -0
  340. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +97 -0
  341. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +223 -0
  342. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +97 -0
  343. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +78 -0
  344. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +66 -0
  345. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +70 -0
  346. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +81 -0
  347. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +115 -0
  348. paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +76 -0
  349. paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +89 -0
  350. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +74 -0
  351. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +99 -0
  352. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +78 -0
  353. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +85 -0
  354. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +81 -0
  355. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +191 -0
  356. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +221 -0
  357. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +218 -0
  358. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +136 -0
  359. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +78 -0
  360. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +103 -0
  361. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +64 -0
  362. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +69 -0
  363. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +105 -0
  364. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +107 -0
  365. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +62 -0
  366. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +61 -0
  367. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +62 -0
  368. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +81 -0
  369. paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +73 -0
  370. paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +89 -0
  371. paddlex/inference/serving/basic_serving/_server.py +35 -0
  372. paddlex/inference/serving/infra/__init__.py +13 -0
  373. paddlex/inference/serving/infra/config.py +36 -0
  374. paddlex/inference/serving/infra/models.py +72 -0
  375. paddlex/inference/serving/infra/storage.py +175 -0
  376. paddlex/inference/serving/infra/utils.py +259 -0
  377. paddlex/inference/serving/schemas/__init__.py +13 -0
  378. paddlex/inference/serving/schemas/anomaly_detection.py +39 -0
  379. paddlex/inference/serving/schemas/doc_preprocessor.py +54 -0
  380. paddlex/inference/serving/schemas/face_recognition.py +124 -0
  381. paddlex/inference/serving/schemas/formula_recognition.py +56 -0
  382. paddlex/inference/serving/schemas/human_keypoint_detection.py +55 -0
  383. paddlex/inference/serving/schemas/image_classification.py +45 -0
  384. paddlex/inference/serving/schemas/image_multilabel_classification.py +47 -0
  385. paddlex/inference/serving/schemas/instance_segmentation.py +53 -0
  386. paddlex/inference/serving/schemas/layout_parsing.py +72 -0
  387. paddlex/inference/serving/schemas/m_3d_bev_detection.py +48 -0
  388. paddlex/inference/serving/schemas/multilingual_speech_recognition.py +57 -0
  389. paddlex/inference/serving/schemas/object_detection.py +52 -0
  390. paddlex/inference/serving/schemas/ocr.py +60 -0
  391. paddlex/inference/serving/schemas/open_vocabulary_detection.py +52 -0
  392. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +52 -0
  393. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +61 -0
  394. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +134 -0
  395. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +151 -0
  396. paddlex/inference/serving/schemas/pp_shituv2.py +124 -0
  397. paddlex/inference/serving/schemas/pp_structurev3.py +84 -0
  398. paddlex/inference/serving/schemas/rotated_object_detection.py +52 -0
  399. paddlex/inference/serving/schemas/seal_recognition.py +62 -0
  400. paddlex/inference/serving/schemas/semantic_segmentation.py +45 -0
  401. paddlex/inference/serving/schemas/shared/__init__.py +13 -0
  402. paddlex/inference/serving/schemas/shared/classification.py +23 -0
  403. paddlex/inference/serving/schemas/shared/image_segmentation.py +28 -0
  404. paddlex/inference/serving/schemas/shared/object_detection.py +24 -0
  405. paddlex/inference/serving/schemas/shared/ocr.py +25 -0
  406. paddlex/inference/serving/schemas/small_object_detection.py +52 -0
  407. paddlex/inference/serving/schemas/table_recognition.py +64 -0
  408. paddlex/inference/serving/schemas/table_recognition_v2.py +66 -0
  409. paddlex/inference/serving/schemas/ts_anomaly_detection.py +37 -0
  410. paddlex/inference/serving/schemas/ts_classification.py +38 -0
  411. paddlex/inference/serving/schemas/ts_forecast.py +37 -0
  412. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +61 -0
  413. paddlex/inference/serving/schemas/video_classification.py +44 -0
  414. paddlex/inference/serving/schemas/video_detection.py +56 -0
  415. paddlex/inference/utils/benchmark.py +23 -11
  416. paddlex/inference/utils/get_pipeline_path.py +2 -1
  417. paddlex/inference/utils/io/__init__.py +3 -0
  418. paddlex/inference/utils/io/readers.py +164 -17
  419. paddlex/inference/utils/io/writers.py +85 -2
  420. paddlex/inference/utils/new_ir_blacklist.py +6 -0
  421. paddlex/inference/utils/official_models.py +277 -211
  422. paddlex/inference/utils/pp_option.py +24 -4
  423. paddlex/model.py +12 -5
  424. paddlex/modules/3d_bev_detection/__init__.py +18 -0
  425. paddlex/modules/3d_bev_detection/dataset_checker/__init__.py +95 -0
  426. paddlex/modules/3d_bev_detection/dataset_checker/dataset_src/__init__.py +17 -0
  427. paddlex/modules/3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +106 -0
  428. paddlex/modules/3d_bev_detection/dataset_checker/dataset_src/check_dataset.py +102 -0
  429. paddlex/modules/3d_bev_detection/evaluator.py +46 -0
  430. paddlex/modules/3d_bev_detection/exportor.py +22 -0
  431. paddlex/modules/3d_bev_detection/model_list.py +18 -0
  432. paddlex/modules/3d_bev_detection/trainer.py +70 -0
  433. paddlex/modules/__init__.py +34 -1
  434. paddlex/modules/base/build_model.py +1 -1
  435. paddlex/modules/base/dataset_checker/dataset_checker.py +6 -1
  436. paddlex/modules/base/evaluator.py +20 -4
  437. paddlex/modules/base/exportor.py +30 -5
  438. paddlex/modules/base/trainer.py +29 -6
  439. paddlex/modules/face_recognition/trainer.py +1 -23
  440. paddlex/modules/formula_recognition/__init__.py +5 -0
  441. paddlex/modules/formula_recognition/dataset_checker/__init__.py +113 -0
  442. paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +19 -0
  443. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +157 -0
  444. paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +80 -0
  445. paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +94 -0
  446. paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +81 -0
  447. paddlex/modules/formula_recognition/evaluator.py +77 -0
  448. paddlex/modules/formula_recognition/exportor.py +22 -0
  449. paddlex/modules/formula_recognition/model_list.py +3 -0
  450. paddlex/modules/formula_recognition/trainer.py +121 -0
  451. paddlex/modules/image_classification/model_list.py +2 -0
  452. paddlex/modules/instance_segmentation/dataset_checker/__init__.py +15 -0
  453. paddlex/modules/keypoint_detection/__init__.py +18 -0
  454. paddlex/modules/keypoint_detection/dataset_checker/__init__.py +56 -0
  455. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +15 -0
  456. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +86 -0
  457. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +13 -0
  458. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +119 -0
  459. paddlex/modules/keypoint_detection/evaluator.py +41 -0
  460. paddlex/modules/keypoint_detection/exportor.py +22 -0
  461. paddlex/modules/keypoint_detection/model_list.py +16 -0
  462. paddlex/modules/keypoint_detection/trainer.py +39 -0
  463. paddlex/modules/multilingual_speech_recognition/__init__.py +18 -0
  464. paddlex/modules/multilingual_speech_recognition/dataset_checker.py +27 -0
  465. paddlex/modules/multilingual_speech_recognition/evaluator.py +27 -0
  466. paddlex/modules/multilingual_speech_recognition/exportor.py +27 -0
  467. paddlex/modules/multilingual_speech_recognition/model_list.py +22 -0
  468. paddlex/modules/multilingual_speech_recognition/trainer.py +40 -0
  469. paddlex/modules/object_detection/evaluator.py +12 -1
  470. paddlex/modules/object_detection/model_list.py +10 -0
  471. paddlex/modules/object_detection/trainer.py +15 -1
  472. paddlex/modules/open_vocabulary_detection/__init__.py +18 -0
  473. paddlex/modules/open_vocabulary_detection/dataset_checker.py +29 -0
  474. paddlex/modules/open_vocabulary_detection/evaluator.py +29 -0
  475. paddlex/modules/open_vocabulary_detection/exportor.py +29 -0
  476. paddlex/modules/open_vocabulary_detection/model_list.py +18 -0
  477. paddlex/modules/open_vocabulary_detection/trainer.py +42 -0
  478. paddlex/modules/open_vocabulary_segmentation/__init__.py +18 -0
  479. paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +29 -0
  480. paddlex/modules/open_vocabulary_segmentation/evaluator.py +29 -0
  481. paddlex/modules/open_vocabulary_segmentation/exportor.py +29 -0
  482. paddlex/modules/open_vocabulary_segmentation/model_list.py +19 -0
  483. paddlex/modules/open_vocabulary_segmentation/trainer.py +42 -0
  484. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +15 -0
  485. paddlex/modules/semantic_segmentation/exportor.py +9 -0
  486. paddlex/modules/semantic_segmentation/model_list.py +2 -0
  487. paddlex/modules/semantic_segmentation/trainer.py +2 -0
  488. paddlex/modules/table_recognition/dataset_checker/__init__.py +16 -1
  489. paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +13 -14
  490. paddlex/modules/table_recognition/model_list.py +2 -0
  491. paddlex/modules/text_detection/dataset_checker/__init__.py +16 -1
  492. paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +13 -3
  493. paddlex/modules/text_detection/model_list.py +2 -0
  494. paddlex/modules/text_recognition/dataset_checker/__init__.py +16 -4
  495. paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +13 -3
  496. paddlex/modules/text_recognition/evaluator.py +4 -3
  497. paddlex/modules/text_recognition/exportor.py +0 -3
  498. paddlex/modules/text_recognition/model_list.py +14 -0
  499. paddlex/modules/text_recognition/trainer.py +4 -3
  500. paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +15 -0
  501. paddlex/modules/ts_anomaly_detection/trainer.py +17 -1
  502. paddlex/modules/ts_classification/dataset_checker/__init__.py +15 -0
  503. paddlex/modules/ts_classification/trainer.py +17 -1
  504. paddlex/modules/ts_forecast/dataset_checker/__init__.py +15 -0
  505. paddlex/modules/ts_forecast/trainer.py +17 -1
  506. paddlex/modules/video_classification/__init__.py +18 -0
  507. paddlex/modules/video_classification/dataset_checker/__init__.py +93 -0
  508. paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +18 -0
  509. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +93 -0
  510. paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +121 -0
  511. paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +82 -0
  512. paddlex/modules/video_classification/evaluator.py +44 -0
  513. paddlex/modules/video_classification/exportor.py +22 -0
  514. paddlex/modules/video_classification/model_list.py +19 -0
  515. paddlex/modules/video_classification/trainer.py +88 -0
  516. paddlex/modules/video_detection/__init__.py +18 -0
  517. paddlex/modules/video_detection/dataset_checker/__init__.py +86 -0
  518. paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +17 -0
  519. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +101 -0
  520. paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +134 -0
  521. paddlex/modules/video_detection/evaluator.py +42 -0
  522. paddlex/modules/video_detection/exportor.py +22 -0
  523. paddlex/modules/video_detection/model_list.py +15 -0
  524. paddlex/modules/video_detection/trainer.py +82 -0
  525. paddlex/ops/__init__.py +149 -0
  526. paddlex/ops/iou3d_nms/iou3d_cpu.cpp +264 -0
  527. paddlex/ops/iou3d_nms/iou3d_cpu.h +27 -0
  528. paddlex/ops/iou3d_nms/iou3d_nms.cpp +204 -0
  529. paddlex/ops/iou3d_nms/iou3d_nms.h +33 -0
  530. paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +108 -0
  531. paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +482 -0
  532. paddlex/ops/setup.py +37 -0
  533. paddlex/ops/voxel/voxelize_op.cc +191 -0
  534. paddlex/ops/voxel/voxelize_op.cu +346 -0
  535. paddlex/paddle2onnx_requirements.txt +1 -0
  536. paddlex/paddlex_cli.py +339 -72
  537. paddlex/repo_apis/Paddle3D_api/__init__.py +17 -0
  538. paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +18 -0
  539. paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +118 -0
  540. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +238 -0
  541. paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +55 -0
  542. paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +104 -0
  543. paddlex/repo_apis/Paddle3D_api/pp3d_config.py +144 -0
  544. paddlex/repo_apis/PaddleClas_api/cls/model.py +6 -0
  545. paddlex/repo_apis/PaddleClas_api/cls/register.py +20 -2
  546. paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +8 -4
  547. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +6 -0
  548. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +27 -5
  549. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +6 -0
  550. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +81 -0
  551. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +182 -3
  552. paddlex/repo_apis/PaddleOCR_api/__init__.py +1 -0
  553. paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +16 -0
  554. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +570 -0
  555. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +402 -0
  556. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +73 -0
  557. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +240 -0
  558. paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +18 -0
  559. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
  560. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +21 -0
  561. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +6 -0
  562. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +126 -7
  563. paddlex/repo_apis/PaddleSeg_api/seg/config.py +9 -0
  564. paddlex/repo_apis/PaddleSeg_api/seg/model.py +10 -0
  565. paddlex/repo_apis/PaddleSeg_api/seg/register.py +20 -0
  566. paddlex/repo_apis/PaddleTS_api/ts_base/config.py +24 -0
  567. paddlex/repo_apis/PaddleTS_api/ts_base/model.py +11 -7
  568. paddlex/repo_apis/PaddleVideo_api/__init__.py +17 -0
  569. paddlex/repo_apis/PaddleVideo_api/config_utils.py +51 -0
  570. paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +19 -0
  571. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +547 -0
  572. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +346 -0
  573. paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +71 -0
  574. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +205 -0
  575. paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +19 -0
  576. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +548 -0
  577. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +298 -0
  578. paddlex/repo_apis/PaddleVideo_api/video_det/register.py +45 -0
  579. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +200 -0
  580. paddlex/repo_apis/base/runner.py +2 -1
  581. paddlex/repo_manager/meta.py +29 -2
  582. paddlex/repo_manager/repo.py +24 -5
  583. paddlex/repo_manager/requirements.txt +10 -7
  584. paddlex/repo_manager/utils.py +62 -1
  585. paddlex/serving_requirements.txt +9 -0
  586. paddlex/utils/config.py +4 -3
  587. paddlex/utils/custom_device_whitelist.py +457 -0
  588. paddlex/utils/device.py +74 -26
  589. paddlex/utils/env.py +28 -0
  590. paddlex/utils/flags.py +4 -0
  591. paddlex/utils/fonts/__init__.py +48 -5
  592. paddlex/utils/lazy_loader.py +2 -0
  593. paddlex/utils/logging.py +1 -2
  594. paddlex/utils/pipeline_arguments.py +711 -0
  595. paddlex-3.0.0rc0.dist-info/METADATA +1035 -0
  596. paddlex-3.0.0rc0.dist-info/RECORD +1015 -0
  597. paddlex-3.0.0rc0.dist-info/WHEEL +5 -0
  598. paddlex/configs/face_recognition/MobileFaceNet.yaml +0 -44
  599. paddlex/configs/face_recognition/ResNet50_face.yaml +0 -44
  600. paddlex/configs/formula_recognition/LaTeX_OCR_rec.yaml +0 -40
  601. paddlex/configs/image_classification/CLIP_vit_base_patch16_224.yaml +0 -41
  602. paddlex/configs/image_classification/CLIP_vit_large_patch14_224.yaml +0 -41
  603. paddlex/configs/image_classification/ConvNeXt_large_384.yaml +0 -41
  604. paddlex/configs/object_detection/YOLOX-X.yaml +0 -40
  605. paddlex/configs/semantic_segmentation/SeaFormer_base.yaml +0 -40
  606. paddlex/configs/semantic_segmentation/SeaFormer_large.yaml +0 -40
  607. paddlex/configs/semantic_segmentation/SeaFormer_small.yaml +0 -40
  608. paddlex/configs/semantic_segmentation/SeaFormer_tiny.yaml +0 -40
  609. paddlex/inference/components/__init__.py +0 -18
  610. paddlex/inference/components/base.py +0 -292
  611. paddlex/inference/components/llm/__init__.py +0 -25
  612. paddlex/inference/components/llm/base.py +0 -65
  613. paddlex/inference/components/llm/erniebot.py +0 -212
  614. paddlex/inference/components/paddle_predictor/__init__.py +0 -20
  615. paddlex/inference/components/paddle_predictor/predictor.py +0 -332
  616. paddlex/inference/components/retrieval/__init__.py +0 -15
  617. paddlex/inference/components/retrieval/faiss.py +0 -359
  618. paddlex/inference/components/task_related/__init__.py +0 -33
  619. paddlex/inference/components/task_related/clas.py +0 -124
  620. paddlex/inference/components/task_related/det.py +0 -284
  621. paddlex/inference/components/task_related/instance_seg.py +0 -89
  622. paddlex/inference/components/task_related/seal_det_warp.py +0 -940
  623. paddlex/inference/components/task_related/seg.py +0 -40
  624. paddlex/inference/components/task_related/table_rec.py +0 -191
  625. paddlex/inference/components/task_related/text_det.py +0 -895
  626. paddlex/inference/components/task_related/text_rec.py +0 -353
  627. paddlex/inference/components/task_related/warp.py +0 -43
  628. paddlex/inference/components/transforms/__init__.py +0 -16
  629. paddlex/inference/components/transforms/image/__init__.py +0 -15
  630. paddlex/inference/components/transforms/image/common.py +0 -598
  631. paddlex/inference/components/transforms/image/funcs.py +0 -58
  632. paddlex/inference/components/transforms/read_data.py +0 -67
  633. paddlex/inference/components/transforms/ts/__init__.py +0 -15
  634. paddlex/inference/components/transforms/ts/common.py +0 -393
  635. paddlex/inference/components/transforms/ts/funcs.py +0 -424
  636. paddlex/inference/models/anomaly_detection.py +0 -87
  637. paddlex/inference/models/base/base_predictor.py +0 -76
  638. paddlex/inference/models/base/basic_predictor.py +0 -122
  639. paddlex/inference/models/face_recognition.py +0 -21
  640. paddlex/inference/models/formula_recognition.py +0 -55
  641. paddlex/inference/models/general_recognition.py +0 -99
  642. paddlex/inference/models/image_classification.py +0 -101
  643. paddlex/inference/models/image_unwarping.py +0 -43
  644. paddlex/inference/models/instance_segmentation.py +0 -66
  645. paddlex/inference/models/multilabel_classification.py +0 -33
  646. paddlex/inference/models/object_detection.py +0 -129
  647. paddlex/inference/models/semantic_segmentation.py +0 -86
  648. paddlex/inference/models/table_recognition.py +0 -106
  649. paddlex/inference/models/text_detection.py +0 -105
  650. paddlex/inference/models/text_recognition.py +0 -78
  651. paddlex/inference/models/ts_ad.py +0 -68
  652. paddlex/inference/models/ts_cls.py +0 -57
  653. paddlex/inference/models/ts_fc.py +0 -73
  654. paddlex/inference/pipelines/attribute_recognition.py +0 -92
  655. paddlex/inference/pipelines/face_recognition.py +0 -49
  656. paddlex/inference/pipelines/formula_recognition.py +0 -102
  657. paddlex/inference/pipelines/layout_parsing/layout_parsing.py +0 -362
  658. paddlex/inference/pipelines/ocr.py +0 -80
  659. paddlex/inference/pipelines/pp_shitu_v2.py +0 -152
  660. paddlex/inference/pipelines/ppchatocrv3/__init__.py +0 -15
  661. paddlex/inference/pipelines/ppchatocrv3/ch_prompt.yaml +0 -14
  662. paddlex/inference/pipelines/ppchatocrv3/ppchatocrv3.py +0 -717
  663. paddlex/inference/pipelines/ppchatocrv3/utils.py +0 -168
  664. paddlex/inference/pipelines/seal_recognition.py +0 -152
  665. paddlex/inference/pipelines/serving/__init__.py +0 -17
  666. paddlex/inference/pipelines/serving/_pipeline_apps/__init__.py +0 -205
  667. paddlex/inference/pipelines/serving/_pipeline_apps/anomaly_detection.py +0 -80
  668. paddlex/inference/pipelines/serving/_pipeline_apps/face_recognition.py +0 -317
  669. paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py +0 -119
  670. paddlex/inference/pipelines/serving/_pipeline_apps/image_classification.py +0 -101
  671. paddlex/inference/pipelines/serving/_pipeline_apps/instance_segmentation.py +0 -112
  672. paddlex/inference/pipelines/serving/_pipeline_apps/layout_parsing.py +0 -205
  673. paddlex/inference/pipelines/serving/_pipeline_apps/multi_label_image_classification.py +0 -90
  674. paddlex/inference/pipelines/serving/_pipeline_apps/object_detection.py +0 -90
  675. paddlex/inference/pipelines/serving/_pipeline_apps/ocr.py +0 -98
  676. paddlex/inference/pipelines/serving/_pipeline_apps/pedestrian_attribute_recognition.py +0 -102
  677. paddlex/inference/pipelines/serving/_pipeline_apps/pp_shitu_v2.py +0 -319
  678. paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py +0 -445
  679. paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py +0 -110
  680. paddlex/inference/pipelines/serving/_pipeline_apps/semantic_segmentation.py +0 -82
  681. paddlex/inference/pipelines/serving/_pipeline_apps/small_object_detection.py +0 -92
  682. paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py +0 -110
  683. paddlex/inference/pipelines/serving/_pipeline_apps/ts_ad.py +0 -68
  684. paddlex/inference/pipelines/serving/_pipeline_apps/ts_cls.py +0 -68
  685. paddlex/inference/pipelines/serving/_pipeline_apps/ts_fc.py +0 -68
  686. paddlex/inference/pipelines/serving/_pipeline_apps/vehicle_attribute_recognition.py +0 -102
  687. paddlex/inference/pipelines/serving/app.py +0 -164
  688. paddlex/inference/pipelines/serving/models.py +0 -30
  689. paddlex/inference/pipelines/serving/server.py +0 -25
  690. paddlex/inference/pipelines/serving/storage.py +0 -161
  691. paddlex/inference/pipelines/serving/utils.py +0 -190
  692. paddlex/inference/pipelines/single_model_pipeline.py +0 -76
  693. paddlex/inference/pipelines/table_recognition/table_recognition.py +0 -193
  694. paddlex/inference/results/__init__.py +0 -31
  695. paddlex/inference/results/attribute_rec.py +0 -89
  696. paddlex/inference/results/base.py +0 -43
  697. paddlex/inference/results/chat_ocr.py +0 -158
  698. paddlex/inference/results/clas.py +0 -133
  699. paddlex/inference/results/det.py +0 -86
  700. paddlex/inference/results/face_rec.py +0 -34
  701. paddlex/inference/results/formula_rec.py +0 -363
  702. paddlex/inference/results/instance_seg.py +0 -152
  703. paddlex/inference/results/ocr.py +0 -157
  704. paddlex/inference/results/seal_rec.py +0 -50
  705. paddlex/inference/results/seg.py +0 -72
  706. paddlex/inference/results/shitu.py +0 -35
  707. paddlex/inference/results/table_rec.py +0 -109
  708. paddlex/inference/results/text_det.py +0 -33
  709. paddlex/inference/results/text_rec.py +0 -66
  710. paddlex/inference/results/ts.py +0 -37
  711. paddlex/inference/results/utils/mixin.py +0 -204
  712. paddlex/inference/results/warp.py +0 -31
  713. paddlex/inference/utils/process_hook.py +0 -54
  714. paddlex/pipelines/OCR.yaml +0 -8
  715. paddlex/pipelines/PP-ChatOCRv3-doc.yaml +0 -27
  716. paddlex/pipelines/PP-ShiTuV2.yaml +0 -13
  717. paddlex/pipelines/anomaly_detection.yaml +0 -7
  718. paddlex/pipelines/face_recognition.yaml +0 -13
  719. paddlex/pipelines/formula_recognition.yaml +0 -8
  720. paddlex/pipelines/image_classification.yaml +0 -7
  721. paddlex/pipelines/instance_segmentation.yaml +0 -7
  722. paddlex/pipelines/layout_parsing.yaml +0 -14
  723. paddlex/pipelines/multi_label_image_classification.yaml +0 -7
  724. paddlex/pipelines/object_detection.yaml +0 -7
  725. paddlex/pipelines/pedestrian_attribute_recognition.yaml +0 -7
  726. paddlex/pipelines/seal_recognition.yaml +0 -10
  727. paddlex/pipelines/semantic_segmentation.yaml +0 -7
  728. paddlex/pipelines/small_object_detection.yaml +0 -7
  729. paddlex/pipelines/table_recognition.yaml +0 -12
  730. paddlex/pipelines/ts_ad.yaml +0 -7
  731. paddlex/pipelines/ts_cls.yaml +0 -7
  732. paddlex/pipelines/ts_fc.yaml +0 -7
  733. paddlex/pipelines/vehicle_attribute_recognition.yaml +0 -7
  734. paddlex/utils/fonts/PingFang-SC-Regular.ttf +0 -0
  735. paddlex-3.0.0b2.dist-info/METADATA +0 -760
  736. paddlex-3.0.0b2.dist-info/RECORD +0 -646
  737. paddlex-3.0.0b2.dist-info/WHEEL +0 -5
  738. /paddlex/configs/{doc_text_orientation → modules/doc_text_orientation}/PP-LCNet_x1_0_doc_ori.yaml +0 -0
  739. /paddlex/configs/{face_detection → modules/face_detection}/BlazeFace-FPN-SSH.yaml +0 -0
  740. /paddlex/configs/{face_detection → modules/face_detection}/BlazeFace.yaml +0 -0
  741. /paddlex/configs/{face_detection → modules/face_detection}/PP-YOLOE_plus-S_face.yaml +0 -0
  742. /paddlex/configs/{face_detection → modules/face_detection}/PicoDet_LCNet_x2_5_face.yaml +0 -0
  743. /paddlex/configs/{human_detection → modules/human_detection}/PP-YOLOE-L_human.yaml +0 -0
  744. /paddlex/configs/{human_detection → modules/human_detection}/PP-YOLOE-S_human.yaml +0 -0
  745. /paddlex/configs/{anomaly_detection → modules/image_anomaly_detection}/STFPM.yaml +0 -0
  746. /paddlex/configs/{image_classification → modules/image_classification}/ConvNeXt_base_224.yaml +0 -0
  747. /paddlex/configs/{image_classification → modules/image_classification}/ConvNeXt_base_384.yaml +0 -0
  748. /paddlex/configs/{image_classification → modules/image_classification}/ConvNeXt_large_224.yaml +0 -0
  749. /paddlex/configs/{image_classification → modules/image_classification}/ConvNeXt_small.yaml +0 -0
  750. /paddlex/configs/{image_classification → modules/image_classification}/ConvNeXt_tiny.yaml +0 -0
  751. /paddlex/configs/{image_classification → modules/image_classification}/FasterNet-L.yaml +0 -0
  752. /paddlex/configs/{image_classification → modules/image_classification}/FasterNet-M.yaml +0 -0
  753. /paddlex/configs/{image_classification → modules/image_classification}/FasterNet-S.yaml +0 -0
  754. /paddlex/configs/{image_classification → modules/image_classification}/FasterNet-T0.yaml +0 -0
  755. /paddlex/configs/{image_classification → modules/image_classification}/FasterNet-T1.yaml +0 -0
  756. /paddlex/configs/{image_classification → modules/image_classification}/FasterNet-T2.yaml +0 -0
  757. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV1_x0_25.yaml +0 -0
  758. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV1_x0_5.yaml +0 -0
  759. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV1_x0_75.yaml +0 -0
  760. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV1_x1_0.yaml +0 -0
  761. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV2_x0_25.yaml +0 -0
  762. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV2_x0_5.yaml +0 -0
  763. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV2_x1_0.yaml +0 -0
  764. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV2_x1_5.yaml +0 -0
  765. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV2_x2_0.yaml +0 -0
  766. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_large_x0_35.yaml +0 -0
  767. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_large_x0_5.yaml +0 -0
  768. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_large_x0_75.yaml +0 -0
  769. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_large_x1_0.yaml +0 -0
  770. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_large_x1_25.yaml +0 -0
  771. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_small_x0_35.yaml +0 -0
  772. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_small_x0_5.yaml +0 -0
  773. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_small_x0_75.yaml +0 -0
  774. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_small_x1_0.yaml +0 -0
  775. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_small_x1_25.yaml +0 -0
  776. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV4_conv_large.yaml +0 -0
  777. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV4_conv_medium.yaml +0 -0
  778. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV4_conv_small.yaml +0 -0
  779. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV4_hybrid_large.yaml +0 -0
  780. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV4_hybrid_medium.yaml +0 -0
  781. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B0.yaml +0 -0
  782. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B1.yaml +0 -0
  783. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B2.yaml +0 -0
  784. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B3.yaml +0 -0
  785. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B4.yaml +0 -0
  786. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B5.yaml +0 -0
  787. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B6.yaml +0 -0
  788. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNet_base.yaml +0 -0
  789. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNet_small.yaml +0 -0
  790. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNet_tiny.yaml +0 -0
  791. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNetV2_base.yaml +0 -0
  792. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNetV2_large.yaml +0 -0
  793. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNetV2_small.yaml +0 -0
  794. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x0_25.yaml +0 -0
  795. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x0_35.yaml +0 -0
  796. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x0_5.yaml +0 -0
  797. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x0_75.yaml +0 -0
  798. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x1_0.yaml +0 -0
  799. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x1_5.yaml +0 -0
  800. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x2_0.yaml +0 -0
  801. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x2_5.yaml +0 -0
  802. /paddlex/configs/{image_classification → modules/image_classification}/ResNet101.yaml +0 -0
  803. /paddlex/configs/{image_classification → modules/image_classification}/ResNet101_vd.yaml +0 -0
  804. /paddlex/configs/{image_classification → modules/image_classification}/ResNet152.yaml +0 -0
  805. /paddlex/configs/{image_classification → modules/image_classification}/ResNet152_vd.yaml +0 -0
  806. /paddlex/configs/{image_classification → modules/image_classification}/ResNet18.yaml +0 -0
  807. /paddlex/configs/{image_classification → modules/image_classification}/ResNet18_vd.yaml +0 -0
  808. /paddlex/configs/{image_classification → modules/image_classification}/ResNet200_vd.yaml +0 -0
  809. /paddlex/configs/{image_classification → modules/image_classification}/ResNet34.yaml +0 -0
  810. /paddlex/configs/{image_classification → modules/image_classification}/ResNet34_vd.yaml +0 -0
  811. /paddlex/configs/{image_classification → modules/image_classification}/ResNet50.yaml +0 -0
  812. /paddlex/configs/{image_classification → modules/image_classification}/ResNet50_vd.yaml +0 -0
  813. /paddlex/configs/{image_classification → modules/image_classification}/StarNet-S1.yaml +0 -0
  814. /paddlex/configs/{image_classification → modules/image_classification}/StarNet-S2.yaml +0 -0
  815. /paddlex/configs/{image_classification → modules/image_classification}/StarNet-S3.yaml +0 -0
  816. /paddlex/configs/{image_classification → modules/image_classification}/StarNet-S4.yaml +0 -0
  817. /paddlex/configs/{image_classification → modules/image_classification}/SwinTransformer_base_patch4_window12_384.yaml +0 -0
  818. /paddlex/configs/{image_classification → modules/image_classification}/SwinTransformer_base_patch4_window7_224.yaml +0 -0
  819. /paddlex/configs/{image_classification → modules/image_classification}/SwinTransformer_large_patch4_window12_384.yaml +0 -0
  820. /paddlex/configs/{image_classification → modules/image_classification}/SwinTransformer_large_patch4_window7_224.yaml +0 -0
  821. /paddlex/configs/{image_classification → modules/image_classification}/SwinTransformer_small_patch4_window7_224.yaml +0 -0
  822. /paddlex/configs/{image_classification → modules/image_classification}/SwinTransformer_tiny_patch4_window7_224.yaml +0 -0
  823. /paddlex/configs/{general_recognition → modules/image_feature}/PP-ShiTuV2_rec.yaml +0 -0
  824. /paddlex/configs/{general_recognition → modules/image_feature}/PP-ShiTuV2_rec_CLIP_vit_base.yaml +0 -0
  825. /paddlex/configs/{general_recognition → modules/image_feature}/PP-ShiTuV2_rec_CLIP_vit_large.yaml +0 -0
  826. /paddlex/configs/{multilabel_classification → modules/image_multilabel_classification}/CLIP_vit_base_patch16_448_ML.yaml +0 -0
  827. /paddlex/configs/{multilabel_classification → modules/image_multilabel_classification}/PP-HGNetV2-B0_ML.yaml +0 -0
  828. /paddlex/configs/{multilabel_classification → modules/image_multilabel_classification}/PP-HGNetV2-B4_ML.yaml +0 -0
  829. /paddlex/configs/{multilabel_classification → modules/image_multilabel_classification}/PP-HGNetV2-B6_ML.yaml +0 -0
  830. /paddlex/configs/{multilabel_classification → modules/image_multilabel_classification}/PP-LCNet_x1_0_ML.yaml +0 -0
  831. /paddlex/configs/{multilabel_classification → modules/image_multilabel_classification}/ResNet50_ML.yaml +0 -0
  832. /paddlex/configs/{image_unwarping → modules/image_unwarping}/UVDoc.yaml +0 -0
  833. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Cascade-MaskRCNN-ResNet50-FPN.yaml +0 -0
  834. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml +0 -0
  835. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Mask-RT-DETR-H.yaml +0 -0
  836. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Mask-RT-DETR-L.yaml +0 -0
  837. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Mask-RT-DETR-M.yaml +0 -0
  838. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Mask-RT-DETR-S.yaml +0 -0
  839. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Mask-RT-DETR-X.yaml +0 -0
  840. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/MaskRCNN-ResNeXt101-vd-FPN.yaml +0 -0
  841. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/MaskRCNN-ResNet101-FPN.yaml +0 -0
  842. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/MaskRCNN-ResNet101-vd-FPN.yaml +0 -0
  843. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/MaskRCNN-ResNet50-FPN.yaml +0 -0
  844. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/MaskRCNN-ResNet50-vd-FPN.yaml +0 -0
  845. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/MaskRCNN-ResNet50.yaml +0 -0
  846. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/PP-YOLOE_seg-S.yaml +0 -0
  847. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/SOLOv2.yaml +0 -0
  848. /paddlex/configs/{structure_analysis → modules/layout_detection}/PicoDet-L_layout_17cls.yaml +0 -0
  849. /paddlex/configs/{structure_analysis → modules/layout_detection}/PicoDet-L_layout_3cls.yaml +0 -0
  850. /paddlex/configs/{structure_analysis → modules/layout_detection}/PicoDet-S_layout_17cls.yaml +0 -0
  851. /paddlex/configs/{structure_analysis → modules/layout_detection}/PicoDet-S_layout_3cls.yaml +0 -0
  852. /paddlex/configs/{structure_analysis → modules/layout_detection}/PicoDet_layout_1x.yaml +0 -0
  853. /paddlex/configs/{structure_analysis → modules/layout_detection}/PicoDet_layout_1x_table.yaml +0 -0
  854. /paddlex/configs/{structure_analysis → modules/layout_detection}/RT-DETR-H_layout_17cls.yaml +0 -0
  855. /paddlex/configs/{structure_analysis → modules/layout_detection}/RT-DETR-H_layout_3cls.yaml +0 -0
  856. /paddlex/configs/{mainbody_detection → modules/mainbody_detection}/PP-ShiTuV2_det.yaml +0 -0
  857. /paddlex/configs/{object_detection → modules/object_detection}/Cascade-FasterRCNN-ResNet50-FPN.yaml +0 -0
  858. /paddlex/configs/{object_detection → modules/object_detection}/Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml +0 -0
  859. /paddlex/configs/{object_detection → modules/object_detection}/CenterNet-DLA-34.yaml +0 -0
  860. /paddlex/configs/{object_detection → modules/object_detection}/CenterNet-ResNet50.yaml +0 -0
  861. /paddlex/configs/{object_detection → modules/object_detection}/DETR-R50.yaml +0 -0
  862. /paddlex/configs/{object_detection → modules/object_detection}/FCOS-ResNet50.yaml +0 -0
  863. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNeXt101-vd-FPN.yaml +0 -0
  864. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet101-FPN.yaml +0 -0
  865. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet101.yaml +0 -0
  866. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet34-FPN.yaml +0 -0
  867. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet50-FPN.yaml +0 -0
  868. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet50-vd-FPN.yaml +0 -0
  869. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml +0 -0
  870. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet50.yaml +0 -0
  871. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-Swin-Tiny-FPN.yaml +0 -0
  872. /paddlex/configs/{object_detection → modules/object_detection}/PP-YOLOE_plus-L.yaml +0 -0
  873. /paddlex/configs/{object_detection → modules/object_detection}/PP-YOLOE_plus-M.yaml +0 -0
  874. /paddlex/configs/{object_detection → modules/object_detection}/PP-YOLOE_plus-S.yaml +0 -0
  875. /paddlex/configs/{object_detection → modules/object_detection}/PP-YOLOE_plus-X.yaml +0 -0
  876. /paddlex/configs/{object_detection → modules/object_detection}/PicoDet-L.yaml +0 -0
  877. /paddlex/configs/{object_detection → modules/object_detection}/PicoDet-M.yaml +0 -0
  878. /paddlex/configs/{object_detection → modules/object_detection}/PicoDet-S.yaml +0 -0
  879. /paddlex/configs/{object_detection → modules/object_detection}/PicoDet-XS.yaml +0 -0
  880. /paddlex/configs/{object_detection → modules/object_detection}/RT-DETR-H.yaml +0 -0
  881. /paddlex/configs/{object_detection → modules/object_detection}/RT-DETR-L.yaml +0 -0
  882. /paddlex/configs/{object_detection → modules/object_detection}/RT-DETR-R18.yaml +0 -0
  883. /paddlex/configs/{object_detection → modules/object_detection}/RT-DETR-R50.yaml +0 -0
  884. /paddlex/configs/{object_detection → modules/object_detection}/RT-DETR-X.yaml +0 -0
  885. /paddlex/configs/{object_detection → modules/object_detection}/YOLOX-L.yaml +0 -0
  886. /paddlex/configs/{object_detection → modules/object_detection}/YOLOX-M.yaml +0 -0
  887. /paddlex/configs/{object_detection → modules/object_detection}/YOLOX-N.yaml +0 -0
  888. /paddlex/configs/{object_detection → modules/object_detection}/YOLOX-S.yaml +0 -0
  889. /paddlex/configs/{object_detection → modules/object_detection}/YOLOX-T.yaml +0 -0
  890. /paddlex/configs/{object_detection → modules/object_detection}/YOLOv3-DarkNet53.yaml +0 -0
  891. /paddlex/configs/{object_detection → modules/object_detection}/YOLOv3-MobileNetV3.yaml +0 -0
  892. /paddlex/configs/{object_detection → modules/object_detection}/YOLOv3-ResNet50_vd_DCN.yaml +0 -0
  893. /paddlex/configs/{pedestrian_attribute → modules/pedestrian_attribute_recognition}/PP-LCNet_x1_0_pedestrian_attribute.yaml +0 -0
  894. /paddlex/configs/{text_detection_seal → modules/seal_text_detection}/PP-OCRv4_mobile_seal_det.yaml +0 -0
  895. /paddlex/configs/{text_detection_seal → modules/seal_text_detection}/PP-OCRv4_server_seal_det.yaml +0 -0
  896. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/Deeplabv3-R101.yaml +0 -0
  897. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/Deeplabv3-R50.yaml +0 -0
  898. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/Deeplabv3_Plus-R101.yaml +0 -0
  899. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/Deeplabv3_Plus-R50.yaml +0 -0
  900. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/OCRNet_HRNet-W18.yaml +0 -0
  901. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/OCRNet_HRNet-W48.yaml +0 -0
  902. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/PP-LiteSeg-B.yaml +0 -0
  903. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/PP-LiteSeg-T.yaml +0 -0
  904. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/SegFormer-B0.yaml +0 -0
  905. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/SegFormer-B1.yaml +0 -0
  906. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/SegFormer-B2.yaml +0 -0
  907. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/SegFormer-B3.yaml +0 -0
  908. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/SegFormer-B4.yaml +0 -0
  909. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/SegFormer-B5.yaml +0 -0
  910. /paddlex/configs/{small_object_detection → modules/small_object_detection}/PP-YOLOE_plus_SOD-L.yaml +0 -0
  911. /paddlex/configs/{small_object_detection → modules/small_object_detection}/PP-YOLOE_plus_SOD-S.yaml +0 -0
  912. /paddlex/configs/{small_object_detection → modules/small_object_detection}/PP-YOLOE_plus_SOD-largesize-L.yaml +0 -0
  913. /paddlex/configs/{table_recognition → modules/table_structure_recognition}/SLANet.yaml +0 -0
  914. /paddlex/configs/{table_recognition → modules/table_structure_recognition}/SLANet_plus.yaml +0 -0
  915. /paddlex/configs/{text_detection → modules/text_detection}/PP-OCRv4_mobile_det.yaml +0 -0
  916. /paddlex/configs/{text_detection → modules/text_detection}/PP-OCRv4_server_det.yaml +0 -0
  917. /paddlex/configs/{text_recognition → modules/text_recognition}/PP-OCRv4_mobile_rec.yaml +0 -0
  918. /paddlex/configs/{text_recognition → modules/text_recognition}/PP-OCRv4_server_rec.yaml +0 -0
  919. /paddlex/configs/{text_recognition → modules/text_recognition}/ch_RepSVTR_rec.yaml +0 -0
  920. /paddlex/configs/{text_recognition → modules/text_recognition}/ch_SVTRv2_rec.yaml +0 -0
  921. /paddlex/configs/{ts_anomaly_detection → modules/ts_anomaly_detection}/AutoEncoder_ad.yaml +0 -0
  922. /paddlex/configs/{ts_anomaly_detection → modules/ts_anomaly_detection}/DLinear_ad.yaml +0 -0
  923. /paddlex/configs/{ts_anomaly_detection → modules/ts_anomaly_detection}/Nonstationary_ad.yaml +0 -0
  924. /paddlex/configs/{ts_anomaly_detection → modules/ts_anomaly_detection}/PatchTST_ad.yaml +0 -0
  925. /paddlex/configs/{ts_anomaly_detection → modules/ts_anomaly_detection}/TimesNet_ad.yaml +0 -0
  926. /paddlex/configs/{ts_classification → modules/ts_classification}/TimesNet_cls.yaml +0 -0
  927. /paddlex/configs/{ts_forecast → modules/ts_forecast}/DLinear.yaml +0 -0
  928. /paddlex/configs/{ts_forecast → modules/ts_forecast}/NLinear.yaml +0 -0
  929. /paddlex/configs/{ts_forecast → modules/ts_forecast}/Nonstationary.yaml +0 -0
  930. /paddlex/configs/{ts_forecast → modules/ts_forecast}/PatchTST.yaml +0 -0
  931. /paddlex/configs/{ts_forecast → modules/ts_forecast}/RLinear.yaml +0 -0
  932. /paddlex/configs/{ts_forecast → modules/ts_forecast}/TiDE.yaml +0 -0
  933. /paddlex/configs/{ts_forecast → modules/ts_forecast}/TimesNet.yaml +0 -0
  934. /paddlex/configs/{vehicle_attribute → modules/vehicle_attribute_recognition}/PP-LCNet_x1_0_vehicle_attribute.yaml +0 -0
  935. /paddlex/configs/{vehicle_detection → modules/vehicle_detection}/PP-YOLOE-L_vehicle.yaml +0 -0
  936. /paddlex/configs/{vehicle_detection → modules/vehicle_detection}/PP-YOLOE-S_vehicle.yaml +0 -0
  937. /paddlex/inference/{results/utils → common}/__init__.py +0 -0
  938. {paddlex-3.0.0b2.dist-info → paddlex-3.0.0rc0.dist-info}/LICENSE +0 -0
  939. {paddlex-3.0.0b2.dist-info → paddlex-3.0.0rc0.dist-info}/entry_points.txt +0 -0
  940. {paddlex-3.0.0b2.dist-info → paddlex-3.0.0rc0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,977 @@
1
+ # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Any, Dict, Optional, Union, List, Tuple
16
+ import os
17
+ import re
18
+ import cv2
19
+ import copy
20
+ import json
21
+ import base64
22
+ import numpy as np
23
+ from .pipeline_base import PP_ChatOCR_Pipeline
24
+ from ...common.reader import ReadImage
25
+ from ...common.batch_sampler import ImageBatchSampler
26
+ from ....utils import logging
27
+ from ....utils.file_interface import custom_open
28
+ from ...utils.pp_option import PaddlePredictorOption
29
+ from ..layout_parsing.result import LayoutParsingResult
30
+ from ..components.chat_server import BaseChat
31
+
32
+
33
+ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
34
+ """PP-ChatOCRv4 Pipeline"""
35
+
36
+ entities = ["PP-ChatOCRv4-doc"]
37
+
38
+ def __init__(
39
+ self,
40
+ config: Dict,
41
+ device: str = None,
42
+ pp_option: PaddlePredictorOption = None,
43
+ use_hpip: bool = False,
44
+ initial_predictor: bool = True,
45
+ ) -> None:
46
+ """Initializes the pp-chatocrv3-doc pipeline.
47
+
48
+ Args:
49
+ config (Dict): Configuration dictionary containing various settings.
50
+ device (str, optional): Device to run the predictions on. Defaults to None.
51
+ pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
52
+ use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
53
+ use_layout_parsing (bool, optional): Whether to use layout parsing. Defaults to True.
54
+ initial_predictor (bool, optional): Whether to initialize the predictor. Defaults to True.
55
+ """
56
+
57
+ super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
58
+
59
+ self.pipeline_name = config["pipeline_name"]
60
+ self.config = config
61
+ self.use_layout_parser = config.get("use_layout_parser", True)
62
+ self.use_mllm_predict = config.get("use_mllm_predict", True)
63
+
64
+ self.layout_parsing_pipeline = None
65
+ self.chat_bot = None
66
+ self.retriever = None
67
+ self.mllm_chat_bot = None
68
+
69
+ if initial_predictor:
70
+ self.inintial_visual_predictor(config)
71
+ self.inintial_chat_predictor(config)
72
+ self.inintial_retriever_predictor(config)
73
+ self.inintial_mllm_predictor(config)
74
+
75
+ self.batch_sampler = ImageBatchSampler(batch_size=1)
76
+ self.img_reader = ReadImage(format="BGR")
77
+
78
+ self.table_structure_len_max = 500
79
+
80
+ def inintial_visual_predictor(self, config: dict) -> None:
81
+ """
82
+ Initializes the visual predictor with the given configuration.
83
+
84
+ Args:
85
+ config (dict): The configuration dictionary containing the necessary
86
+ parameters for initializing the predictor.
87
+ Returns:
88
+ None
89
+ """
90
+ self.use_layout_parser = config.get("use_layout_parser", True)
91
+
92
+ if self.use_layout_parser:
93
+ layout_parsing_config = config.get("SubPipelines", {}).get(
94
+ "LayoutParser",
95
+ {"pipeline_config_error": "config error for layout_parsing_pipeline!"},
96
+ )
97
+ self.layout_parsing_pipeline = self.create_pipeline(layout_parsing_config)
98
+ return
99
+
100
+ def inintial_retriever_predictor(self, config: dict) -> None:
101
+ """
102
+ Initializes the retriever predictor with the given configuration.
103
+
104
+ Args:
105
+ config (dict): The configuration dictionary containing the necessary
106
+ parameters for initializing the predictor.
107
+ Returns:
108
+ None
109
+ """
110
+ from .. import create_retriever
111
+
112
+ retriever_config = config.get("SubModules", {}).get(
113
+ "LLM_Retriever",
114
+ {"retriever_config_error": "config error for llm retriever!"},
115
+ )
116
+ self.retriever = create_retriever(retriever_config)
117
+
118
+ def inintial_chat_predictor(self, config: dict) -> None:
119
+ """
120
+ Initializes the chat predictor with the given configuration.
121
+
122
+ Args:
123
+ config (dict): The configuration dictionary containing the necessary
124
+ parameters for initializing the predictor.
125
+ Returns:
126
+ None
127
+ """
128
+ from .. import create_chat_bot
129
+
130
+ chat_bot_config = config.get("SubModules", {}).get(
131
+ "LLM_Chat",
132
+ {"chat_bot_config_error": "config error for llm chat bot!"},
133
+ )
134
+ self.chat_bot = create_chat_bot(chat_bot_config)
135
+
136
+ from .. import create_prompt_engineering
137
+
138
+ text_pe_config = (
139
+ config.get("SubModules", {})
140
+ .get("PromptEngneering", {})
141
+ .get(
142
+ "KIE_CommonText",
143
+ {"pe_config_error": "config error for text_pe!"},
144
+ )
145
+ )
146
+ self.text_pe = create_prompt_engineering(text_pe_config)
147
+
148
+ table_pe_config = (
149
+ config.get("SubModules", {})
150
+ .get("PromptEngneering", {})
151
+ .get(
152
+ "KIE_Table",
153
+ {"pe_config_error": "config error for table_pe!"},
154
+ )
155
+ )
156
+ self.table_pe = create_prompt_engineering(table_pe_config)
157
+ return
158
+
159
+ def inintial_mllm_predictor(self, config: dict) -> None:
160
+ """
161
+ Initializes the predictor with the given configuration.
162
+
163
+ Args:
164
+ config (dict): The configuration dictionary containing the necessary
165
+ parameters for initializing the predictor.
166
+ Returns:
167
+ None
168
+ """
169
+ from .. import create_chat_bot, create_prompt_engineering
170
+
171
+ self.use_mllm_predict = config.get("use_mllm_predict", True)
172
+ if self.use_mllm_predict:
173
+ mllm_chat_bot_config = config.get("SubModules", {}).get(
174
+ "MLLM_Chat",
175
+ {"mllm_chat_bot_config": "config error for mllm chat bot!"},
176
+ )
177
+ self.mllm_chat_bot = create_chat_bot(mllm_chat_bot_config)
178
+ ensemble_pe_config = (
179
+ config.get("SubModules", {})
180
+ .get("PromptEngneering", {})
181
+ .get(
182
+ "Ensemble",
183
+ {"pe_config_error": "config error for ensemble_pe!"},
184
+ )
185
+ )
186
+ self.ensemble_pe = create_prompt_engineering(ensemble_pe_config)
187
+ return
188
+
189
+ def decode_visual_result(self, layout_parsing_result: LayoutParsingResult) -> dict:
190
+ """
191
+ Decodes the visual result from the layout parsing result.
192
+
193
+ Args:
194
+ layout_parsing_result (LayoutParsingResult): The result of layout parsing.
195
+
196
+ Returns:
197
+ dict: The decoded visual information.
198
+ """
199
+ normal_text_dict = {}
200
+ parsing_res_list = layout_parsing_result["parsing_res_list"]
201
+ for pno in range(len(parsing_res_list)):
202
+ label = parsing_res_list[pno]["block_label"]
203
+ content = parsing_res_list[pno]["block_content"]
204
+ if label in ["table", "formula"]:
205
+ continue
206
+ key = f"words in {label}"
207
+ if key not in normal_text_dict:
208
+ normal_text_dict[key] = content
209
+ else:
210
+ normal_text_dict[key] += f"\n {content}"
211
+
212
+ table_res_list = layout_parsing_result["table_res_list"]
213
+ table_text_list = []
214
+ table_html_list = []
215
+ table_nei_text_list = []
216
+ for table_res in table_res_list:
217
+ table_html_list.append(table_res["pred_html"])
218
+ single_table_text = " ".join(table_res["table_ocr_pred"]["rec_texts"])
219
+ table_text_list.append(single_table_text)
220
+ table_nei_text_list.append(table_res["neighbor_texts"])
221
+
222
+ visual_info = {}
223
+ visual_info["normal_text_dict"] = normal_text_dict
224
+ visual_info["table_text_list"] = table_text_list
225
+ visual_info["table_html_list"] = table_html_list
226
+ visual_info["table_nei_text_list"] = table_nei_text_list
227
+ return visual_info
228
+
229
+ # Function to perform visual prediction on input images
230
+ def visual_predict(
231
+ self,
232
+ input: Union[str, List[str], np.ndarray, List[np.ndarray]],
233
+ use_doc_orientation_classify: Optional[bool] = None,
234
+ use_doc_unwarping: Optional[bool] = None,
235
+ use_general_ocr: Optional[bool] = None,
236
+ use_seal_recognition: Optional[bool] = None,
237
+ use_table_recognition: Optional[bool] = None,
238
+ layout_threshold: Optional[Union[float, dict]] = None,
239
+ layout_nms: Optional[bool] = None,
240
+ layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None,
241
+ layout_merge_bboxes_mode: Optional[str] = None,
242
+ text_det_limit_side_len: Optional[int] = None,
243
+ text_det_limit_type: Optional[str] = None,
244
+ text_det_thresh: Optional[float] = None,
245
+ text_det_box_thresh: Optional[float] = None,
246
+ text_det_unclip_ratio: Optional[float] = None,
247
+ text_rec_score_thresh: Optional[float] = None,
248
+ seal_det_limit_side_len: Optional[int] = None,
249
+ seal_det_limit_type: Optional[str] = None,
250
+ seal_det_thresh: Optional[float] = None,
251
+ seal_det_box_thresh: Optional[float] = None,
252
+ seal_det_unclip_ratio: Optional[float] = None,
253
+ seal_rec_score_thresh: Optional[float] = None,
254
+ **kwargs,
255
+ ) -> dict:
256
+ """
257
+ This function takes an input image or a list of images and performs various visual
258
+ prediction tasks such as document orientation classification, document unwarping,
259
+ general OCR, seal recognition, and table recognition based on the provided flags.
260
+
261
+ Args:
262
+ input (Union[str, list[str], np.ndarray, list[np.ndarray]]): Input image path, list of image paths,
263
+ numpy array of an image, or list of numpy arrays.
264
+ use_doc_orientation_classify (bool): Flag to use document orientation classification.
265
+ use_doc_unwarping (bool): Flag to use document unwarping.
266
+ use_general_ocr (bool): Flag to use general OCR.
267
+ use_seal_recognition (bool): Flag to use seal recognition.
268
+ use_table_recognition (bool): Flag to use table recognition.
269
+ layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
270
+ layout_nms (bool, optional): Whether to use layout-aware NMS. Defaults to False.
271
+ layout_unclip_ratio (Optional[Union[float, Tuple[float, float]]], optional): The ratio of unclipping the bounding box.
272
+ Defaults to None.
273
+ If it's a single number, then both width and height are used.
274
+ If it's a tuple of two numbers, then they are used separately for width and height respectively.
275
+ If it's None, then no unclipping will be performed.
276
+ layout_merge_bboxes_mode (Optional[str], optional): The mode for merging bounding boxes. Defaults to None.
277
+ text_det_limit_side_len (Optional[int]): Maximum side length for text detection.
278
+ text_det_limit_type (Optional[str]): Type of limit to apply for text detection.
279
+ text_det_thresh (Optional[float]): Threshold for text detection.
280
+ text_det_box_thresh (Optional[float]): Threshold for text detection boxes.
281
+ text_det_unclip_ratio (Optional[float]): Ratio for unclipping text detection boxes.
282
+ text_rec_score_thresh (Optional[float]): Score threshold for text recognition.
283
+ seal_det_limit_side_len (Optional[int]): Maximum side length for seal detection.
284
+ seal_det_limit_type (Optional[str]): Type of limit to apply for seal detection.
285
+ seal_det_thresh (Optional[float]): Threshold for seal detection.
286
+ seal_det_box_thresh (Optional[float]): Threshold for seal detection boxes.
287
+ seal_det_unclip_ratio (Optional[float]): Ratio for unclipping seal detection boxes.
288
+ seal_rec_score_thresh (Optional[float]): Score threshold for seal recognition.
289
+ **kwargs: Additional keyword arguments.
290
+
291
+ Returns:
292
+ dict: A dictionary containing the layout parsing result and visual information.
293
+ """
294
+ if self.use_layout_parser == False:
295
+ logging.error("The models for layout parser are not initialized.")
296
+ yield {"error": "The models for layout parser are not initialized."}
297
+
298
+ if self.layout_parsing_pipeline is None:
299
+ logging.warning(
300
+ "The layout parsing pipeline is not initialized, will initialize it now."
301
+ )
302
+ self.inintial_visual_predictor(self.config)
303
+
304
+ for layout_parsing_result in self.layout_parsing_pipeline.predict(
305
+ input,
306
+ use_doc_orientation_classify=use_doc_orientation_classify,
307
+ use_doc_unwarping=use_doc_unwarping,
308
+ use_general_ocr=use_general_ocr,
309
+ use_seal_recognition=use_seal_recognition,
310
+ use_table_recognition=use_table_recognition,
311
+ layout_threshold=layout_threshold,
312
+ layout_nms=layout_nms,
313
+ layout_unclip_ratio=layout_unclip_ratio,
314
+ layout_merge_bboxes_mode=layout_merge_bboxes_mode,
315
+ text_det_limit_side_len=text_det_limit_side_len,
316
+ text_det_limit_type=text_det_limit_type,
317
+ text_det_thresh=text_det_thresh,
318
+ text_det_box_thresh=text_det_box_thresh,
319
+ text_det_unclip_ratio=text_det_unclip_ratio,
320
+ text_rec_score_thresh=text_rec_score_thresh,
321
+ seal_det_box_thresh=seal_det_box_thresh,
322
+ seal_det_limit_side_len=seal_det_limit_side_len,
323
+ seal_det_limit_type=seal_det_limit_type,
324
+ seal_det_thresh=seal_det_thresh,
325
+ seal_det_unclip_ratio=seal_det_unclip_ratio,
326
+ seal_rec_score_thresh=seal_rec_score_thresh,
327
+ ):
328
+
329
+ visual_info = self.decode_visual_result(layout_parsing_result)
330
+
331
+ visual_predict_res = {
332
+ "layout_parsing_result": layout_parsing_result,
333
+ "visual_info": visual_info,
334
+ }
335
+ yield visual_predict_res
336
+
337
+ def save_visual_info_list(self, visual_info: dict, save_path: str) -> None:
338
+ """
339
+ Save the visual info list to the specified file path.
340
+
341
+ Args:
342
+ visual_info (dict): The visual info result, which can be a single object or a list of objects.
343
+ save_path (str): The file path to save the visual info list.
344
+
345
+ Returns:
346
+ None
347
+ """
348
+ if not isinstance(visual_info, list):
349
+ visual_info_list = [visual_info]
350
+ else:
351
+ visual_info_list = visual_info
352
+
353
+ with open(save_path, "w") as fout:
354
+ fout.write(json.dumps(visual_info_list, ensure_ascii=False) + "\n")
355
+ return
356
+
357
+ def load_visual_info_list(self, data_path: str) -> List[dict]:
358
+ """
359
+ Loads visual info list from a JSON file.
360
+
361
+ Args:
362
+ data_path (str): The path to the JSON file containing visual info.
363
+
364
+ Returns:
365
+ list[dict]: A list of dict objects parsed from the JSON file.
366
+ """
367
+ with open(data_path, "r") as fin:
368
+ data = fin.readline()
369
+ visual_info_list = json.loads(data)
370
+ return visual_info_list
371
+
372
+ def merge_visual_info_list(
373
+ self, visual_info_list: List[dict]
374
+ ) -> Tuple[list, list, list, list]:
375
+ """
376
+ Merge visual info lists.
377
+
378
+ Args:
379
+ visual_info_list (list[dict]): A list of visual info results.
380
+
381
+ Returns:
382
+ tuple[list, list, list, list]: A tuple containing four lists, one for normal text dicts,
383
+ one for table text lists, one for table HTML lists.
384
+ one for table neighbor texts.
385
+ """
386
+ all_normal_text_list = []
387
+ all_table_text_list = []
388
+ all_table_html_list = []
389
+ all_table_nei_text_list = []
390
+ for single_visual_info in visual_info_list:
391
+ normal_text_dict = single_visual_info["normal_text_dict"]
392
+ for key in normal_text_dict:
393
+ normal_text_dict[key] = normal_text_dict[key].replace("\n", "")
394
+ table_text_list = single_visual_info["table_text_list"]
395
+ table_html_list = single_visual_info["table_html_list"]
396
+ table_nei_text_list = single_visual_info["table_nei_text_list"]
397
+ all_normal_text_list.append(normal_text_dict)
398
+ all_table_text_list.extend(table_text_list)
399
+ all_table_html_list.extend(table_html_list)
400
+ all_table_nei_text_list.extend(table_nei_text_list)
401
+ return (
402
+ all_normal_text_list,
403
+ all_table_text_list,
404
+ all_table_html_list,
405
+ all_table_nei_text_list,
406
+ )
407
+
408
+ def build_vector(
409
+ self,
410
+ visual_info: dict,
411
+ min_characters: int = 3500,
412
+ block_size: int = 300,
413
+ flag_save_bytes_vector: bool = False,
414
+ retriever_config: dict = None,
415
+ ) -> dict:
416
+ """
417
+ Build a vector representation from visual information.
418
+
419
+ Args:
420
+ visual_info (dict): The visual information input, can be a single instance or a list of instances.
421
+ min_characters (int): The minimum number of characters required for text processing, defaults to 3500.
422
+ block_size (int): The size of each chunk to split the text into.
423
+ flag_save_bytes_vector (bool): Whether to save the vector as bytes, defaults to False.
424
+ retriever_config (dict): The configuration for the retriever, defaults to None.
425
+
426
+ Returns:
427
+ dict: A dictionary containing the vector info and a flag indicating if the text is too short.
428
+ """
429
+
430
+ if not isinstance(visual_info, list):
431
+ visual_info_list = [visual_info]
432
+ else:
433
+ visual_info_list = visual_info
434
+
435
+ if retriever_config is not None:
436
+ from .. import create_retriever
437
+
438
+ retriever = create_retriever(retriever_config)
439
+ else:
440
+ if self.retriever is None:
441
+ logging.warning(
442
+ "The retriever is not initialized,will initialize it now."
443
+ )
444
+ self.inintial_retriever_predictor(self.config)
445
+ retriever = self.retriever
446
+
447
+ all_visual_info = self.merge_visual_info_list(visual_info_list)
448
+ (
449
+ all_normal_text_list,
450
+ all_table_text_list,
451
+ all_table_html_list,
452
+ all_table_nei_text_list,
453
+ ) = all_visual_info
454
+
455
+ vector_info = {}
456
+
457
+ all_items = []
458
+ for i, normal_text_dict in enumerate(all_normal_text_list):
459
+ for type, text in normal_text_dict.items():
460
+ all_items += [f"{type}:{text}\n"]
461
+
462
+ for table_html, table_text, table_nei_text in zip(
463
+ all_table_html_list, all_table_text_list, all_table_nei_text_list
464
+ ):
465
+ if len(table_html) > min_characters - self.table_structure_len_max:
466
+ all_items += [f"table:{table_text}\t{table_nei_text}"]
467
+
468
+ all_text_str = "".join(all_items)
469
+ vector_info["flag_save_bytes_vector"] = False
470
+ if len(all_text_str) > min_characters:
471
+ vector_info["flag_too_short_text"] = False
472
+ vector_info["model_name"] = retriever.model_name
473
+ vector_info["block_size"] = block_size
474
+ vector_info["vector"] = retriever.generate_vector_database(
475
+ all_items, block_size=block_size
476
+ )
477
+ if flag_save_bytes_vector:
478
+ vector_info["vector"] = retriever.encode_vector_store_to_bytes(
479
+ vector_info["vector"]
480
+ )
481
+ vector_info["flag_save_bytes_vector"] = True
482
+ else:
483
+ vector_info["flag_too_short_text"] = True
484
+ vector_info["vector"] = all_items
485
+ return vector_info
486
+
487
+ def save_vector(
488
+ self, vector_info: dict, save_path: str, retriever_config: dict = None
489
+ ) -> None:
490
+ directory = os.path.dirname(save_path)
491
+ if not os.path.exists(directory):
492
+ os.makedirs(directory)
493
+
494
+ if retriever_config is not None:
495
+ from .. import create_retriever
496
+
497
+ retriever = create_retriever(retriever_config)
498
+ else:
499
+ if self.retriever is None:
500
+ logging.warning(
501
+ "The retriever is not initialized,will initialize it now."
502
+ )
503
+ self.inintial_retriever_predictor(self.config)
504
+ retriever = self.retriever
505
+
506
+ vector_info_data = copy.deepcopy(vector_info)
507
+ if (
508
+ not vector_info["flag_too_short_text"]
509
+ and not vector_info["flag_save_bytes_vector"]
510
+ ):
511
+ vector_info_data["vector"] = retriever.encode_vector_store_to_bytes(
512
+ vector_info_data["vector"]
513
+ )
514
+ vector_info_data["flag_save_bytes_vector"] = True
515
+
516
+ with custom_open(save_path, "w") as fout:
517
+ fout.write(json.dumps(vector_info_data, ensure_ascii=False) + "\n")
518
+ return
519
+
520
+ def load_vector(self, data_path: str, retriever_config: dict = None) -> dict:
521
+ vector_info = None
522
+
523
+ if retriever_config is not None:
524
+ from .. import create_retriever
525
+
526
+ retriever = create_retriever(retriever_config)
527
+ else:
528
+ if self.retriever is None:
529
+ logging.warning(
530
+ "The retriever is not initialized,will initialize it now."
531
+ )
532
+ self.inintial_retriever_predictor(self.config)
533
+ retriever = self.retriever
534
+
535
+ with open(data_path, "r") as fin:
536
+ data = fin.readline()
537
+ vector_info = json.loads(data)
538
+ if (
539
+ "flag_too_short_text" not in vector_info
540
+ or "flag_save_bytes_vector" not in vector_info
541
+ or "vector" not in vector_info
542
+ ):
543
+ logging.error("Invalid vector info.")
544
+ return {"error": "Invalid vector info when load vector!"}
545
+ if vector_info["flag_save_bytes_vector"]:
546
+ vector_info["vector"] = retriever.decode_vector_store_from_bytes(
547
+ vector_info["vector"]
548
+ )
549
+ vector_info["flag_save_bytes_vector"] = False
550
+
551
+ return vector_info
552
+
553
+ def format_key(self, key_list: Union[str, List[str]]) -> List[str]:
554
+ """
555
+ Formats the key list.
556
+
557
+ Args:
558
+ key_list (str|list[str]): A string or a list of strings representing the keys.
559
+
560
+ Returns:
561
+ list[str]: A list of formatted keys.
562
+ """
563
+ if key_list == "":
564
+ return []
565
+
566
+ if isinstance(key_list, list):
567
+ key_list = [key.replace("\xa0", " ") for key in key_list]
568
+ return key_list
569
+
570
+ if isinstance(key_list, str):
571
+ key_list = re.sub(r"[\t\n\r\f\v]", "", key_list)
572
+ key_list = key_list.replace(",", ",").split(",")
573
+ return key_list
574
+
575
+ return []
576
+
577
+ def mllm_pred(
578
+ self,
579
+ input: Union[str, np.ndarray],
580
+ key_list: Union[str, List[str]],
581
+ mllm_chat_bot_config=None,
582
+ ) -> dict:
583
+ """
584
+ Generates MLLM results based on the provided key list and input image.
585
+
586
+ Args:
587
+ input (Union[str, np.ndarray]): Input image path, or numpy array of an image.
588
+ key_list (Union[str, list[str]]): A single key or a list of keys to extract information.
589
+ chat_bot_config (dict): The parameters for LLM chatbot, including api_type, api_key... refer to config file for more details.
590
+ Returns:
591
+ dict: A dictionary containing the chat results.
592
+ """
593
+ if self.use_mllm_predict == False:
594
+ logging.error("MLLM prediction is disabled.")
595
+ return {"mllm_res": "Error:MLLM prediction is disabled!"}
596
+
597
+ key_list = self.format_key(key_list)
598
+ if len(key_list) == 0:
599
+ return {"mllm_res": "Error:输入的key_list无效!"}
600
+
601
+ if isinstance(input, list):
602
+ logging.error("Input is a list, but it's not supported here.")
603
+ return {"mllm_res": "Error:Input is a list, but it's not supported here!"}
604
+ if isinstance(input, str) and input.endswith(".pdf"):
605
+ logging.error("MLMM prediction does not support PDF currently!")
606
+ return {"mllm_res": "Error:MLMM prediction does not support PDF currently!"}
607
+
608
+ if self.mllm_chat_bot is None:
609
+ logging.warning(
610
+ "The MLLM chat bot is not initialized,will initialize it now."
611
+ )
612
+ self.inintial_mllm_predictor(self.config)
613
+
614
+ if mllm_chat_bot_config is not None:
615
+ from .. import create_chat_bot
616
+
617
+ mllm_chat_bot = create_chat_bot(mllm_chat_bot_config)
618
+ else:
619
+ mllm_chat_bot = self.mllm_chat_bot
620
+
621
+ for image_array in self.img_reader([input]):
622
+
623
+ image_string = cv2.imencode(".jpg", image_array)[1].tostring()
624
+ image_base64 = base64.b64encode(image_string).decode("utf-8")
625
+ result = {}
626
+ for key in key_list:
627
+ prompt = (
628
+ str(key)
629
+ + "\n请用图片中完整出现的内容回答,可以是单词、短语或句子,针对问题回答尽可能详细和完整,并保持格式、单位、符号和标点都与图片中的文字内容完全一致。"
630
+ )
631
+ mllm_chat_bot_result = mllm_chat_bot.generate_chat_results(
632
+ prompt=prompt, image=image_base64
633
+ )["content"]
634
+ if mllm_chat_bot_result is None:
635
+ return {"mllm_res": "大模型调用失败"}
636
+ result[key] = mllm_chat_bot_result
637
+ return {"mllm_res": result}
638
+
639
+ def generate_and_merge_chat_results(
640
+ self,
641
+ chat_bot: BaseChat,
642
+ prompt: str,
643
+ key_list: list,
644
+ final_results: dict,
645
+ failed_results: list,
646
+ ) -> None:
647
+ """
648
+ Generate and merge chat results into the final results dictionary.
649
+
650
+ Args:
651
+ prompt (str): The input prompt for the chat bot.
652
+ key_list (list): A list of keys to track which results to merge.
653
+ final_results (dict): The dictionary to store the final merged results.
654
+ failed_results (list): A list of failed results to avoid merging.
655
+
656
+ Returns:
657
+ None
658
+ """
659
+
660
+ llm_result = chat_bot.generate_chat_results(prompt)
661
+ llm_result_content = llm_result["content"]
662
+ llm_result_reasoning_content = llm_result["reasoning_content"]
663
+
664
+ if llm_result_reasoning_content is not None:
665
+ if "reasoning_content" not in final_results:
666
+ final_results["reasoning_content"] = [llm_result_reasoning_content]
667
+ else:
668
+ final_results["reasoning_content"].append(llm_result_reasoning_content)
669
+
670
+ if llm_result_content is None:
671
+ logging.error(
672
+ "chat bot error: \n [prompt:]\n %s\n [result:] %s\n"
673
+ % (prompt, chat_bot.ERROR_MASSAGE)
674
+ )
675
+ return
676
+
677
+ llm_result_content = chat_bot.fix_llm_result_format(llm_result_content)
678
+
679
+ for key, value in llm_result_content.items():
680
+ if value not in failed_results and key in key_list:
681
+ key_list.remove(key)
682
+ final_results[key] = value
683
+ return
684
+
685
+ def get_related_normal_text(
686
+ self,
687
+ retriever_config: dict,
688
+ use_vector_retrieval: bool,
689
+ vector_info: dict,
690
+ key_list: List[str],
691
+ all_normal_text_list: list,
692
+ min_characters: int,
693
+ ) -> str:
694
+ """
695
+ Retrieve related normal text based on vector retrieval or all normal text list.
696
+
697
+ Args:
698
+ retriever_config (dict): Configuration for the retriever.
699
+ use_vector_retrieval (bool): Whether to use vector retrieval.
700
+ vector_info (dict): Dictionary containing vector information.
701
+ key_list (list[str]): List of keys to generate question keys.
702
+ all_normal_text_list (list): List of normal text.
703
+ min_characters (int): The minimum number of characters required for text processing, defaults to 3500.
704
+
705
+ Returns:
706
+ str: Related normal text.
707
+ """
708
+
709
+ if use_vector_retrieval and vector_info is not None:
710
+
711
+ if retriever_config is not None:
712
+ from .. import create_retriever
713
+
714
+ retriever = create_retriever(retriever_config)
715
+ else:
716
+ if self.retriever is None:
717
+ logging.warning(
718
+ "The retriever is not initialized,will initialize it now."
719
+ )
720
+ self.inintial_retriever_predictor(self.config)
721
+ retriever = self.retriever
722
+
723
+ question_key_list = [f"{key}" for key in key_list]
724
+ vector = vector_info["vector"]
725
+ if not vector_info["flag_too_short_text"]:
726
+ assert (
727
+ vector_info["model_name"] == retriever.model_name
728
+ ), f"The vector model name ({vector_info['model_name']}) does not match the retriever model name ({retriever.model_name}). Please check your retriever config."
729
+ if vector_info["flag_save_bytes_vector"]:
730
+ vector = retriever.decode_vector_store_from_bytes(vector)
731
+ related_text = retriever.similarity_retrieval(
732
+ question_key_list, vector, topk=50, min_characters=min_characters
733
+ )
734
+ else:
735
+ if len(vector) > 0:
736
+ related_text = "".join(vector)
737
+ else:
738
+ related_text = ""
739
+ else:
740
+ all_items = []
741
+ for i, normal_text_dict in enumerate(all_normal_text_list):
742
+ for type, text in normal_text_dict.items():
743
+ all_items += [f"{type}:{text}\n"]
744
+ related_text = "".join(all_items)
745
+ if len(related_text) > min_characters:
746
+ logging.warning(
747
+ "The input text content is too long, the large language model may truncate it."
748
+ )
749
+ return related_text
750
+
751
+ def ensemble_ocr_llm_mllm(
752
+ self,
753
+ chat_bot: BaseChat,
754
+ key_list: List[str],
755
+ ocr_llm_predict_dict: dict,
756
+ mllm_predict_dict: dict,
757
+ ) -> dict:
758
+ """
759
+ Ensemble OCR_LLM and LMM predictions based on given key list.
760
+
761
+ Args:
762
+ key_list (list[str]): List of keys to retrieve predictions.
763
+ ocr_llm_predict_dict (dict): Dictionary containing OCR LLM predictions.
764
+ mllm_predict_dict (dict): Dictionary containing mLLM predictions.
765
+
766
+ Returns:
767
+ dict: A dictionary with final predictions.
768
+ """
769
+ final_predict_dict = {}
770
+
771
+ for key in key_list:
772
+ predict = ""
773
+ ocr_llm_predict = ""
774
+ mllm_predict = ""
775
+ if key in ocr_llm_predict_dict:
776
+ ocr_llm_predict = ocr_llm_predict_dict[key]
777
+ if key in mllm_predict_dict:
778
+ mllm_predict = mllm_predict_dict[key]
779
+ if ocr_llm_predict != "" and mllm_predict != "":
780
+ prompt = self.ensemble_pe.generate_prompt(
781
+ key, ocr_llm_predict, mllm_predict
782
+ )
783
+ llm_result = chat_bot.generate_chat_results(prompt)
784
+ llm_result_content = llm_result["content"]
785
+ llm_result_reasoning_content = llm_result["reasoning_content"]
786
+ if llm_result_reasoning_content is not None:
787
+ if "reasoning_content" not in final_predict_dict:
788
+ final_predict_dict["reasoning_content"] = [
789
+ llm_result_reasoning_content
790
+ ]
791
+ else:
792
+ final_predict_dict["reasoning_content"].append(
793
+ llm_result_reasoning_content
794
+ )
795
+ if llm_result_content is not None:
796
+ llm_result_content = chat_bot.fix_llm_result_format(
797
+ llm_result_content
798
+ )
799
+ if key in llm_result_content:
800
+ tmp = llm_result_content[key]
801
+ if "B" in tmp:
802
+ predict = mllm_predict
803
+ else:
804
+ predict = ocr_llm_predict
805
+ else:
806
+ predict = ocr_llm_predict
807
+ elif key in ocr_llm_predict_dict:
808
+ predict = ocr_llm_predict_dict[key]
809
+ elif key in mllm_predict_dict:
810
+ predict = mllm_predict_dict[key]
811
+
812
+ if predict != "":
813
+ final_predict_dict[key] = predict
814
+ return final_predict_dict
815
+
816
+ def chat(
817
+ self,
818
+ key_list: Union[str, List[str]],
819
+ visual_info: dict,
820
+ use_vector_retrieval: bool = True,
821
+ vector_info: dict = None,
822
+ min_characters: int = 3500,
823
+ text_task_description: str = None,
824
+ text_output_format: str = None,
825
+ text_rules_str: str = None,
826
+ text_few_shot_demo_text_content: str = None,
827
+ text_few_shot_demo_key_value_list: str = None,
828
+ table_task_description: str = None,
829
+ table_output_format: str = None,
830
+ table_rules_str: str = None,
831
+ table_few_shot_demo_text_content: str = None,
832
+ table_few_shot_demo_key_value_list: str = None,
833
+ mllm_predict_info: dict = None,
834
+ mllm_integration_strategy: str = "integration",
835
+ chat_bot_config: dict = None,
836
+ retriever_config: dict = None,
837
+ ) -> dict:
838
+ """
839
+ Generates chat results based on the provided key list and visual information.
840
+
841
+ Args:
842
+ key_list (Union[str, list[str]]): A single key or a list of keys to extract information.
843
+ visual_info (dict): The visual information result.
844
+ use_vector_retrieval (bool): Whether to use vector retrieval.
845
+ vector_info (dict): The vector information for retrieval.
846
+ min_characters (int): The minimum number of characters required for text processing, defaults to 3500.
847
+ text_task_description (str): The description of the text task.
848
+ text_output_format (str): The output format for text results.
849
+ text_rules_str (str): The rules for generating text results.
850
+ text_few_shot_demo_text_content (str): The text content for few-shot demos.
851
+ text_few_shot_demo_key_value_list (str): The key-value list for few-shot demos.
852
+ table_task_description (str): The description of the table task.
853
+ table_output_format (str): The output format for table results.
854
+ table_rules_str (str): The rules for generating table results.
855
+ table_few_shot_demo_text_content (str): The text content for table few-shot demos.
856
+ table_few_shot_demo_key_value_list (str): The key-value list for table few-shot demos.
857
+ mllm_predict_dict (dict): The dictionary of mLLM predicts.
858
+ mllm_integration_strategy (str): The integration strategy of mLLM and LLM, defaults to "integration", options are "integration", "llm_only" and "mllm_only".
859
+ chat_bot_config (dict): The parameters for LLM chatbot, including api_type, api_key... refer to config file for more details.
860
+ retriever_config (dict): The parameters for LLM retriever, including api_type, api_key... refer to config file for more details.
861
+ Returns:
862
+ dict: A dictionary containing the chat results.
863
+ """
864
+
865
+ key_list = self.format_key(key_list)
866
+ key_list_ori = key_list.copy()
867
+ if len(key_list) == 0:
868
+ return {"chat_res": "Error:输入的key_list无效!"}
869
+
870
+ if not isinstance(visual_info, list):
871
+ visual_info_list = [visual_info]
872
+ else:
873
+ visual_info_list = visual_info
874
+
875
+ if self.chat_bot is None:
876
+ logging.warning(
877
+ "The LLM chat bot is not initialized,will initialize it now."
878
+ )
879
+ self.inintial_chat_predictor(self.config)
880
+
881
+ if chat_bot_config is not None:
882
+ from .. import create_chat_bot
883
+
884
+ chat_bot = create_chat_bot(chat_bot_config)
885
+ else:
886
+ chat_bot = self.chat_bot
887
+
888
+ all_visual_info = self.merge_visual_info_list(visual_info_list)
889
+ (
890
+ all_normal_text_list,
891
+ all_table_text_list,
892
+ all_table_html_list,
893
+ all_table_nei_text_list,
894
+ ) = all_visual_info
895
+
896
+ final_results = {}
897
+ failed_results = ["大模型调用失败", "未知", "未找到关键信息", "None", ""]
898
+
899
+ if len(key_list) > 0:
900
+ related_text = self.get_related_normal_text(
901
+ retriever_config,
902
+ use_vector_retrieval,
903
+ vector_info,
904
+ key_list,
905
+ all_normal_text_list,
906
+ min_characters,
907
+ )
908
+
909
+ if len(related_text) > 0:
910
+ prompt = self.text_pe.generate_prompt(
911
+ related_text,
912
+ key_list,
913
+ task_description=text_task_description,
914
+ output_format=text_output_format,
915
+ rules_str=text_rules_str,
916
+ few_shot_demo_text_content=text_few_shot_demo_text_content,
917
+ few_shot_demo_key_value_list=text_few_shot_demo_key_value_list,
918
+ )
919
+ self.generate_and_merge_chat_results(
920
+ chat_bot, prompt, key_list, final_results, failed_results
921
+ )
922
+
923
+ if len(key_list) > 0:
924
+ for table_html, table_text, table_nei_text in zip(
925
+ all_table_html_list, all_table_text_list, all_table_nei_text_list
926
+ ):
927
+ if len(table_html) <= min_characters - self.table_structure_len_max:
928
+ for table_info in [table_html]:
929
+ if len(key_list) > 0:
930
+
931
+ if len(table_nei_text) > 0:
932
+ table_info = (
933
+ table_info + "\n 表格周围文字:" + table_nei_text
934
+ )
935
+
936
+ prompt = self.table_pe.generate_prompt(
937
+ table_info,
938
+ key_list,
939
+ task_description=table_task_description,
940
+ output_format=table_output_format,
941
+ rules_str=table_rules_str,
942
+ few_shot_demo_text_content=table_few_shot_demo_text_content,
943
+ few_shot_demo_key_value_list=table_few_shot_demo_key_value_list,
944
+ )
945
+
946
+ self.generate_and_merge_chat_results(
947
+ chat_bot,
948
+ prompt,
949
+ key_list,
950
+ final_results,
951
+ failed_results,
952
+ )
953
+
954
+ if (
955
+ self.use_mllm_predict
956
+ and mllm_integration_strategy != "llm_only"
957
+ and mllm_predict_info is not None
958
+ ):
959
+ if mllm_integration_strategy == "integration":
960
+ final_predict_dict = self.ensemble_ocr_llm_mllm(
961
+ chat_bot, key_list_ori, final_results, mllm_predict_info
962
+ )
963
+ elif mllm_integration_strategy == "mllm_only":
964
+ final_predict_dict = mllm_predict_info
965
+ else:
966
+ return {
967
+ "chat_res": f"Error:Unsupported mllm_integration_strategy {mllm_integration_strategy}, only support 'integration', 'llm_only' and 'mllm_only'!"
968
+ }
969
+ else:
970
+ final_predict_dict = final_results
971
+ return {"chat_res": final_predict_dict}
972
+
973
+ def predict(self, *args, **kwargs) -> None:
974
+ logging.error(
975
+ "PP-ChatOCRv4-doc Pipeline do not support to call `predict()` directly! Please invoke `visual_predict`, `build_vector`, `chat` sequentially to obtain the result."
976
+ )
977
+ return