paddlex 3.0.0rc0__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (785) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +17 -34
  3. paddlex/__main__.py +1 -1
  4. paddlex/configs/modules/doc_vlm/PP-DocBee-2B.yaml +14 -0
  5. paddlex/configs/modules/doc_vlm/PP-DocBee-7B.yaml +14 -0
  6. paddlex/configs/modules/open_vocabulary_detection/YOLO-Worldv2-L.yaml +13 -0
  7. paddlex/configs/pipelines/anomaly_detection.yaml +1 -1
  8. paddlex/configs/pipelines/doc_understanding.yaml +9 -0
  9. paddlex/configs/pipelines/ts_anomaly_detection.yaml +1 -1
  10. paddlex/configs/pipelines/ts_classification.yaml +1 -1
  11. paddlex/configs/pipelines/ts_forecast.yaml +1 -1
  12. paddlex/constants.py +17 -0
  13. paddlex/engine.py +7 -5
  14. paddlex/hpip_links.html +23 -11
  15. paddlex/inference/__init__.py +3 -3
  16. paddlex/inference/common/__init__.py +1 -1
  17. paddlex/inference/common/batch_sampler/__init__.py +5 -4
  18. paddlex/inference/common/batch_sampler/audio_batch_sampler.py +5 -6
  19. paddlex/inference/common/batch_sampler/base_batch_sampler.py +20 -16
  20. paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +4 -7
  21. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +64 -0
  22. paddlex/inference/common/batch_sampler/image_batch_sampler.py +12 -36
  23. paddlex/inference/common/batch_sampler/ts_batch_sampler.py +9 -10
  24. paddlex/inference/common/batch_sampler/video_batch_sampler.py +2 -22
  25. paddlex/inference/common/reader/__init__.py +4 -4
  26. paddlex/inference/common/reader/audio_reader.py +3 -3
  27. paddlex/inference/common/reader/det_3d_reader.py +7 -5
  28. paddlex/inference/common/reader/image_reader.py +16 -12
  29. paddlex/inference/common/reader/ts_reader.py +3 -2
  30. paddlex/inference/common/reader/video_reader.py +3 -3
  31. paddlex/inference/common/result/__init__.py +7 -7
  32. paddlex/inference/common/result/base_cv_result.py +12 -2
  33. paddlex/inference/common/result/base_result.py +7 -5
  34. paddlex/inference/common/result/base_ts_result.py +1 -2
  35. paddlex/inference/common/result/base_video_result.py +2 -2
  36. paddlex/inference/common/result/mixin.py +12 -13
  37. paddlex/inference/models/__init__.py +41 -85
  38. paddlex/inference/models/anomaly_detection/__init__.py +1 -1
  39. paddlex/inference/models/anomaly_detection/predictor.py +9 -19
  40. paddlex/inference/models/anomaly_detection/processors.py +9 -2
  41. paddlex/inference/models/anomaly_detection/result.py +3 -2
  42. paddlex/inference/models/base/__init__.py +2 -2
  43. paddlex/inference/models/base/predictor/__init__.py +1 -2
  44. paddlex/inference/models/base/predictor/base_predictor.py +284 -39
  45. paddlex/inference/models/common/__init__.py +6 -15
  46. paddlex/inference/models/common/static_infer.py +764 -243
  47. paddlex/inference/models/common/tokenizer/__init__.py +5 -3
  48. paddlex/inference/models/common/tokenizer/bert_tokenizer.py +1 -1
  49. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +609 -0
  50. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +7 -5
  51. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +432 -0
  52. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +72 -64
  53. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +337 -121
  54. paddlex/inference/models/common/tokenizer/utils.py +1 -1
  55. paddlex/inference/models/common/tokenizer/vocab.py +1 -1
  56. paddlex/inference/models/common/ts/__init__.py +1 -1
  57. paddlex/inference/models/common/ts/funcs.py +13 -6
  58. paddlex/inference/models/common/ts/processors.py +14 -5
  59. paddlex/inference/models/common/vision/__init__.py +3 -3
  60. paddlex/inference/models/common/vision/funcs.py +17 -12
  61. paddlex/inference/models/common/vision/processors.py +61 -46
  62. paddlex/inference/models/common/vlm/__init__.py +13 -0
  63. paddlex/inference/models/common/vlm/activations.py +189 -0
  64. paddlex/inference/models/common/vlm/bert_padding.py +127 -0
  65. paddlex/inference/models/common/vlm/distributed.py +229 -0
  66. paddlex/inference/models/common/vlm/flash_attn_utils.py +119 -0
  67. paddlex/inference/models/common/vlm/generation/__init__.py +34 -0
  68. paddlex/inference/models/common/vlm/generation/configuration_utils.py +533 -0
  69. paddlex/inference/models/common/vlm/generation/logits_process.py +730 -0
  70. paddlex/inference/models/common/vlm/generation/stopping_criteria.py +106 -0
  71. paddlex/inference/models/common/vlm/generation/utils.py +2162 -0
  72. paddlex/inference/models/common/vlm/transformers/__init__.py +16 -0
  73. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +1037 -0
  74. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +408 -0
  75. paddlex/inference/models/common/vlm/transformers/model_outputs.py +1612 -0
  76. paddlex/inference/models/common/vlm/transformers/model_utils.py +2038 -0
  77. paddlex/inference/models/common/vlm/transformers/utils.py +178 -0
  78. paddlex/inference/models/common/vlm/utils.py +109 -0
  79. paddlex/inference/models/doc_vlm/__init__.py +15 -0
  80. paddlex/inference/models/doc_vlm/modeling/__init__.py +15 -0
  81. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +2600 -0
  82. paddlex/inference/models/doc_vlm/predictor.py +198 -0
  83. paddlex/inference/models/doc_vlm/processors/__init__.py +15 -0
  84. paddlex/inference/models/doc_vlm/processors/common.py +372 -0
  85. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +698 -0
  86. paddlex/inference/models/doc_vlm/result.py +21 -0
  87. paddlex/inference/models/face_feature/__init__.py +1 -1
  88. paddlex/inference/models/face_feature/predictor.py +2 -1
  89. paddlex/inference/models/formula_recognition/__init__.py +1 -1
  90. paddlex/inference/models/formula_recognition/predictor.py +11 -27
  91. paddlex/inference/models/formula_recognition/processors.py +35 -19
  92. paddlex/inference/models/formula_recognition/result.py +19 -12
  93. paddlex/inference/models/image_classification/__init__.py +1 -1
  94. paddlex/inference/models/image_classification/predictor.py +9 -19
  95. paddlex/inference/models/image_classification/processors.py +4 -2
  96. paddlex/inference/models/image_classification/result.py +4 -3
  97. paddlex/inference/models/image_feature/__init__.py +1 -1
  98. paddlex/inference/models/image_feature/predictor.py +9 -19
  99. paddlex/inference/models/image_feature/processors.py +4 -1
  100. paddlex/inference/models/image_feature/result.py +2 -3
  101. paddlex/inference/models/image_multilabel_classification/__init__.py +1 -1
  102. paddlex/inference/models/image_multilabel_classification/predictor.py +7 -6
  103. paddlex/inference/models/image_multilabel_classification/processors.py +6 -2
  104. paddlex/inference/models/image_multilabel_classification/result.py +4 -3
  105. paddlex/inference/models/image_unwarping/__init__.py +1 -1
  106. paddlex/inference/models/image_unwarping/predictor.py +8 -16
  107. paddlex/inference/models/image_unwarping/processors.py +6 -2
  108. paddlex/inference/models/image_unwarping/result.py +4 -2
  109. paddlex/inference/models/instance_segmentation/__init__.py +1 -1
  110. paddlex/inference/models/instance_segmentation/predictor.py +7 -15
  111. paddlex/inference/models/instance_segmentation/processors.py +4 -7
  112. paddlex/inference/models/instance_segmentation/result.py +11 -10
  113. paddlex/inference/models/keypoint_detection/__init__.py +1 -1
  114. paddlex/inference/models/keypoint_detection/predictor.py +2 -3
  115. paddlex/inference/models/keypoint_detection/processors.py +11 -3
  116. paddlex/inference/models/keypoint_detection/result.py +9 -4
  117. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
  118. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/predictor.py +15 -26
  119. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/processors.py +26 -14
  120. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/result.py +15 -12
  121. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/visualizer_3d.py +77 -39
  122. paddlex/inference/models/multilingual_speech_recognition/__init__.py +1 -1
  123. paddlex/inference/models/multilingual_speech_recognition/predictor.py +11 -15
  124. paddlex/inference/models/multilingual_speech_recognition/processors.py +45 -53
  125. paddlex/inference/models/multilingual_speech_recognition/result.py +1 -1
  126. paddlex/inference/models/object_detection/__init__.py +1 -1
  127. paddlex/inference/models/object_detection/predictor.py +6 -12
  128. paddlex/inference/models/object_detection/processors.py +36 -31
  129. paddlex/inference/models/object_detection/result.py +5 -4
  130. paddlex/inference/models/object_detection/utils.py +1 -1
  131. paddlex/inference/models/open_vocabulary_detection/__init__.py +1 -1
  132. paddlex/inference/models/open_vocabulary_detection/predictor.py +31 -14
  133. paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +3 -2
  134. paddlex/inference/models/open_vocabulary_detection/processors/common.py +114 -0
  135. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +19 -8
  136. paddlex/inference/models/open_vocabulary_detection/processors/yoloworld_processors.py +209 -0
  137. paddlex/inference/models/open_vocabulary_segmentation/__init__.py +1 -1
  138. paddlex/inference/models/open_vocabulary_segmentation/predictor.py +6 -13
  139. paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +1 -1
  140. paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +12 -12
  141. paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +1 -1
  142. paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +11 -9
  143. paddlex/inference/models/semantic_segmentation/__init__.py +1 -1
  144. paddlex/inference/models/semantic_segmentation/predictor.py +9 -18
  145. paddlex/inference/models/semantic_segmentation/processors.py +11 -8
  146. paddlex/inference/models/semantic_segmentation/result.py +4 -3
  147. paddlex/inference/models/table_structure_recognition/__init__.py +1 -1
  148. paddlex/inference/models/table_structure_recognition/predictor.py +8 -18
  149. paddlex/inference/models/table_structure_recognition/processors.py +23 -29
  150. paddlex/inference/models/table_structure_recognition/result.py +9 -6
  151. paddlex/inference/models/text_detection/__init__.py +1 -1
  152. paddlex/inference/models/text_detection/predictor.py +16 -24
  153. paddlex/inference/models/text_detection/processors.py +74 -36
  154. paddlex/inference/models/text_detection/result.py +9 -4
  155. paddlex/inference/models/text_recognition/__init__.py +1 -1
  156. paddlex/inference/models/text_recognition/predictor.py +11 -19
  157. paddlex/inference/models/text_recognition/processors.py +27 -13
  158. paddlex/inference/models/text_recognition/result.py +3 -2
  159. paddlex/inference/models/ts_anomaly_detection/__init__.py +1 -1
  160. paddlex/inference/models/ts_anomaly_detection/predictor.py +12 -17
  161. paddlex/inference/models/ts_anomaly_detection/processors.py +6 -2
  162. paddlex/inference/models/ts_anomaly_detection/result.py +21 -10
  163. paddlex/inference/models/ts_classification/__init__.py +1 -1
  164. paddlex/inference/models/ts_classification/predictor.py +14 -27
  165. paddlex/inference/models/ts_classification/processors.py +7 -2
  166. paddlex/inference/models/ts_classification/result.py +21 -12
  167. paddlex/inference/models/ts_forecasting/__init__.py +1 -1
  168. paddlex/inference/models/ts_forecasting/predictor.py +13 -18
  169. paddlex/inference/models/ts_forecasting/processors.py +12 -3
  170. paddlex/inference/models/ts_forecasting/result.py +24 -11
  171. paddlex/inference/models/video_classification/__init__.py +1 -1
  172. paddlex/inference/models/video_classification/predictor.py +9 -15
  173. paddlex/inference/models/video_classification/processors.py +24 -24
  174. paddlex/inference/models/video_classification/result.py +7 -3
  175. paddlex/inference/models/video_detection/__init__.py +1 -1
  176. paddlex/inference/models/video_detection/predictor.py +8 -15
  177. paddlex/inference/models/video_detection/processors.py +24 -11
  178. paddlex/inference/models/video_detection/result.py +10 -5
  179. paddlex/inference/pipelines/__init__.py +44 -37
  180. paddlex/inference/pipelines/anomaly_detection/__init__.py +1 -1
  181. paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
  182. paddlex/inference/pipelines/attribute_recognition/__init__.py +1 -1
  183. paddlex/inference/pipelines/attribute_recognition/pipeline.py +13 -8
  184. paddlex/inference/pipelines/attribute_recognition/result.py +10 -8
  185. paddlex/inference/pipelines/base.py +31 -11
  186. paddlex/inference/pipelines/components/__init__.py +14 -8
  187. paddlex/inference/pipelines/components/chat_server/__init__.py +1 -1
  188. paddlex/inference/pipelines/components/chat_server/base.py +2 -2
  189. paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +8 -8
  190. paddlex/inference/pipelines/components/common/__init__.py +5 -4
  191. paddlex/inference/pipelines/components/common/base_operator.py +2 -1
  192. paddlex/inference/pipelines/components/common/base_result.py +3 -2
  193. paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +1 -2
  194. paddlex/inference/pipelines/components/common/crop_image_regions.py +11 -5
  195. paddlex/inference/pipelines/components/common/seal_det_warp.py +44 -13
  196. paddlex/inference/pipelines/components/common/sort_boxes.py +4 -2
  197. paddlex/inference/pipelines/components/common/warp_image.py +50 -0
  198. paddlex/inference/pipelines/components/faisser.py +9 -4
  199. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +2 -2
  200. paddlex/inference/pipelines/components/prompt_engineering/base.py +2 -2
  201. paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +2 -1
  202. paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +2 -2
  203. paddlex/inference/pipelines/components/retriever/__init__.py +2 -2
  204. paddlex/inference/pipelines/components/retriever/base.py +18 -16
  205. paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +2 -2
  206. paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +87 -84
  207. paddlex/inference/pipelines/components/utils/__init__.py +1 -1
  208. paddlex/inference/pipelines/components/utils/mixin.py +7 -7
  209. paddlex/inference/pipelines/doc_preprocessor/__init__.py +1 -1
  210. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +21 -28
  211. paddlex/inference/pipelines/doc_preprocessor/result.py +5 -10
  212. paddlex/inference/pipelines/doc_understanding/__init__.py +15 -0
  213. paddlex/inference/pipelines/doc_understanding/pipeline.py +71 -0
  214. paddlex/inference/pipelines/face_recognition/__init__.py +1 -1
  215. paddlex/inference/pipelines/face_recognition/pipeline.py +3 -1
  216. paddlex/inference/pipelines/face_recognition/result.py +3 -2
  217. paddlex/inference/pipelines/formula_recognition/__init__.py +1 -1
  218. paddlex/inference/pipelines/formula_recognition/pipeline.py +22 -16
  219. paddlex/inference/pipelines/formula_recognition/result.py +20 -19
  220. paddlex/inference/pipelines/image_classification/__init__.py +1 -1
  221. paddlex/inference/pipelines/image_classification/pipeline.py +17 -8
  222. paddlex/inference/pipelines/image_multilabel_classification/__init__.py +1 -1
  223. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +18 -9
  224. paddlex/inference/pipelines/instance_segmentation/__init__.py +1 -1
  225. paddlex/inference/pipelines/instance_segmentation/pipeline.py +17 -6
  226. paddlex/inference/pipelines/keypoint_detection/__init__.py +1 -1
  227. paddlex/inference/pipelines/keypoint_detection/pipeline.py +17 -6
  228. paddlex/inference/pipelines/layout_parsing/__init__.py +1 -1
  229. paddlex/inference/pipelines/layout_parsing/pipeline.py +23 -12
  230. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +16 -6
  231. paddlex/inference/pipelines/layout_parsing/result.py +5 -4
  232. paddlex/inference/pipelines/layout_parsing/result_v2.py +5 -8
  233. paddlex/inference/pipelines/layout_parsing/utils.py +7 -8
  234. paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
  235. paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/pipeline.py +17 -10
  236. paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +1 -1
  237. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +17 -6
  238. paddlex/inference/pipelines/object_detection/__init__.py +1 -1
  239. paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
  240. paddlex/inference/pipelines/ocr/__init__.py +1 -1
  241. paddlex/inference/pipelines/ocr/pipeline.py +28 -11
  242. paddlex/inference/pipelines/ocr/result.py +13 -9
  243. paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +1 -1
  244. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +17 -6
  245. paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +1 -1
  246. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +17 -6
  247. paddlex/inference/pipelines/pp_chatocr/__init__.py +1 -1
  248. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +14 -5
  249. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +22 -11
  250. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +31 -13
  251. paddlex/inference/pipelines/pp_shitu_v2/__init__.py +1 -1
  252. paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +12 -8
  253. paddlex/inference/pipelines/pp_shitu_v2/result.py +4 -4
  254. paddlex/inference/pipelines/rotated_object_detection/__init__.py +1 -1
  255. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +17 -6
  256. paddlex/inference/pipelines/seal_recognition/__init__.py +1 -1
  257. paddlex/inference/pipelines/seal_recognition/pipeline.py +21 -13
  258. paddlex/inference/pipelines/seal_recognition/result.py +4 -2
  259. paddlex/inference/pipelines/semantic_segmentation/__init__.py +1 -1
  260. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +17 -6
  261. paddlex/inference/pipelines/small_object_detection/__init__.py +1 -1
  262. paddlex/inference/pipelines/small_object_detection/pipeline.py +17 -6
  263. paddlex/inference/pipelines/table_recognition/__init__.py +1 -1
  264. paddlex/inference/pipelines/table_recognition/pipeline.py +41 -25
  265. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +65 -33
  266. paddlex/inference/pipelines/table_recognition/result.py +11 -9
  267. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +12 -8
  268. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +46 -32
  269. paddlex/inference/pipelines/table_recognition/utils.py +1 -1
  270. paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +1 -1
  271. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +16 -6
  272. paddlex/inference/pipelines/ts_classification/__init__.py +1 -1
  273. paddlex/inference/pipelines/ts_classification/pipeline.py +16 -6
  274. paddlex/inference/pipelines/ts_forecasting/__init__.py +1 -1
  275. paddlex/inference/pipelines/ts_forecasting/pipeline.py +16 -6
  276. paddlex/inference/pipelines/video_classification/__init__.py +1 -1
  277. paddlex/inference/pipelines/video_classification/pipeline.py +17 -6
  278. paddlex/inference/pipelines/video_detection/__init__.py +1 -1
  279. paddlex/inference/pipelines/video_detection/pipeline.py +20 -7
  280. paddlex/inference/serving/__init__.py +5 -1
  281. paddlex/inference/serving/basic_serving/__init__.py +1 -1
  282. paddlex/inference/serving/basic_serving/_app.py +31 -19
  283. paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +7 -4
  284. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +1 -1
  285. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +7 -3
  286. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +1 -1
  287. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +7 -2
  288. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +10 -7
  289. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +10 -7
  290. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_understanding.py +153 -0
  291. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +16 -13
  292. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +10 -7
  293. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +10 -7
  294. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +10 -7
  295. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +10 -7
  296. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +13 -7
  297. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +10 -7
  298. paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +10 -7
  299. paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +10 -7
  300. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +10 -7
  301. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +10 -7
  302. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +10 -7
  303. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +13 -7
  304. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +10 -7
  305. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -11
  306. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +16 -13
  307. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +16 -13
  308. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +10 -7
  309. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +10 -7
  310. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +10 -7
  311. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +10 -7
  312. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +10 -7
  313. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +10 -7
  314. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +10 -7
  315. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +10 -7
  316. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +10 -7
  317. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +10 -7
  318. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +10 -7
  319. paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +10 -7
  320. paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +10 -7
  321. paddlex/inference/serving/basic_serving/_server.py +9 -4
  322. paddlex/inference/serving/infra/__init__.py +1 -1
  323. paddlex/inference/serving/infra/config.py +1 -1
  324. paddlex/inference/serving/infra/models.py +13 -6
  325. paddlex/inference/serving/infra/storage.py +9 -4
  326. paddlex/inference/serving/infra/utils.py +37 -9
  327. paddlex/inference/serving/schemas/__init__.py +1 -1
  328. paddlex/inference/serving/schemas/anomaly_detection.py +1 -1
  329. paddlex/inference/serving/schemas/doc_preprocessor.py +1 -1
  330. paddlex/inference/serving/schemas/doc_understanding.py +78 -0
  331. paddlex/inference/serving/schemas/face_recognition.py +1 -1
  332. paddlex/inference/serving/schemas/formula_recognition.py +1 -1
  333. paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -1
  334. paddlex/inference/serving/schemas/image_classification.py +1 -1
  335. paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -1
  336. paddlex/inference/serving/schemas/instance_segmentation.py +1 -1
  337. paddlex/inference/serving/schemas/layout_parsing.py +1 -1
  338. paddlex/inference/serving/schemas/m_3d_bev_detection.py +1 -1
  339. paddlex/inference/serving/schemas/multilingual_speech_recognition.py +1 -1
  340. paddlex/inference/serving/schemas/object_detection.py +1 -1
  341. paddlex/inference/serving/schemas/ocr.py +1 -1
  342. paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -1
  343. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -1
  344. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -1
  345. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -1
  346. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +1 -1
  347. paddlex/inference/serving/schemas/pp_shituv2.py +1 -1
  348. paddlex/inference/serving/schemas/pp_structurev3.py +1 -1
  349. paddlex/inference/serving/schemas/rotated_object_detection.py +1 -1
  350. paddlex/inference/serving/schemas/seal_recognition.py +1 -1
  351. paddlex/inference/serving/schemas/semantic_segmentation.py +1 -1
  352. paddlex/inference/serving/schemas/shared/__init__.py +1 -1
  353. paddlex/inference/serving/schemas/shared/classification.py +1 -1
  354. paddlex/inference/serving/schemas/shared/image_segmentation.py +1 -1
  355. paddlex/inference/serving/schemas/shared/object_detection.py +1 -1
  356. paddlex/inference/serving/schemas/shared/ocr.py +1 -1
  357. paddlex/inference/serving/schemas/small_object_detection.py +1 -1
  358. paddlex/inference/serving/schemas/table_recognition.py +1 -1
  359. paddlex/inference/serving/schemas/table_recognition_v2.py +1 -1
  360. paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -1
  361. paddlex/inference/serving/schemas/ts_classification.py +1 -1
  362. paddlex/inference/serving/schemas/ts_forecast.py +1 -1
  363. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -1
  364. paddlex/inference/serving/schemas/video_classification.py +1 -1
  365. paddlex/inference/serving/schemas/video_detection.py +1 -1
  366. paddlex/inference/utils/__init__.py +1 -1
  367. paddlex/inference/utils/benchmark.py +332 -179
  368. paddlex/inference/utils/color_map.py +1 -1
  369. paddlex/inference/utils/get_pipeline_path.py +1 -1
  370. paddlex/inference/utils/hpi.py +251 -0
  371. paddlex/inference/utils/hpi_model_info_collection.json +2252 -0
  372. paddlex/inference/utils/io/__init__.py +11 -11
  373. paddlex/inference/utils/io/readers.py +22 -18
  374. paddlex/inference/utils/io/style.py +21 -14
  375. paddlex/inference/utils/io/tablepyxl.py +13 -5
  376. paddlex/inference/utils/io/writers.py +9 -10
  377. paddlex/inference/utils/model_paths.py +48 -0
  378. paddlex/inference/utils/{new_ir_blacklist.py → new_ir_blocklist.py} +1 -2
  379. paddlex/inference/utils/official_models.py +264 -262
  380. paddlex/inference/utils/pp_option.py +164 -93
  381. paddlex/inference/utils/trt_blocklist.py +43 -0
  382. paddlex/inference/utils/trt_config.py +420 -0
  383. paddlex/model.py +28 -10
  384. paddlex/modules/__init__.py +57 -80
  385. paddlex/modules/anomaly_detection/__init__.py +2 -2
  386. paddlex/modules/anomaly_detection/dataset_checker/__init__.py +2 -3
  387. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
  388. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +6 -3
  389. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/check_dataset.py +8 -4
  390. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +7 -4
  391. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/split_dataset.py +2 -2
  392. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  393. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/visualizer.py +7 -2
  394. paddlex/modules/anomaly_detection/evaluator.py +1 -1
  395. paddlex/modules/anomaly_detection/exportor.py +1 -1
  396. paddlex/modules/anomaly_detection/model_list.py +1 -1
  397. paddlex/modules/anomaly_detection/trainer.py +3 -4
  398. paddlex/modules/base/__init__.py +5 -5
  399. paddlex/modules/base/build_model.py +1 -2
  400. paddlex/modules/base/dataset_checker/__init__.py +2 -2
  401. paddlex/modules/base/dataset_checker/dataset_checker.py +4 -4
  402. paddlex/modules/base/dataset_checker/utils.py +1 -3
  403. paddlex/modules/base/evaluator.py +8 -8
  404. paddlex/modules/base/exportor.py +12 -13
  405. paddlex/modules/base/trainer.py +21 -11
  406. paddlex/modules/base/utils/__init__.py +13 -0
  407. paddlex/modules/base/utils/cinn_setting.py +89 -0
  408. paddlex/modules/base/utils/coco_eval.py +94 -0
  409. paddlex/modules/base/utils/topk_eval.py +118 -0
  410. paddlex/modules/doc_vlm/__init__.py +18 -0
  411. paddlex/modules/doc_vlm/dataset_checker.py +29 -0
  412. paddlex/modules/doc_vlm/evaluator.py +29 -0
  413. paddlex/modules/doc_vlm/exportor.py +29 -0
  414. paddlex/modules/doc_vlm/model_list.py +16 -0
  415. paddlex/modules/doc_vlm/trainer.py +41 -0
  416. paddlex/modules/face_recognition/__init__.py +2 -2
  417. paddlex/modules/face_recognition/dataset_checker/__init__.py +2 -2
  418. paddlex/modules/face_recognition/dataset_checker/dataset_src/__init__.py +1 -1
  419. paddlex/modules/face_recognition/dataset_checker/dataset_src/check_dataset.py +3 -5
  420. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
  421. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  422. paddlex/modules/face_recognition/evaluator.py +1 -1
  423. paddlex/modules/face_recognition/exportor.py +1 -1
  424. paddlex/modules/face_recognition/model_list.py +1 -1
  425. paddlex/modules/face_recognition/trainer.py +1 -1
  426. paddlex/modules/formula_recognition/__init__.py +2 -2
  427. paddlex/modules/formula_recognition/dataset_checker/__init__.py +3 -3
  428. paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  429. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
  430. paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +2 -6
  431. paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
  432. paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
  433. paddlex/modules/formula_recognition/evaluator.py +1 -1
  434. paddlex/modules/formula_recognition/exportor.py +1 -1
  435. paddlex/modules/formula_recognition/model_list.py +1 -1
  436. paddlex/modules/formula_recognition/trainer.py +2 -3
  437. paddlex/modules/general_recognition/__init__.py +2 -2
  438. paddlex/modules/general_recognition/dataset_checker/__init__.py +2 -2
  439. paddlex/modules/general_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  440. paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +7 -9
  441. paddlex/modules/general_recognition/dataset_checker/dataset_src/check_dataset.py +4 -5
  442. paddlex/modules/general_recognition/dataset_checker/dataset_src/convert_dataset.py +6 -5
  443. paddlex/modules/general_recognition/dataset_checker/dataset_src/split_dataset.py +1 -1
  444. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
  445. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  446. paddlex/modules/general_recognition/evaluator.py +1 -1
  447. paddlex/modules/general_recognition/exportor.py +1 -1
  448. paddlex/modules/general_recognition/model_list.py +1 -1
  449. paddlex/modules/general_recognition/trainer.py +1 -1
  450. paddlex/modules/image_classification/__init__.py +2 -2
  451. paddlex/modules/image_classification/dataset_checker/__init__.py +2 -2
  452. paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py +2 -2
  453. paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  454. paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
  455. paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py +4 -4
  456. paddlex/modules/image_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  457. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
  458. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  459. paddlex/modules/image_classification/evaluator.py +1 -1
  460. paddlex/modules/image_classification/exportor.py +1 -1
  461. paddlex/modules/image_classification/model_list.py +1 -1
  462. paddlex/modules/image_classification/trainer.py +3 -3
  463. paddlex/modules/image_unwarping/__init__.py +1 -1
  464. paddlex/modules/image_unwarping/model_list.py +1 -1
  465. paddlex/modules/instance_segmentation/__init__.py +2 -2
  466. paddlex/modules/instance_segmentation/dataset_checker/__init__.py +2 -3
  467. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
  468. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +9 -5
  469. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/check_dataset.py +8 -5
  470. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/convert_dataset.py +8 -8
  471. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/split_dataset.py +7 -4
  472. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
  473. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +10 -8
  474. paddlex/modules/instance_segmentation/evaluator.py +1 -1
  475. paddlex/modules/instance_segmentation/exportor.py +1 -1
  476. paddlex/modules/instance_segmentation/model_list.py +1 -1
  477. paddlex/modules/instance_segmentation/trainer.py +1 -1
  478. paddlex/modules/keypoint_detection/__init__.py +2 -2
  479. paddlex/modules/keypoint_detection/dataset_checker/__init__.py +2 -2
  480. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +1 -1
  481. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
  482. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  483. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +8 -3
  484. paddlex/modules/keypoint_detection/evaluator.py +1 -1
  485. paddlex/modules/keypoint_detection/exportor.py +1 -1
  486. paddlex/modules/keypoint_detection/model_list.py +1 -1
  487. paddlex/modules/keypoint_detection/trainer.py +2 -2
  488. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/__init__.py +2 -2
  489. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/__init__.py +3 -3
  490. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/__init__.py +2 -2
  491. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/analyse_dataset.py +8 -8
  492. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/check_dataset.py +1 -2
  493. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/evaluator.py +1 -1
  494. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/exportor.py +1 -1
  495. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/model_list.py +1 -1
  496. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/trainer.py +5 -7
  497. paddlex/modules/multilabel_classification/__init__.py +2 -2
  498. paddlex/modules/multilabel_classification/dataset_checker/__init__.py +2 -2
  499. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/__init__.py +2 -2
  500. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  501. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
  502. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/convert_dataset.py +10 -7
  503. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  504. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
  505. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +1 -5
  506. paddlex/modules/multilabel_classification/evaluator.py +1 -1
  507. paddlex/modules/multilabel_classification/exportor.py +1 -1
  508. paddlex/modules/multilabel_classification/model_list.py +1 -1
  509. paddlex/modules/multilabel_classification/trainer.py +3 -3
  510. paddlex/modules/multilingual_speech_recognition/__init__.py +2 -2
  511. paddlex/modules/multilingual_speech_recognition/dataset_checker.py +3 -3
  512. paddlex/modules/multilingual_speech_recognition/evaluator.py +3 -3
  513. paddlex/modules/multilingual_speech_recognition/exportor.py +3 -3
  514. paddlex/modules/multilingual_speech_recognition/model_list.py +1 -1
  515. paddlex/modules/multilingual_speech_recognition/trainer.py +7 -5
  516. paddlex/modules/object_detection/__init__.py +2 -2
  517. paddlex/modules/object_detection/dataset_checker/__init__.py +2 -11
  518. paddlex/modules/object_detection/dataset_checker/dataset_src/__init__.py +2 -2
  519. paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +10 -8
  520. paddlex/modules/object_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
  521. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +13 -8
  522. paddlex/modules/object_detection/dataset_checker/dataset_src/split_dataset.py +8 -4
  523. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  524. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +9 -8
  525. paddlex/modules/object_detection/evaluator.py +9 -4
  526. paddlex/modules/object_detection/exportor.py +1 -1
  527. paddlex/modules/object_detection/model_list.py +1 -1
  528. paddlex/modules/object_detection/trainer.py +4 -5
  529. paddlex/modules/open_vocabulary_detection/__init__.py +2 -2
  530. paddlex/modules/open_vocabulary_detection/dataset_checker.py +3 -3
  531. paddlex/modules/open_vocabulary_detection/evaluator.py +3 -3
  532. paddlex/modules/open_vocabulary_detection/exportor.py +3 -3
  533. paddlex/modules/open_vocabulary_detection/model_list.py +2 -4
  534. paddlex/modules/open_vocabulary_detection/trainer.py +7 -5
  535. paddlex/modules/open_vocabulary_segmentation/__init__.py +2 -2
  536. paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +3 -3
  537. paddlex/modules/open_vocabulary_segmentation/evaluator.py +3 -3
  538. paddlex/modules/open_vocabulary_segmentation/exportor.py +3 -3
  539. paddlex/modules/open_vocabulary_segmentation/model_list.py +1 -1
  540. paddlex/modules/open_vocabulary_segmentation/trainer.py +7 -5
  541. paddlex/modules/semantic_segmentation/__init__.py +2 -2
  542. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +2 -3
  543. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
  544. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/analyse_dataset.py +6 -3
  545. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/check_dataset.py +2 -2
  546. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/convert_dataset.py +7 -4
  547. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/split_dataset.py +2 -2
  548. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
  549. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/visualizer.py +6 -2
  550. paddlex/modules/semantic_segmentation/evaluator.py +1 -1
  551. paddlex/modules/semantic_segmentation/exportor.py +1 -1
  552. paddlex/modules/semantic_segmentation/model_list.py +1 -1
  553. paddlex/modules/semantic_segmentation/trainer.py +3 -4
  554. paddlex/modules/table_recognition/__init__.py +2 -2
  555. paddlex/modules/table_recognition/dataset_checker/__init__.py +5 -5
  556. paddlex/modules/table_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  557. paddlex/modules/table_recognition/dataset_checker/dataset_src/analyse_dataset.py +3 -2
  558. paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +8 -7
  559. paddlex/modules/table_recognition/dataset_checker/dataset_src/split_dataset.py +2 -1
  560. paddlex/modules/table_recognition/evaluator.py +1 -1
  561. paddlex/modules/table_recognition/exportor.py +1 -1
  562. paddlex/modules/table_recognition/model_list.py +1 -1
  563. paddlex/modules/table_recognition/trainer.py +2 -5
  564. paddlex/modules/text_detection/__init__.py +2 -2
  565. paddlex/modules/text_detection/dataset_checker/__init__.py +4 -6
  566. paddlex/modules/text_detection/dataset_checker/dataset_src/__init__.py +2 -2
  567. paddlex/modules/text_detection/dataset_checker/dataset_src/analyse_dataset.py +12 -9
  568. paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +3 -3
  569. paddlex/modules/text_detection/dataset_checker/dataset_src/split_dataset.py +3 -3
  570. paddlex/modules/text_detection/evaluator.py +1 -1
  571. paddlex/modules/text_detection/exportor.py +1 -1
  572. paddlex/modules/text_detection/model_list.py +1 -1
  573. paddlex/modules/text_detection/trainer.py +2 -5
  574. paddlex/modules/text_recognition/__init__.py +2 -2
  575. paddlex/modules/text_recognition/dataset_checker/__init__.py +4 -5
  576. paddlex/modules/text_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  577. paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
  578. paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +2 -5
  579. paddlex/modules/text_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
  580. paddlex/modules/text_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
  581. paddlex/modules/text_recognition/evaluator.py +1 -1
  582. paddlex/modules/text_recognition/exportor.py +1 -1
  583. paddlex/modules/text_recognition/model_list.py +1 -1
  584. paddlex/modules/text_recognition/trainer.py +2 -3
  585. paddlex/modules/ts_anomaly_detection/__init__.py +2 -2
  586. paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +4 -5
  587. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
  588. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +1 -9
  589. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/check_dataset.py +2 -2
  590. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -6
  591. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/split_dataset.py +4 -4
  592. paddlex/modules/ts_anomaly_detection/evaluator.py +1 -1
  593. paddlex/modules/ts_anomaly_detection/exportor.py +2 -3
  594. paddlex/modules/ts_anomaly_detection/model_list.py +1 -1
  595. paddlex/modules/ts_anomaly_detection/trainer.py +8 -8
  596. paddlex/modules/ts_classification/__init__.py +2 -2
  597. paddlex/modules/ts_classification/dataset_checker/__init__.py +4 -5
  598. paddlex/modules/ts_classification/dataset_checker/dataset_src/__init__.py +2 -2
  599. paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -5
  600. paddlex/modules/ts_classification/dataset_checker/dataset_src/check_dataset.py +2 -2
  601. paddlex/modules/ts_classification/dataset_checker/dataset_src/convert_dataset.py +2 -6
  602. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +4 -4
  603. paddlex/modules/ts_classification/evaluator.py +1 -1
  604. paddlex/modules/ts_classification/exportor.py +2 -3
  605. paddlex/modules/ts_classification/model_list.py +1 -1
  606. paddlex/modules/ts_classification/trainer.py +7 -7
  607. paddlex/modules/ts_forecast/__init__.py +2 -2
  608. paddlex/modules/ts_forecast/dataset_checker/__init__.py +4 -5
  609. paddlex/modules/ts_forecast/dataset_checker/dataset_src/__init__.py +2 -2
  610. paddlex/modules/ts_forecast/dataset_checker/dataset_src/analyse_dataset.py +1 -9
  611. paddlex/modules/ts_forecast/dataset_checker/dataset_src/check_dataset.py +2 -2
  612. paddlex/modules/ts_forecast/dataset_checker/dataset_src/convert_dataset.py +2 -6
  613. paddlex/modules/ts_forecast/dataset_checker/dataset_src/split_dataset.py +4 -4
  614. paddlex/modules/ts_forecast/evaluator.py +1 -1
  615. paddlex/modules/ts_forecast/exportor.py +2 -3
  616. paddlex/modules/ts_forecast/model_list.py +1 -1
  617. paddlex/modules/ts_forecast/trainer.py +7 -7
  618. paddlex/modules/video_classification/__init__.py +2 -2
  619. paddlex/modules/video_classification/dataset_checker/__init__.py +2 -2
  620. paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +2 -2
  621. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +9 -9
  622. paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +2 -3
  623. paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  624. paddlex/modules/video_classification/evaluator.py +1 -1
  625. paddlex/modules/video_classification/exportor.py +1 -1
  626. paddlex/modules/video_classification/model_list.py +1 -1
  627. paddlex/modules/video_classification/trainer.py +3 -3
  628. paddlex/modules/video_detection/__init__.py +2 -2
  629. paddlex/modules/video_detection/dataset_checker/__init__.py +2 -2
  630. paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +2 -2
  631. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  632. paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +3 -5
  633. paddlex/modules/video_detection/evaluator.py +1 -1
  634. paddlex/modules/video_detection/exportor.py +1 -1
  635. paddlex/modules/video_detection/model_list.py +1 -1
  636. paddlex/modules/video_detection/trainer.py +3 -3
  637. paddlex/ops/__init__.py +5 -2
  638. paddlex/ops/iou3d_nms/iou3d_cpu.cpp +8 -6
  639. paddlex/ops/iou3d_nms/iou3d_cpu.h +3 -2
  640. paddlex/ops/iou3d_nms/iou3d_nms.cpp +8 -6
  641. paddlex/ops/iou3d_nms/iou3d_nms.h +6 -4
  642. paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +24 -18
  643. paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +9 -7
  644. paddlex/ops/setup.py +3 -3
  645. paddlex/ops/voxel/voxelize_op.cc +22 -19
  646. paddlex/ops/voxel/voxelize_op.cu +25 -25
  647. paddlex/paddlex_cli.py +86 -75
  648. paddlex/repo_apis/Paddle3D_api/__init__.py +1 -1
  649. paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +1 -1
  650. paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +1 -1
  651. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +4 -4
  652. paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +2 -2
  653. paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +1 -1
  654. paddlex/repo_apis/Paddle3D_api/pp3d_config.py +3 -2
  655. paddlex/repo_apis/PaddleClas_api/__init__.py +1 -1
  656. paddlex/repo_apis/PaddleClas_api/cls/__init__.py +3 -3
  657. paddlex/repo_apis/PaddleClas_api/cls/config.py +4 -3
  658. paddlex/repo_apis/PaddleClas_api/cls/model.py +3 -3
  659. paddlex/repo_apis/PaddleClas_api/cls/register.py +2 -3
  660. paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -2
  661. paddlex/repo_apis/PaddleClas_api/shitu_rec/__init__.py +2 -2
  662. paddlex/repo_apis/PaddleClas_api/shitu_rec/config.py +2 -2
  663. paddlex/repo_apis/PaddleClas_api/shitu_rec/model.py +1 -4
  664. paddlex/repo_apis/PaddleClas_api/shitu_rec/register.py +2 -2
  665. paddlex/repo_apis/PaddleClas_api/shitu_rec/runner.py +1 -6
  666. paddlex/repo_apis/PaddleDetection_api/__init__.py +2 -2
  667. paddlex/repo_apis/PaddleDetection_api/config_helper.py +3 -3
  668. paddlex/repo_apis/PaddleDetection_api/instance_seg/__init__.py +2 -2
  669. paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +2 -3
  670. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +3 -3
  671. paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py +2 -3
  672. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -2
  673. paddlex/repo_apis/PaddleDetection_api/object_det/__init__.py +3 -3
  674. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +4 -3
  675. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +5 -6
  676. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +1 -1
  677. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +2 -3
  678. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -2
  679. paddlex/repo_apis/PaddleNLP_api/__init__.py +1 -1
  680. paddlex/repo_apis/PaddleOCR_api/__init__.py +4 -3
  681. paddlex/repo_apis/PaddleOCR_api/config_utils.py +1 -1
  682. paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +1 -1
  683. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +4 -3
  684. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +4 -4
  685. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +2 -3
  686. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -2
  687. paddlex/repo_apis/PaddleOCR_api/table_rec/__init__.py +1 -1
  688. paddlex/repo_apis/PaddleOCR_api/table_rec/config.py +1 -1
  689. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +3 -3
  690. paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +2 -3
  691. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +2 -2
  692. paddlex/repo_apis/PaddleOCR_api/text_det/__init__.py +1 -1
  693. paddlex/repo_apis/PaddleOCR_api/text_det/config.py +1 -1
  694. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +3 -3
  695. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +2 -3
  696. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +2 -2
  697. paddlex/repo_apis/PaddleOCR_api/text_rec/__init__.py +1 -1
  698. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +4 -3
  699. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +4 -4
  700. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +2 -3
  701. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -2
  702. paddlex/repo_apis/PaddleSeg_api/__init__.py +1 -1
  703. paddlex/repo_apis/PaddleSeg_api/base_seg_config.py +2 -2
  704. paddlex/repo_apis/PaddleSeg_api/seg/__init__.py +1 -1
  705. paddlex/repo_apis/PaddleSeg_api/seg/config.py +3 -6
  706. paddlex/repo_apis/PaddleSeg_api/seg/model.py +5 -5
  707. paddlex/repo_apis/PaddleSeg_api/seg/register.py +2 -3
  708. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -2
  709. paddlex/repo_apis/PaddleTS_api/__init__.py +4 -3
  710. paddlex/repo_apis/PaddleTS_api/ts_ad/__init__.py +1 -1
  711. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +2 -3
  712. paddlex/repo_apis/PaddleTS_api/ts_ad/register.py +2 -2
  713. paddlex/repo_apis/PaddleTS_api/ts_ad/runner.py +2 -2
  714. paddlex/repo_apis/PaddleTS_api/ts_base/__init__.py +1 -1
  715. paddlex/repo_apis/PaddleTS_api/ts_base/config.py +2 -4
  716. paddlex/repo_apis/PaddleTS_api/ts_base/model.py +4 -4
  717. paddlex/repo_apis/PaddleTS_api/ts_base/runner.py +2 -2
  718. paddlex/repo_apis/PaddleTS_api/ts_cls/__init__.py +1 -1
  719. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -3
  720. paddlex/repo_apis/PaddleTS_api/ts_cls/register.py +2 -2
  721. paddlex/repo_apis/PaddleTS_api/ts_cls/runner.py +2 -2
  722. paddlex/repo_apis/PaddleTS_api/ts_fc/__init__.py +1 -1
  723. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +2 -3
  724. paddlex/repo_apis/PaddleTS_api/ts_fc/register.py +1 -1
  725. paddlex/repo_apis/PaddleVideo_api/__init__.py +1 -1
  726. paddlex/repo_apis/PaddleVideo_api/config_utils.py +1 -1
  727. paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +3 -3
  728. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +4 -3
  729. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +3 -3
  730. paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +2 -3
  731. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -2
  732. paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +3 -3
  733. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +4 -3
  734. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +4 -4
  735. paddlex/repo_apis/PaddleVideo_api/video_det/register.py +2 -3
  736. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -2
  737. paddlex/repo_apis/__init__.py +1 -1
  738. paddlex/repo_apis/base/__init__.py +4 -5
  739. paddlex/repo_apis/base/config.py +2 -3
  740. paddlex/repo_apis/base/model.py +11 -19
  741. paddlex/repo_apis/base/register.py +1 -1
  742. paddlex/repo_apis/base/runner.py +11 -12
  743. paddlex/repo_apis/base/utils/__init__.py +1 -1
  744. paddlex/repo_apis/base/utils/arg.py +1 -1
  745. paddlex/repo_apis/base/utils/subprocess.py +1 -1
  746. paddlex/repo_manager/__init__.py +2 -9
  747. paddlex/repo_manager/core.py +9 -27
  748. paddlex/repo_manager/meta.py +37 -31
  749. paddlex/repo_manager/repo.py +169 -160
  750. paddlex/repo_manager/utils.py +13 -224
  751. paddlex/utils/__init__.py +1 -1
  752. paddlex/utils/cache.py +8 -10
  753. paddlex/utils/config.py +6 -5
  754. paddlex/utils/{custom_device_whitelist.py → custom_device_list.py} +29 -199
  755. paddlex/utils/deps.py +249 -0
  756. paddlex/utils/device.py +73 -29
  757. paddlex/utils/download.py +4 -4
  758. paddlex/utils/env.py +33 -7
  759. paddlex/utils/errors/__init__.py +1 -1
  760. paddlex/utils/errors/dataset_checker.py +1 -1
  761. paddlex/utils/errors/others.py +2 -16
  762. paddlex/utils/file_interface.py +4 -5
  763. paddlex/utils/flags.py +19 -12
  764. paddlex/utils/fonts/__init__.py +2 -1
  765. paddlex/utils/func_register.py +1 -1
  766. paddlex/utils/install.py +87 -0
  767. paddlex/utils/interactive_get_pipeline.py +3 -3
  768. paddlex/utils/lazy_loader.py +3 -3
  769. paddlex/utils/logging.py +10 -1
  770. paddlex/utils/misc.py +5 -5
  771. paddlex/utils/pipeline_arguments.py +15 -7
  772. paddlex/utils/result_saver.py +4 -5
  773. paddlex/utils/subclass_register.py +2 -4
  774. paddlex/version.py +2 -1
  775. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.0rc1.dist-info}/METADATA +212 -73
  776. paddlex-3.0.0rc1.dist-info/RECORD +1068 -0
  777. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.0rc1.dist-info}/WHEEL +1 -1
  778. paddlex/inference/models/base/predictor/basic_predictor.py +0 -139
  779. paddlex/paddle2onnx_requirements.txt +0 -1
  780. paddlex/repo_manager/requirements.txt +0 -21
  781. paddlex/serving_requirements.txt +0 -9
  782. paddlex-3.0.0rc0.dist-info/RECORD +0 -1015
  783. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.0rc1.dist-info}/entry_points.txt +0 -0
  784. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.0rc1.dist-info/licenses}/LICENSE +0 -0
  785. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,698 @@
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import base64
16
+ import math
17
+ from io import BytesIO
18
+ from typing import Dict, List, Optional, Tuple, Union
19
+
20
+ import numpy as np
21
+ import paddle
22
+ import requests
23
+ from PIL import Image
24
+
25
+ from .....utils import logging
26
+ from ....utils.benchmark import benchmark
27
+ from ...common.vision.funcs import resize
28
+ from .common import (
29
+ BatchFeature,
30
+ ChannelDimension,
31
+ ImageInput,
32
+ PILImageResampling,
33
+ TensorType,
34
+ TextInput,
35
+ convert_to_rgb,
36
+ get_image_size,
37
+ infer_channel_dimension_format,
38
+ is_valid_image,
39
+ make_list_of_images,
40
+ to_channel_dimension_format,
41
+ to_numpy_array,
42
+ valid_images,
43
+ )
44
+
45
+ OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073]
46
+ OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711]
47
+
48
+ IMAGE_FACTOR = 28
49
+ MIN_PIXELS = 4 * 28 * 28
50
+ MAX_PIXELS = 16384 * 28 * 28
51
+ MAX_RATIO = 200
52
+
53
+
54
+ def is_scaled_image(image: np.ndarray) -> bool:
55
+ """
56
+ Checks to see whether the pixel values have already been rescaled to [0, 1].
57
+ """
58
+ if image.dtype == np.uint8:
59
+ return False
60
+
61
+ # It's possible the image has pixel values in [0, 255] but is of floating type
62
+ return np.min(image) >= 0 and np.max(image) <= 1
63
+
64
+
65
+ class Qwen2VLProcessor(object):
66
+ r"""
67
+ Constructs a Qwen2-VL processor which wraps a Qwen2-VL image processor and a Qwen2 tokenizer into a single processor.
68
+
69
+ [`Qwen2VLProcessor`] offers all the functionalities of [`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`]. See the
70
+ [`~Qwen2VLProcessor.__call__`] and [`~Qwen2VLProcessor.decode`] for more information.
71
+
72
+ Args:
73
+ image_processor ([`Qwen2VLImageProcessor`], *optional*):
74
+ The image processor is a required input.
75
+ tokenizer ([`MIXQwen2Tokenizer`], *optional*):
76
+ The tokenizer is a required input.
77
+ """
78
+
79
+ def __init__(self, image_processor, tokenizer, **kwargs):
80
+ self.image_processor = image_processor
81
+ self.tokenizer = tokenizer
82
+ self.image_processor.min_pixels = kwargs.get("min_pixels", 3136)
83
+ self.image_processor.max_pixels = kwargs.get("max_pixels", 12845056)
84
+
85
+ def _preprocess(
86
+ self,
87
+ images: ImageInput = None,
88
+ text: Union[TextInput, List[TextInput]] = None,
89
+ padding: bool = False,
90
+ truncation: Union[bool, str] = None,
91
+ max_length: int = None,
92
+ return_tensors: Optional[Union[str, TensorType]] = TensorType.PADDLE,
93
+ ):
94
+ """
95
+ Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
96
+ and `kwargs` arguments to Qwen2TokenizerFast's [`~Qwen2TokenizerFast.__call__`] if `text` is not `None` to encode
97
+ the text. To prepare the vision inputs, this method forwards the `vision_infos` and `kwrags` arguments to
98
+ Qwen2VLImageProcessor's [`~Qwen2VLImageProcessor.__call__`] if `vision_infos` is not `None`.
99
+
100
+ Args:
101
+ images (`PIL.Image.Image`, `np.ndarray`, `paddle.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[paddle.Tensor]`):
102
+ The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or Paddle
103
+ tensor. Both channels-first and channels-last formats are supported.
104
+ text (`str`, `List[str]`, `List[List[str]]`):
105
+ The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
106
+ (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
107
+ `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
108
+ padding (`bool`, *optional*, defaults to `False`):
109
+ Select a strategy to pad the returned sequences (according to the model's padding side and padding
110
+ index) among:
111
+ - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
112
+ sequence if provided).
113
+ - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
114
+ acceptable input length for the model if that argument is not provided.
115
+ - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
116
+ lengths).
117
+ max_length (`int`, *optional*):
118
+ Maximum length of the returned list and optionally padding length (see above).
119
+ truncation (`bool`, *optional*):
120
+ Activates truncation to cut input sequences longer than `max_length` to `max_length`.
121
+ return_tensors (`str` or [`~utils.TensorType`], *optional*):
122
+ If set, will return tensors of a particular framework. Acceptable values are:
123
+
124
+ - `'pd'`: Return Paddle `paddle.Tensor` objects.
125
+ - `'np'`: Return NumPy `np.ndarray` objects.
126
+
127
+ Returns:
128
+ - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
129
+ - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
130
+ `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
131
+ `None`).
132
+ - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
133
+ - **image_grid_thw** -- List of image 3D grid in LLM. Returned when `images` is not `None`.
134
+ """
135
+ if images is not None:
136
+ image_inputs = self.image_processor(
137
+ images=images, return_tensors=return_tensors
138
+ )
139
+ image_grid_thw = image_inputs["image_grid_thw"]
140
+ else:
141
+ image_inputs = {}
142
+ image_grid_thw = None
143
+
144
+ if not isinstance(text, list):
145
+ text = [text]
146
+
147
+ if image_grid_thw is not None:
148
+ merge_length = self.image_processor.merge_size**2
149
+ index = 0
150
+ for i in range(len(text)):
151
+ while "<|image_pad|>" in text[i]:
152
+ text[i] = text[i].replace(
153
+ "<|image_pad|>",
154
+ "<|placeholder|>"
155
+ * int(image_grid_thw[index].prod() // merge_length),
156
+ 1, # 单个<|image_pad|>替换成对应的视觉token数量的<|placeholder|>
157
+ )
158
+ index += 1
159
+ text[i] = text[i].replace("<|placeholder|>", "<|image_pad|>")
160
+ text_inputs = self.tokenizer(
161
+ text,
162
+ return_tensors=return_tensors,
163
+ padding=padding,
164
+ truncation=truncation,
165
+ max_length=max_length,
166
+ )
167
+
168
+ return BatchFeature(data={**text_inputs, **image_inputs}).data
169
+
170
+ def batch_decode(self, *args, **kwargs):
171
+ """
172
+ This method forwards all its arguments to Qwen2TokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
173
+ refer to the docstring of this method for more information.
174
+ """
175
+ return self.tokenizer.batch_decode(*args, **kwargs)
176
+
177
+ def decode(self, *args, **kwargs):
178
+ """
179
+ This method forwards all its arguments to Qwen2TokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
180
+ the docstring of this method for more information.
181
+ """
182
+ return self.tokenizer.decode(*args, **kwargs)
183
+
184
+
185
+ def make_batched_images(images) -> List[List[ImageInput]]:
186
+ """
187
+ Accepts images in list or nested list format, and makes a list of images for preprocessing.
188
+
189
+ Args:
190
+ images (`Union[List[List[ImageInput]], List[ImageInput], ImageInput]`):
191
+ The input image.
192
+
193
+ Returns:
194
+ list: A list of images.
195
+ """
196
+ if (
197
+ isinstance(images, (list, tuple))
198
+ and isinstance(images[0], (list, tuple))
199
+ and is_valid_image(images[0][0])
200
+ ):
201
+ return [img for img_list in images for img in img_list]
202
+
203
+ elif isinstance(images, (list, tuple)) and is_valid_image(images[0]):
204
+ return images
205
+
206
+ elif is_valid_image(images):
207
+ return [images]
208
+
209
+ raise ValueError(f"Could not make batched images from {images}")
210
+
211
+
212
+ class Qwen2VLImageProcessor(object):
213
+ r"""
214
+ Constructs a Qwen2-VL image processor that dynamically resizes images based on the original images.
215
+
216
+ Args:
217
+ do_resize (`bool`, *optional*, defaults to `True`):
218
+ Whether to resize the image's (height, width) dimensions.
219
+ resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
220
+ Resampling filter to use when resizing the image.
221
+ do_rescale (`bool`, *optional*, defaults to `True`):
222
+ Whether to rescale the image by the specified scale `rescale_factor`.
223
+ rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
224
+ Scale factor to use if rescaling the image.
225
+ do_normalize (`bool`, *optional*, defaults to `True`):
226
+ Whether to normalize the image.
227
+ image_mean (`float` or `List[float]`, *optional*, defaults to `[0.48145466, 0.4578275, 0.40821073]`):
228
+ Mean to use if normalizing the image. This is a float or list of floats for each channel in the image.
229
+ image_std (`float` or `List[float]`, *optional*, defaults to `[0.26862954, 0.26130258, 0.27577711]`):
230
+ Standard deviation to use if normalizing the image. This is a float or list of floats for each channel in the image.
231
+ do_convert_rgb (`bool`, *optional*, defaults to `True`):
232
+ Whether to convert the image to RGB.
233
+ min_pixels (`int`, *optional*, defaults to `56 * 56`):
234
+ The min pixels of the image to resize the image.
235
+ max_pixels (`int`, *optional*, defaults to `28 * 28 * 1280`):
236
+ The max pixels of the image to resize the image.
237
+ patch_size (`int`, *optional*, defaults to 14):
238
+ The spatial patch size of the vision encoder.
239
+ temporal_patch_size (`int`, *optional*, defaults to 2):
240
+ The temporal patch size of the vision encoder.
241
+ merge_size (`int`, *optional*, defaults to 2):
242
+ The merge size of the vision encoder to llm encoder.
243
+ """
244
+
245
+ def __init__(
246
+ self,
247
+ do_resize: bool = True,
248
+ resample=None,
249
+ do_rescale: bool = True,
250
+ rescale_factor: float = 1 / 255.0,
251
+ do_normalize: bool = True,
252
+ image_mean: Optional[Union[float, List[float]]] = None,
253
+ image_std: Optional[Union[float, List[float]]] = None,
254
+ do_convert_rgb: bool = True,
255
+ min_pixels: int = 56 * 56,
256
+ max_pixels: int = 28 * 28 * 1280,
257
+ patch_size: int = 14,
258
+ temporal_patch_size: int = 2,
259
+ merge_size: int = 2,
260
+ **kwargs,
261
+ ) -> None:
262
+ super().__init__(**kwargs)
263
+ import cv2
264
+
265
+ resample = cv2.INTER_CUBIC if resample is None else resample
266
+ self.do_resize = do_resize
267
+ self.resample = resample
268
+ self.do_rescale = do_rescale
269
+ self.rescale_factor = rescale_factor
270
+ self.do_normalize = do_normalize
271
+ image_mean_ = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
272
+ image_std_ = image_std if image_std is not None else OPENAI_CLIP_STD
273
+ self.min_pixels = min_pixels
274
+ self.max_pixels = max_pixels
275
+ self.patch_size = patch_size
276
+ self.temporal_patch_size = temporal_patch_size
277
+ self.merge_size = merge_size
278
+ self.size = {"min_pixels": min_pixels, "max_pixels": max_pixels}
279
+ self.do_convert_rgb = do_convert_rgb
280
+
281
+ self.image_mean = np.array(image_mean_)[None, None, ...]
282
+ self.image_std = np.array(image_std_)[None, None, ...]
283
+
284
+ def _preprocess(
285
+ self,
286
+ images,
287
+ do_resize: bool = None,
288
+ resample: PILImageResampling = None,
289
+ do_rescale: bool = None,
290
+ rescale_factor: float = None,
291
+ do_normalize: bool = None,
292
+ image_mean: Optional[Union[float, List[float]]] = None,
293
+ image_std: Optional[Union[float, List[float]]] = None,
294
+ do_convert_rgb: bool = None,
295
+ data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
296
+ input_data_format: Optional[Union[str, ChannelDimension]] = None,
297
+ ):
298
+ """
299
+ Preprocess an image or batch of images. Copy of the `preprocess` method from `CLIPImageProcessor`.
300
+
301
+ Args:
302
+ images (`ImageInput`):
303
+ Image or batch of images to preprocess. Expects pixel values ranging from 0 to 255. If pixel values range from 0 to 1, set `do_rescale=False`.
304
+ vision_info (`List[Dict]`, *optional*):
305
+ Optional list of dictionaries containing additional information about vision inputs.
306
+ do_resize (`bool`, *optional*, defaults to `self.do_resize`):
307
+ Whether to resize the image.
308
+ resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
309
+ Resampling filter to use if resizing the image. This can be one of the `PILImageResampling` enums.
310
+ do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
311
+ Whether to rescale the image.
312
+ rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
313
+ Scale factor to use if rescaling the image.
314
+ do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
315
+ Whether to normalize the image.
316
+ image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`):
317
+ Mean to use if normalizing the image. Can be a float or a list of floats corresponding to the number of channels in the image.
318
+ image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
319
+ Standard deviation to use if normalizing the image. Can be a float or a list of floats corresponding to the number of channels in the image.
320
+ do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`):
321
+ Whether to convert the image to RGB.
322
+ data_format (`ChannelDimension`, *optional*, defaults to `ChannelDimension.FIRST`):
323
+ The channel dimension format for the output image. Can be one of:
324
+ - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
325
+ - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
326
+ - Unset: Use the channel dimension format of the input image.
327
+ input_data_format (`ChannelDimension` or `str`, *optional*):
328
+ The channel dimension format for the input image. Can be one of:
329
+ - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
330
+ - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
331
+ - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
332
+ """
333
+ images = make_list_of_images(images)
334
+
335
+ if do_convert_rgb:
336
+ images = [convert_to_rgb(image) for image in images]
337
+
338
+ # All transformations expect numpy arrays.
339
+ images = [to_numpy_array(image) for image in images]
340
+
341
+ if is_scaled_image(images[0]) and do_rescale:
342
+ logging.warning(
343
+ "It looks like you are trying to rescale already rescaled images. If the input"
344
+ " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
345
+ )
346
+ if input_data_format is None:
347
+ # We assume that all images have the same channel dimension format.
348
+ input_data_format = infer_channel_dimension_format(images[0])
349
+
350
+ height, width = get_image_size(images[0], channel_dim=input_data_format)
351
+ resized_height, resized_width = height, width
352
+ processed_images = []
353
+
354
+ for image in images:
355
+
356
+ if do_resize:
357
+ resized_height, resized_width = smart_resize(
358
+ height,
359
+ width,
360
+ factor=self.patch_size * self.merge_size,
361
+ min_pixels=self.min_pixels,
362
+ max_pixels=self.max_pixels,
363
+ )
364
+ image = image.astype("uint8")
365
+ image = resize(
366
+ image,
367
+ (resized_width, resized_height),
368
+ interp=None,
369
+ backend="cv2",
370
+ )
371
+
372
+ if do_rescale:
373
+ image = image.astype("float32")
374
+ image *= rescale_factor
375
+
376
+ if do_normalize:
377
+ assert input_data_format == ChannelDimension.LAST
378
+ image = (image - self.image_mean) / self.image_std
379
+
380
+ image = to_channel_dimension_format(
381
+ image, data_format, input_channel_dim=input_data_format
382
+ )
383
+ processed_images.append(image)
384
+
385
+ patches = np.array(processed_images)
386
+ if data_format == ChannelDimension.LAST:
387
+ patches = patches.transpose([0, 3, 1, 2])
388
+ if patches.shape[0] == 1:
389
+ patches = np.tile(patches, (self.temporal_patch_size, 1, 1, 1))
390
+ channel = patches.shape[1]
391
+ grid_t = patches.shape[0] // self.temporal_patch_size
392
+ grid_h, grid_w = (
393
+ resized_height // self.patch_size,
394
+ resized_width // self.patch_size,
395
+ )
396
+ patches = patches.reshape(
397
+ [
398
+ grid_t,
399
+ self.temporal_patch_size,
400
+ channel,
401
+ grid_h // self.merge_size,
402
+ self.merge_size,
403
+ self.patch_size,
404
+ grid_w // self.merge_size,
405
+ self.merge_size,
406
+ self.patch_size,
407
+ ]
408
+ )
409
+ patches = patches.transpose([0, 3, 6, 4, 7, 2, 1, 5, 8])
410
+ flatten_patches = patches.reshape(
411
+ [
412
+ grid_t * grid_h * grid_w,
413
+ channel * self.temporal_patch_size * self.patch_size * self.patch_size,
414
+ ]
415
+ )
416
+
417
+ return flatten_patches, (grid_t, grid_h, grid_w)
418
+
419
+ def preprocess(
420
+ self,
421
+ images: ImageInput,
422
+ do_resize: bool = None,
423
+ size: Dict[str, int] = None,
424
+ resample: PILImageResampling = None,
425
+ do_rescale: bool = None,
426
+ rescale_factor: float = None,
427
+ do_normalize: bool = None,
428
+ image_mean: Optional[Union[float, List[float]]] = None,
429
+ image_std: Optional[Union[float, List[float]]] = None,
430
+ do_convert_rgb: bool = None,
431
+ return_tensors: Optional[Union[str, TensorType]] = None,
432
+ data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
433
+ input_data_format: Optional[Union[str, ChannelDimension]] = None,
434
+ ):
435
+ """
436
+ Args:
437
+ images (`ImageInput`):
438
+ Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
439
+ passing in images with pixel values between 0 and 1, set `do_rescale=False`.
440
+ do_resize (`bool`, *optional*, defaults to `self.do_resize`):
441
+ Whether to resize the image.
442
+ size (`Dict[str, int]`, *optional*, defaults to `self.size`):
443
+ Size of the image after resizing. Shortest edge of the image is resized to size["shortest_edge"], with
444
+ the longest edge resized to keep the input aspect ratio.
445
+ resample (`int`, *optional*, defaults to `self.resample`):
446
+ Resampling filter to use if resizing the image. This can be one of the enum `PILImageResampling`. Only
447
+ has an effect if `do_resize` is set to `True`.
448
+ do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
449
+ Whether to rescale the image.
450
+ rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
451
+ Rescale factor to rescale the image by if `do_rescale` is set to `True`.
452
+ do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
453
+ Whether to normalize the image.
454
+ image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`):
455
+ Image mean to use for normalization. Only has an effect if `do_normalize` is set to `True`.
456
+ image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
457
+ Image standard deviation to use for normalization. Only has an effect if `do_normalize` is set to
458
+ `True`.
459
+ do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`):
460
+ Whether to convert the image to RGB.
461
+ return_tensors (`str` or `TensorType`, *optional*):
462
+ The type of tensors to return. Can be one of:
463
+ - Unset: Return a list of `np.ndarray`.
464
+ - `TensorType.PADDLE` or `'pt'`: Return a batch of type `paddle.Tensor`.
465
+ - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
466
+ data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
467
+ The channel dimension format for the output image. Can be one of:
468
+ - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
469
+ - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
470
+ - Unset: Use the channel dimension format of the input image.
471
+ input_data_format (`ChannelDimension` or `str`, *optional*):
472
+ The channel dimension format for the input image. If unset, the channel dimension format is inferred
473
+ from the input image. Can be one of:
474
+ - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
475
+ - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
476
+ - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
477
+
478
+ """
479
+ do_resize = do_resize if do_resize is not None else self.do_resize
480
+ size = size if size is not None else self.size
481
+ resample = resample if resample is not None else self.resample
482
+ do_rescale = do_rescale if do_rescale is not None else self.do_rescale
483
+ rescale_factor = (
484
+ rescale_factor if rescale_factor is not None else self.rescale_factor
485
+ )
486
+ do_normalize = do_normalize if do_normalize is not None else self.do_normalize
487
+ image_mean = image_mean if image_mean is not None else self.image_mean
488
+ image_std = image_std if image_std is not None else self.image_std
489
+ do_convert_rgb = (
490
+ do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb
491
+ )
492
+
493
+ if images is not None:
494
+ images = make_batched_images(images)
495
+
496
+ if images is not None and not valid_images(images):
497
+ raise ValueError(
498
+ "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
499
+ "paddle.Tensor."
500
+ )
501
+
502
+ if images is not None:
503
+ pixel_values, vision_grid_thws = [], []
504
+ for image in images:
505
+ patches, image_grid_thw = self._preprocess(
506
+ image,
507
+ do_resize=do_resize,
508
+ resample=resample,
509
+ do_rescale=do_rescale,
510
+ rescale_factor=rescale_factor,
511
+ do_normalize=do_normalize,
512
+ image_mean=image_mean,
513
+ image_std=image_std,
514
+ data_format=data_format,
515
+ do_convert_rgb=do_convert_rgb,
516
+ input_data_format=input_data_format,
517
+ )
518
+ pixel_values.extend(patches)
519
+ vision_grid_thws.append(image_grid_thw)
520
+ pixel_values = np.array(pixel_values)
521
+ vision_grid_thws = np.array(vision_grid_thws)
522
+ data = {"pixel_values": pixel_values, "image_grid_thw": vision_grid_thws}
523
+
524
+ return BatchFeature(data=data, tensor_type=return_tensors)
525
+
526
+ def __call__(self, images, **kwargs):
527
+ return self.preprocess(images, **kwargs)
528
+
529
+
530
+ def round_by_factor(number: int, factor: int) -> int:
531
+ """Returns the closest integer to 'number' that is divisible by 'factor'."""
532
+ return round(number / factor) * factor
533
+
534
+
535
+ def ceil_by_factor(number: int, factor: int) -> int:
536
+ """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'."""
537
+ return math.ceil(number / factor) * factor
538
+
539
+
540
+ def floor_by_factor(number: int, factor: int) -> int:
541
+ """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'."""
542
+ return math.floor(number / factor) * factor
543
+
544
+
545
+ def smart_resize(
546
+ height: int,
547
+ width: int,
548
+ factor: int = IMAGE_FACTOR,
549
+ min_pixels: int = MIN_PIXELS,
550
+ max_pixels: int = MAX_PIXELS,
551
+ ) -> Tuple[int, int]:
552
+ """
553
+ Rescales the image so that the following conditions are met:
554
+
555
+ 1. Both dimensions (height and width) are divisible by 'factor'.
556
+
557
+ 2. The total number of pixels is within the range ['min_pixels', 'max_pixels'].
558
+
559
+ 3. The aspect ratio of the image is maintained as closely as possible.
560
+ """
561
+ if max(height, width) / min(height, width) > MAX_RATIO:
562
+ raise ValueError(
563
+ f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)}"
564
+ )
565
+ h_bar = max(factor, round_by_factor(height, factor))
566
+ w_bar = max(factor, round_by_factor(width, factor))
567
+ if h_bar * w_bar > max_pixels:
568
+ beta = math.sqrt((height * width) / max_pixels)
569
+ h_bar = floor_by_factor(height / beta, factor)
570
+ w_bar = floor_by_factor(width / beta, factor)
571
+ elif h_bar * w_bar < min_pixels:
572
+ beta = math.sqrt(min_pixels / (height * width))
573
+ h_bar = ceil_by_factor(height * beta, factor)
574
+ w_bar = ceil_by_factor(width * beta, factor)
575
+ return h_bar, w_bar
576
+
577
+
578
+ def fetch_image(
579
+ ele: Dict[str, Union[str, Image.Image]], size_factor: int = IMAGE_FACTOR
580
+ ) -> Image.Image:
581
+ if not isinstance(ele, dict):
582
+ ele = {"image": ele}
583
+ if "image" in ele:
584
+ image = ele["image"]
585
+ else:
586
+ image = ele["image_url"]
587
+ image_obj = None
588
+ if isinstance(image, Image.Image):
589
+ image_obj = image
590
+ elif isinstance(image, np.ndarray):
591
+ image_obj = Image.fromarray(image)
592
+ elif image.startswith("http://") or image.startswith("https://"):
593
+ image_obj = Image.open(requests.get(image, stream=True).raw)
594
+ elif image.startswith("file://"):
595
+ image_obj = Image.open(image[7:])
596
+ elif image.startswith("data:image"):
597
+ data = image.split(";", 1)[1]
598
+ if data.startswith("base64,"):
599
+ data = base64.b64decode(data[7:])
600
+ image_obj = Image.open(BytesIO(data))
601
+ else:
602
+ image_obj = Image.open(image)
603
+ if image_obj is None:
604
+ raise ValueError(
605
+ f"Unrecognized image input, support local path, http url, base64 and PIL.Image, got {image}"
606
+ )
607
+ image = image_obj.convert("RGB")
608
+ # resize
609
+ if "resized_height" in ele and "resized_width" in ele:
610
+ resized_height, resized_width = smart_resize(
611
+ ele["resized_height"],
612
+ ele["resized_width"],
613
+ factor=size_factor,
614
+ )
615
+ else:
616
+ width, height = image.size # Image, not tensor
617
+ min_pixels = ele.get("min_pixels", MIN_PIXELS)
618
+ max_pixels = ele.get("max_pixels", MAX_PIXELS)
619
+ resized_height, resized_width = smart_resize(
620
+ height,
621
+ width,
622
+ factor=size_factor,
623
+ min_pixels=min_pixels,
624
+ max_pixels=max_pixels,
625
+ )
626
+ image = image.resize((resized_width, resized_height))
627
+
628
+ return image
629
+
630
+
631
+ def extract_vision_info(
632
+ conversations: Union[List[dict], List[List[dict]]]
633
+ ) -> List[dict]:
634
+ vision_infos = []
635
+ if isinstance(conversations[0], dict):
636
+ conversations = [conversations]
637
+ for conversation in conversations:
638
+ for message in conversation:
639
+ if isinstance(message["content"], list):
640
+ for ele in message["content"]:
641
+ if (
642
+ "image" in ele
643
+ or "image_url" in ele
644
+ or ele["type"] in ("image", "image_url")
645
+ ):
646
+ vision_infos.append(ele)
647
+ return vision_infos
648
+
649
+
650
+ def process_vision_info(
651
+ conversations: Union[List[dict], List[List[dict]]],
652
+ ) -> Tuple[
653
+ Union[List[Image.Image], None, List[Union[paddle.Tensor, List[Image.Image]]], None]
654
+ ]:
655
+ vision_infos = extract_vision_info(conversations)
656
+ image_inputs = []
657
+ for vision_info in vision_infos:
658
+ if "image" in vision_info or "image_url" in vision_info:
659
+ image_inputs.append(fetch_image(vision_info))
660
+ else:
661
+ raise ValueError("image, image_url should in content.")
662
+ if len(image_inputs) == 0:
663
+ image_inputs = None
664
+ return image_inputs
665
+
666
+
667
+ class PPDocBeeProcessor(Qwen2VLProcessor):
668
+ """
669
+ PP-DocBee processor, based on Qwen2VLProcessor
670
+ """
671
+
672
+ @benchmark.timeit
673
+ def preprocess(self, image: Union[str, Image.Image, np.ndarray], query: str):
674
+ """
675
+ PreProcess for PP-DocBee Series
676
+ """
677
+ image_inputs = fetch_image(image)
678
+ image_pad_token = "<|vision_start|><|image_pad|><|vision_end|>"
679
+ text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{image_pad_token}{query}<|im_end|>\n<|im_start|>assistant\n"
680
+ text = [text]
681
+
682
+ rst_inputs = self._preprocess(
683
+ text=text,
684
+ images=[image_inputs],
685
+ padding=False,
686
+ return_tensors="pd",
687
+ )
688
+
689
+ return rst_inputs
690
+
691
+ @benchmark.timeit
692
+ def postprocess(self, model_pred, *args, **kwargs):
693
+ """
694
+ Post process adapt for PaddleX
695
+ """
696
+ return self.tokenizer.batch_decode(
697
+ model_pred[0], skip_special_tokens=True, clean_up_tokenization_spaces=False
698
+ )