paddlex 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (824) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +17 -34
  3. paddlex/__main__.py +1 -1
  4. paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
  5. paddlex/configs/modules/doc_vlm/PP-DocBee-2B.yaml +14 -0
  6. paddlex/configs/modules/doc_vlm/PP-DocBee-7B.yaml +14 -0
  7. paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
  8. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
  9. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
  10. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
  11. paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
  12. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
  13. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
  14. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
  15. paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
  16. paddlex/configs/modules/open_vocabulary_detection/YOLO-Worldv2-L.yaml +13 -0
  17. paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
  18. paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
  19. paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
  20. paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
  21. paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
  22. paddlex/configs/pipelines/OCR.yaml +7 -6
  23. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
  24. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
  25. paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
  26. paddlex/configs/pipelines/anomaly_detection.yaml +1 -1
  27. paddlex/configs/pipelines/doc_understanding.yaml +9 -0
  28. paddlex/configs/pipelines/formula_recognition.yaml +2 -2
  29. paddlex/configs/pipelines/layout_parsing.yaml +3 -2
  30. paddlex/configs/pipelines/seal_recognition.yaml +1 -0
  31. paddlex/configs/pipelines/table_recognition.yaml +2 -1
  32. paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
  33. paddlex/configs/pipelines/ts_anomaly_detection.yaml +1 -1
  34. paddlex/configs/pipelines/ts_classification.yaml +1 -1
  35. paddlex/configs/pipelines/ts_forecast.yaml +1 -1
  36. paddlex/constants.py +17 -0
  37. paddlex/engine.py +7 -5
  38. paddlex/hpip_links.html +23 -11
  39. paddlex/inference/__init__.py +3 -3
  40. paddlex/inference/common/__init__.py +1 -1
  41. paddlex/inference/common/batch_sampler/__init__.py +5 -4
  42. paddlex/inference/common/batch_sampler/audio_batch_sampler.py +5 -6
  43. paddlex/inference/common/batch_sampler/base_batch_sampler.py +20 -16
  44. paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +4 -7
  45. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +87 -0
  46. paddlex/inference/common/batch_sampler/image_batch_sampler.py +45 -60
  47. paddlex/inference/common/batch_sampler/ts_batch_sampler.py +9 -10
  48. paddlex/inference/common/batch_sampler/video_batch_sampler.py +2 -22
  49. paddlex/inference/common/reader/__init__.py +4 -4
  50. paddlex/inference/common/reader/audio_reader.py +3 -3
  51. paddlex/inference/common/reader/det_3d_reader.py +7 -5
  52. paddlex/inference/common/reader/image_reader.py +16 -12
  53. paddlex/inference/common/reader/ts_reader.py +3 -2
  54. paddlex/inference/common/reader/video_reader.py +3 -3
  55. paddlex/inference/common/result/__init__.py +7 -7
  56. paddlex/inference/common/result/base_cv_result.py +12 -2
  57. paddlex/inference/common/result/base_result.py +7 -5
  58. paddlex/inference/common/result/base_ts_result.py +1 -2
  59. paddlex/inference/common/result/base_video_result.py +2 -2
  60. paddlex/inference/common/result/mixin.py +31 -25
  61. paddlex/inference/models/__init__.py +41 -85
  62. paddlex/inference/models/anomaly_detection/__init__.py +1 -1
  63. paddlex/inference/models/anomaly_detection/predictor.py +9 -19
  64. paddlex/inference/models/anomaly_detection/processors.py +9 -2
  65. paddlex/inference/models/anomaly_detection/result.py +3 -2
  66. paddlex/inference/models/base/__init__.py +2 -2
  67. paddlex/inference/models/base/predictor/__init__.py +1 -2
  68. paddlex/inference/models/base/predictor/base_predictor.py +278 -39
  69. paddlex/inference/models/common/__init__.py +6 -15
  70. paddlex/inference/models/common/static_infer.py +724 -251
  71. paddlex/inference/models/common/tokenizer/__init__.py +7 -3
  72. paddlex/inference/models/common/tokenizer/bert_tokenizer.py +1 -1
  73. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +609 -0
  74. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +9 -7
  75. paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
  76. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +438 -0
  77. paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
  78. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +85 -77
  79. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +339 -123
  80. paddlex/inference/models/common/tokenizer/utils.py +1 -1
  81. paddlex/inference/models/common/tokenizer/vocab.py +8 -8
  82. paddlex/inference/models/common/ts/__init__.py +1 -1
  83. paddlex/inference/models/common/ts/funcs.py +13 -6
  84. paddlex/inference/models/common/ts/processors.py +14 -5
  85. paddlex/inference/models/common/vision/__init__.py +3 -3
  86. paddlex/inference/models/common/vision/funcs.py +17 -12
  87. paddlex/inference/models/common/vision/processors.py +61 -46
  88. paddlex/inference/models/common/vlm/__init__.py +13 -0
  89. paddlex/inference/models/common/vlm/activations.py +189 -0
  90. paddlex/inference/models/common/vlm/bert_padding.py +127 -0
  91. paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
  92. paddlex/inference/models/common/vlm/distributed.py +229 -0
  93. paddlex/inference/models/common/vlm/flash_attn_utils.py +119 -0
  94. paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
  95. paddlex/inference/models/common/vlm/generation/__init__.py +34 -0
  96. paddlex/inference/models/common/vlm/generation/configuration_utils.py +533 -0
  97. paddlex/inference/models/common/vlm/generation/logits_process.py +730 -0
  98. paddlex/inference/models/common/vlm/generation/stopping_criteria.py +106 -0
  99. paddlex/inference/models/common/vlm/generation/utils.py +2162 -0
  100. paddlex/inference/models/common/vlm/transformers/__init__.py +16 -0
  101. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +1037 -0
  102. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +408 -0
  103. paddlex/inference/models/common/vlm/transformers/model_outputs.py +1612 -0
  104. paddlex/inference/models/common/vlm/transformers/model_utils.py +2014 -0
  105. paddlex/inference/models/common/vlm/transformers/utils.py +178 -0
  106. paddlex/inference/models/common/vlm/utils.py +109 -0
  107. paddlex/inference/models/doc_vlm/__init__.py +15 -0
  108. paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
  109. paddlex/inference/models/doc_vlm/modeling/__init__.py +17 -0
  110. paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
  111. paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
  112. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +2495 -0
  113. paddlex/inference/models/doc_vlm/predictor.py +253 -0
  114. paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
  115. paddlex/inference/models/doc_vlm/processors/__init__.py +17 -0
  116. paddlex/inference/models/doc_vlm/processors/common.py +561 -0
  117. paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
  118. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +543 -0
  119. paddlex/inference/models/doc_vlm/result.py +21 -0
  120. paddlex/inference/models/face_feature/__init__.py +1 -1
  121. paddlex/inference/models/face_feature/predictor.py +2 -1
  122. paddlex/inference/models/formula_recognition/__init__.py +1 -1
  123. paddlex/inference/models/formula_recognition/predictor.py +18 -28
  124. paddlex/inference/models/formula_recognition/processors.py +126 -97
  125. paddlex/inference/models/formula_recognition/result.py +43 -35
  126. paddlex/inference/models/image_classification/__init__.py +1 -1
  127. paddlex/inference/models/image_classification/predictor.py +9 -19
  128. paddlex/inference/models/image_classification/processors.py +4 -2
  129. paddlex/inference/models/image_classification/result.py +4 -3
  130. paddlex/inference/models/image_feature/__init__.py +1 -1
  131. paddlex/inference/models/image_feature/predictor.py +9 -19
  132. paddlex/inference/models/image_feature/processors.py +7 -5
  133. paddlex/inference/models/image_feature/result.py +2 -3
  134. paddlex/inference/models/image_multilabel_classification/__init__.py +1 -1
  135. paddlex/inference/models/image_multilabel_classification/predictor.py +7 -6
  136. paddlex/inference/models/image_multilabel_classification/processors.py +6 -2
  137. paddlex/inference/models/image_multilabel_classification/result.py +4 -3
  138. paddlex/inference/models/image_unwarping/__init__.py +1 -1
  139. paddlex/inference/models/image_unwarping/predictor.py +8 -16
  140. paddlex/inference/models/image_unwarping/processors.py +6 -2
  141. paddlex/inference/models/image_unwarping/result.py +4 -2
  142. paddlex/inference/models/instance_segmentation/__init__.py +1 -1
  143. paddlex/inference/models/instance_segmentation/predictor.py +7 -15
  144. paddlex/inference/models/instance_segmentation/processors.py +4 -7
  145. paddlex/inference/models/instance_segmentation/result.py +11 -10
  146. paddlex/inference/models/keypoint_detection/__init__.py +1 -1
  147. paddlex/inference/models/keypoint_detection/predictor.py +5 -3
  148. paddlex/inference/models/keypoint_detection/processors.py +11 -3
  149. paddlex/inference/models/keypoint_detection/result.py +9 -4
  150. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
  151. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/predictor.py +15 -26
  152. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/processors.py +26 -14
  153. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/result.py +15 -12
  154. paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/visualizer_3d.py +77 -39
  155. paddlex/inference/models/multilingual_speech_recognition/__init__.py +1 -1
  156. paddlex/inference/models/multilingual_speech_recognition/predictor.py +11 -15
  157. paddlex/inference/models/multilingual_speech_recognition/processors.py +45 -53
  158. paddlex/inference/models/multilingual_speech_recognition/result.py +1 -1
  159. paddlex/inference/models/object_detection/__init__.py +1 -1
  160. paddlex/inference/models/object_detection/predictor.py +8 -12
  161. paddlex/inference/models/object_detection/processors.py +63 -33
  162. paddlex/inference/models/object_detection/result.py +5 -4
  163. paddlex/inference/models/object_detection/utils.py +3 -1
  164. paddlex/inference/models/open_vocabulary_detection/__init__.py +1 -1
  165. paddlex/inference/models/open_vocabulary_detection/predictor.py +31 -14
  166. paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +3 -2
  167. paddlex/inference/models/open_vocabulary_detection/processors/common.py +114 -0
  168. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +19 -8
  169. paddlex/inference/models/open_vocabulary_detection/processors/yoloworld_processors.py +209 -0
  170. paddlex/inference/models/open_vocabulary_segmentation/__init__.py +1 -1
  171. paddlex/inference/models/open_vocabulary_segmentation/predictor.py +6 -13
  172. paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +1 -1
  173. paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +12 -12
  174. paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +1 -1
  175. paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +11 -9
  176. paddlex/inference/models/semantic_segmentation/__init__.py +1 -1
  177. paddlex/inference/models/semantic_segmentation/predictor.py +9 -18
  178. paddlex/inference/models/semantic_segmentation/processors.py +11 -8
  179. paddlex/inference/models/semantic_segmentation/result.py +4 -3
  180. paddlex/inference/models/table_structure_recognition/__init__.py +1 -1
  181. paddlex/inference/models/table_structure_recognition/predictor.py +8 -18
  182. paddlex/inference/models/table_structure_recognition/processors.py +23 -29
  183. paddlex/inference/models/table_structure_recognition/result.py +8 -15
  184. paddlex/inference/models/text_detection/__init__.py +1 -1
  185. paddlex/inference/models/text_detection/predictor.py +24 -24
  186. paddlex/inference/models/text_detection/processors.py +116 -44
  187. paddlex/inference/models/text_detection/result.py +8 -13
  188. paddlex/inference/models/text_recognition/__init__.py +1 -1
  189. paddlex/inference/models/text_recognition/predictor.py +11 -19
  190. paddlex/inference/models/text_recognition/processors.py +27 -13
  191. paddlex/inference/models/text_recognition/result.py +3 -2
  192. paddlex/inference/models/ts_anomaly_detection/__init__.py +1 -1
  193. paddlex/inference/models/ts_anomaly_detection/predictor.py +12 -17
  194. paddlex/inference/models/ts_anomaly_detection/processors.py +6 -2
  195. paddlex/inference/models/ts_anomaly_detection/result.py +21 -10
  196. paddlex/inference/models/ts_classification/__init__.py +1 -1
  197. paddlex/inference/models/ts_classification/predictor.py +14 -27
  198. paddlex/inference/models/ts_classification/processors.py +7 -2
  199. paddlex/inference/models/ts_classification/result.py +21 -12
  200. paddlex/inference/models/ts_forecasting/__init__.py +1 -1
  201. paddlex/inference/models/ts_forecasting/predictor.py +13 -18
  202. paddlex/inference/models/ts_forecasting/processors.py +12 -3
  203. paddlex/inference/models/ts_forecasting/result.py +24 -11
  204. paddlex/inference/models/video_classification/__init__.py +1 -1
  205. paddlex/inference/models/video_classification/predictor.py +9 -15
  206. paddlex/inference/models/video_classification/processors.py +24 -24
  207. paddlex/inference/models/video_classification/result.py +7 -3
  208. paddlex/inference/models/video_detection/__init__.py +1 -1
  209. paddlex/inference/models/video_detection/predictor.py +8 -15
  210. paddlex/inference/models/video_detection/processors.py +24 -11
  211. paddlex/inference/models/video_detection/result.py +10 -5
  212. paddlex/inference/pipelines/__init__.py +48 -37
  213. paddlex/inference/pipelines/_parallel.py +172 -0
  214. paddlex/inference/pipelines/anomaly_detection/__init__.py +1 -1
  215. paddlex/inference/pipelines/anomaly_detection/pipeline.py +29 -9
  216. paddlex/inference/pipelines/attribute_recognition/__init__.py +1 -1
  217. paddlex/inference/pipelines/attribute_recognition/pipeline.py +24 -9
  218. paddlex/inference/pipelines/attribute_recognition/result.py +10 -8
  219. paddlex/inference/pipelines/base.py +43 -13
  220. paddlex/inference/pipelines/components/__init__.py +14 -8
  221. paddlex/inference/pipelines/components/chat_server/__init__.py +1 -1
  222. paddlex/inference/pipelines/components/chat_server/base.py +2 -2
  223. paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +8 -8
  224. paddlex/inference/pipelines/components/common/__init__.py +5 -4
  225. paddlex/inference/pipelines/components/common/base_operator.py +2 -1
  226. paddlex/inference/pipelines/components/common/base_result.py +3 -2
  227. paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +1 -2
  228. paddlex/inference/pipelines/components/common/crop_image_regions.py +11 -5
  229. paddlex/inference/pipelines/components/common/seal_det_warp.py +44 -13
  230. paddlex/inference/pipelines/components/common/sort_boxes.py +4 -2
  231. paddlex/inference/pipelines/components/common/warp_image.py +50 -0
  232. paddlex/inference/pipelines/components/faisser.py +10 -5
  233. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +2 -2
  234. paddlex/inference/pipelines/components/prompt_engineering/base.py +2 -2
  235. paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +2 -1
  236. paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +2 -2
  237. paddlex/inference/pipelines/components/retriever/__init__.py +2 -2
  238. paddlex/inference/pipelines/components/retriever/base.py +18 -16
  239. paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +2 -2
  240. paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +87 -84
  241. paddlex/inference/pipelines/components/utils/__init__.py +1 -1
  242. paddlex/inference/pipelines/components/utils/mixin.py +7 -7
  243. paddlex/inference/pipelines/doc_preprocessor/__init__.py +1 -1
  244. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +70 -51
  245. paddlex/inference/pipelines/doc_preprocessor/result.py +5 -10
  246. paddlex/inference/pipelines/doc_understanding/__init__.py +15 -0
  247. paddlex/inference/pipelines/doc_understanding/pipeline.py +71 -0
  248. paddlex/inference/pipelines/face_recognition/__init__.py +1 -1
  249. paddlex/inference/pipelines/face_recognition/pipeline.py +3 -1
  250. paddlex/inference/pipelines/face_recognition/result.py +3 -2
  251. paddlex/inference/pipelines/formula_recognition/__init__.py +1 -1
  252. paddlex/inference/pipelines/formula_recognition/pipeline.py +137 -93
  253. paddlex/inference/pipelines/formula_recognition/result.py +20 -29
  254. paddlex/inference/pipelines/image_classification/__init__.py +1 -1
  255. paddlex/inference/pipelines/image_classification/pipeline.py +30 -11
  256. paddlex/inference/pipelines/image_multilabel_classification/__init__.py +1 -1
  257. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +31 -12
  258. paddlex/inference/pipelines/instance_segmentation/__init__.py +1 -1
  259. paddlex/inference/pipelines/instance_segmentation/pipeline.py +30 -9
  260. paddlex/inference/pipelines/keypoint_detection/__init__.py +1 -1
  261. paddlex/inference/pipelines/keypoint_detection/pipeline.py +30 -9
  262. paddlex/inference/pipelines/layout_parsing/__init__.py +1 -1
  263. paddlex/inference/pipelines/layout_parsing/pipeline.py +54 -56
  264. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +904 -261
  265. paddlex/inference/pipelines/layout_parsing/result.py +9 -21
  266. paddlex/inference/pipelines/layout_parsing/result_v2.py +525 -250
  267. paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
  268. paddlex/inference/pipelines/layout_parsing/utils.py +570 -2004
  269. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
  270. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
  271. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
  272. paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
  273. paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/pipeline.py +17 -10
  274. paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +1 -1
  275. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +17 -6
  276. paddlex/inference/pipelines/object_detection/__init__.py +1 -1
  277. paddlex/inference/pipelines/object_detection/pipeline.py +29 -9
  278. paddlex/inference/pipelines/ocr/__init__.py +1 -1
  279. paddlex/inference/pipelines/ocr/pipeline.py +151 -77
  280. paddlex/inference/pipelines/ocr/result.py +31 -24
  281. paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +1 -1
  282. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +17 -6
  283. paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +1 -1
  284. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +17 -6
  285. paddlex/inference/pipelines/pp_chatocr/__init__.py +1 -1
  286. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +14 -5
  287. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +22 -14
  288. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +34 -16
  289. paddlex/inference/pipelines/pp_shitu_v2/__init__.py +1 -1
  290. paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +12 -8
  291. paddlex/inference/pipelines/pp_shitu_v2/result.py +4 -4
  292. paddlex/inference/pipelines/rotated_object_detection/__init__.py +1 -1
  293. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +30 -9
  294. paddlex/inference/pipelines/seal_recognition/__init__.py +1 -1
  295. paddlex/inference/pipelines/seal_recognition/pipeline.py +127 -63
  296. paddlex/inference/pipelines/seal_recognition/result.py +4 -2
  297. paddlex/inference/pipelines/semantic_segmentation/__init__.py +1 -1
  298. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +30 -9
  299. paddlex/inference/pipelines/small_object_detection/__init__.py +1 -1
  300. paddlex/inference/pipelines/small_object_detection/pipeline.py +30 -9
  301. paddlex/inference/pipelines/table_recognition/__init__.py +1 -1
  302. paddlex/inference/pipelines/table_recognition/pipeline.py +61 -37
  303. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +668 -65
  304. paddlex/inference/pipelines/table_recognition/result.py +12 -10
  305. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +12 -8
  306. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +55 -37
  307. paddlex/inference/pipelines/table_recognition/utils.py +1 -1
  308. paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +1 -1
  309. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +16 -6
  310. paddlex/inference/pipelines/ts_classification/__init__.py +1 -1
  311. paddlex/inference/pipelines/ts_classification/pipeline.py +16 -6
  312. paddlex/inference/pipelines/ts_forecasting/__init__.py +1 -1
  313. paddlex/inference/pipelines/ts_forecasting/pipeline.py +16 -6
  314. paddlex/inference/pipelines/video_classification/__init__.py +1 -1
  315. paddlex/inference/pipelines/video_classification/pipeline.py +17 -6
  316. paddlex/inference/pipelines/video_detection/__init__.py +1 -1
  317. paddlex/inference/pipelines/video_detection/pipeline.py +20 -7
  318. paddlex/inference/serving/__init__.py +5 -1
  319. paddlex/inference/serving/basic_serving/__init__.py +1 -1
  320. paddlex/inference/serving/basic_serving/_app.py +31 -19
  321. paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +7 -4
  322. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +1 -1
  323. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +12 -4
  324. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +1 -1
  325. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +7 -2
  326. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +10 -7
  327. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +10 -7
  328. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_understanding.py +153 -0
  329. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +16 -13
  330. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +10 -7
  331. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +10 -7
  332. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +10 -7
  333. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +10 -7
  334. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +13 -7
  335. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +10 -8
  336. paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +10 -7
  337. paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +10 -7
  338. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +10 -7
  339. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +10 -7
  340. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +10 -7
  341. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +13 -7
  342. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +10 -7
  343. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -12
  344. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +17 -14
  345. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +16 -13
  346. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +16 -9
  347. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +10 -7
  348. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +10 -7
  349. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +10 -7
  350. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +10 -7
  351. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +11 -12
  352. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +14 -12
  353. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +10 -7
  354. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +10 -7
  355. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +10 -7
  356. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +10 -7
  357. paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +10 -7
  358. paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +10 -7
  359. paddlex/inference/serving/basic_serving/_server.py +9 -4
  360. paddlex/inference/serving/infra/__init__.py +1 -1
  361. paddlex/inference/serving/infra/config.py +1 -1
  362. paddlex/inference/serving/infra/models.py +13 -6
  363. paddlex/inference/serving/infra/storage.py +9 -4
  364. paddlex/inference/serving/infra/utils.py +54 -28
  365. paddlex/inference/serving/schemas/__init__.py +1 -1
  366. paddlex/inference/serving/schemas/anomaly_detection.py +1 -1
  367. paddlex/inference/serving/schemas/doc_preprocessor.py +1 -1
  368. paddlex/inference/serving/schemas/doc_understanding.py +78 -0
  369. paddlex/inference/serving/schemas/face_recognition.py +1 -1
  370. paddlex/inference/serving/schemas/formula_recognition.py +2 -2
  371. paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -1
  372. paddlex/inference/serving/schemas/image_classification.py +1 -1
  373. paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -1
  374. paddlex/inference/serving/schemas/instance_segmentation.py +1 -1
  375. paddlex/inference/serving/schemas/layout_parsing.py +2 -3
  376. paddlex/inference/serving/schemas/m_3d_bev_detection.py +1 -1
  377. paddlex/inference/serving/schemas/multilingual_speech_recognition.py +1 -1
  378. paddlex/inference/serving/schemas/object_detection.py +1 -1
  379. paddlex/inference/serving/schemas/ocr.py +1 -1
  380. paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -1
  381. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -1
  382. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -1
  383. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +2 -3
  384. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +3 -3
  385. paddlex/inference/serving/schemas/pp_shituv2.py +1 -1
  386. paddlex/inference/serving/schemas/pp_structurev3.py +11 -7
  387. paddlex/inference/serving/schemas/rotated_object_detection.py +1 -1
  388. paddlex/inference/serving/schemas/seal_recognition.py +2 -2
  389. paddlex/inference/serving/schemas/semantic_segmentation.py +1 -1
  390. paddlex/inference/serving/schemas/shared/__init__.py +1 -1
  391. paddlex/inference/serving/schemas/shared/classification.py +1 -1
  392. paddlex/inference/serving/schemas/shared/image_segmentation.py +1 -1
  393. paddlex/inference/serving/schemas/shared/object_detection.py +1 -1
  394. paddlex/inference/serving/schemas/shared/ocr.py +1 -1
  395. paddlex/inference/serving/schemas/small_object_detection.py +1 -1
  396. paddlex/inference/serving/schemas/table_recognition.py +3 -7
  397. paddlex/inference/serving/schemas/table_recognition_v2.py +6 -7
  398. paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -1
  399. paddlex/inference/serving/schemas/ts_classification.py +1 -1
  400. paddlex/inference/serving/schemas/ts_forecast.py +1 -1
  401. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -1
  402. paddlex/inference/serving/schemas/video_classification.py +1 -1
  403. paddlex/inference/serving/schemas/video_detection.py +1 -1
  404. paddlex/inference/utils/__init__.py +1 -1
  405. paddlex/inference/utils/benchmark.py +332 -179
  406. paddlex/inference/utils/color_map.py +1 -1
  407. paddlex/inference/utils/get_pipeline_path.py +1 -1
  408. paddlex/inference/utils/hpi.py +258 -0
  409. paddlex/inference/utils/hpi_model_info_collection.json +2331 -0
  410. paddlex/inference/utils/io/__init__.py +11 -11
  411. paddlex/inference/utils/io/readers.py +31 -27
  412. paddlex/inference/utils/io/style.py +21 -14
  413. paddlex/inference/utils/io/tablepyxl.py +13 -5
  414. paddlex/inference/utils/io/writers.py +9 -10
  415. paddlex/inference/utils/mkldnn_blocklist.py +25 -0
  416. paddlex/inference/utils/model_paths.py +48 -0
  417. paddlex/inference/utils/{new_ir_blacklist.py → new_ir_blocklist.py} +1 -2
  418. paddlex/inference/utils/official_models.py +278 -262
  419. paddlex/inference/utils/pp_option.py +184 -92
  420. paddlex/inference/utils/trt_blocklist.py +43 -0
  421. paddlex/inference/utils/trt_config.py +420 -0
  422. paddlex/model.py +30 -12
  423. paddlex/modules/__init__.py +57 -80
  424. paddlex/modules/anomaly_detection/__init__.py +2 -2
  425. paddlex/modules/anomaly_detection/dataset_checker/__init__.py +2 -3
  426. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
  427. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +6 -3
  428. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/check_dataset.py +8 -4
  429. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +7 -4
  430. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/split_dataset.py +2 -2
  431. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  432. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/visualizer.py +7 -2
  433. paddlex/modules/anomaly_detection/evaluator.py +3 -3
  434. paddlex/modules/anomaly_detection/exportor.py +1 -1
  435. paddlex/modules/anomaly_detection/model_list.py +1 -1
  436. paddlex/modules/anomaly_detection/trainer.py +3 -4
  437. paddlex/modules/base/__init__.py +5 -5
  438. paddlex/modules/base/build_model.py +1 -2
  439. paddlex/modules/base/dataset_checker/__init__.py +2 -2
  440. paddlex/modules/base/dataset_checker/dataset_checker.py +4 -4
  441. paddlex/modules/base/dataset_checker/utils.py +1 -3
  442. paddlex/modules/base/evaluator.py +13 -13
  443. paddlex/modules/base/exportor.py +12 -13
  444. paddlex/modules/base/trainer.py +21 -11
  445. paddlex/modules/base/utils/__init__.py +13 -0
  446. paddlex/modules/base/utils/cinn_setting.py +89 -0
  447. paddlex/modules/base/utils/coco_eval.py +94 -0
  448. paddlex/modules/base/utils/topk_eval.py +118 -0
  449. paddlex/modules/doc_vlm/__init__.py +18 -0
  450. paddlex/modules/doc_vlm/dataset_checker.py +29 -0
  451. paddlex/modules/doc_vlm/evaluator.py +29 -0
  452. paddlex/modules/doc_vlm/exportor.py +29 -0
  453. paddlex/modules/doc_vlm/model_list.py +16 -0
  454. paddlex/modules/doc_vlm/trainer.py +41 -0
  455. paddlex/modules/face_recognition/__init__.py +2 -2
  456. paddlex/modules/face_recognition/dataset_checker/__init__.py +2 -2
  457. paddlex/modules/face_recognition/dataset_checker/dataset_src/__init__.py +1 -1
  458. paddlex/modules/face_recognition/dataset_checker/dataset_src/check_dataset.py +3 -5
  459. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
  460. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  461. paddlex/modules/face_recognition/evaluator.py +3 -3
  462. paddlex/modules/face_recognition/exportor.py +1 -1
  463. paddlex/modules/face_recognition/model_list.py +1 -1
  464. paddlex/modules/face_recognition/trainer.py +1 -1
  465. paddlex/modules/formula_recognition/__init__.py +2 -2
  466. paddlex/modules/formula_recognition/dataset_checker/__init__.py +3 -3
  467. paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  468. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
  469. paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +2 -6
  470. paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
  471. paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
  472. paddlex/modules/formula_recognition/evaluator.py +6 -3
  473. paddlex/modules/formula_recognition/exportor.py +1 -1
  474. paddlex/modules/formula_recognition/model_list.py +4 -1
  475. paddlex/modules/formula_recognition/trainer.py +5 -3
  476. paddlex/modules/general_recognition/__init__.py +2 -2
  477. paddlex/modules/general_recognition/dataset_checker/__init__.py +2 -2
  478. paddlex/modules/general_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  479. paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +7 -9
  480. paddlex/modules/general_recognition/dataset_checker/dataset_src/check_dataset.py +4 -5
  481. paddlex/modules/general_recognition/dataset_checker/dataset_src/convert_dataset.py +6 -5
  482. paddlex/modules/general_recognition/dataset_checker/dataset_src/split_dataset.py +1 -1
  483. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
  484. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  485. paddlex/modules/general_recognition/evaluator.py +2 -2
  486. paddlex/modules/general_recognition/exportor.py +1 -1
  487. paddlex/modules/general_recognition/model_list.py +1 -1
  488. paddlex/modules/general_recognition/trainer.py +1 -1
  489. paddlex/modules/image_classification/__init__.py +2 -2
  490. paddlex/modules/image_classification/dataset_checker/__init__.py +2 -2
  491. paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py +2 -2
  492. paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  493. paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
  494. paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py +4 -4
  495. paddlex/modules/image_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  496. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
  497. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -5
  498. paddlex/modules/image_classification/evaluator.py +3 -3
  499. paddlex/modules/image_classification/exportor.py +1 -1
  500. paddlex/modules/image_classification/model_list.py +2 -1
  501. paddlex/modules/image_classification/trainer.py +3 -3
  502. paddlex/modules/image_unwarping/__init__.py +1 -1
  503. paddlex/modules/image_unwarping/model_list.py +1 -1
  504. paddlex/modules/instance_segmentation/__init__.py +2 -2
  505. paddlex/modules/instance_segmentation/dataset_checker/__init__.py +2 -3
  506. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
  507. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +9 -5
  508. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/check_dataset.py +8 -5
  509. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/convert_dataset.py +8 -8
  510. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/split_dataset.py +7 -4
  511. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
  512. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +10 -8
  513. paddlex/modules/instance_segmentation/evaluator.py +2 -2
  514. paddlex/modules/instance_segmentation/exportor.py +1 -1
  515. paddlex/modules/instance_segmentation/model_list.py +1 -1
  516. paddlex/modules/instance_segmentation/trainer.py +1 -1
  517. paddlex/modules/keypoint_detection/__init__.py +2 -2
  518. paddlex/modules/keypoint_detection/dataset_checker/__init__.py +2 -2
  519. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +1 -1
  520. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
  521. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  522. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +8 -3
  523. paddlex/modules/keypoint_detection/evaluator.py +2 -2
  524. paddlex/modules/keypoint_detection/exportor.py +1 -1
  525. paddlex/modules/keypoint_detection/model_list.py +1 -1
  526. paddlex/modules/keypoint_detection/trainer.py +2 -2
  527. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/__init__.py +2 -2
  528. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/__init__.py +3 -3
  529. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/__init__.py +2 -2
  530. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/analyse_dataset.py +8 -8
  531. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/check_dataset.py +1 -2
  532. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/evaluator.py +3 -3
  533. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/exportor.py +1 -1
  534. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/model_list.py +1 -1
  535. paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/trainer.py +5 -7
  536. paddlex/modules/multilabel_classification/__init__.py +2 -2
  537. paddlex/modules/multilabel_classification/dataset_checker/__init__.py +2 -2
  538. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/__init__.py +2 -2
  539. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  540. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
  541. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/convert_dataset.py +10 -7
  542. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  543. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
  544. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +1 -5
  545. paddlex/modules/multilabel_classification/evaluator.py +3 -3
  546. paddlex/modules/multilabel_classification/exportor.py +1 -1
  547. paddlex/modules/multilabel_classification/model_list.py +1 -1
  548. paddlex/modules/multilabel_classification/trainer.py +3 -3
  549. paddlex/modules/multilingual_speech_recognition/__init__.py +2 -2
  550. paddlex/modules/multilingual_speech_recognition/dataset_checker.py +3 -3
  551. paddlex/modules/multilingual_speech_recognition/evaluator.py +3 -3
  552. paddlex/modules/multilingual_speech_recognition/exportor.py +3 -3
  553. paddlex/modules/multilingual_speech_recognition/model_list.py +1 -1
  554. paddlex/modules/multilingual_speech_recognition/trainer.py +7 -5
  555. paddlex/modules/object_detection/__init__.py +2 -2
  556. paddlex/modules/object_detection/dataset_checker/__init__.py +2 -11
  557. paddlex/modules/object_detection/dataset_checker/dataset_src/__init__.py +2 -2
  558. paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +10 -8
  559. paddlex/modules/object_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
  560. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +17 -12
  561. paddlex/modules/object_detection/dataset_checker/dataset_src/split_dataset.py +8 -4
  562. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
  563. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +9 -8
  564. paddlex/modules/object_detection/evaluator.py +11 -6
  565. paddlex/modules/object_detection/exportor.py +1 -1
  566. paddlex/modules/object_detection/model_list.py +3 -1
  567. paddlex/modules/object_detection/trainer.py +4 -5
  568. paddlex/modules/open_vocabulary_detection/__init__.py +2 -2
  569. paddlex/modules/open_vocabulary_detection/dataset_checker.py +3 -3
  570. paddlex/modules/open_vocabulary_detection/evaluator.py +3 -3
  571. paddlex/modules/open_vocabulary_detection/exportor.py +3 -3
  572. paddlex/modules/open_vocabulary_detection/model_list.py +2 -4
  573. paddlex/modules/open_vocabulary_detection/trainer.py +7 -5
  574. paddlex/modules/open_vocabulary_segmentation/__init__.py +2 -2
  575. paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +3 -3
  576. paddlex/modules/open_vocabulary_segmentation/evaluator.py +3 -3
  577. paddlex/modules/open_vocabulary_segmentation/exportor.py +3 -3
  578. paddlex/modules/open_vocabulary_segmentation/model_list.py +1 -1
  579. paddlex/modules/open_vocabulary_segmentation/trainer.py +7 -5
  580. paddlex/modules/semantic_segmentation/__init__.py +2 -2
  581. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +2 -3
  582. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
  583. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/analyse_dataset.py +6 -3
  584. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/check_dataset.py +2 -2
  585. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/convert_dataset.py +7 -4
  586. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/split_dataset.py +2 -2
  587. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
  588. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/visualizer.py +6 -2
  589. paddlex/modules/semantic_segmentation/evaluator.py +3 -3
  590. paddlex/modules/semantic_segmentation/exportor.py +1 -1
  591. paddlex/modules/semantic_segmentation/model_list.py +1 -1
  592. paddlex/modules/semantic_segmentation/trainer.py +3 -4
  593. paddlex/modules/table_recognition/__init__.py +2 -2
  594. paddlex/modules/table_recognition/dataset_checker/__init__.py +5 -5
  595. paddlex/modules/table_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  596. paddlex/modules/table_recognition/dataset_checker/dataset_src/analyse_dataset.py +3 -2
  597. paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +8 -7
  598. paddlex/modules/table_recognition/dataset_checker/dataset_src/split_dataset.py +2 -1
  599. paddlex/modules/table_recognition/evaluator.py +3 -3
  600. paddlex/modules/table_recognition/exportor.py +1 -1
  601. paddlex/modules/table_recognition/model_list.py +1 -1
  602. paddlex/modules/table_recognition/trainer.py +2 -5
  603. paddlex/modules/text_detection/__init__.py +2 -2
  604. paddlex/modules/text_detection/dataset_checker/__init__.py +4 -6
  605. paddlex/modules/text_detection/dataset_checker/dataset_src/__init__.py +2 -2
  606. paddlex/modules/text_detection/dataset_checker/dataset_src/analyse_dataset.py +12 -9
  607. paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +3 -3
  608. paddlex/modules/text_detection/dataset_checker/dataset_src/split_dataset.py +3 -3
  609. paddlex/modules/text_detection/evaluator.py +3 -3
  610. paddlex/modules/text_detection/exportor.py +1 -1
  611. paddlex/modules/text_detection/model_list.py +3 -1
  612. paddlex/modules/text_detection/trainer.py +2 -5
  613. paddlex/modules/text_recognition/__init__.py +2 -2
  614. paddlex/modules/text_recognition/dataset_checker/__init__.py +4 -5
  615. paddlex/modules/text_recognition/dataset_checker/dataset_src/__init__.py +2 -2
  616. paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
  617. paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +2 -5
  618. paddlex/modules/text_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
  619. paddlex/modules/text_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
  620. paddlex/modules/text_recognition/evaluator.py +3 -3
  621. paddlex/modules/text_recognition/exportor.py +1 -1
  622. paddlex/modules/text_recognition/model_list.py +3 -1
  623. paddlex/modules/text_recognition/trainer.py +2 -3
  624. paddlex/modules/ts_anomaly_detection/__init__.py +2 -2
  625. paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +4 -5
  626. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
  627. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +1 -9
  628. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/check_dataset.py +2 -2
  629. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -6
  630. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/split_dataset.py +4 -4
  631. paddlex/modules/ts_anomaly_detection/evaluator.py +3 -3
  632. paddlex/modules/ts_anomaly_detection/exportor.py +2 -3
  633. paddlex/modules/ts_anomaly_detection/model_list.py +1 -1
  634. paddlex/modules/ts_anomaly_detection/trainer.py +8 -8
  635. paddlex/modules/ts_classification/__init__.py +2 -2
  636. paddlex/modules/ts_classification/dataset_checker/__init__.py +4 -5
  637. paddlex/modules/ts_classification/dataset_checker/dataset_src/__init__.py +2 -2
  638. paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -5
  639. paddlex/modules/ts_classification/dataset_checker/dataset_src/check_dataset.py +2 -2
  640. paddlex/modules/ts_classification/dataset_checker/dataset_src/convert_dataset.py +2 -6
  641. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +5 -5
  642. paddlex/modules/ts_classification/evaluator.py +3 -3
  643. paddlex/modules/ts_classification/exportor.py +2 -3
  644. paddlex/modules/ts_classification/model_list.py +1 -1
  645. paddlex/modules/ts_classification/trainer.py +7 -7
  646. paddlex/modules/ts_forecast/__init__.py +2 -2
  647. paddlex/modules/ts_forecast/dataset_checker/__init__.py +4 -5
  648. paddlex/modules/ts_forecast/dataset_checker/dataset_src/__init__.py +2 -2
  649. paddlex/modules/ts_forecast/dataset_checker/dataset_src/analyse_dataset.py +1 -9
  650. paddlex/modules/ts_forecast/dataset_checker/dataset_src/check_dataset.py +2 -2
  651. paddlex/modules/ts_forecast/dataset_checker/dataset_src/convert_dataset.py +2 -6
  652. paddlex/modules/ts_forecast/dataset_checker/dataset_src/split_dataset.py +4 -4
  653. paddlex/modules/ts_forecast/evaluator.py +3 -3
  654. paddlex/modules/ts_forecast/exportor.py +2 -3
  655. paddlex/modules/ts_forecast/model_list.py +1 -1
  656. paddlex/modules/ts_forecast/trainer.py +7 -7
  657. paddlex/modules/video_classification/__init__.py +2 -2
  658. paddlex/modules/video_classification/dataset_checker/__init__.py +2 -2
  659. paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +2 -2
  660. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +9 -9
  661. paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +2 -3
  662. paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  663. paddlex/modules/video_classification/evaluator.py +3 -3
  664. paddlex/modules/video_classification/exportor.py +1 -1
  665. paddlex/modules/video_classification/model_list.py +1 -1
  666. paddlex/modules/video_classification/trainer.py +3 -3
  667. paddlex/modules/video_detection/__init__.py +2 -2
  668. paddlex/modules/video_detection/dataset_checker/__init__.py +2 -2
  669. paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +2 -2
  670. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +8 -9
  671. paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +3 -5
  672. paddlex/modules/video_detection/evaluator.py +3 -3
  673. paddlex/modules/video_detection/exportor.py +1 -1
  674. paddlex/modules/video_detection/model_list.py +1 -1
  675. paddlex/modules/video_detection/trainer.py +3 -3
  676. paddlex/ops/__init__.py +7 -4
  677. paddlex/ops/iou3d_nms/iou3d_cpu.cpp +8 -6
  678. paddlex/ops/iou3d_nms/iou3d_cpu.h +3 -2
  679. paddlex/ops/iou3d_nms/iou3d_nms.cpp +8 -6
  680. paddlex/ops/iou3d_nms/iou3d_nms.h +6 -4
  681. paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +24 -18
  682. paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +9 -7
  683. paddlex/ops/setup.py +3 -3
  684. paddlex/ops/voxel/voxelize_op.cc +22 -19
  685. paddlex/ops/voxel/voxelize_op.cu +25 -25
  686. paddlex/paddlex_cli.py +104 -87
  687. paddlex/repo_apis/Paddle3D_api/__init__.py +1 -1
  688. paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +1 -1
  689. paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +1 -1
  690. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +6 -6
  691. paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +2 -2
  692. paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +1 -1
  693. paddlex/repo_apis/Paddle3D_api/pp3d_config.py +3 -2
  694. paddlex/repo_apis/PaddleClas_api/__init__.py +1 -1
  695. paddlex/repo_apis/PaddleClas_api/cls/__init__.py +3 -3
  696. paddlex/repo_apis/PaddleClas_api/cls/config.py +5 -4
  697. paddlex/repo_apis/PaddleClas_api/cls/model.py +4 -4
  698. paddlex/repo_apis/PaddleClas_api/cls/register.py +12 -3
  699. paddlex/repo_apis/PaddleClas_api/cls/runner.py +2 -3
  700. paddlex/repo_apis/PaddleClas_api/shitu_rec/__init__.py +2 -2
  701. paddlex/repo_apis/PaddleClas_api/shitu_rec/config.py +2 -2
  702. paddlex/repo_apis/PaddleClas_api/shitu_rec/model.py +1 -4
  703. paddlex/repo_apis/PaddleClas_api/shitu_rec/register.py +2 -2
  704. paddlex/repo_apis/PaddleClas_api/shitu_rec/runner.py +1 -6
  705. paddlex/repo_apis/PaddleDetection_api/__init__.py +2 -2
  706. paddlex/repo_apis/PaddleDetection_api/config_helper.py +3 -3
  707. paddlex/repo_apis/PaddleDetection_api/instance_seg/__init__.py +2 -2
  708. paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +2 -3
  709. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +4 -4
  710. paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py +2 -3
  711. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +2 -3
  712. paddlex/repo_apis/PaddleDetection_api/object_det/__init__.py +3 -3
  713. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +5 -4
  714. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +6 -7
  715. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +26 -1
  716. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +32 -3
  717. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +2 -3
  718. paddlex/repo_apis/PaddleNLP_api/__init__.py +1 -1
  719. paddlex/repo_apis/PaddleOCR_api/__init__.py +4 -3
  720. paddlex/repo_apis/PaddleOCR_api/config_utils.py +1 -1
  721. paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +1 -1
  722. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +7 -6
  723. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +9 -13
  724. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +29 -3
  725. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +2 -3
  726. paddlex/repo_apis/PaddleOCR_api/table_rec/__init__.py +1 -1
  727. paddlex/repo_apis/PaddleOCR_api/table_rec/config.py +1 -1
  728. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +4 -4
  729. paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +2 -3
  730. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +3 -3
  731. paddlex/repo_apis/PaddleOCR_api/text_det/__init__.py +1 -1
  732. paddlex/repo_apis/PaddleOCR_api/text_det/config.py +1 -1
  733. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +4 -4
  734. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +20 -3
  735. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +3 -3
  736. paddlex/repo_apis/PaddleOCR_api/text_rec/__init__.py +1 -1
  737. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +7 -6
  738. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +9 -13
  739. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +20 -3
  740. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +2 -3
  741. paddlex/repo_apis/PaddleSeg_api/__init__.py +1 -1
  742. paddlex/repo_apis/PaddleSeg_api/base_seg_config.py +2 -2
  743. paddlex/repo_apis/PaddleSeg_api/seg/__init__.py +1 -1
  744. paddlex/repo_apis/PaddleSeg_api/seg/config.py +3 -6
  745. paddlex/repo_apis/PaddleSeg_api/seg/model.py +6 -6
  746. paddlex/repo_apis/PaddleSeg_api/seg/register.py +2 -3
  747. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +2 -3
  748. paddlex/repo_apis/PaddleTS_api/__init__.py +4 -3
  749. paddlex/repo_apis/PaddleTS_api/ts_ad/__init__.py +1 -1
  750. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +5 -6
  751. paddlex/repo_apis/PaddleTS_api/ts_ad/register.py +2 -2
  752. paddlex/repo_apis/PaddleTS_api/ts_ad/runner.py +2 -2
  753. paddlex/repo_apis/PaddleTS_api/ts_base/__init__.py +1 -1
  754. paddlex/repo_apis/PaddleTS_api/ts_base/config.py +2 -4
  755. paddlex/repo_apis/PaddleTS_api/ts_base/model.py +4 -4
  756. paddlex/repo_apis/PaddleTS_api/ts_base/runner.py +2 -2
  757. paddlex/repo_apis/PaddleTS_api/ts_cls/__init__.py +1 -1
  758. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +4 -5
  759. paddlex/repo_apis/PaddleTS_api/ts_cls/register.py +2 -2
  760. paddlex/repo_apis/PaddleTS_api/ts_cls/runner.py +2 -2
  761. paddlex/repo_apis/PaddleTS_api/ts_fc/__init__.py +1 -1
  762. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +6 -7
  763. paddlex/repo_apis/PaddleTS_api/ts_fc/register.py +1 -1
  764. paddlex/repo_apis/PaddleVideo_api/__init__.py +1 -1
  765. paddlex/repo_apis/PaddleVideo_api/config_utils.py +1 -1
  766. paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +3 -3
  767. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +5 -4
  768. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +4 -4
  769. paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +2 -3
  770. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +2 -3
  771. paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +3 -3
  772. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +5 -4
  773. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +5 -5
  774. paddlex/repo_apis/PaddleVideo_api/video_det/register.py +2 -3
  775. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +2 -3
  776. paddlex/repo_apis/__init__.py +1 -1
  777. paddlex/repo_apis/base/__init__.py +4 -5
  778. paddlex/repo_apis/base/config.py +3 -4
  779. paddlex/repo_apis/base/model.py +11 -19
  780. paddlex/repo_apis/base/register.py +1 -1
  781. paddlex/repo_apis/base/runner.py +11 -12
  782. paddlex/repo_apis/base/utils/__init__.py +1 -1
  783. paddlex/repo_apis/base/utils/arg.py +1 -1
  784. paddlex/repo_apis/base/utils/subprocess.py +1 -1
  785. paddlex/repo_manager/__init__.py +2 -9
  786. paddlex/repo_manager/core.py +12 -30
  787. paddlex/repo_manager/meta.py +41 -31
  788. paddlex/repo_manager/repo.py +171 -161
  789. paddlex/repo_manager/utils.py +13 -224
  790. paddlex/utils/__init__.py +1 -1
  791. paddlex/utils/cache.py +8 -10
  792. paddlex/utils/config.py +6 -5
  793. paddlex/utils/{custom_device_whitelist.py → custom_device_list.py} +53 -199
  794. paddlex/utils/deps.py +249 -0
  795. paddlex/utils/device.py +87 -36
  796. paddlex/utils/download.py +4 -4
  797. paddlex/utils/env.py +37 -7
  798. paddlex/utils/errors/__init__.py +1 -1
  799. paddlex/utils/errors/dataset_checker.py +1 -1
  800. paddlex/utils/errors/others.py +2 -16
  801. paddlex/utils/file_interface.py +4 -5
  802. paddlex/utils/flags.py +17 -12
  803. paddlex/utils/fonts/__init__.py +36 -5
  804. paddlex/utils/func_register.py +1 -1
  805. paddlex/utils/install.py +87 -0
  806. paddlex/utils/interactive_get_pipeline.py +3 -3
  807. paddlex/utils/lazy_loader.py +3 -3
  808. paddlex/utils/logging.py +10 -1
  809. paddlex/utils/misc.py +6 -6
  810. paddlex/utils/pipeline_arguments.py +15 -7
  811. paddlex/utils/result_saver.py +4 -5
  812. paddlex/utils/subclass_register.py +2 -4
  813. paddlex/version.py +2 -1
  814. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/METADATA +237 -102
  815. paddlex-3.0.1.dist-info/RECORD +1095 -0
  816. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
  817. paddlex/inference/models/base/predictor/basic_predictor.py +0 -139
  818. paddlex/paddle2onnx_requirements.txt +0 -1
  819. paddlex/repo_manager/requirements.txt +0 -21
  820. paddlex/serving_requirements.txt +0 -9
  821. paddlex-3.0.0rc0.dist-info/RECORD +0 -1015
  822. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
  823. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info/licenses}/LICENSE +0 -0
  824. {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,830 @@
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from functools import partial
16
+ from typing import List, Optional, Tuple, Type
17
+
18
+ import paddle
19
+ import paddle.nn as nn
20
+ import paddle.nn.functional as F
21
+
22
+ from ...common.vlm.transformers.model_outputs import CausalLMOutputWithPast
23
+ from .qwen2 import Qwen2Config, Qwen2ForCausalLM, Qwen2Model
24
+
25
+
26
+ class MLPBlock(paddle.nn.Layer):
27
+ def __init__(
28
+ self,
29
+ embedding_dim: int,
30
+ mlp_dim: int,
31
+ act: Type[paddle.nn.Layer] = paddle.nn.GELU,
32
+ ) -> None:
33
+ super().__init__()
34
+ self.lin1 = nn.Linear(embedding_dim, mlp_dim)
35
+ self.lin2 = nn.Linear(mlp_dim, embedding_dim)
36
+ self.act = act()
37
+
38
+ def forward(self, x: paddle.Tensor) -> paddle.Tensor:
39
+ return self.lin2(self.act(self.lin1(x)))
40
+
41
+
42
+ class LayerNorm2d(paddle.nn.Layer):
43
+ def __init__(self, num_channels: int, epsilon: float = 1e-06) -> None:
44
+ super().__init__()
45
+ self.weight = paddle.base.framework.EagerParamBase.from_tensor(
46
+ tensor=paddle.ones(shape=num_channels)
47
+ )
48
+ self.bias = paddle.base.framework.EagerParamBase.from_tensor(
49
+ tensor=paddle.zeros(shape=num_channels)
50
+ )
51
+ self.epsilon = epsilon
52
+
53
+ def forward(self, x: paddle.Tensor) -> paddle.Tensor:
54
+ u = x.mean(axis=1, keepdim=True)
55
+ s = (x - u).pow(y=2).mean(axis=1, keepdim=True)
56
+ x = (x - u) / paddle.sqrt(x=s + self.epsilon)
57
+ x = self.weight[:, None, None] * x + self.bias[:, None, None]
58
+ return x
59
+
60
+
61
+ class ImageEncoderViT(paddle.nn.Layer):
62
+ def __init__(
63
+ self,
64
+ img_size: int = 1024,
65
+ patch_size: int = 16,
66
+ in_chans: int = 3,
67
+ embed_dim: int = 768,
68
+ depth: int = 12,
69
+ num_heads: int = 12,
70
+ mlp_ratio: float = 4.0,
71
+ out_chans: int = 256,
72
+ qkv_bias: bool = True,
73
+ norm_layer: Type[nn.Layer] = nn.LayerNorm,
74
+ act_layer: Type[nn.Layer] = nn.GELU,
75
+ use_abs_pos: bool = True,
76
+ use_rel_pos: bool = False,
77
+ rel_pos_zero_init: bool = True,
78
+ window_size: int = 0,
79
+ global_attn_indexes: Tuple[int, ...] = (),
80
+ ) -> None:
81
+ """
82
+ Args:
83
+ img_size (int): Input image size.
84
+ patch_size (int): Patch size.
85
+ in_chans (int): Number of input image channels.
86
+ embed_dim (int): Patch embedding dimension.
87
+ depth (int): Depth of ViT.
88
+ num_heads (int): Number of attention heads in each ViT block.
89
+ mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
90
+ qkv_bias (bool): If True, add a learnable bias to query, key, value.
91
+ norm_layer (nn.Layer): Normalization layer.
92
+ act_layer (nn.Layer): Activation layer.
93
+ use_abs_pos (bool): If True, use absolute positional embeddings.
94
+ use_rel_pos (bool): If True, add relative positional embeddings to the attention map.
95
+ rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
96
+ window_size (int): Window size for window attention blocks.
97
+ global_attn_indexes (list): Indexes for blocks using global attention.
98
+ """
99
+ super().__init__()
100
+ self.img_size = img_size
101
+
102
+ self.patch_embed = PatchEmbed(
103
+ kernel_size=(patch_size, patch_size),
104
+ stride=(patch_size, patch_size),
105
+ in_chans=in_chans,
106
+ embed_dim=embed_dim,
107
+ )
108
+
109
+ self.pos_embed: Optional[paddle.base.framework.EagerParamBase.from_tensor] = (
110
+ None
111
+ )
112
+ if use_abs_pos:
113
+ self.pos_embed = paddle.base.framework.EagerParamBase.from_tensor(
114
+ tensor=paddle.zeros(
115
+ shape=[1, img_size // patch_size, img_size // patch_size, embed_dim]
116
+ )
117
+ )
118
+
119
+ self.blocks = paddle.nn.LayerList()
120
+ for i in range(depth):
121
+ block = Block(
122
+ dim=embed_dim,
123
+ num_heads=num_heads,
124
+ mlp_ratio=mlp_ratio,
125
+ qkv_bias=qkv_bias,
126
+ norm_layer=norm_layer,
127
+ act_layer=act_layer,
128
+ use_rel_pos=use_rel_pos,
129
+ rel_pos_zero_init=rel_pos_zero_init,
130
+ window_size=window_size if i not in global_attn_indexes else 0,
131
+ input_size=(img_size // patch_size, img_size // patch_size),
132
+ )
133
+ self.blocks.append(block)
134
+
135
+ self.neck = nn.Sequential(
136
+ nn.Conv2D(
137
+ embed_dim,
138
+ out_chans,
139
+ kernel_size=1,
140
+ bias_attr=False,
141
+ ),
142
+ LayerNorm2d(out_chans),
143
+ nn.Conv2D(
144
+ out_chans,
145
+ out_chans,
146
+ kernel_size=3,
147
+ padding=1,
148
+ bias_attr=False,
149
+ ),
150
+ LayerNorm2d(out_chans),
151
+ )
152
+
153
+ self.net_2 = nn.Conv2D(
154
+ 256, 512, kernel_size=3, stride=2, padding=1, bias_attr=False
155
+ )
156
+ self.net_3 = nn.Conv2D(
157
+ 512, 1024, kernel_size=3, stride=2, padding=1, bias_attr=False
158
+ )
159
+
160
+ def forward(self, x: paddle.Tensor) -> paddle.Tensor:
161
+ x = self.patch_embed(x)
162
+ if self.pos_embed is not None:
163
+ x = x + self.pos_embed
164
+ for blk in self.blocks:
165
+ x = blk(x)
166
+ x = self.neck(x.transpose([0, 3, 1, 2]))
167
+ x = self.net_2(x)
168
+ x = self.net_3(x)
169
+ return x
170
+
171
+
172
+ class Block(paddle.nn.Layer):
173
+ """Transformer blocks with support of window attention and residual propagation blocks"""
174
+
175
+ def __init__(
176
+ self,
177
+ dim: int,
178
+ num_heads: int,
179
+ mlp_ratio: float = 4.0,
180
+ qkv_bias: bool = True,
181
+ norm_layer: Type[nn.Layer] = nn.LayerNorm,
182
+ act_layer: Type[nn.Layer] = nn.GELU,
183
+ use_rel_pos: bool = False,
184
+ rel_pos_zero_init: bool = True,
185
+ window_size: int = 0,
186
+ input_size: Optional[Tuple[int, int]] = None,
187
+ ) -> None:
188
+ """
189
+ Args:
190
+ dim (int): Number of input channels.
191
+ num_heads (int): Number of attention heads in each ViT block.
192
+ mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
193
+ qkv_bias (bool): If True, add a learnable bias to query, key, value.
194
+ norm_layer (nn.Layer): Normalization layer.
195
+ act_layer (nn.Layer): Activation layer.
196
+ use_rel_pos (bool): If True, add relative positional embeddings to the attention map.
197
+ rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
198
+ window_size (int): Window size for window attention blocks. If it equals 0, then
199
+ use global attention.
200
+ input_size (tuple(int, int) or None): Input resolution for calculating the relative
201
+ positional parameter size.
202
+ """
203
+ super().__init__()
204
+ self.norm1 = norm_layer(dim)
205
+ self.attn = Attention(
206
+ dim,
207
+ num_heads=num_heads,
208
+ qkv_bias=qkv_bias,
209
+ use_rel_pos=use_rel_pos,
210
+ rel_pos_zero_init=rel_pos_zero_init,
211
+ input_size=input_size if window_size == 0 else (window_size, window_size),
212
+ )
213
+
214
+ self.norm2 = norm_layer(dim)
215
+ self.mlp = MLPBlock(
216
+ embedding_dim=dim, mlp_dim=int(dim * mlp_ratio), act=act_layer
217
+ )
218
+
219
+ self.window_size = window_size
220
+
221
+ def forward(self, x: paddle.Tensor) -> paddle.Tensor:
222
+ shortcut = x
223
+ x = self.norm1(x)
224
+ # Window partition
225
+ if self.window_size > 0:
226
+ H, W = x.shape[1], x.shape[2]
227
+ x, pad_hw = window_partition(x, self.window_size)
228
+
229
+ x = self.attn(x)
230
+ # Reverse window partition
231
+ if self.window_size > 0:
232
+ x = window_unpartition(x, self.window_size, pad_hw, (H, W))
233
+
234
+ x = shortcut + x
235
+ x = x + self.mlp(self.norm2(x))
236
+
237
+ return x
238
+
239
+
240
+ class Attention(paddle.nn.Layer):
241
+ """Multi-head Attention block with relative position embeddings."""
242
+
243
+ def __init__(
244
+ self,
245
+ dim: int,
246
+ num_heads: int = 8,
247
+ qkv_bias: bool = True,
248
+ use_rel_pos: bool = False,
249
+ rel_pos_zero_init: bool = True,
250
+ input_size: Optional[Tuple[int, int]] = None,
251
+ ) -> None:
252
+ """
253
+ Args:
254
+ dim (int): Number of input channels.
255
+ num_heads (int): Number of attention heads.
256
+ qkv_bias (bool): If True, add a learnable bias to query, key, value.
257
+ rel_pos (bool): If True, add relative positional embeddings to the attention map.
258
+ rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
259
+ input_size (tuple(int, int) or None): Input resolution for calculating the relative
260
+ positional parameter size.
261
+ """
262
+ super().__init__()
263
+ self.num_heads = num_heads
264
+ head_dim = dim // num_heads
265
+ self.scale = head_dim**-0.5
266
+
267
+ self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
268
+ self.proj = nn.Linear(dim, dim)
269
+
270
+ self.use_rel_pos = use_rel_pos
271
+ if self.use_rel_pos:
272
+ assert (
273
+ input_size is not None
274
+ ), "Input size must be provided if using relative positional encoding."
275
+ self.rel_pos_h = paddle.base.framework.EagerParamBase.from_tensor(
276
+ tensor=paddle.zeros(shape=[2 * input_size[0] - 1, head_dim])
277
+ )
278
+ self.rel_pos_w = paddle.base.framework.EagerParamBase.from_tensor(
279
+ tensor=paddle.zeros(shape=[2 * input_size[1] - 1, head_dim])
280
+ )
281
+
282
+ def forward(self, x: paddle.Tensor) -> paddle.Tensor:
283
+ B, H, W, _ = tuple(x.shape)
284
+ qkv = (
285
+ self.qkv(x)
286
+ .reshape([B, H * W, 3, self.num_heads, -1])
287
+ .transpose([2, 0, 3, 1, 4])
288
+ )
289
+ q, k, v = qkv.reshape([3, B * self.num_heads, H * W, -1]).unbind(axis=0)
290
+
291
+ attn = (q * self.scale) @ k.transpose([0, 2, 1])
292
+
293
+ if self.use_rel_pos:
294
+ attn = add_decomposed_rel_pos(
295
+ attn, q, self.rel_pos_h, self.rel_pos_w, (H, W), (H, W)
296
+ )
297
+
298
+ attn = F.softmax(attn, axis=-1)
299
+ x = (
300
+ (attn @ v)
301
+ .reshape([B, self.num_heads, H, W, -1])
302
+ .transpose([0, 2, 3, 1, 4])
303
+ .reshape([B, H, W, -1])
304
+ )
305
+ x = self.proj(x)
306
+
307
+ return x
308
+
309
+
310
+ def window_partition(
311
+ x: paddle.Tensor, window_size: int
312
+ ) -> Tuple[paddle.Tensor, Tuple[int, int]]:
313
+ """
314
+ Partition into non-overlapping windows with padding if needed.
315
+ Args:
316
+ x (tensor): input tokens with [B, H, W, C].
317
+ window_size (int): window size.
318
+
319
+ Returns:
320
+ windows: windows after partition with [B * num_windows, window_size, window_size, C].
321
+ (Hp, Wp): padded height and width before partition
322
+ """
323
+ B, H, W, C = tuple(x.shape)
324
+
325
+ pad_h = (window_size - H % window_size) % window_size
326
+ pad_w = (window_size - W % window_size) % window_size
327
+ if pad_h > 0 or pad_w > 0:
328
+ x = F.pad(x, pad=(0, pad_w, 0, pad_h), data_format="NHWC")
329
+ Hp, Wp = H + pad_h, W + pad_w
330
+
331
+ x = x.reshape(
332
+ [B, Hp // window_size, window_size, Wp // window_size, window_size, C]
333
+ )
334
+ windows = x.transpose([0, 1, 3, 2, 4, 5]).reshape([-1, window_size, window_size, C])
335
+ return windows, (Hp, Wp)
336
+
337
+
338
+ def window_unpartition(
339
+ windows: paddle.Tensor,
340
+ window_size: int,
341
+ pad_hw: Tuple[int, int],
342
+ hw: Tuple[int, int],
343
+ ) -> paddle.Tensor:
344
+ """
345
+ Window unpartition into original sequences and removing padding.
346
+ Args:
347
+ windows (tensor): input tokens with [B * num_windows, window_size, window_size, C].
348
+ window_size (int): window size.
349
+ pad_hw (Tuple): padded height and width (Hp, Wp).
350
+ hw (Tuple): original height and width (H, W) before padding.
351
+
352
+ Returns:
353
+ x: unpartitioned sequences with [B, H, W, C].
354
+ """
355
+ Hp, Wp = pad_hw
356
+ H, W = hw
357
+ B = tuple(windows.shape)[0] // (Hp * Wp // window_size // window_size)
358
+ x = windows.reshape(
359
+ [B, Hp // window_size, Wp // window_size, window_size, window_size, -1]
360
+ )
361
+ x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, Hp, Wp, -1])
362
+ if Hp > H or Wp > W:
363
+ x = x[:, :H, :W, :]
364
+ return x
365
+
366
+
367
+ def get_rel_pos(q_size: int, k_size: int, rel_pos: paddle.Tensor) -> paddle.Tensor:
368
+ """
369
+ Get relative positional embeddings according to the relative positions of
370
+ query and key sizes.
371
+ Args:
372
+ q_size (int): size of query q.
373
+ k_size (int): size of key k.
374
+ rel_pos (Tensor): relative position embeddings (L, C).
375
+
376
+ Returns:
377
+ Extracted positional embeddings according to relative positions.
378
+ """
379
+ max_rel_dist = int(2 * max(q_size, k_size) - 1)
380
+ if tuple(rel_pos.shape)[0] != max_rel_dist:
381
+ rel_pos_resized = paddle.nn.functional.interpolate(
382
+ rel_pos.reshape([1, tuple(rel_pos.shape)[0], -1]).transpose([0, 2, 1]),
383
+ size=max_rel_dist,
384
+ mode="linear",
385
+ )
386
+ rel_pos_resized = rel_pos_resized.reshape([-1, max_rel_dist]).transpose([1, 0])
387
+ else:
388
+ rel_pos_resized = rel_pos
389
+
390
+ q_coords = paddle.arange(end=q_size)[:, None] * max(k_size / q_size, 1.0)
391
+ k_coords = paddle.arange(end=k_size)[None, :] * max(q_size / k_size, 1.0)
392
+ relative_coords = q_coords - k_coords + (k_size - 1) * max(q_size / k_size, 1.0)
393
+ return rel_pos_resized[relative_coords.astype(dtype="int64")]
394
+
395
+
396
+ def add_decomposed_rel_pos(
397
+ attn: paddle.Tensor,
398
+ q: paddle.Tensor,
399
+ rel_pos_h: paddle.Tensor,
400
+ rel_pos_w: paddle.Tensor,
401
+ q_size: Tuple[int, int],
402
+ k_size: Tuple[int, int],
403
+ ) -> paddle.Tensor:
404
+ """
405
+ Calculate decomposed Relative Positional Embeddings from :paper:`mvitv2`.
406
+ https://github.com/facebookresearch/mvit/blob/19786631e330df9f3622e5402b4a419a263a2c80/mvit/models/attention.py # noqa B950
407
+ Args:
408
+ attn (Tensor): attention map.
409
+ q (Tensor): query q in the attention layer with shape (B, q_h * q_w, C).
410
+ rel_pos_h (Tensor): relative position embeddings (Lh, C) for height axis.
411
+ rel_pos_w (Tensor): relative position embeddings (Lw, C) for width axis.
412
+ q_size (Tuple): spatial sequence size of query q with (q_h, q_w).
413
+ k_size (Tuple): spatial sequence size of key k with (k_h, k_w).
414
+
415
+ Returns:
416
+ attn (Tensor): attention map with added relative positional embeddings.
417
+ """
418
+ q_h, q_w = q_size
419
+ k_h, k_w = k_size
420
+ Rh = get_rel_pos(q_h, k_h, rel_pos_h)
421
+ Rw = get_rel_pos(q_w, k_w, rel_pos_w)
422
+
423
+ B, _, dim = tuple(q.shape)
424
+ r_q = q.reshape([B, q_h, q_w, dim])
425
+ rel_h = paddle.einsum("bhwc,hkc->bhwk", r_q, Rh)
426
+ rel_w = paddle.einsum("bhwc,wkc->bhwk", r_q, Rw)
427
+
428
+ attn = (
429
+ attn.reshape([B, q_h, q_w, k_h, k_w])
430
+ + rel_h[:, :, :, :, None]
431
+ + rel_w[:, :, :, None, :]
432
+ ).reshape([B, q_h * q_w, k_h * k_w])
433
+
434
+ return attn
435
+
436
+
437
+ class PatchEmbed(paddle.nn.Layer):
438
+ """
439
+ Image to Patch Embedding.
440
+ """
441
+
442
+ def __init__(
443
+ self,
444
+ kernel_size: Tuple[int, int] = (16, 16),
445
+ stride: Tuple[int, int] = (16, 16),
446
+ padding: Tuple[int, int] = (0, 0),
447
+ in_chans: int = 3,
448
+ embed_dim: int = 768,
449
+ ) -> None:
450
+ """
451
+ Args:
452
+ kernel_size (Tuple): kernel size of the projection layer.
453
+ stride (Tuple): stride of the projection layer.
454
+ padding (Tuple): padding size of the projection layer.
455
+ in_chans (int): Number of input image channels.
456
+ embed_dim (int): Patch embedding dimension.
457
+ """
458
+ super().__init__()
459
+ self.proj = nn.Conv2D(
460
+ in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding
461
+ )
462
+
463
+ def forward(self, x: paddle.Tensor) -> paddle.Tensor:
464
+ x = self.proj(x)
465
+ # B C H W -> B H W C
466
+ x = x.transpose([0, 2, 3, 1])
467
+ return x
468
+
469
+
470
+ DEFAULT_IMAGE_TOKEN = "<image>"
471
+ DEFAULT_IMAGE_PATCH_TOKEN = "<imgpad>"
472
+ DEFAULT_IM_START_TOKEN = "<img>"
473
+ DEFAULT_IM_END_TOKEN = "</img>"
474
+
475
+
476
+ class Qwen2LMHead(nn.Layer):
477
+ def __init__(
478
+ self,
479
+ config,
480
+ embedding_weights=None,
481
+ transpose_y=False,
482
+ tensor_parallel_output=1,
483
+ ):
484
+ super(Qwen2LMHead, self).__init__()
485
+ self.config = config
486
+ vocab_size = config.vocab_size
487
+
488
+ self.transpose_y = transpose_y
489
+ if transpose_y:
490
+ # only for weight from embedding_weights
491
+ if embedding_weights is not None:
492
+ self.weight = embedding_weights
493
+ else:
494
+ self.weight = self.create_parameter(
495
+ shape=[vocab_size, config.hidden_size],
496
+ dtype=paddle.get_default_dtype(),
497
+ )
498
+ else:
499
+ # for weight from model init
500
+ self.weight = self.create_parameter(
501
+ shape=[config.hidden_size, vocab_size],
502
+ dtype=paddle.get_default_dtype(),
503
+ )
504
+
505
+ def forward(self, hidden_states, tensor_parallel_output=1):
506
+ logits = paddle.matmul(hidden_states, self.weight, transpose_y=self.transpose_y)
507
+ return logits
508
+
509
+
510
+ class GOTConfig(Qwen2Config):
511
+ model_type = "GOT"
512
+
513
+
514
+ class GOTQwenModel(Qwen2Model):
515
+ config_class = GOTConfig
516
+
517
+ def __init__(self, config: Qwen2Config):
518
+ super(GOTQwenModel, self).__init__(config)
519
+ self.vision_tower_high = ImageEncoderViT(
520
+ depth=12,
521
+ embed_dim=768,
522
+ img_size=1024,
523
+ mlp_ratio=4,
524
+ norm_layer=partial(paddle.nn.LayerNorm, epsilon=1e-6),
525
+ num_heads=12,
526
+ patch_size=16,
527
+ qkv_bias=True,
528
+ use_rel_pos=True,
529
+ global_attn_indexes=[2, 5, 8, 11],
530
+ window_size=14,
531
+ out_chans=256,
532
+ )
533
+ self.mm_projector_vary = nn.Linear(1024, 1024)
534
+
535
+ def forward(
536
+ self,
537
+ input_ids: paddle.Tensor = None,
538
+ attention_mask: Optional[paddle.Tensor] = None,
539
+ position_ids: Optional[paddle.Tensor] = None,
540
+ past_key_values: Optional[List[paddle.Tensor]] = None,
541
+ inputs_embeds: Optional[paddle.Tensor] = None,
542
+ use_cache: Optional[bool] = None,
543
+ output_attentions: Optional[bool] = None,
544
+ output_hidden_states: Optional[bool] = None,
545
+ images: Optional[paddle.Tensor] = None,
546
+ return_dict: Optional[bool] = None,
547
+ ):
548
+ # HACK: replace back original embeddings for LLaVA pretraining
549
+ orig_embeds_params = getattr(self, "orig_embeds_params", None)
550
+ if orig_embeds_params is not None:
551
+ with paddle.no_grad():
552
+ self.get_input_embeddings().weight[: -self.num_new_tokens] = (
553
+ orig_embeds_params[: -self.num_new_tokens].data
554
+ )
555
+
556
+ if inputs_embeds is None:
557
+ inputs_embeds = self.embed_tokens(input_ids)
558
+
559
+ vision_tower_high = getattr(self, "vision_tower_high", None)
560
+
561
+ if (
562
+ vision_tower_high is not None
563
+ and (input_ids.shape[1] != 1 or self.training)
564
+ and images is not None
565
+ ):
566
+ use_im_start_end = getattr(self.config, "use_im_start_end", -1)
567
+
568
+ im_patch_token = getattr(self.config, "im_patch_token", -1)
569
+ im_start_token = getattr(self.config, "im_start_token", -1)
570
+ im_end_token = getattr(self.config, "im_end_token", -1)
571
+
572
+ im_patch_token = 151859
573
+ im_start_token = 151857
574
+ im_end_token = 151858
575
+
576
+ image_features = []
577
+
578
+ for image in images:
579
+ if self.training:
580
+ image = image[1]
581
+ P, C, H, W = image.shape
582
+ if P == 1:
583
+ with paddle.set_grad_enabled(False):
584
+ cnn_feature = vision_tower_high(image)
585
+ cnn_feature = cnn_feature.flatten(2).transpose(
586
+ [0, 2, 1]
587
+ ) # 256*1024
588
+ image_feature = self.mm_projector_vary(cnn_feature)
589
+ image_features.append(image_feature)
590
+
591
+ else:
592
+ image_patches = paddle.unbind(image)
593
+ image_patches_features = []
594
+ for image_patch in image_patches:
595
+ image_p = paddle.stack([image_patch])
596
+ with paddle.set_grad_enabled(False):
597
+ cnn_feature_p = vision_tower_high(image_p)
598
+ cnn_feature_p = cnn_feature_p.flatten(2).transpose(
599
+ [0, 2, 1]
600
+ )
601
+ image_feature_p = self.mm_projector_vary(cnn_feature_p)
602
+ image_patches_features.append(image_feature_p)
603
+ image_feature = paddle.concat(image_patches_features, axis=1)
604
+ image_features.append(image_feature)
605
+
606
+ dummy_image_features_2 = paddle.zeros(
607
+ [256, 1024], dtype=inputs_embeds.dtype
608
+ )
609
+ dummy_image_features = dummy_image_features_2
610
+ use_im_start_end = True
611
+ new_input_embeds = []
612
+ for cur_input_ids, cur_input_embeds, cur_image_features in zip(
613
+ input_ids, inputs_embeds, image_features
614
+ ):
615
+ if (cur_input_ids == im_patch_token).sum() == 0:
616
+ # multimodal LLM, but the current sample is not multimodal
617
+ cur_input_embeds = (
618
+ cur_input_embeds + (0.0 * dummy_image_features).sum()
619
+ )
620
+ new_input_embeds.append(cur_input_embeds)
621
+ continue
622
+
623
+ if use_im_start_end:
624
+ if (cur_input_ids == im_start_token).sum() != (
625
+ cur_input_ids == im_end_token
626
+ ).sum():
627
+ raise ValueError(
628
+ "The number of image start tokens and image end tokens should be the same."
629
+ )
630
+
631
+ image_start_tokens = paddle.where(cur_input_ids == im_start_token)[
632
+ 0
633
+ ]
634
+ for image_start_token_pos, per_cur_image_features in zip(
635
+ image_start_tokens, cur_image_features
636
+ ):
637
+ num_patches = per_cur_image_features.shape[0]
638
+
639
+ if (
640
+ cur_input_ids[image_start_token_pos + num_patches + 1]
641
+ != im_end_token
642
+ ):
643
+ raise ValueError(
644
+ "The image end token should follow the image start token."
645
+ )
646
+
647
+ cur_input_embeds = paddle.concat(
648
+ (
649
+ cur_input_embeds[: image_start_token_pos + 1],
650
+ per_cur_image_features,
651
+ cur_input_embeds[
652
+ image_start_token_pos + num_patches + 1 :
653
+ ],
654
+ ),
655
+ axis=0,
656
+ )
657
+
658
+ new_input_embeds.append(cur_input_embeds)
659
+ else:
660
+ raise NotImplementedError
661
+
662
+ inputs_embeds = paddle.stack(new_input_embeds, axis=0)
663
+
664
+ return super().forward(
665
+ input_ids=None,
666
+ attention_mask=attention_mask,
667
+ past_key_values=past_key_values,
668
+ inputs_embeds=inputs_embeds,
669
+ use_cache=use_cache,
670
+ position_ids=position_ids,
671
+ output_attentions=output_attentions,
672
+ output_hidden_states=output_hidden_states,
673
+ return_dict=return_dict,
674
+ )
675
+
676
+
677
+ class GOTQwenForCausalLM(Qwen2ForCausalLM):
678
+ config_class = GOTConfig
679
+
680
+ def __init__(self, config):
681
+ super(Qwen2ForCausalLM, self).__init__(config)
682
+ self.qwen2 = GOTQwenModel(config)
683
+
684
+ self.vocab_size = config.vocab_size
685
+
686
+ if config.tie_word_embeddings:
687
+ self.lm_head = Qwen2LMHead(
688
+ config,
689
+ embedding_weights=self.qwen2.embed_tokens.weight,
690
+ transpose_y=True,
691
+ )
692
+ self.tie_weights()
693
+ else:
694
+ self.lm_head = Qwen2LMHead(config)
695
+
696
+ self.eval()
697
+
698
+ def get_model(self):
699
+ return self.qwen2
700
+
701
+ def forward(
702
+ self,
703
+ input_ids: paddle.Tensor = None,
704
+ attention_mask: Optional[paddle.Tensor] = None,
705
+ position_ids: Optional[paddle.Tensor] = None,
706
+ past_key_values: Optional[List[paddle.Tensor]] = None,
707
+ inputs_embeds: Optional[paddle.Tensor] = None,
708
+ labels: Optional[paddle.Tensor] = None,
709
+ use_cache: Optional[bool] = None,
710
+ output_attentions: Optional[bool] = None,
711
+ output_hidden_states: Optional[bool] = None,
712
+ images: Optional[paddle.Tensor] = None,
713
+ return_dict: Optional[bool] = None,
714
+ ):
715
+ output_attentions = (
716
+ output_attentions
717
+ if output_attentions is not None
718
+ else self.config.output_attentions
719
+ )
720
+ output_hidden_states = (
721
+ output_hidden_states
722
+ if output_hidden_states is not None
723
+ else self.config.output_hidden_states
724
+ )
725
+ return_dict = (
726
+ return_dict if return_dict is not None else self.config.use_return_dict
727
+ )
728
+
729
+ outputs = self.qwen2(
730
+ input_ids=input_ids,
731
+ past_key_values=past_key_values,
732
+ attention_mask=attention_mask,
733
+ position_ids=position_ids,
734
+ inputs_embeds=inputs_embeds,
735
+ use_cache=use_cache,
736
+ output_attentions=output_attentions,
737
+ output_hidden_states=output_hidden_states,
738
+ images=images,
739
+ return_dict=return_dict,
740
+ )
741
+
742
+ hidden_states = outputs[0]
743
+ logits = self.lm_head(hidden_states)
744
+ logits = logits.astype(dtype="float32")
745
+
746
+ loss = None
747
+ if labels is not None:
748
+ # Shift so that tokens < n predict n
749
+ shift_logits = logits[..., :-1, :]
750
+ shift_labels = labels[..., 1:]
751
+ loss_fct = nn.CrossEntropyLoss(reduction="sum")
752
+ shift_logits = shift_logits.reshape([-1, self.config.vocab_size])
753
+ shift_labels = shift_labels.reshape([-1])
754
+
755
+ loss = loss_fct(shift_logits, shift_labels)
756
+ label_sum = paddle.sum(shift_labels != -100)
757
+ loss = loss / label_sum
758
+
759
+ if not return_dict:
760
+ output = (logits,) + outputs[1:]
761
+ return (loss,) + output if loss is not None else output
762
+
763
+ return CausalLMOutputWithPast(
764
+ loss=loss,
765
+ logits=logits,
766
+ past_key_values=outputs.past_key_values,
767
+ hidden_states=outputs.hidden_states,
768
+ attentions=outputs.attentions,
769
+ )
770
+
771
+ def prepare_inputs_for_generation(
772
+ self,
773
+ input_ids,
774
+ past_key_values=None,
775
+ attention_mask=None,
776
+ inputs_embeds=None,
777
+ **kwargs
778
+ ):
779
+ batch_size, seq_length = input_ids.shape
780
+ attention_mask = paddle.ones((batch_size, seq_length), dtype=paddle.bool)
781
+
782
+ # Omit tokens covered by past_key_values
783
+ if past_key_values is not None:
784
+ past_length = past_key_values[0][0].shape[1]
785
+ if past_length < input_ids.shape[1]:
786
+ input_ids = input_ids[:, past_length:]
787
+
788
+ position_ids = kwargs.get("position_ids", None)
789
+ if attention_mask is not None and position_ids is None:
790
+ # create position_ids on the fly for batch generation
791
+ position_ids = attention_mask.astype(dtype="int64").cumsum(-1) - 1
792
+ position_ids.masked_fill_(attention_mask == 0, 1)
793
+ if past_key_values:
794
+ position_ids = position_ids[:, -input_ids.shape[1] :]
795
+
796
+ # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
797
+ if inputs_embeds is not None and past_key_values is None:
798
+ model_inputs = {"inputs_embeds": inputs_embeds}
799
+ else:
800
+ model_inputs = {"input_ids": input_ids}
801
+
802
+ model_inputs.update(
803
+ {
804
+ "position_ids": position_ids,
805
+ "past_key_values": past_key_values,
806
+ "use_cache": kwargs.get("use_cache"),
807
+ "attention_mask": attention_mask,
808
+ "images": kwargs.get("images", None),
809
+ }
810
+ )
811
+ return model_inputs
812
+
813
+
814
+ class PPChart2TableInference(GOTQwenForCausalLM):
815
+
816
+ def generate(self, inputs, **kwargs):
817
+ max_new_tokens = kwargs.get("max_new_tokens", 1024)
818
+ no_repeat_ngram_size = kwargs.get("no_repeat_ngram_size", 20)
819
+
820
+ with paddle.no_grad():
821
+ generated_ids = super().generate(
822
+ inputs["input_ids"],
823
+ images=inputs["images"],
824
+ do_sample=False,
825
+ num_beams=1,
826
+ no_repeat_ngram_size=no_repeat_ngram_size,
827
+ max_new_tokens=max_new_tokens,
828
+ )
829
+
830
+ return generated_ids