paddlex 2.1.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1786) hide show
  1. paddlex/.version +1 -0
  2. paddlex/__init__.py +35 -19
  3. paddlex/__main__.py +39 -0
  4. paddlex/configs/modules/3d_bev_detection/BEVFusion.yaml +38 -0
  5. paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
  6. paddlex/configs/modules/doc_text_orientation/PP-LCNet_x1_0_doc_ori.yaml +41 -0
  7. paddlex/configs/modules/doc_vlm/PP-DocBee-2B.yaml +14 -0
  8. paddlex/configs/modules/doc_vlm/PP-DocBee-7B.yaml +14 -0
  9. paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
  10. paddlex/configs/modules/face_detection/BlazeFace-FPN-SSH.yaml +40 -0
  11. paddlex/configs/modules/face_detection/BlazeFace.yaml +40 -0
  12. paddlex/configs/modules/face_detection/PP-YOLOE_plus-S_face.yaml +40 -0
  13. paddlex/configs/modules/face_detection/PicoDet_LCNet_x2_5_face.yaml +40 -0
  14. paddlex/configs/modules/face_feature/MobileFaceNet.yaml +41 -0
  15. paddlex/configs/modules/face_feature/ResNet50_face.yaml +41 -0
  16. paddlex/configs/modules/formula_recognition/LaTeX_OCR_rec.yaml +40 -0
  17. paddlex/configs/modules/formula_recognition/PP-FormulaNet-L.yaml +40 -0
  18. paddlex/configs/modules/formula_recognition/PP-FormulaNet-S.yaml +40 -0
  19. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
  20. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
  21. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
  22. paddlex/configs/modules/formula_recognition/UniMERNet.yaml +40 -0
  23. paddlex/configs/modules/human_detection/PP-YOLOE-L_human.yaml +42 -0
  24. paddlex/configs/modules/human_detection/PP-YOLOE-S_human.yaml +42 -0
  25. paddlex/configs/modules/image_anomaly_detection/STFPM.yaml +41 -0
  26. paddlex/configs/modules/image_classification/CLIP_vit_base_patch16_224.yaml +41 -0
  27. paddlex/configs/modules/image_classification/CLIP_vit_large_patch14_224.yaml +41 -0
  28. paddlex/configs/modules/image_classification/ConvNeXt_base_224.yaml +41 -0
  29. paddlex/configs/modules/image_classification/ConvNeXt_base_384.yaml +41 -0
  30. paddlex/configs/modules/image_classification/ConvNeXt_large_224.yaml +41 -0
  31. paddlex/configs/modules/image_classification/ConvNeXt_large_384.yaml +41 -0
  32. paddlex/configs/modules/image_classification/ConvNeXt_small.yaml +41 -0
  33. paddlex/configs/modules/image_classification/ConvNeXt_tiny.yaml +41 -0
  34. paddlex/configs/modules/image_classification/FasterNet-L.yaml +40 -0
  35. paddlex/configs/modules/image_classification/FasterNet-M.yaml +40 -0
  36. paddlex/configs/modules/image_classification/FasterNet-S.yaml +40 -0
  37. paddlex/configs/modules/image_classification/FasterNet-T0.yaml +40 -0
  38. paddlex/configs/modules/image_classification/FasterNet-T1.yaml +40 -0
  39. paddlex/configs/modules/image_classification/FasterNet-T2.yaml +40 -0
  40. paddlex/configs/modules/image_classification/MobileNetV1_x0_25.yaml +41 -0
  41. paddlex/configs/modules/image_classification/MobileNetV1_x0_5.yaml +41 -0
  42. paddlex/configs/modules/image_classification/MobileNetV1_x0_75.yaml +41 -0
  43. paddlex/configs/modules/image_classification/MobileNetV1_x1_0.yaml +41 -0
  44. paddlex/configs/modules/image_classification/MobileNetV2_x0_25.yaml +41 -0
  45. paddlex/configs/modules/image_classification/MobileNetV2_x0_5.yaml +41 -0
  46. paddlex/configs/modules/image_classification/MobileNetV2_x1_0.yaml +41 -0
  47. paddlex/configs/modules/image_classification/MobileNetV2_x1_5.yaml +41 -0
  48. paddlex/configs/modules/image_classification/MobileNetV2_x2_0.yaml +41 -0
  49. paddlex/configs/modules/image_classification/MobileNetV3_large_x0_35.yaml +41 -0
  50. paddlex/configs/modules/image_classification/MobileNetV3_large_x0_5.yaml +41 -0
  51. paddlex/configs/modules/image_classification/MobileNetV3_large_x0_75.yaml +41 -0
  52. paddlex/configs/modules/image_classification/MobileNetV3_large_x1_0.yaml +41 -0
  53. paddlex/configs/modules/image_classification/MobileNetV3_large_x1_25.yaml +41 -0
  54. paddlex/configs/modules/image_classification/MobileNetV3_small_x0_35.yaml +41 -0
  55. paddlex/configs/modules/image_classification/MobileNetV3_small_x0_5.yaml +41 -0
  56. paddlex/configs/modules/image_classification/MobileNetV3_small_x0_75.yaml +41 -0
  57. paddlex/configs/modules/image_classification/MobileNetV3_small_x1_0.yaml +41 -0
  58. paddlex/configs/modules/image_classification/MobileNetV3_small_x1_25.yaml +41 -0
  59. paddlex/configs/modules/image_classification/MobileNetV4_conv_large.yaml +41 -0
  60. paddlex/configs/modules/image_classification/MobileNetV4_conv_medium.yaml +41 -0
  61. paddlex/configs/modules/image_classification/MobileNetV4_conv_small.yaml +41 -0
  62. paddlex/configs/modules/image_classification/MobileNetV4_hybrid_large.yaml +41 -0
  63. paddlex/configs/modules/image_classification/MobileNetV4_hybrid_medium.yaml +41 -0
  64. paddlex/configs/modules/image_classification/PP-HGNetV2-B0.yaml +41 -0
  65. paddlex/configs/modules/image_classification/PP-HGNetV2-B1.yaml +41 -0
  66. paddlex/configs/modules/image_classification/PP-HGNetV2-B2.yaml +41 -0
  67. paddlex/configs/modules/image_classification/PP-HGNetV2-B3.yaml +41 -0
  68. paddlex/configs/modules/image_classification/PP-HGNetV2-B4.yaml +41 -0
  69. paddlex/configs/modules/image_classification/PP-HGNetV2-B5.yaml +41 -0
  70. paddlex/configs/modules/image_classification/PP-HGNetV2-B6.yaml +41 -0
  71. paddlex/configs/modules/image_classification/PP-HGNet_base.yaml +41 -0
  72. paddlex/configs/modules/image_classification/PP-HGNet_small.yaml +41 -0
  73. paddlex/configs/modules/image_classification/PP-HGNet_tiny.yaml +41 -0
  74. paddlex/configs/modules/image_classification/PP-LCNetV2_base.yaml +41 -0
  75. paddlex/configs/modules/image_classification/PP-LCNetV2_large.yaml +41 -0
  76. paddlex/configs/modules/image_classification/PP-LCNetV2_small.yaml +41 -0
  77. paddlex/configs/modules/image_classification/PP-LCNet_x0_25.yaml +41 -0
  78. paddlex/configs/modules/image_classification/PP-LCNet_x0_35.yaml +41 -0
  79. paddlex/configs/modules/image_classification/PP-LCNet_x0_5.yaml +41 -0
  80. paddlex/configs/modules/image_classification/PP-LCNet_x0_75.yaml +41 -0
  81. paddlex/configs/modules/image_classification/PP-LCNet_x1_0.yaml +41 -0
  82. paddlex/configs/modules/image_classification/PP-LCNet_x1_5.yaml +41 -0
  83. paddlex/configs/modules/image_classification/PP-LCNet_x2_0.yaml +41 -0
  84. paddlex/configs/modules/image_classification/PP-LCNet_x2_5.yaml +41 -0
  85. paddlex/configs/modules/image_classification/ResNet101.yaml +41 -0
  86. paddlex/configs/modules/image_classification/ResNet101_vd.yaml +41 -0
  87. paddlex/configs/modules/image_classification/ResNet152.yaml +41 -0
  88. paddlex/configs/modules/image_classification/ResNet152_vd.yaml +41 -0
  89. paddlex/configs/modules/image_classification/ResNet18.yaml +41 -0
  90. paddlex/configs/modules/image_classification/ResNet18_vd.yaml +41 -0
  91. paddlex/configs/modules/image_classification/ResNet200_vd.yaml +41 -0
  92. paddlex/configs/modules/image_classification/ResNet34.yaml +41 -0
  93. paddlex/configs/modules/image_classification/ResNet34_vd.yaml +41 -0
  94. paddlex/configs/modules/image_classification/ResNet50.yaml +41 -0
  95. paddlex/configs/modules/image_classification/ResNet50_vd.yaml +41 -0
  96. paddlex/configs/modules/image_classification/StarNet-S1.yaml +41 -0
  97. paddlex/configs/modules/image_classification/StarNet-S2.yaml +41 -0
  98. paddlex/configs/modules/image_classification/StarNet-S3.yaml +41 -0
  99. paddlex/configs/modules/image_classification/StarNet-S4.yaml +41 -0
  100. paddlex/configs/modules/image_classification/SwinTransformer_base_patch4_window12_384.yaml +41 -0
  101. paddlex/configs/modules/image_classification/SwinTransformer_base_patch4_window7_224.yaml +41 -0
  102. paddlex/configs/modules/image_classification/SwinTransformer_large_patch4_window12_384.yaml +41 -0
  103. paddlex/configs/modules/image_classification/SwinTransformer_large_patch4_window7_224.yaml +41 -0
  104. paddlex/configs/modules/image_classification/SwinTransformer_small_patch4_window7_224.yaml +41 -0
  105. paddlex/configs/modules/image_classification/SwinTransformer_tiny_patch4_window7_224.yaml +41 -0
  106. paddlex/configs/modules/image_feature/PP-ShiTuV2_rec.yaml +42 -0
  107. paddlex/configs/modules/image_feature/PP-ShiTuV2_rec_CLIP_vit_base.yaml +42 -0
  108. paddlex/configs/modules/image_feature/PP-ShiTuV2_rec_CLIP_vit_large.yaml +41 -0
  109. paddlex/configs/modules/image_multilabel_classification/CLIP_vit_base_patch16_448_ML.yaml +41 -0
  110. paddlex/configs/modules/image_multilabel_classification/PP-HGNetV2-B0_ML.yaml +41 -0
  111. paddlex/configs/modules/image_multilabel_classification/PP-HGNetV2-B4_ML.yaml +41 -0
  112. paddlex/configs/modules/image_multilabel_classification/PP-HGNetV2-B6_ML.yaml +41 -0
  113. paddlex/configs/modules/image_multilabel_classification/PP-LCNet_x1_0_ML.yaml +41 -0
  114. paddlex/configs/modules/image_multilabel_classification/ResNet50_ML.yaml +41 -0
  115. paddlex/configs/modules/image_unwarping/UVDoc.yaml +12 -0
  116. paddlex/configs/modules/instance_segmentation/Cascade-MaskRCNN-ResNet50-FPN.yaml +40 -0
  117. paddlex/configs/modules/instance_segmentation/Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml +40 -0
  118. paddlex/configs/modules/instance_segmentation/Mask-RT-DETR-H.yaml +40 -0
  119. paddlex/configs/modules/instance_segmentation/Mask-RT-DETR-L.yaml +40 -0
  120. paddlex/configs/modules/instance_segmentation/Mask-RT-DETR-M.yaml +40 -0
  121. paddlex/configs/modules/instance_segmentation/Mask-RT-DETR-S.yaml +40 -0
  122. paddlex/configs/modules/instance_segmentation/Mask-RT-DETR-X.yaml +40 -0
  123. paddlex/configs/modules/instance_segmentation/MaskRCNN-ResNeXt101-vd-FPN.yaml +39 -0
  124. paddlex/configs/modules/instance_segmentation/MaskRCNN-ResNet101-FPN.yaml +40 -0
  125. paddlex/configs/modules/instance_segmentation/MaskRCNN-ResNet101-vd-FPN.yaml +40 -0
  126. paddlex/configs/modules/instance_segmentation/MaskRCNN-ResNet50-FPN.yaml +40 -0
  127. paddlex/configs/modules/instance_segmentation/MaskRCNN-ResNet50-vd-FPN.yaml +40 -0
  128. paddlex/configs/modules/instance_segmentation/MaskRCNN-ResNet50.yaml +40 -0
  129. paddlex/configs/modules/instance_segmentation/PP-YOLOE_seg-S.yaml +40 -0
  130. paddlex/configs/modules/instance_segmentation/SOLOv2.yaml +40 -0
  131. paddlex/configs/modules/keypoint_detection/PP-TinyPose_128x96.yaml +40 -0
  132. paddlex/configs/modules/keypoint_detection/PP-TinyPose_256x192.yaml +40 -0
  133. paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
  134. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +40 -0
  135. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +40 -0
  136. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +40 -0
  137. paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
  138. paddlex/configs/modules/layout_detection/PicoDet-L_layout_17cls.yaml +40 -0
  139. paddlex/configs/modules/layout_detection/PicoDet-L_layout_3cls.yaml +40 -0
  140. paddlex/configs/modules/layout_detection/PicoDet-S_layout_17cls.yaml +40 -0
  141. paddlex/configs/modules/layout_detection/PicoDet-S_layout_3cls.yaml +40 -0
  142. paddlex/configs/modules/layout_detection/PicoDet_layout_1x.yaml +40 -0
  143. paddlex/configs/modules/layout_detection/PicoDet_layout_1x_table.yaml +40 -0
  144. paddlex/configs/modules/layout_detection/RT-DETR-H_layout_17cls.yaml +40 -0
  145. paddlex/configs/modules/layout_detection/RT-DETR-H_layout_3cls.yaml +40 -0
  146. paddlex/configs/modules/mainbody_detection/PP-ShiTuV2_det.yaml +41 -0
  147. paddlex/configs/modules/multilingual_speech_recognition/whisper_base.yaml +12 -0
  148. paddlex/configs/modules/multilingual_speech_recognition/whisper_large.yaml +12 -0
  149. paddlex/configs/modules/multilingual_speech_recognition/whisper_medium.yaml +12 -0
  150. paddlex/configs/modules/multilingual_speech_recognition/whisper_small.yaml +12 -0
  151. paddlex/configs/modules/multilingual_speech_recognition/whisper_tiny.yaml +12 -0
  152. paddlex/configs/modules/object_detection/Cascade-FasterRCNN-ResNet50-FPN.yaml +41 -0
  153. paddlex/configs/modules/object_detection/Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml +42 -0
  154. paddlex/configs/modules/object_detection/CenterNet-DLA-34.yaml +41 -0
  155. paddlex/configs/modules/object_detection/CenterNet-ResNet50.yaml +41 -0
  156. paddlex/configs/modules/object_detection/Co-DINO-R50.yaml +40 -0
  157. paddlex/configs/modules/object_detection/Co-DINO-Swin-L.yaml +40 -0
  158. paddlex/configs/modules/object_detection/Co-Deformable-DETR-R50.yaml +40 -0
  159. paddlex/configs/modules/object_detection/Co-Deformable-DETR-Swin-T.yaml +40 -0
  160. paddlex/configs/modules/object_detection/DETR-R50.yaml +42 -0
  161. paddlex/configs/modules/object_detection/FCOS-ResNet50.yaml +41 -0
  162. paddlex/configs/modules/object_detection/FasterRCNN-ResNeXt101-vd-FPN.yaml +42 -0
  163. paddlex/configs/modules/object_detection/FasterRCNN-ResNet101-FPN.yaml +42 -0
  164. paddlex/configs/modules/object_detection/FasterRCNN-ResNet101.yaml +42 -0
  165. paddlex/configs/modules/object_detection/FasterRCNN-ResNet34-FPN.yaml +42 -0
  166. paddlex/configs/modules/object_detection/FasterRCNN-ResNet50-FPN.yaml +42 -0
  167. paddlex/configs/modules/object_detection/FasterRCNN-ResNet50-vd-FPN.yaml +42 -0
  168. paddlex/configs/modules/object_detection/FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml +42 -0
  169. paddlex/configs/modules/object_detection/FasterRCNN-ResNet50.yaml +42 -0
  170. paddlex/configs/modules/object_detection/FasterRCNN-Swin-Tiny-FPN.yaml +42 -0
  171. paddlex/configs/modules/object_detection/PP-YOLOE_plus-L.yaml +40 -0
  172. paddlex/configs/modules/object_detection/PP-YOLOE_plus-M.yaml +40 -0
  173. paddlex/configs/modules/object_detection/PP-YOLOE_plus-S.yaml +40 -0
  174. paddlex/configs/modules/object_detection/PP-YOLOE_plus-X.yaml +40 -0
  175. paddlex/configs/modules/object_detection/PicoDet-L.yaml +40 -0
  176. paddlex/configs/modules/object_detection/PicoDet-M.yaml +42 -0
  177. paddlex/configs/modules/object_detection/PicoDet-S.yaml +40 -0
  178. paddlex/configs/modules/object_detection/PicoDet-XS.yaml +42 -0
  179. paddlex/configs/modules/object_detection/RT-DETR-H.yaml +40 -0
  180. paddlex/configs/modules/object_detection/RT-DETR-L.yaml +40 -0
  181. paddlex/configs/modules/object_detection/RT-DETR-R18.yaml +40 -0
  182. paddlex/configs/modules/object_detection/RT-DETR-R50.yaml +40 -0
  183. paddlex/configs/modules/object_detection/RT-DETR-X.yaml +40 -0
  184. paddlex/configs/modules/object_detection/YOLOX-L.yaml +40 -0
  185. paddlex/configs/modules/object_detection/YOLOX-M.yaml +40 -0
  186. paddlex/configs/modules/object_detection/YOLOX-N.yaml +40 -0
  187. paddlex/configs/modules/object_detection/YOLOX-S.yaml +40 -0
  188. paddlex/configs/modules/object_detection/YOLOX-T.yaml +40 -0
  189. paddlex/configs/modules/object_detection/YOLOX-X.yaml +40 -0
  190. paddlex/configs/modules/object_detection/YOLOv3-DarkNet53.yaml +40 -0
  191. paddlex/configs/modules/object_detection/YOLOv3-MobileNetV3.yaml +40 -0
  192. paddlex/configs/modules/object_detection/YOLOv3-ResNet50_vd_DCN.yaml +40 -0
  193. paddlex/configs/modules/open_vocabulary_detection/GroundingDINO-T.yaml +13 -0
  194. paddlex/configs/modules/open_vocabulary_detection/YOLO-Worldv2-L.yaml +13 -0
  195. paddlex/configs/modules/open_vocabulary_segmentation/SAM-H_box.yaml +17 -0
  196. paddlex/configs/modules/open_vocabulary_segmentation/SAM-H_point.yaml +15 -0
  197. paddlex/configs/modules/pedestrian_attribute_recognition/PP-LCNet_x1_0_pedestrian_attribute.yaml +41 -0
  198. paddlex/configs/modules/rotated_object_detection/PP-YOLOE-R-L.yaml +40 -0
  199. paddlex/configs/modules/seal_text_detection/PP-OCRv4_mobile_seal_det.yaml +40 -0
  200. paddlex/configs/modules/seal_text_detection/PP-OCRv4_server_seal_det.yaml +40 -0
  201. paddlex/configs/modules/semantic_segmentation/Deeplabv3-R101.yaml +40 -0
  202. paddlex/configs/modules/semantic_segmentation/Deeplabv3-R50.yaml +40 -0
  203. paddlex/configs/modules/semantic_segmentation/Deeplabv3_Plus-R101.yaml +40 -0
  204. paddlex/configs/modules/semantic_segmentation/Deeplabv3_Plus-R50.yaml +40 -0
  205. paddlex/configs/modules/semantic_segmentation/MaskFormer_small.yaml +42 -0
  206. paddlex/configs/modules/semantic_segmentation/MaskFormer_tiny.yaml +42 -0
  207. paddlex/configs/modules/semantic_segmentation/OCRNet_HRNet-W18.yaml +40 -0
  208. paddlex/configs/modules/semantic_segmentation/OCRNet_HRNet-W48.yaml +40 -0
  209. paddlex/configs/modules/semantic_segmentation/PP-LiteSeg-B.yaml +41 -0
  210. paddlex/configs/modules/semantic_segmentation/PP-LiteSeg-T.yaml +40 -0
  211. paddlex/configs/modules/semantic_segmentation/SeaFormer_base.yaml +40 -0
  212. paddlex/configs/modules/semantic_segmentation/SeaFormer_large.yaml +40 -0
  213. paddlex/configs/modules/semantic_segmentation/SeaFormer_small.yaml +40 -0
  214. paddlex/configs/modules/semantic_segmentation/SeaFormer_tiny.yaml +40 -0
  215. paddlex/configs/modules/semantic_segmentation/SegFormer-B0.yaml +40 -0
  216. paddlex/configs/modules/semantic_segmentation/SegFormer-B1.yaml +40 -0
  217. paddlex/configs/modules/semantic_segmentation/SegFormer-B2.yaml +40 -0
  218. paddlex/configs/modules/semantic_segmentation/SegFormer-B3.yaml +40 -0
  219. paddlex/configs/modules/semantic_segmentation/SegFormer-B4.yaml +40 -0
  220. paddlex/configs/modules/semantic_segmentation/SegFormer-B5.yaml +40 -0
  221. paddlex/configs/modules/small_object_detection/PP-YOLOE_plus_SOD-L.yaml +42 -0
  222. paddlex/configs/modules/small_object_detection/PP-YOLOE_plus_SOD-S.yaml +42 -0
  223. paddlex/configs/modules/small_object_detection/PP-YOLOE_plus_SOD-largesize-L.yaml +42 -0
  224. paddlex/configs/modules/table_cells_detection/RT-DETR-L_wired_table_cell_det.yaml +40 -0
  225. paddlex/configs/modules/table_cells_detection/RT-DETR-L_wireless_table_cell_det.yaml +40 -0
  226. paddlex/configs/modules/table_classification/PP-LCNet_x1_0_table_cls.yaml +41 -0
  227. paddlex/configs/modules/table_structure_recognition/SLANeXt_wired.yaml +39 -0
  228. paddlex/configs/modules/table_structure_recognition/SLANeXt_wireless.yaml +39 -0
  229. paddlex/configs/modules/table_structure_recognition/SLANet.yaml +39 -0
  230. paddlex/configs/modules/table_structure_recognition/SLANet_plus.yaml +39 -0
  231. paddlex/configs/modules/text_detection/PP-OCRv3_mobile_det.yaml +40 -0
  232. paddlex/configs/modules/text_detection/PP-OCRv3_server_det.yaml +40 -0
  233. paddlex/configs/modules/text_detection/PP-OCRv4_mobile_det.yaml +40 -0
  234. paddlex/configs/modules/text_detection/PP-OCRv4_server_det.yaml +40 -0
  235. paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
  236. paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
  237. paddlex/configs/modules/text_recognition/PP-OCRv3_mobile_rec.yaml +39 -0
  238. paddlex/configs/modules/text_recognition/PP-OCRv4_mobile_rec.yaml +39 -0
  239. paddlex/configs/modules/text_recognition/PP-OCRv4_server_rec.yaml +39 -0
  240. paddlex/configs/modules/text_recognition/PP-OCRv4_server_rec_doc.yaml +39 -0
  241. paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
  242. paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
  243. paddlex/configs/modules/text_recognition/arabic_PP-OCRv3_mobile_rec.yaml +39 -0
  244. paddlex/configs/modules/text_recognition/ch_RepSVTR_rec.yaml +39 -0
  245. paddlex/configs/modules/text_recognition/ch_SVTRv2_rec.yaml +39 -0
  246. paddlex/configs/modules/text_recognition/chinese_cht_PP-OCRv3_mobile_rec.yaml +39 -0
  247. paddlex/configs/modules/text_recognition/cyrillic_PP-OCRv3_mobile_rec.yaml +39 -0
  248. paddlex/configs/modules/text_recognition/devanagari_PP-OCRv3_mobile_rec.yaml +39 -0
  249. paddlex/configs/modules/text_recognition/en_PP-OCRv3_mobile_rec.yaml +39 -0
  250. paddlex/configs/modules/text_recognition/en_PP-OCRv4_mobile_rec.yaml +39 -0
  251. paddlex/configs/modules/text_recognition/japan_PP-OCRv3_mobile_rec.yaml +39 -0
  252. paddlex/configs/modules/text_recognition/ka_PP-OCRv3_mobile_rec.yaml +39 -0
  253. paddlex/configs/modules/text_recognition/korean_PP-OCRv3_mobile_rec.yaml +39 -0
  254. paddlex/configs/modules/text_recognition/latin_PP-OCRv3_mobile_rec.yaml +39 -0
  255. paddlex/configs/modules/text_recognition/ta_PP-OCRv3_mobile_rec.yaml +39 -0
  256. paddlex/configs/modules/text_recognition/te_PP-OCRv3_mobile_rec.yaml +39 -0
  257. paddlex/configs/modules/textline_orientation/PP-LCNet_x0_25_textline_ori.yaml +41 -0
  258. paddlex/configs/modules/ts_anomaly_detection/AutoEncoder_ad.yaml +37 -0
  259. paddlex/configs/modules/ts_anomaly_detection/DLinear_ad.yaml +37 -0
  260. paddlex/configs/modules/ts_anomaly_detection/Nonstationary_ad.yaml +37 -0
  261. paddlex/configs/modules/ts_anomaly_detection/PatchTST_ad.yaml +37 -0
  262. paddlex/configs/modules/ts_anomaly_detection/TimesNet_ad.yaml +37 -0
  263. paddlex/configs/modules/ts_classification/TimesNet_cls.yaml +37 -0
  264. paddlex/configs/modules/ts_forecast/DLinear.yaml +38 -0
  265. paddlex/configs/modules/ts_forecast/NLinear.yaml +38 -0
  266. paddlex/configs/modules/ts_forecast/Nonstationary.yaml +38 -0
  267. paddlex/configs/modules/ts_forecast/PatchTST.yaml +38 -0
  268. paddlex/configs/modules/ts_forecast/RLinear.yaml +38 -0
  269. paddlex/configs/modules/ts_forecast/TiDE.yaml +38 -0
  270. paddlex/configs/modules/ts_forecast/TimesNet.yaml +38 -0
  271. paddlex/configs/modules/vehicle_attribute_recognition/PP-LCNet_x1_0_vehicle_attribute.yaml +41 -0
  272. paddlex/configs/modules/vehicle_detection/PP-YOLOE-L_vehicle.yaml +41 -0
  273. paddlex/configs/modules/vehicle_detection/PP-YOLOE-S_vehicle.yaml +42 -0
  274. paddlex/configs/modules/video_classification/PP-TSM-R50_8frames_uniform.yaml +42 -0
  275. paddlex/configs/modules/video_classification/PP-TSMv2-LCNetV2_16frames_uniform.yaml +42 -0
  276. paddlex/configs/modules/video_classification/PP-TSMv2-LCNetV2_8frames_uniform.yaml +42 -0
  277. paddlex/configs/modules/video_detection/YOWO.yaml +40 -0
  278. paddlex/configs/pipelines/3d_bev_detection.yaml +9 -0
  279. paddlex/configs/pipelines/OCR.yaml +45 -0
  280. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +151 -0
  281. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +237 -0
  282. paddlex/configs/pipelines/PP-ShiTuV2.yaml +18 -0
  283. paddlex/configs/pipelines/PP-StructureV3.yaml +226 -0
  284. paddlex/configs/pipelines/anomaly_detection.yaml +8 -0
  285. paddlex/configs/pipelines/doc_preprocessor.yaml +15 -0
  286. paddlex/configs/pipelines/doc_understanding.yaml +9 -0
  287. paddlex/configs/pipelines/face_recognition.yaml +18 -0
  288. paddlex/configs/pipelines/formula_recognition.yaml +39 -0
  289. paddlex/configs/pipelines/human_keypoint_detection.yaml +17 -0
  290. paddlex/configs/pipelines/image_classification.yaml +10 -0
  291. paddlex/configs/pipelines/image_multilabel_classification.yaml +9 -0
  292. paddlex/configs/pipelines/instance_segmentation.yaml +10 -0
  293. paddlex/configs/pipelines/layout_parsing.yaml +102 -0
  294. paddlex/configs/pipelines/multilingual_speech_recognition.yaml +9 -0
  295. paddlex/configs/pipelines/object_detection.yaml +10 -0
  296. paddlex/configs/pipelines/open_vocabulary_detection.yaml +12 -0
  297. paddlex/configs/pipelines/open_vocabulary_segmentation.yaml +13 -0
  298. paddlex/configs/pipelines/pedestrian_attribute_recognition.yaml +15 -0
  299. paddlex/configs/pipelines/rotated_object_detection.yaml +10 -0
  300. paddlex/configs/pipelines/seal_recognition.yaml +52 -0
  301. paddlex/configs/pipelines/semantic_segmentation.yaml +10 -0
  302. paddlex/configs/pipelines/small_object_detection.yaml +10 -0
  303. paddlex/configs/pipelines/table_recognition.yaml +57 -0
  304. paddlex/configs/pipelines/table_recognition_v2.yaml +82 -0
  305. paddlex/configs/pipelines/ts_anomaly_detection.yaml +8 -0
  306. paddlex/configs/pipelines/ts_classification.yaml +8 -0
  307. paddlex/configs/pipelines/ts_forecast.yaml +8 -0
  308. paddlex/configs/pipelines/vehicle_attribute_recognition.yaml +15 -0
  309. paddlex/configs/pipelines/video_classification.yaml +9 -0
  310. paddlex/configs/pipelines/video_detection.yaml +10 -0
  311. paddlex/constants.py +17 -0
  312. paddlex/engine.py +56 -0
  313. paddlex/hpip_links.html +31 -0
  314. paddlex/inference/__init__.py +19 -0
  315. paddlex/inference/common/__init__.py +13 -0
  316. paddlex/inference/common/batch_sampler/__init__.py +21 -0
  317. paddlex/inference/common/batch_sampler/audio_batch_sampler.py +83 -0
  318. paddlex/inference/common/batch_sampler/base_batch_sampler.py +94 -0
  319. paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +144 -0
  320. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +87 -0
  321. paddlex/inference/common/batch_sampler/image_batch_sampler.py +121 -0
  322. paddlex/inference/common/batch_sampler/ts_batch_sampler.py +109 -0
  323. paddlex/inference/common/batch_sampler/video_batch_sampler.py +74 -0
  324. paddlex/inference/common/reader/__init__.py +19 -0
  325. paddlex/inference/common/reader/audio_reader.py +46 -0
  326. paddlex/inference/common/reader/det_3d_reader.py +241 -0
  327. paddlex/inference/common/reader/image_reader.py +73 -0
  328. paddlex/inference/common/reader/ts_reader.py +46 -0
  329. paddlex/inference/common/reader/video_reader.py +42 -0
  330. paddlex/inference/common/result/__init__.py +29 -0
  331. paddlex/inference/common/result/base_cv_result.py +41 -0
  332. paddlex/inference/common/result/base_result.py +72 -0
  333. paddlex/inference/common/result/base_ts_result.py +41 -0
  334. paddlex/inference/common/result/base_video_result.py +36 -0
  335. paddlex/inference/common/result/mixin.py +709 -0
  336. paddlex/inference/models/__init__.py +86 -0
  337. paddlex/inference/models/anomaly_detection/__init__.py +15 -0
  338. paddlex/inference/models/anomaly_detection/predictor.py +135 -0
  339. paddlex/inference/models/anomaly_detection/processors.py +53 -0
  340. paddlex/inference/models/anomaly_detection/result.py +71 -0
  341. paddlex/inference/models/base/__init__.py +15 -0
  342. paddlex/inference/models/base/predictor/__init__.py +15 -0
  343. paddlex/inference/models/base/predictor/base_predictor.py +414 -0
  344. paddlex/inference/models/common/__init__.py +26 -0
  345. paddlex/inference/models/common/static_infer.py +801 -0
  346. paddlex/inference/models/common/tokenizer/__init__.py +21 -0
  347. paddlex/inference/models/common/tokenizer/bert_tokenizer.py +655 -0
  348. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +609 -0
  349. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +453 -0
  350. paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
  351. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +438 -0
  352. paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
  353. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +2149 -0
  354. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3720 -0
  355. paddlex/inference/models/common/tokenizer/utils.py +66 -0
  356. paddlex/inference/models/common/tokenizer/vocab.py +647 -0
  357. paddlex/inference/models/common/ts/__init__.py +15 -0
  358. paddlex/inference/models/common/ts/funcs.py +540 -0
  359. paddlex/inference/models/common/ts/processors.py +322 -0
  360. paddlex/inference/models/common/vision/__init__.py +23 -0
  361. paddlex/inference/models/common/vision/funcs.py +98 -0
  362. paddlex/inference/models/common/vision/processors.py +285 -0
  363. paddlex/inference/models/common/vlm/__init__.py +13 -0
  364. paddlex/inference/models/common/vlm/activations.py +189 -0
  365. paddlex/inference/models/common/vlm/bert_padding.py +127 -0
  366. paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
  367. paddlex/inference/models/common/vlm/distributed.py +229 -0
  368. paddlex/inference/models/common/vlm/flash_attn_utils.py +119 -0
  369. paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
  370. paddlex/inference/models/common/vlm/generation/__init__.py +34 -0
  371. paddlex/inference/models/common/vlm/generation/configuration_utils.py +533 -0
  372. paddlex/inference/models/common/vlm/generation/logits_process.py +730 -0
  373. paddlex/inference/models/common/vlm/generation/stopping_criteria.py +106 -0
  374. paddlex/inference/models/common/vlm/generation/utils.py +2162 -0
  375. paddlex/inference/models/common/vlm/transformers/__init__.py +16 -0
  376. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +1037 -0
  377. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +408 -0
  378. paddlex/inference/models/common/vlm/transformers/model_outputs.py +1612 -0
  379. paddlex/inference/models/common/vlm/transformers/model_utils.py +2014 -0
  380. paddlex/inference/models/common/vlm/transformers/utils.py +178 -0
  381. paddlex/inference/models/common/vlm/utils.py +109 -0
  382. paddlex/inference/models/doc_vlm/__init__.py +15 -0
  383. paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
  384. paddlex/inference/models/doc_vlm/modeling/__init__.py +17 -0
  385. paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
  386. paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
  387. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +2495 -0
  388. paddlex/inference/models/doc_vlm/predictor.py +253 -0
  389. paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
  390. paddlex/inference/models/doc_vlm/processors/__init__.py +17 -0
  391. paddlex/inference/models/doc_vlm/processors/common.py +561 -0
  392. paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
  393. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +543 -0
  394. paddlex/inference/models/doc_vlm/result.py +21 -0
  395. paddlex/inference/models/face_feature/__init__.py +15 -0
  396. paddlex/inference/models/face_feature/predictor.py +66 -0
  397. paddlex/inference/models/formula_recognition/__init__.py +15 -0
  398. paddlex/inference/models/formula_recognition/predictor.py +193 -0
  399. paddlex/inference/models/formula_recognition/processors.py +1015 -0
  400. paddlex/inference/models/formula_recognition/result.py +411 -0
  401. paddlex/inference/models/image_classification/__init__.py +15 -0
  402. paddlex/inference/models/image_classification/predictor.py +172 -0
  403. paddlex/inference/models/image_classification/processors.py +89 -0
  404. paddlex/inference/models/image_classification/result.py +93 -0
  405. paddlex/inference/models/image_feature/__init__.py +15 -0
  406. paddlex/inference/models/image_feature/predictor.py +146 -0
  407. paddlex/inference/models/image_feature/processors.py +31 -0
  408. paddlex/inference/models/image_feature/result.py +32 -0
  409. paddlex/inference/models/image_multilabel_classification/__init__.py +15 -0
  410. paddlex/inference/models/image_multilabel_classification/predictor.py +95 -0
  411. paddlex/inference/models/image_multilabel_classification/processors.py +89 -0
  412. paddlex/inference/models/image_multilabel_classification/result.py +96 -0
  413. paddlex/inference/models/image_unwarping/__init__.py +15 -0
  414. paddlex/inference/models/image_unwarping/predictor.py +97 -0
  415. paddlex/inference/models/image_unwarping/processors.py +92 -0
  416. paddlex/inference/models/image_unwarping/result.py +47 -0
  417. paddlex/inference/models/instance_segmentation/__init__.py +15 -0
  418. paddlex/inference/models/instance_segmentation/predictor.py +202 -0
  419. paddlex/inference/models/instance_segmentation/processors.py +102 -0
  420. paddlex/inference/models/instance_segmentation/result.py +162 -0
  421. paddlex/inference/models/keypoint_detection/__init__.py +15 -0
  422. paddlex/inference/models/keypoint_detection/predictor.py +190 -0
  423. paddlex/inference/models/keypoint_detection/processors.py +367 -0
  424. paddlex/inference/models/keypoint_detection/result.py +197 -0
  425. paddlex/inference/models/m_3d_bev_detection/__init__.py +15 -0
  426. paddlex/inference/models/m_3d_bev_detection/predictor.py +303 -0
  427. paddlex/inference/models/m_3d_bev_detection/processors.py +990 -0
  428. paddlex/inference/models/m_3d_bev_detection/result.py +68 -0
  429. paddlex/inference/models/m_3d_bev_detection/visualizer_3d.py +169 -0
  430. paddlex/inference/models/multilingual_speech_recognition/__init__.py +15 -0
  431. paddlex/inference/models/multilingual_speech_recognition/predictor.py +137 -0
  432. paddlex/inference/models/multilingual_speech_recognition/processors.py +1933 -0
  433. paddlex/inference/models/multilingual_speech_recognition/result.py +21 -0
  434. paddlex/inference/models/object_detection/__init__.py +15 -0
  435. paddlex/inference/models/object_detection/predictor.py +344 -0
  436. paddlex/inference/models/object_detection/processors.py +885 -0
  437. paddlex/inference/models/object_detection/result.py +114 -0
  438. paddlex/inference/models/object_detection/utils.py +70 -0
  439. paddlex/inference/models/open_vocabulary_detection/__init__.py +15 -0
  440. paddlex/inference/models/open_vocabulary_detection/predictor.py +172 -0
  441. paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +16 -0
  442. paddlex/inference/models/open_vocabulary_detection/processors/common.py +114 -0
  443. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +496 -0
  444. paddlex/inference/models/open_vocabulary_detection/processors/yoloworld_processors.py +209 -0
  445. paddlex/inference/models/open_vocabulary_segmentation/__init__.py +15 -0
  446. paddlex/inference/models/open_vocabulary_segmentation/predictor.py +113 -0
  447. paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +15 -0
  448. paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +249 -0
  449. paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +15 -0
  450. paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +149 -0
  451. paddlex/inference/models/semantic_segmentation/__init__.py +15 -0
  452. paddlex/inference/models/semantic_segmentation/predictor.py +158 -0
  453. paddlex/inference/models/semantic_segmentation/processors.py +117 -0
  454. paddlex/inference/models/semantic_segmentation/result.py +73 -0
  455. paddlex/inference/models/table_structure_recognition/__init__.py +15 -0
  456. paddlex/inference/models/table_structure_recognition/predictor.py +161 -0
  457. paddlex/inference/models/table_structure_recognition/processors.py +229 -0
  458. paddlex/inference/models/table_structure_recognition/result.py +63 -0
  459. paddlex/inference/models/text_detection/__init__.py +15 -0
  460. paddlex/inference/models/text_detection/predictor.py +191 -0
  461. paddlex/inference/models/text_detection/processors.py +538 -0
  462. paddlex/inference/models/text_detection/result.py +46 -0
  463. paddlex/inference/models/text_recognition/__init__.py +15 -0
  464. paddlex/inference/models/text_recognition/predictor.py +98 -0
  465. paddlex/inference/models/text_recognition/processors.py +245 -0
  466. paddlex/inference/models/text_recognition/result.py +76 -0
  467. paddlex/inference/models/ts_anomaly_detection/__init__.py +15 -0
  468. paddlex/inference/models/ts_anomaly_detection/predictor.py +141 -0
  469. paddlex/inference/models/ts_anomaly_detection/processors.py +98 -0
  470. paddlex/inference/models/ts_anomaly_detection/result.py +83 -0
  471. paddlex/inference/models/ts_classification/__init__.py +15 -0
  472. paddlex/inference/models/ts_classification/predictor.py +122 -0
  473. paddlex/inference/models/ts_classification/processors.py +122 -0
  474. paddlex/inference/models/ts_classification/result.py +87 -0
  475. paddlex/inference/models/ts_forecasting/__init__.py +15 -0
  476. paddlex/inference/models/ts_forecasting/predictor.py +154 -0
  477. paddlex/inference/models/ts_forecasting/processors.py +158 -0
  478. paddlex/inference/models/ts_forecasting/result.py +96 -0
  479. paddlex/inference/models/video_classification/__init__.py +15 -0
  480. paddlex/inference/models/video_classification/predictor.py +141 -0
  481. paddlex/inference/models/video_classification/processors.py +409 -0
  482. paddlex/inference/models/video_classification/result.py +96 -0
  483. paddlex/inference/models/video_detection/__init__.py +15 -0
  484. paddlex/inference/models/video_detection/predictor.py +129 -0
  485. paddlex/inference/models/video_detection/processors.py +463 -0
  486. paddlex/inference/models/video_detection/result.py +109 -0
  487. paddlex/inference/pipelines/__init__.py +239 -0
  488. paddlex/inference/pipelines/_parallel.py +172 -0
  489. paddlex/inference/pipelines/anomaly_detection/__init__.py +15 -0
  490. paddlex/inference/pipelines/anomaly_detection/pipeline.py +82 -0
  491. paddlex/inference/pipelines/attribute_recognition/__init__.py +15 -0
  492. paddlex/inference/pipelines/attribute_recognition/pipeline.py +120 -0
  493. paddlex/inference/pipelines/attribute_recognition/result.py +102 -0
  494. paddlex/inference/pipelines/base.py +156 -0
  495. paddlex/inference/pipelines/components/__init__.py +29 -0
  496. paddlex/inference/pipelines/components/chat_server/__init__.py +16 -0
  497. paddlex/inference/pipelines/components/chat_server/base.py +39 -0
  498. paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +236 -0
  499. paddlex/inference/pipelines/components/common/__init__.py +19 -0
  500. paddlex/inference/pipelines/components/common/base_operator.py +37 -0
  501. paddlex/inference/pipelines/components/common/base_result.py +66 -0
  502. paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +45 -0
  503. paddlex/inference/pipelines/components/common/crop_image_regions.py +556 -0
  504. paddlex/inference/pipelines/components/common/seal_det_warp.py +972 -0
  505. paddlex/inference/pipelines/components/common/sort_boxes.py +85 -0
  506. paddlex/inference/pipelines/components/common/warp_image.py +50 -0
  507. paddlex/inference/pipelines/components/faisser.py +357 -0
  508. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +16 -0
  509. paddlex/inference/pipelines/components/prompt_engineering/base.py +35 -0
  510. paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +128 -0
  511. paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +148 -0
  512. paddlex/inference/pipelines/components/retriever/__init__.py +16 -0
  513. paddlex/inference/pipelines/components/retriever/base.py +228 -0
  514. paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +70 -0
  515. paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +166 -0
  516. paddlex/inference/pipelines/components/utils/__init__.py +13 -0
  517. paddlex/inference/pipelines/components/utils/mixin.py +206 -0
  518. paddlex/inference/pipelines/doc_preprocessor/__init__.py +15 -0
  519. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +209 -0
  520. paddlex/inference/pipelines/doc_preprocessor/result.py +98 -0
  521. paddlex/inference/pipelines/doc_understanding/__init__.py +15 -0
  522. paddlex/inference/pipelines/doc_understanding/pipeline.py +71 -0
  523. paddlex/inference/pipelines/face_recognition/__init__.py +15 -0
  524. paddlex/inference/pipelines/face_recognition/pipeline.py +63 -0
  525. paddlex/inference/pipelines/face_recognition/result.py +44 -0
  526. paddlex/inference/pipelines/formula_recognition/__init__.py +15 -0
  527. paddlex/inference/pipelines/formula_recognition/pipeline.py +347 -0
  528. paddlex/inference/pipelines/formula_recognition/result.py +282 -0
  529. paddlex/inference/pipelines/image_classification/__init__.py +15 -0
  530. paddlex/inference/pipelines/image_classification/pipeline.py +90 -0
  531. paddlex/inference/pipelines/image_multilabel_classification/__init__.py +15 -0
  532. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +97 -0
  533. paddlex/inference/pipelines/instance_segmentation/__init__.py +15 -0
  534. paddlex/inference/pipelines/instance_segmentation/pipeline.py +91 -0
  535. paddlex/inference/pipelines/keypoint_detection/__init__.py +15 -0
  536. paddlex/inference/pipelines/keypoint_detection/pipeline.py +158 -0
  537. paddlex/inference/pipelines/layout_parsing/__init__.py +16 -0
  538. paddlex/inference/pipelines/layout_parsing/pipeline.py +568 -0
  539. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +1382 -0
  540. paddlex/inference/pipelines/layout_parsing/result.py +191 -0
  541. paddlex/inference/pipelines/layout_parsing/result_v2.py +745 -0
  542. paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
  543. paddlex/inference/pipelines/layout_parsing/utils.py +951 -0
  544. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
  545. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1143 -0
  546. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +562 -0
  547. paddlex/inference/pipelines/m_3d_bev_detection/__init__.py +15 -0
  548. paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +74 -0
  549. paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +15 -0
  550. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +78 -0
  551. paddlex/inference/pipelines/object_detection/__init__.py +15 -0
  552. paddlex/inference/pipelines/object_detection/pipeline.py +115 -0
  553. paddlex/inference/pipelines/ocr/__init__.py +15 -0
  554. paddlex/inference/pipelines/ocr/pipeline.py +463 -0
  555. paddlex/inference/pipelines/ocr/result.py +255 -0
  556. paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +15 -0
  557. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +86 -0
  558. paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +15 -0
  559. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +100 -0
  560. paddlex/inference/pipelines/pp_chatocr/__init__.py +16 -0
  561. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +111 -0
  562. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +781 -0
  563. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +992 -0
  564. paddlex/inference/pipelines/pp_shitu_v2/__init__.py +15 -0
  565. paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +156 -0
  566. paddlex/inference/pipelines/pp_shitu_v2/result.py +126 -0
  567. paddlex/inference/pipelines/rotated_object_detection/__init__.py +15 -0
  568. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +95 -0
  569. paddlex/inference/pipelines/seal_recognition/__init__.py +15 -0
  570. paddlex/inference/pipelines/seal_recognition/pipeline.py +335 -0
  571. paddlex/inference/pipelines/seal_recognition/result.py +89 -0
  572. paddlex/inference/pipelines/semantic_segmentation/__init__.py +15 -0
  573. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +95 -0
  574. paddlex/inference/pipelines/small_object_detection/__init__.py +15 -0
  575. paddlex/inference/pipelines/small_object_detection/pipeline.py +95 -0
  576. paddlex/inference/pipelines/table_recognition/__init__.py +16 -0
  577. paddlex/inference/pipelines/table_recognition/pipeline.py +486 -0
  578. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +1395 -0
  579. paddlex/inference/pipelines/table_recognition/result.py +218 -0
  580. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +366 -0
  581. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +488 -0
  582. paddlex/inference/pipelines/table_recognition/utils.py +44 -0
  583. paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +15 -0
  584. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +72 -0
  585. paddlex/inference/pipelines/ts_classification/__init__.py +15 -0
  586. paddlex/inference/pipelines/ts_classification/pipeline.py +72 -0
  587. paddlex/inference/pipelines/ts_forecasting/__init__.py +15 -0
  588. paddlex/inference/pipelines/ts_forecasting/pipeline.py +72 -0
  589. paddlex/inference/pipelines/video_classification/__init__.py +15 -0
  590. paddlex/inference/pipelines/video_classification/pipeline.py +79 -0
  591. paddlex/inference/pipelines/video_detection/__init__.py +15 -0
  592. paddlex/inference/pipelines/video_detection/pipeline.py +86 -0
  593. paddlex/inference/serving/__init__.py +17 -0
  594. paddlex/inference/serving/basic_serving/__init__.py +18 -0
  595. paddlex/inference/serving/basic_serving/_app.py +221 -0
  596. paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +44 -0
  597. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +13 -0
  598. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +104 -0
  599. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +36 -0
  600. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +95 -0
  601. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +67 -0
  602. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +100 -0
  603. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_understanding.py +153 -0
  604. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +226 -0
  605. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +100 -0
  606. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +81 -0
  607. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +69 -0
  608. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +73 -0
  609. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +87 -0
  610. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +117 -0
  611. paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +79 -0
  612. paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +92 -0
  613. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +77 -0
  614. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +102 -0
  615. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +81 -0
  616. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +91 -0
  617. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +84 -0
  618. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +193 -0
  619. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +223 -0
  620. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +221 -0
  621. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +143 -0
  622. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +81 -0
  623. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +106 -0
  624. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +67 -0
  625. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +72 -0
  626. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +108 -0
  627. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +113 -0
  628. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +65 -0
  629. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +64 -0
  630. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +65 -0
  631. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +84 -0
  632. paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +76 -0
  633. paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +92 -0
  634. paddlex/inference/serving/basic_serving/_server.py +40 -0
  635. paddlex/inference/serving/infra/__init__.py +13 -0
  636. paddlex/inference/serving/infra/config.py +36 -0
  637. paddlex/inference/serving/infra/models.py +79 -0
  638. paddlex/inference/serving/infra/storage.py +180 -0
  639. paddlex/inference/serving/infra/utils.py +285 -0
  640. paddlex/inference/serving/schemas/__init__.py +13 -0
  641. paddlex/inference/serving/schemas/anomaly_detection.py +39 -0
  642. paddlex/inference/serving/schemas/doc_preprocessor.py +54 -0
  643. paddlex/inference/serving/schemas/doc_understanding.py +78 -0
  644. paddlex/inference/serving/schemas/face_recognition.py +124 -0
  645. paddlex/inference/serving/schemas/formula_recognition.py +56 -0
  646. paddlex/inference/serving/schemas/human_keypoint_detection.py +55 -0
  647. paddlex/inference/serving/schemas/image_classification.py +45 -0
  648. paddlex/inference/serving/schemas/image_multilabel_classification.py +47 -0
  649. paddlex/inference/serving/schemas/instance_segmentation.py +53 -0
  650. paddlex/inference/serving/schemas/layout_parsing.py +71 -0
  651. paddlex/inference/serving/schemas/m_3d_bev_detection.py +48 -0
  652. paddlex/inference/serving/schemas/multilingual_speech_recognition.py +57 -0
  653. paddlex/inference/serving/schemas/object_detection.py +52 -0
  654. paddlex/inference/serving/schemas/ocr.py +60 -0
  655. paddlex/inference/serving/schemas/open_vocabulary_detection.py +52 -0
  656. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +52 -0
  657. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +61 -0
  658. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +133 -0
  659. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +150 -0
  660. paddlex/inference/serving/schemas/pp_shituv2.py +124 -0
  661. paddlex/inference/serving/schemas/pp_structurev3.py +88 -0
  662. paddlex/inference/serving/schemas/rotated_object_detection.py +52 -0
  663. paddlex/inference/serving/schemas/seal_recognition.py +62 -0
  664. paddlex/inference/serving/schemas/semantic_segmentation.py +45 -0
  665. paddlex/inference/serving/schemas/shared/__init__.py +13 -0
  666. paddlex/inference/serving/schemas/shared/classification.py +23 -0
  667. paddlex/inference/serving/schemas/shared/image_segmentation.py +28 -0
  668. paddlex/inference/serving/schemas/shared/object_detection.py +24 -0
  669. paddlex/inference/serving/schemas/shared/ocr.py +25 -0
  670. paddlex/inference/serving/schemas/small_object_detection.py +52 -0
  671. paddlex/inference/serving/schemas/table_recognition.py +64 -0
  672. paddlex/inference/serving/schemas/table_recognition_v2.py +69 -0
  673. paddlex/inference/serving/schemas/ts_anomaly_detection.py +37 -0
  674. paddlex/inference/serving/schemas/ts_classification.py +38 -0
  675. paddlex/inference/serving/schemas/ts_forecast.py +37 -0
  676. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +61 -0
  677. paddlex/inference/serving/schemas/video_classification.py +44 -0
  678. paddlex/inference/serving/schemas/video_detection.py +56 -0
  679. paddlex/inference/utils/__init__.py +13 -0
  680. paddlex/inference/utils/benchmark.py +379 -0
  681. paddlex/inference/utils/color_map.py +123 -0
  682. paddlex/inference/utils/get_pipeline_path.py +27 -0
  683. paddlex/inference/utils/hpi.py +254 -0
  684. paddlex/inference/utils/hpi_model_info_collection.json +2331 -0
  685. paddlex/inference/utils/io/__init__.py +36 -0
  686. paddlex/inference/utils/io/readers.py +504 -0
  687. paddlex/inference/utils/io/style.py +381 -0
  688. paddlex/inference/utils/io/tablepyxl.py +157 -0
  689. paddlex/inference/utils/io/writers.py +458 -0
  690. paddlex/inference/utils/model_paths.py +48 -0
  691. paddlex/inference/utils/new_ir_blocklist.py +27 -0
  692. paddlex/inference/utils/official_models.py +367 -0
  693. paddlex/inference/utils/pp_option.py +339 -0
  694. paddlex/inference/utils/trt_blocklist.py +43 -0
  695. paddlex/inference/utils/trt_config.py +420 -0
  696. paddlex/model.py +131 -0
  697. paddlex/modules/__init__.py +115 -0
  698. paddlex/modules/anomaly_detection/__init__.py +18 -0
  699. paddlex/modules/anomaly_detection/dataset_checker/__init__.py +94 -0
  700. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/__init__.py +19 -0
  701. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +82 -0
  702. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/check_dataset.py +91 -0
  703. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +233 -0
  704. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/split_dataset.py +87 -0
  705. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/__init__.py +13 -0
  706. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/visualizer.py +76 -0
  707. paddlex/modules/anomaly_detection/evaluator.py +58 -0
  708. paddlex/modules/anomaly_detection/exportor.py +22 -0
  709. paddlex/modules/anomaly_detection/model_list.py +16 -0
  710. paddlex/modules/anomaly_detection/trainer.py +70 -0
  711. paddlex/modules/base/__init__.py +18 -0
  712. paddlex/modules/base/build_model.py +33 -0
  713. paddlex/modules/base/dataset_checker/__init__.py +16 -0
  714. paddlex/modules/base/dataset_checker/dataset_checker.py +169 -0
  715. paddlex/modules/base/dataset_checker/utils.py +108 -0
  716. paddlex/modules/base/evaluator.py +170 -0
  717. paddlex/modules/base/exportor.py +145 -0
  718. paddlex/modules/base/trainer.py +144 -0
  719. paddlex/modules/base/utils/__init__.py +13 -0
  720. paddlex/modules/base/utils/cinn_setting.py +89 -0
  721. paddlex/modules/base/utils/coco_eval.py +94 -0
  722. paddlex/modules/base/utils/topk_eval.py +118 -0
  723. paddlex/modules/doc_vlm/__init__.py +18 -0
  724. paddlex/modules/doc_vlm/dataset_checker.py +29 -0
  725. paddlex/modules/doc_vlm/evaluator.py +29 -0
  726. paddlex/modules/doc_vlm/exportor.py +29 -0
  727. paddlex/modules/doc_vlm/model_list.py +16 -0
  728. paddlex/modules/doc_vlm/trainer.py +41 -0
  729. paddlex/modules/face_recognition/__init__.py +18 -0
  730. paddlex/modules/face_recognition/dataset_checker/__init__.py +71 -0
  731. paddlex/modules/face_recognition/dataset_checker/dataset_src/__init__.py +16 -0
  732. paddlex/modules/face_recognition/dataset_checker/dataset_src/check_dataset.py +172 -0
  733. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/__init__.py +13 -0
  734. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +153 -0
  735. paddlex/modules/face_recognition/evaluator.py +52 -0
  736. paddlex/modules/face_recognition/exportor.py +22 -0
  737. paddlex/modules/face_recognition/model_list.py +15 -0
  738. paddlex/modules/face_recognition/trainer.py +75 -0
  739. paddlex/modules/formula_recognition/__init__.py +18 -0
  740. paddlex/modules/formula_recognition/dataset_checker/__init__.py +113 -0
  741. paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +19 -0
  742. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +158 -0
  743. paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +76 -0
  744. paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +95 -0
  745. paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +80 -0
  746. paddlex/modules/formula_recognition/evaluator.py +80 -0
  747. paddlex/modules/formula_recognition/exportor.py +22 -0
  748. paddlex/modules/formula_recognition/model_list.py +23 -0
  749. paddlex/modules/formula_recognition/trainer.py +123 -0
  750. paddlex/modules/general_recognition/__init__.py +18 -0
  751. paddlex/modules/general_recognition/dataset_checker/__init__.py +107 -0
  752. paddlex/modules/general_recognition/dataset_checker/dataset_src/__init__.py +19 -0
  753. paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +96 -0
  754. paddlex/modules/general_recognition/dataset_checker/dataset_src/check_dataset.py +99 -0
  755. paddlex/modules/general_recognition/dataset_checker/dataset_src/convert_dataset.py +100 -0
  756. paddlex/modules/general_recognition/dataset_checker/dataset_src/split_dataset.py +82 -0
  757. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/__init__.py +13 -0
  758. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +147 -0
  759. paddlex/modules/general_recognition/evaluator.py +31 -0
  760. paddlex/modules/general_recognition/exportor.py +22 -0
  761. paddlex/modules/general_recognition/model_list.py +19 -0
  762. paddlex/modules/general_recognition/trainer.py +52 -0
  763. paddlex/modules/image_classification/__init__.py +18 -0
  764. paddlex/modules/image_classification/dataset_checker/__init__.py +104 -0
  765. paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py +19 -0
  766. paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +92 -0
  767. paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py +132 -0
  768. paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py +51 -0
  769. paddlex/modules/image_classification/dataset_checker/dataset_src/split_dataset.py +81 -0
  770. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/__init__.py +13 -0
  771. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +153 -0
  772. paddlex/modules/image_classification/evaluator.py +43 -0
  773. paddlex/modules/image_classification/exportor.py +22 -0
  774. paddlex/modules/image_classification/model_list.py +99 -0
  775. paddlex/modules/image_classification/trainer.py +82 -0
  776. paddlex/modules/image_unwarping/__init__.py +13 -0
  777. paddlex/modules/image_unwarping/model_list.py +17 -0
  778. paddlex/modules/instance_segmentation/__init__.py +18 -0
  779. paddlex/modules/instance_segmentation/dataset_checker/__init__.py +107 -0
  780. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/__init__.py +19 -0
  781. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +82 -0
  782. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/check_dataset.py +95 -0
  783. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/convert_dataset.py +241 -0
  784. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/split_dataset.py +122 -0
  785. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/__init__.py +13 -0
  786. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +223 -0
  787. paddlex/modules/instance_segmentation/evaluator.py +32 -0
  788. paddlex/modules/instance_segmentation/exportor.py +22 -0
  789. paddlex/modules/instance_segmentation/model_list.py +33 -0
  790. paddlex/modules/instance_segmentation/trainer.py +31 -0
  791. paddlex/modules/keypoint_detection/__init__.py +18 -0
  792. paddlex/modules/keypoint_detection/dataset_checker/__init__.py +56 -0
  793. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +15 -0
  794. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +91 -0
  795. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +13 -0
  796. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +124 -0
  797. paddlex/modules/keypoint_detection/evaluator.py +41 -0
  798. paddlex/modules/keypoint_detection/exportor.py +22 -0
  799. paddlex/modules/keypoint_detection/model_list.py +16 -0
  800. paddlex/modules/keypoint_detection/trainer.py +39 -0
  801. paddlex/modules/m_3d_bev_detection/__init__.py +18 -0
  802. paddlex/modules/m_3d_bev_detection/dataset_checker/__init__.py +95 -0
  803. paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/__init__.py +17 -0
  804. paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +106 -0
  805. paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/check_dataset.py +101 -0
  806. paddlex/modules/m_3d_bev_detection/evaluator.py +46 -0
  807. paddlex/modules/m_3d_bev_detection/exportor.py +22 -0
  808. paddlex/modules/m_3d_bev_detection/model_list.py +18 -0
  809. paddlex/modules/m_3d_bev_detection/trainer.py +68 -0
  810. paddlex/modules/multilabel_classification/__init__.py +18 -0
  811. paddlex/modules/multilabel_classification/dataset_checker/__init__.py +106 -0
  812. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/__init__.py +19 -0
  813. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +94 -0
  814. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/check_dataset.py +132 -0
  815. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/convert_dataset.py +120 -0
  816. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/split_dataset.py +81 -0
  817. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/__init__.py +13 -0
  818. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +149 -0
  819. paddlex/modules/multilabel_classification/evaluator.py +43 -0
  820. paddlex/modules/multilabel_classification/exportor.py +22 -0
  821. paddlex/modules/multilabel_classification/model_list.py +24 -0
  822. paddlex/modules/multilabel_classification/trainer.py +85 -0
  823. paddlex/modules/multilingual_speech_recognition/__init__.py +18 -0
  824. paddlex/modules/multilingual_speech_recognition/dataset_checker.py +27 -0
  825. paddlex/modules/multilingual_speech_recognition/evaluator.py +27 -0
  826. paddlex/modules/multilingual_speech_recognition/exportor.py +27 -0
  827. paddlex/modules/multilingual_speech_recognition/model_list.py +22 -0
  828. paddlex/modules/multilingual_speech_recognition/trainer.py +42 -0
  829. paddlex/modules/object_detection/__init__.py +18 -0
  830. paddlex/modules/object_detection/dataset_checker/__init__.py +106 -0
  831. paddlex/modules/object_detection/dataset_checker/dataset_src/__init__.py +19 -0
  832. paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +82 -0
  833. paddlex/modules/object_detection/dataset_checker/dataset_src/check_dataset.py +91 -0
  834. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +438 -0
  835. paddlex/modules/object_detection/dataset_checker/dataset_src/split_dataset.py +123 -0
  836. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/__init__.py +13 -0
  837. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +193 -0
  838. paddlex/modules/object_detection/evaluator.py +57 -0
  839. paddlex/modules/object_detection/exportor.py +22 -0
  840. paddlex/modules/object_detection/model_list.py +86 -0
  841. paddlex/modules/object_detection/trainer.py +98 -0
  842. paddlex/modules/open_vocabulary_detection/__init__.py +18 -0
  843. paddlex/modules/open_vocabulary_detection/dataset_checker.py +29 -0
  844. paddlex/modules/open_vocabulary_detection/evaluator.py +29 -0
  845. paddlex/modules/open_vocabulary_detection/exportor.py +29 -0
  846. paddlex/modules/open_vocabulary_detection/model_list.py +16 -0
  847. paddlex/modules/open_vocabulary_detection/trainer.py +44 -0
  848. paddlex/modules/open_vocabulary_segmentation/__init__.py +18 -0
  849. paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +29 -0
  850. paddlex/modules/open_vocabulary_segmentation/evaluator.py +29 -0
  851. paddlex/modules/open_vocabulary_segmentation/exportor.py +29 -0
  852. paddlex/modules/open_vocabulary_segmentation/model_list.py +19 -0
  853. paddlex/modules/open_vocabulary_segmentation/trainer.py +44 -0
  854. paddlex/modules/semantic_segmentation/__init__.py +18 -0
  855. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +109 -0
  856. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/__init__.py +19 -0
  857. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/analyse_dataset.py +76 -0
  858. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/check_dataset.py +80 -0
  859. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/convert_dataset.py +165 -0
  860. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/split_dataset.py +87 -0
  861. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/__init__.py +13 -0
  862. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/visualizer.py +75 -0
  863. paddlex/modules/semantic_segmentation/evaluator.py +58 -0
  864. paddlex/modules/semantic_segmentation/exportor.py +31 -0
  865. paddlex/modules/semantic_segmentation/model_list.py +37 -0
  866. paddlex/modules/semantic_segmentation/trainer.py +72 -0
  867. paddlex/modules/table_recognition/__init__.py +18 -0
  868. paddlex/modules/table_recognition/dataset_checker/__init__.py +98 -0
  869. paddlex/modules/table_recognition/dataset_checker/dataset_src/__init__.py +18 -0
  870. paddlex/modules/table_recognition/dataset_checker/dataset_src/analyse_dataset.py +59 -0
  871. paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +87 -0
  872. paddlex/modules/table_recognition/dataset_checker/dataset_src/split_dataset.py +80 -0
  873. paddlex/modules/table_recognition/evaluator.py +43 -0
  874. paddlex/modules/table_recognition/exportor.py +22 -0
  875. paddlex/modules/table_recognition/model_list.py +21 -0
  876. paddlex/modules/table_recognition/trainer.py +67 -0
  877. paddlex/modules/text_detection/__init__.py +18 -0
  878. paddlex/modules/text_detection/dataset_checker/__init__.py +107 -0
  879. paddlex/modules/text_detection/dataset_checker/dataset_src/__init__.py +18 -0
  880. paddlex/modules/text_detection/dataset_checker/dataset_src/analyse_dataset.py +220 -0
  881. paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +106 -0
  882. paddlex/modules/text_detection/dataset_checker/dataset_src/split_dataset.py +140 -0
  883. paddlex/modules/text_detection/evaluator.py +41 -0
  884. paddlex/modules/text_detection/exportor.py +22 -0
  885. paddlex/modules/text_detection/model_list.py +26 -0
  886. paddlex/modules/text_detection/trainer.py +65 -0
  887. paddlex/modules/text_recognition/__init__.py +18 -0
  888. paddlex/modules/text_recognition/dataset_checker/__init__.py +125 -0
  889. paddlex/modules/text_recognition/dataset_checker/dataset_src/__init__.py +19 -0
  890. paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +162 -0
  891. paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +104 -0
  892. paddlex/modules/text_recognition/dataset_checker/dataset_src/convert_dataset.py +95 -0
  893. paddlex/modules/text_recognition/dataset_checker/dataset_src/split_dataset.py +80 -0
  894. paddlex/modules/text_recognition/evaluator.py +64 -0
  895. paddlex/modules/text_recognition/exportor.py +22 -0
  896. paddlex/modules/text_recognition/model_list.py +36 -0
  897. paddlex/modules/text_recognition/trainer.py +105 -0
  898. paddlex/modules/ts_anomaly_detection/__init__.py +19 -0
  899. paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +111 -0
  900. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/__init__.py +19 -0
  901. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +19 -0
  902. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/check_dataset.py +64 -0
  903. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +74 -0
  904. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/split_dataset.py +63 -0
  905. paddlex/modules/ts_anomaly_detection/evaluator.py +67 -0
  906. paddlex/modules/ts_anomaly_detection/exportor.py +44 -0
  907. paddlex/modules/ts_anomaly_detection/model_list.py +22 -0
  908. paddlex/modules/ts_anomaly_detection/trainer.py +113 -0
  909. paddlex/modules/ts_classification/__init__.py +19 -0
  910. paddlex/modules/ts_classification/dataset_checker/__init__.py +111 -0
  911. paddlex/modules/ts_classification/dataset_checker/dataset_src/__init__.py +19 -0
  912. paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +77 -0
  913. paddlex/modules/ts_classification/dataset_checker/dataset_src/check_dataset.py +64 -0
  914. paddlex/modules/ts_classification/dataset_checker/dataset_src/convert_dataset.py +74 -0
  915. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +88 -0
  916. paddlex/modules/ts_classification/evaluator.py +66 -0
  917. paddlex/modules/ts_classification/exportor.py +44 -0
  918. paddlex/modules/ts_classification/model_list.py +18 -0
  919. paddlex/modules/ts_classification/trainer.py +108 -0
  920. paddlex/modules/ts_forecast/__init__.py +19 -0
  921. paddlex/modules/ts_forecast/dataset_checker/__init__.py +111 -0
  922. paddlex/modules/ts_forecast/dataset_checker/dataset_src/__init__.py +19 -0
  923. paddlex/modules/ts_forecast/dataset_checker/dataset_src/analyse_dataset.py +19 -0
  924. paddlex/modules/ts_forecast/dataset_checker/dataset_src/check_dataset.py +64 -0
  925. paddlex/modules/ts_forecast/dataset_checker/dataset_src/convert_dataset.py +73 -0
  926. paddlex/modules/ts_forecast/dataset_checker/dataset_src/split_dataset.py +63 -0
  927. paddlex/modules/ts_forecast/evaluator.py +66 -0
  928. paddlex/modules/ts_forecast/exportor.py +44 -0
  929. paddlex/modules/ts_forecast/model_list.py +24 -0
  930. paddlex/modules/ts_forecast/trainer.py +108 -0
  931. paddlex/modules/video_classification/__init__.py +18 -0
  932. paddlex/modules/video_classification/dataset_checker/__init__.py +93 -0
  933. paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +18 -0
  934. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +93 -0
  935. paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +120 -0
  936. paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +82 -0
  937. paddlex/modules/video_classification/evaluator.py +44 -0
  938. paddlex/modules/video_classification/exportor.py +22 -0
  939. paddlex/modules/video_classification/model_list.py +19 -0
  940. paddlex/modules/video_classification/trainer.py +88 -0
  941. paddlex/modules/video_detection/__init__.py +18 -0
  942. paddlex/modules/video_detection/dataset_checker/__init__.py +86 -0
  943. paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +17 -0
  944. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +100 -0
  945. paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +132 -0
  946. paddlex/modules/video_detection/evaluator.py +42 -0
  947. paddlex/modules/video_detection/exportor.py +22 -0
  948. paddlex/modules/video_detection/model_list.py +15 -0
  949. paddlex/modules/video_detection/trainer.py +82 -0
  950. paddlex/ops/__init__.py +152 -0
  951. paddlex/ops/iou3d_nms/iou3d_cpu.cpp +266 -0
  952. paddlex/ops/iou3d_nms/iou3d_cpu.h +28 -0
  953. paddlex/ops/iou3d_nms/iou3d_nms.cpp +206 -0
  954. paddlex/ops/iou3d_nms/iou3d_nms.h +35 -0
  955. paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +114 -0
  956. paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +484 -0
  957. paddlex/ops/setup.py +37 -0
  958. paddlex/ops/voxel/voxelize_op.cc +194 -0
  959. paddlex/ops/voxel/voxelize_op.cu +346 -0
  960. paddlex/paddlex_cli.py +476 -0
  961. paddlex/repo_apis/Paddle3D_api/__init__.py +17 -0
  962. paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +18 -0
  963. paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +118 -0
  964. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +238 -0
  965. paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +55 -0
  966. paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +104 -0
  967. paddlex/repo_apis/Paddle3D_api/pp3d_config.py +145 -0
  968. paddlex/repo_apis/PaddleClas_api/__init__.py +17 -0
  969. paddlex/repo_apis/PaddleClas_api/cls/__init__.py +19 -0
  970. paddlex/repo_apis/PaddleClas_api/cls/config.py +595 -0
  971. paddlex/repo_apis/PaddleClas_api/cls/model.py +355 -0
  972. paddlex/repo_apis/PaddleClas_api/cls/register.py +907 -0
  973. paddlex/repo_apis/PaddleClas_api/cls/runner.py +218 -0
  974. paddlex/repo_apis/PaddleClas_api/shitu_rec/__init__.py +18 -0
  975. paddlex/repo_apis/PaddleClas_api/shitu_rec/config.py +141 -0
  976. paddlex/repo_apis/PaddleClas_api/shitu_rec/model.py +20 -0
  977. paddlex/repo_apis/PaddleClas_api/shitu_rec/register.py +68 -0
  978. paddlex/repo_apis/PaddleClas_api/shitu_rec/runner.py +50 -0
  979. paddlex/repo_apis/PaddleDetection_api/__init__.py +17 -0
  980. paddlex/repo_apis/PaddleDetection_api/config_helper.py +280 -0
  981. paddlex/repo_apis/PaddleDetection_api/instance_seg/__init__.py +18 -0
  982. paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +457 -0
  983. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +403 -0
  984. paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py +262 -0
  985. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +225 -0
  986. paddlex/repo_apis/PaddleDetection_api/object_det/__init__.py +19 -0
  987. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +540 -0
  988. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +429 -0
  989. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +245 -0
  990. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +1135 -0
  991. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +225 -0
  992. paddlex/repo_apis/PaddleNLP_api/__init__.py +13 -0
  993. paddlex/repo_apis/PaddleOCR_api/__init__.py +22 -0
  994. paddlex/repo_apis/PaddleOCR_api/config_utils.py +53 -0
  995. paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +16 -0
  996. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +571 -0
  997. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +398 -0
  998. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +99 -0
  999. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +239 -0
  1000. paddlex/repo_apis/PaddleOCR_api/table_rec/__init__.py +16 -0
  1001. paddlex/repo_apis/PaddleOCR_api/table_rec/config.py +64 -0
  1002. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +126 -0
  1003. paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +70 -0
  1004. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +51 -0
  1005. paddlex/repo_apis/PaddleOCR_api/text_det/__init__.py +16 -0
  1006. paddlex/repo_apis/PaddleOCR_api/text_det/config.py +62 -0
  1007. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +72 -0
  1008. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +107 -0
  1009. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +53 -0
  1010. paddlex/repo_apis/PaddleOCR_api/text_rec/__init__.py +16 -0
  1011. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +564 -0
  1012. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +398 -0
  1013. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +216 -0
  1014. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +239 -0
  1015. paddlex/repo_apis/PaddleSeg_api/__init__.py +16 -0
  1016. paddlex/repo_apis/PaddleSeg_api/base_seg_config.py +134 -0
  1017. paddlex/repo_apis/PaddleSeg_api/seg/__init__.py +16 -0
  1018. paddlex/repo_apis/PaddleSeg_api/seg/config.py +183 -0
  1019. paddlex/repo_apis/PaddleSeg_api/seg/model.py +491 -0
  1020. paddlex/repo_apis/PaddleSeg_api/seg/register.py +272 -0
  1021. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +261 -0
  1022. paddlex/repo_apis/PaddleTS_api/__init__.py +20 -0
  1023. paddlex/repo_apis/PaddleTS_api/ts_ad/__init__.py +16 -0
  1024. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +88 -0
  1025. paddlex/repo_apis/PaddleTS_api/ts_ad/register.py +146 -0
  1026. paddlex/repo_apis/PaddleTS_api/ts_ad/runner.py +158 -0
  1027. paddlex/repo_apis/PaddleTS_api/ts_base/__init__.py +13 -0
  1028. paddlex/repo_apis/PaddleTS_api/ts_base/config.py +244 -0
  1029. paddlex/repo_apis/PaddleTS_api/ts_base/model.py +276 -0
  1030. paddlex/repo_apis/PaddleTS_api/ts_base/runner.py +158 -0
  1031. paddlex/repo_apis/PaddleTS_api/ts_cls/__init__.py +16 -0
  1032. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +72 -0
  1033. paddlex/repo_apis/PaddleTS_api/ts_cls/register.py +59 -0
  1034. paddlex/repo_apis/PaddleTS_api/ts_cls/runner.py +158 -0
  1035. paddlex/repo_apis/PaddleTS_api/ts_fc/__init__.py +16 -0
  1036. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +136 -0
  1037. paddlex/repo_apis/PaddleTS_api/ts_fc/register.py +186 -0
  1038. paddlex/repo_apis/PaddleVideo_api/__init__.py +17 -0
  1039. paddlex/repo_apis/PaddleVideo_api/config_utils.py +51 -0
  1040. paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +19 -0
  1041. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +548 -0
  1042. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +346 -0
  1043. paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +70 -0
  1044. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +204 -0
  1045. paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +19 -0
  1046. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +549 -0
  1047. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +298 -0
  1048. paddlex/repo_apis/PaddleVideo_api/video_det/register.py +44 -0
  1049. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +199 -0
  1050. paddlex/repo_apis/__init__.py +13 -0
  1051. paddlex/repo_apis/base/__init__.py +22 -0
  1052. paddlex/repo_apis/base/config.py +237 -0
  1053. paddlex/repo_apis/base/model.py +563 -0
  1054. paddlex/repo_apis/base/register.py +135 -0
  1055. paddlex/repo_apis/base/runner.py +390 -0
  1056. paddlex/repo_apis/base/utils/__init__.py +13 -0
  1057. paddlex/repo_apis/base/utils/arg.py +64 -0
  1058. paddlex/repo_apis/base/utils/subprocess.py +107 -0
  1059. paddlex/repo_manager/__init__.py +17 -0
  1060. paddlex/repo_manager/core.py +253 -0
  1061. paddlex/repo_manager/meta.py +180 -0
  1062. paddlex/repo_manager/repo.py +425 -0
  1063. paddlex/repo_manager/utils.py +148 -0
  1064. paddlex/utils/__init__.py +1 -12
  1065. paddlex/utils/cache.py +146 -0
  1066. paddlex/utils/config.py +216 -0
  1067. paddlex/utils/custom_device_list.py +311 -0
  1068. paddlex/utils/deps.py +249 -0
  1069. paddlex/utils/device.py +195 -0
  1070. paddlex/utils/download.py +168 -182
  1071. paddlex/utils/env.py +32 -45
  1072. paddlex/utils/errors/__init__.py +17 -0
  1073. paddlex/utils/errors/dataset_checker.py +78 -0
  1074. paddlex/utils/errors/others.py +138 -0
  1075. paddlex/utils/file_interface.py +211 -0
  1076. paddlex/utils/flags.py +70 -0
  1077. paddlex/utils/fonts/__init__.py +97 -0
  1078. paddlex/utils/func_register.py +41 -0
  1079. paddlex/utils/install.py +87 -0
  1080. paddlex/utils/interactive_get_pipeline.py +55 -0
  1081. paddlex/utils/lazy_loader.py +68 -0
  1082. paddlex/utils/logging.py +140 -33
  1083. paddlex/utils/misc.py +201 -0
  1084. paddlex/utils/pipeline_arguments.py +719 -0
  1085. paddlex/utils/result_saver.py +58 -0
  1086. paddlex/utils/subclass_register.py +99 -0
  1087. paddlex/version.py +55 -0
  1088. paddlex-3.0.0.dist-info/METADATA +1168 -0
  1089. paddlex-3.0.0.dist-info/RECORD +1093 -0
  1090. paddlex-3.0.0.dist-info/WHEEL +5 -0
  1091. paddlex-3.0.0.dist-info/entry_points.txt +2 -0
  1092. paddlex-3.0.0.dist-info/licenses/LICENSE +169 -0
  1093. paddlex-3.0.0.dist-info/top_level.txt +1 -0
  1094. PaddleClas/__init__.py +0 -16
  1095. PaddleClas/deploy/__init__.py +0 -1
  1096. PaddleClas/deploy/paddleserving/__init__.py +0 -0
  1097. PaddleClas/deploy/paddleserving/classification_web_service.py +0 -74
  1098. PaddleClas/deploy/paddleserving/cpu_utilization.py +0 -4
  1099. PaddleClas/deploy/paddleserving/pipeline_http_client.py +0 -20
  1100. PaddleClas/deploy/paddleserving/pipeline_rpc_client.py +0 -33
  1101. PaddleClas/deploy/paddleserving/recognition/__init__.py +0 -0
  1102. PaddleClas/deploy/paddleserving/recognition/pipeline_http_client.py +0 -21
  1103. PaddleClas/deploy/paddleserving/recognition/pipeline_rpc_client.py +0 -34
  1104. PaddleClas/deploy/paddleserving/recognition/recognition_web_service.py +0 -209
  1105. PaddleClas/deploy/python/__init__.py +0 -0
  1106. PaddleClas/deploy/python/build_gallery.py +0 -214
  1107. PaddleClas/deploy/python/det_preprocess.py +0 -205
  1108. PaddleClas/deploy/python/postprocess.py +0 -161
  1109. PaddleClas/deploy/python/predict_cls.py +0 -142
  1110. PaddleClas/deploy/python/predict_det.py +0 -158
  1111. PaddleClas/deploy/python/predict_rec.py +0 -138
  1112. PaddleClas/deploy/python/predict_system.py +0 -144
  1113. PaddleClas/deploy/python/preprocess.py +0 -337
  1114. PaddleClas/deploy/utils/__init__.py +0 -5
  1115. PaddleClas/deploy/utils/config.py +0 -197
  1116. PaddleClas/deploy/utils/draw_bbox.py +0 -61
  1117. PaddleClas/deploy/utils/encode_decode.py +0 -31
  1118. PaddleClas/deploy/utils/get_image_list.py +0 -49
  1119. PaddleClas/deploy/utils/logger.py +0 -120
  1120. PaddleClas/deploy/utils/predictor.py +0 -71
  1121. PaddleClas/deploy/vector_search/__init__.py +0 -1
  1122. PaddleClas/deploy/vector_search/interface.py +0 -272
  1123. PaddleClas/deploy/vector_search/test.py +0 -34
  1124. PaddleClas/hubconf.py +0 -788
  1125. PaddleClas/paddleclas.py +0 -552
  1126. PaddleClas/ppcls/__init__.py +0 -20
  1127. PaddleClas/ppcls/arch/__init__.py +0 -127
  1128. PaddleClas/ppcls/arch/backbone/__init__.py +0 -80
  1129. PaddleClas/ppcls/arch/backbone/base/__init__.py +0 -0
  1130. PaddleClas/ppcls/arch/backbone/base/theseus_layer.py +0 -126
  1131. PaddleClas/ppcls/arch/backbone/legendary_models/__init__.py +0 -6
  1132. PaddleClas/ppcls/arch/backbone/legendary_models/esnet.py +0 -355
  1133. PaddleClas/ppcls/arch/backbone/legendary_models/hrnet.py +0 -744
  1134. PaddleClas/ppcls/arch/backbone/legendary_models/inception_v3.py +0 -539
  1135. PaddleClas/ppcls/arch/backbone/legendary_models/mobilenet_v1.py +0 -234
  1136. PaddleClas/ppcls/arch/backbone/legendary_models/mobilenet_v3.py +0 -561
  1137. PaddleClas/ppcls/arch/backbone/legendary_models/pp_lcnet.py +0 -399
  1138. PaddleClas/ppcls/arch/backbone/legendary_models/resnet.py +0 -534
  1139. PaddleClas/ppcls/arch/backbone/legendary_models/vgg.py +0 -231
  1140. PaddleClas/ppcls/arch/backbone/model_zoo/__init__.py +0 -0
  1141. PaddleClas/ppcls/arch/backbone/model_zoo/alexnet.py +0 -168
  1142. PaddleClas/ppcls/arch/backbone/model_zoo/cspnet.py +0 -376
  1143. PaddleClas/ppcls/arch/backbone/model_zoo/darknet.py +0 -197
  1144. PaddleClas/ppcls/arch/backbone/model_zoo/densenet.py +0 -344
  1145. PaddleClas/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py +0 -272
  1146. PaddleClas/ppcls/arch/backbone/model_zoo/dla.py +0 -528
  1147. PaddleClas/ppcls/arch/backbone/model_zoo/dpn.py +0 -451
  1148. PaddleClas/ppcls/arch/backbone/model_zoo/efficientnet.py +0 -976
  1149. PaddleClas/ppcls/arch/backbone/model_zoo/ghostnet.py +0 -363
  1150. PaddleClas/ppcls/arch/backbone/model_zoo/googlenet.py +0 -229
  1151. PaddleClas/ppcls/arch/backbone/model_zoo/gvt.py +0 -693
  1152. PaddleClas/ppcls/arch/backbone/model_zoo/hardnet.py +0 -293
  1153. PaddleClas/ppcls/arch/backbone/model_zoo/inception_v4.py +0 -477
  1154. PaddleClas/ppcls/arch/backbone/model_zoo/levit.py +0 -589
  1155. PaddleClas/ppcls/arch/backbone/model_zoo/mixnet.py +0 -815
  1156. PaddleClas/ppcls/arch/backbone/model_zoo/mobilenet_v2.py +0 -287
  1157. PaddleClas/ppcls/arch/backbone/model_zoo/rednet.py +0 -203
  1158. PaddleClas/ppcls/arch/backbone/model_zoo/regnet.py +0 -431
  1159. PaddleClas/ppcls/arch/backbone/model_zoo/repvgg.py +0 -422
  1160. PaddleClas/ppcls/arch/backbone/model_zoo/res2net.py +0 -264
  1161. PaddleClas/ppcls/arch/backbone/model_zoo/res2net_vd.py +0 -305
  1162. PaddleClas/ppcls/arch/backbone/model_zoo/resnest.py +0 -740
  1163. PaddleClas/ppcls/arch/backbone/model_zoo/resnet_vc.py +0 -309
  1164. PaddleClas/ppcls/arch/backbone/model_zoo/resnext.py +0 -298
  1165. PaddleClas/ppcls/arch/backbone/model_zoo/resnext101_wsl.py +0 -490
  1166. PaddleClas/ppcls/arch/backbone/model_zoo/resnext_vd.py +0 -317
  1167. PaddleClas/ppcls/arch/backbone/model_zoo/rexnet.py +0 -281
  1168. PaddleClas/ppcls/arch/backbone/model_zoo/se_resnet_vd.py +0 -390
  1169. PaddleClas/ppcls/arch/backbone/model_zoo/se_resnext.py +0 -364
  1170. PaddleClas/ppcls/arch/backbone/model_zoo/se_resnext_vd.py +0 -309
  1171. PaddleClas/ppcls/arch/backbone/model_zoo/shufflenet_v2.py +0 -362
  1172. PaddleClas/ppcls/arch/backbone/model_zoo/squeezenet.py +0 -194
  1173. PaddleClas/ppcls/arch/backbone/model_zoo/swin_transformer.py +0 -857
  1174. PaddleClas/ppcls/arch/backbone/model_zoo/tnt.py +0 -385
  1175. PaddleClas/ppcls/arch/backbone/model_zoo/vision_transformer.py +0 -495
  1176. PaddleClas/ppcls/arch/backbone/model_zoo/xception.py +0 -377
  1177. PaddleClas/ppcls/arch/backbone/model_zoo/xception_deeplab.py +0 -421
  1178. PaddleClas/ppcls/arch/backbone/variant_models/__init__.py +0 -3
  1179. PaddleClas/ppcls/arch/backbone/variant_models/pp_lcnet_variant.py +0 -29
  1180. PaddleClas/ppcls/arch/backbone/variant_models/resnet_variant.py +0 -23
  1181. PaddleClas/ppcls/arch/backbone/variant_models/vgg_variant.py +0 -28
  1182. PaddleClas/ppcls/arch/gears/__init__.py +0 -32
  1183. PaddleClas/ppcls/arch/gears/arcmargin.py +0 -72
  1184. PaddleClas/ppcls/arch/gears/circlemargin.py +0 -59
  1185. PaddleClas/ppcls/arch/gears/cosmargin.py +0 -55
  1186. PaddleClas/ppcls/arch/gears/fc.py +0 -35
  1187. PaddleClas/ppcls/arch/gears/identity_head.py +0 -9
  1188. PaddleClas/ppcls/arch/gears/vehicle_neck.py +0 -52
  1189. PaddleClas/ppcls/arch/utils.py +0 -53
  1190. PaddleClas/ppcls/data/__init__.py +0 -144
  1191. PaddleClas/ppcls/data/dataloader/DistributedRandomIdentitySampler.py +0 -90
  1192. PaddleClas/ppcls/data/dataloader/__init__.py +0 -9
  1193. PaddleClas/ppcls/data/dataloader/common_dataset.py +0 -84
  1194. PaddleClas/ppcls/data/dataloader/dali.py +0 -319
  1195. PaddleClas/ppcls/data/dataloader/icartoon_dataset.py +0 -36
  1196. PaddleClas/ppcls/data/dataloader/imagenet_dataset.py +0 -38
  1197. PaddleClas/ppcls/data/dataloader/logo_dataset.py +0 -46
  1198. PaddleClas/ppcls/data/dataloader/mix_dataset.py +0 -49
  1199. PaddleClas/ppcls/data/dataloader/mix_sampler.py +0 -79
  1200. PaddleClas/ppcls/data/dataloader/multilabel_dataset.py +0 -59
  1201. PaddleClas/ppcls/data/dataloader/pk_sampler.py +0 -105
  1202. PaddleClas/ppcls/data/dataloader/vehicle_dataset.py +0 -138
  1203. PaddleClas/ppcls/data/postprocess/__init__.py +0 -41
  1204. PaddleClas/ppcls/data/postprocess/topk.py +0 -85
  1205. PaddleClas/ppcls/data/preprocess/__init__.py +0 -100
  1206. PaddleClas/ppcls/data/preprocess/batch_ops/__init__.py +0 -1
  1207. PaddleClas/ppcls/data/preprocess/batch_ops/batch_operators.py +0 -231
  1208. PaddleClas/ppcls/data/preprocess/ops/__init__.py +0 -1
  1209. PaddleClas/ppcls/data/preprocess/ops/autoaugment.py +0 -264
  1210. PaddleClas/ppcls/data/preprocess/ops/cutout.py +0 -41
  1211. PaddleClas/ppcls/data/preprocess/ops/fmix.py +0 -217
  1212. PaddleClas/ppcls/data/preprocess/ops/functional.py +0 -138
  1213. PaddleClas/ppcls/data/preprocess/ops/grid.py +0 -89
  1214. PaddleClas/ppcls/data/preprocess/ops/hide_and_seek.py +0 -44
  1215. PaddleClas/ppcls/data/preprocess/ops/operators.py +0 -384
  1216. PaddleClas/ppcls/data/preprocess/ops/randaugment.py +0 -106
  1217. PaddleClas/ppcls/data/preprocess/ops/random_erasing.py +0 -90
  1218. PaddleClas/ppcls/data/preprocess/ops/timm_autoaugment.py +0 -877
  1219. PaddleClas/ppcls/data/utils/__init__.py +0 -13
  1220. PaddleClas/ppcls/data/utils/get_image_list.py +0 -49
  1221. PaddleClas/ppcls/engine/__init__.py +0 -0
  1222. PaddleClas/ppcls/engine/engine.py +0 -436
  1223. PaddleClas/ppcls/engine/evaluation/__init__.py +0 -16
  1224. PaddleClas/ppcls/engine/evaluation/classification.py +0 -143
  1225. PaddleClas/ppcls/engine/evaluation/retrieval.py +0 -169
  1226. PaddleClas/ppcls/engine/slim/__init__.py +0 -16
  1227. PaddleClas/ppcls/engine/slim/prune.py +0 -66
  1228. PaddleClas/ppcls/engine/slim/quant.py +0 -55
  1229. PaddleClas/ppcls/engine/train/__init__.py +0 -14
  1230. PaddleClas/ppcls/engine/train/train.py +0 -79
  1231. PaddleClas/ppcls/engine/train/utils.py +0 -72
  1232. PaddleClas/ppcls/loss/__init__.py +0 -65
  1233. PaddleClas/ppcls/loss/celoss.py +0 -67
  1234. PaddleClas/ppcls/loss/centerloss.py +0 -54
  1235. PaddleClas/ppcls/loss/comfunc.py +0 -45
  1236. PaddleClas/ppcls/loss/deephashloss.py +0 -92
  1237. PaddleClas/ppcls/loss/distanceloss.py +0 -43
  1238. PaddleClas/ppcls/loss/distillationloss.py +0 -141
  1239. PaddleClas/ppcls/loss/dmlloss.py +0 -46
  1240. PaddleClas/ppcls/loss/emlloss.py +0 -97
  1241. PaddleClas/ppcls/loss/googlenetloss.py +0 -41
  1242. PaddleClas/ppcls/loss/msmloss.py +0 -78
  1243. PaddleClas/ppcls/loss/multilabelloss.py +0 -43
  1244. PaddleClas/ppcls/loss/npairsloss.py +0 -38
  1245. PaddleClas/ppcls/loss/pairwisecosface.py +0 -55
  1246. PaddleClas/ppcls/loss/supconloss.py +0 -108
  1247. PaddleClas/ppcls/loss/trihardloss.py +0 -82
  1248. PaddleClas/ppcls/loss/triplet.py +0 -137
  1249. PaddleClas/ppcls/metric/__init__.py +0 -51
  1250. PaddleClas/ppcls/metric/metrics.py +0 -308
  1251. PaddleClas/ppcls/optimizer/__init__.py +0 -72
  1252. PaddleClas/ppcls/optimizer/learning_rate.py +0 -326
  1253. PaddleClas/ppcls/optimizer/optimizer.py +0 -207
  1254. PaddleClas/ppcls/utils/__init__.py +0 -27
  1255. PaddleClas/ppcls/utils/check.py +0 -151
  1256. PaddleClas/ppcls/utils/config.py +0 -210
  1257. PaddleClas/ppcls/utils/download.py +0 -319
  1258. PaddleClas/ppcls/utils/ema.py +0 -63
  1259. PaddleClas/ppcls/utils/logger.py +0 -137
  1260. PaddleClas/ppcls/utils/metrics.py +0 -107
  1261. PaddleClas/ppcls/utils/misc.py +0 -63
  1262. PaddleClas/ppcls/utils/model_zoo.py +0 -213
  1263. PaddleClas/ppcls/utils/profiler.py +0 -111
  1264. PaddleClas/ppcls/utils/save_load.py +0 -136
  1265. PaddleClas/setup.py +0 -58
  1266. PaddleClas/tools/__init__.py +0 -15
  1267. PaddleClas/tools/eval.py +0 -31
  1268. PaddleClas/tools/export_model.py +0 -34
  1269. PaddleClas/tools/infer.py +0 -31
  1270. PaddleClas/tools/train.py +0 -32
  1271. paddlex/cls.py +0 -82
  1272. paddlex/command.py +0 -215
  1273. paddlex/cv/__init__.py +0 -17
  1274. paddlex/cv/datasets/__init__.py +0 -18
  1275. paddlex/cv/datasets/coco.py +0 -208
  1276. paddlex/cv/datasets/imagenet.py +0 -88
  1277. paddlex/cv/datasets/seg_dataset.py +0 -91
  1278. paddlex/cv/datasets/voc.py +0 -445
  1279. paddlex/cv/models/__init__.py +0 -18
  1280. paddlex/cv/models/base.py +0 -631
  1281. paddlex/cv/models/classifier.py +0 -989
  1282. paddlex/cv/models/detector.py +0 -2292
  1283. paddlex/cv/models/load_model.py +0 -148
  1284. paddlex/cv/models/segmenter.py +0 -768
  1285. paddlex/cv/models/slim/__init__.py +0 -13
  1286. paddlex/cv/models/slim/prune.py +0 -55
  1287. paddlex/cv/models/utils/__init__.py +0 -13
  1288. paddlex/cv/models/utils/det_metrics/__init__.py +0 -15
  1289. paddlex/cv/models/utils/det_metrics/coco_utils.py +0 -476
  1290. paddlex/cv/models/utils/det_metrics/metrics.py +0 -220
  1291. paddlex/cv/models/utils/infer_nets.py +0 -45
  1292. paddlex/cv/models/utils/seg_metrics.py +0 -62
  1293. paddlex/cv/models/utils/visualize.py +0 -399
  1294. paddlex/cv/transforms/__init__.py +0 -46
  1295. paddlex/cv/transforms/batch_operators.py +0 -286
  1296. paddlex/cv/transforms/box_utils.py +0 -41
  1297. paddlex/cv/transforms/functions.py +0 -193
  1298. paddlex/cv/transforms/operators.py +0 -1402
  1299. paddlex/deploy.py +0 -268
  1300. paddlex/det.py +0 -49
  1301. paddlex/paddleseg/__init__.py +0 -17
  1302. paddlex/paddleseg/core/__init__.py +0 -20
  1303. paddlex/paddleseg/core/infer.py +0 -289
  1304. paddlex/paddleseg/core/predict.py +0 -145
  1305. paddlex/paddleseg/core/train.py +0 -258
  1306. paddlex/paddleseg/core/val.py +0 -172
  1307. paddlex/paddleseg/cvlibs/__init__.py +0 -17
  1308. paddlex/paddleseg/cvlibs/callbacks.py +0 -279
  1309. paddlex/paddleseg/cvlibs/config.py +0 -359
  1310. paddlex/paddleseg/cvlibs/manager.py +0 -142
  1311. paddlex/paddleseg/cvlibs/param_init.py +0 -91
  1312. paddlex/paddleseg/datasets/__init__.py +0 -21
  1313. paddlex/paddleseg/datasets/ade.py +0 -112
  1314. paddlex/paddleseg/datasets/cityscapes.py +0 -86
  1315. paddlex/paddleseg/datasets/cocostuff.py +0 -79
  1316. paddlex/paddleseg/datasets/dataset.py +0 -164
  1317. paddlex/paddleseg/datasets/mini_deep_globe_road_extraction.py +0 -95
  1318. paddlex/paddleseg/datasets/optic_disc_seg.py +0 -97
  1319. paddlex/paddleseg/datasets/pascal_context.py +0 -80
  1320. paddlex/paddleseg/datasets/voc.py +0 -113
  1321. paddlex/paddleseg/models/__init__.py +0 -39
  1322. paddlex/paddleseg/models/ann.py +0 -436
  1323. paddlex/paddleseg/models/attention_unet.py +0 -189
  1324. paddlex/paddleseg/models/backbones/__init__.py +0 -18
  1325. paddlex/paddleseg/models/backbones/hrnet.py +0 -815
  1326. paddlex/paddleseg/models/backbones/mobilenetv3.py +0 -365
  1327. paddlex/paddleseg/models/backbones/resnet_vd.py +0 -364
  1328. paddlex/paddleseg/models/backbones/xception_deeplab.py +0 -415
  1329. paddlex/paddleseg/models/bisenet.py +0 -311
  1330. paddlex/paddleseg/models/danet.py +0 -220
  1331. paddlex/paddleseg/models/decoupled_segnet.py +0 -233
  1332. paddlex/paddleseg/models/deeplab.py +0 -258
  1333. paddlex/paddleseg/models/dnlnet.py +0 -231
  1334. paddlex/paddleseg/models/emanet.py +0 -219
  1335. paddlex/paddleseg/models/fast_scnn.py +0 -318
  1336. paddlex/paddleseg/models/fcn.py +0 -135
  1337. paddlex/paddleseg/models/gcnet.py +0 -223
  1338. paddlex/paddleseg/models/gscnn.py +0 -357
  1339. paddlex/paddleseg/models/hardnet.py +0 -309
  1340. paddlex/paddleseg/models/isanet.py +0 -202
  1341. paddlex/paddleseg/models/layers/__init__.py +0 -19
  1342. paddlex/paddleseg/models/layers/activation.py +0 -73
  1343. paddlex/paddleseg/models/layers/attention.py +0 -146
  1344. paddlex/paddleseg/models/layers/layer_libs.py +0 -168
  1345. paddlex/paddleseg/models/layers/nonlocal2d.py +0 -155
  1346. paddlex/paddleseg/models/layers/pyramid_pool.py +0 -182
  1347. paddlex/paddleseg/models/losses/__init__.py +0 -27
  1348. paddlex/paddleseg/models/losses/binary_cross_entropy_loss.py +0 -174
  1349. paddlex/paddleseg/models/losses/bootstrapped_cross_entropy.py +0 -73
  1350. paddlex/paddleseg/models/losses/cross_entropy_loss.py +0 -94
  1351. paddlex/paddleseg/models/losses/decoupledsegnet_relax_boundary_loss.py +0 -129
  1352. paddlex/paddleseg/models/losses/dice_loss.py +0 -61
  1353. paddlex/paddleseg/models/losses/edge_attention_loss.py +0 -78
  1354. paddlex/paddleseg/models/losses/gscnn_dual_task_loss.py +0 -141
  1355. paddlex/paddleseg/models/losses/l1_loss.py +0 -76
  1356. paddlex/paddleseg/models/losses/lovasz_loss.py +0 -222
  1357. paddlex/paddleseg/models/losses/mean_square_error_loss.py +0 -65
  1358. paddlex/paddleseg/models/losses/mixed_loss.py +0 -58
  1359. paddlex/paddleseg/models/losses/ohem_cross_entropy_loss.py +0 -99
  1360. paddlex/paddleseg/models/losses/ohem_edge_attention_loss.py +0 -114
  1361. paddlex/paddleseg/models/ocrnet.py +0 -248
  1362. paddlex/paddleseg/models/pspnet.py +0 -147
  1363. paddlex/paddleseg/models/sfnet.py +0 -236
  1364. paddlex/paddleseg/models/shufflenet_slim.py +0 -268
  1365. paddlex/paddleseg/models/u2net.py +0 -574
  1366. paddlex/paddleseg/models/unet.py +0 -155
  1367. paddlex/paddleseg/models/unet_3plus.py +0 -316
  1368. paddlex/paddleseg/models/unet_plusplus.py +0 -237
  1369. paddlex/paddleseg/transforms/__init__.py +0 -16
  1370. paddlex/paddleseg/transforms/functional.py +0 -161
  1371. paddlex/paddleseg/transforms/transforms.py +0 -937
  1372. paddlex/paddleseg/utils/__init__.py +0 -22
  1373. paddlex/paddleseg/utils/config_check.py +0 -60
  1374. paddlex/paddleseg/utils/download.py +0 -163
  1375. paddlex/paddleseg/utils/env/__init__.py +0 -16
  1376. paddlex/paddleseg/utils/env/seg_env.py +0 -56
  1377. paddlex/paddleseg/utils/env/sys_env.py +0 -122
  1378. paddlex/paddleseg/utils/logger.py +0 -48
  1379. paddlex/paddleseg/utils/metrics.py +0 -146
  1380. paddlex/paddleseg/utils/progbar.py +0 -212
  1381. paddlex/paddleseg/utils/timer.py +0 -53
  1382. paddlex/paddleseg/utils/utils.py +0 -120
  1383. paddlex/paddleseg/utils/visualize.py +0 -90
  1384. paddlex/ppcls/__init__.py +0 -20
  1385. paddlex/ppcls/arch/__init__.py +0 -127
  1386. paddlex/ppcls/arch/backbone/__init__.py +0 -80
  1387. paddlex/ppcls/arch/backbone/base/__init__.py +0 -0
  1388. paddlex/ppcls/arch/backbone/base/theseus_layer.py +0 -130
  1389. paddlex/ppcls/arch/backbone/legendary_models/__init__.py +0 -6
  1390. paddlex/ppcls/arch/backbone/legendary_models/esnet.py +0 -355
  1391. paddlex/ppcls/arch/backbone/legendary_models/hrnet.py +0 -748
  1392. paddlex/ppcls/arch/backbone/legendary_models/inception_v3.py +0 -539
  1393. paddlex/ppcls/arch/backbone/legendary_models/mobilenet_v1.py +0 -234
  1394. paddlex/ppcls/arch/backbone/legendary_models/mobilenet_v3.py +0 -561
  1395. paddlex/ppcls/arch/backbone/legendary_models/pp_lcnet.py +0 -399
  1396. paddlex/ppcls/arch/backbone/legendary_models/resnet.py +0 -534
  1397. paddlex/ppcls/arch/backbone/legendary_models/vgg.py +0 -235
  1398. paddlex/ppcls/arch/backbone/model_zoo/__init__.py +0 -0
  1399. paddlex/ppcls/arch/backbone/model_zoo/alexnet.py +0 -168
  1400. paddlex/ppcls/arch/backbone/model_zoo/cspnet.py +0 -376
  1401. paddlex/ppcls/arch/backbone/model_zoo/darknet.py +0 -197
  1402. paddlex/ppcls/arch/backbone/model_zoo/densenet.py +0 -344
  1403. paddlex/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py +0 -272
  1404. paddlex/ppcls/arch/backbone/model_zoo/dla.py +0 -528
  1405. paddlex/ppcls/arch/backbone/model_zoo/dpn.py +0 -451
  1406. paddlex/ppcls/arch/backbone/model_zoo/efficientnet.py +0 -976
  1407. paddlex/ppcls/arch/backbone/model_zoo/ghostnet.py +0 -363
  1408. paddlex/ppcls/arch/backbone/model_zoo/googlenet.py +0 -229
  1409. paddlex/ppcls/arch/backbone/model_zoo/gvt.py +0 -693
  1410. paddlex/ppcls/arch/backbone/model_zoo/hardnet.py +0 -293
  1411. paddlex/ppcls/arch/backbone/model_zoo/inception_v4.py +0 -477
  1412. paddlex/ppcls/arch/backbone/model_zoo/levit.py +0 -589
  1413. paddlex/ppcls/arch/backbone/model_zoo/mixnet.py +0 -815
  1414. paddlex/ppcls/arch/backbone/model_zoo/mobilenet_v2.py +0 -287
  1415. paddlex/ppcls/arch/backbone/model_zoo/rednet.py +0 -203
  1416. paddlex/ppcls/arch/backbone/model_zoo/regnet.py +0 -431
  1417. paddlex/ppcls/arch/backbone/model_zoo/repvgg.py +0 -422
  1418. paddlex/ppcls/arch/backbone/model_zoo/res2net.py +0 -264
  1419. paddlex/ppcls/arch/backbone/model_zoo/res2net_vd.py +0 -305
  1420. paddlex/ppcls/arch/backbone/model_zoo/resnest.py +0 -740
  1421. paddlex/ppcls/arch/backbone/model_zoo/resnet_vc.py +0 -309
  1422. paddlex/ppcls/arch/backbone/model_zoo/resnext.py +0 -298
  1423. paddlex/ppcls/arch/backbone/model_zoo/resnext101_wsl.py +0 -490
  1424. paddlex/ppcls/arch/backbone/model_zoo/resnext_vd.py +0 -317
  1425. paddlex/ppcls/arch/backbone/model_zoo/rexnet.py +0 -281
  1426. paddlex/ppcls/arch/backbone/model_zoo/se_resnet_vd.py +0 -390
  1427. paddlex/ppcls/arch/backbone/model_zoo/se_resnext.py +0 -364
  1428. paddlex/ppcls/arch/backbone/model_zoo/se_resnext_vd.py +0 -309
  1429. paddlex/ppcls/arch/backbone/model_zoo/shufflenet_v2.py +0 -362
  1430. paddlex/ppcls/arch/backbone/model_zoo/squeezenet.py +0 -194
  1431. paddlex/ppcls/arch/backbone/model_zoo/swin_transformer.py +0 -857
  1432. paddlex/ppcls/arch/backbone/model_zoo/tnt.py +0 -385
  1433. paddlex/ppcls/arch/backbone/model_zoo/vision_transformer.py +0 -495
  1434. paddlex/ppcls/arch/backbone/model_zoo/xception.py +0 -377
  1435. paddlex/ppcls/arch/backbone/model_zoo/xception_deeplab.py +0 -421
  1436. paddlex/ppcls/arch/backbone/variant_models/__init__.py +0 -3
  1437. paddlex/ppcls/arch/backbone/variant_models/pp_lcnet_variant.py +0 -29
  1438. paddlex/ppcls/arch/backbone/variant_models/resnet_variant.py +0 -23
  1439. paddlex/ppcls/arch/backbone/variant_models/vgg_variant.py +0 -28
  1440. paddlex/ppcls/arch/gears/__init__.py +0 -32
  1441. paddlex/ppcls/arch/gears/arcmargin.py +0 -72
  1442. paddlex/ppcls/arch/gears/circlemargin.py +0 -59
  1443. paddlex/ppcls/arch/gears/cosmargin.py +0 -55
  1444. paddlex/ppcls/arch/gears/fc.py +0 -35
  1445. paddlex/ppcls/arch/gears/identity_head.py +0 -9
  1446. paddlex/ppcls/arch/gears/vehicle_neck.py +0 -52
  1447. paddlex/ppcls/arch/utils.py +0 -53
  1448. paddlex/ppcls/data/__init__.py +0 -144
  1449. paddlex/ppcls/data/dataloader/DistributedRandomIdentitySampler.py +0 -90
  1450. paddlex/ppcls/data/dataloader/__init__.py +0 -9
  1451. paddlex/ppcls/data/dataloader/common_dataset.py +0 -84
  1452. paddlex/ppcls/data/dataloader/dali.py +0 -319
  1453. paddlex/ppcls/data/dataloader/icartoon_dataset.py +0 -36
  1454. paddlex/ppcls/data/dataloader/imagenet_dataset.py +0 -38
  1455. paddlex/ppcls/data/dataloader/logo_dataset.py +0 -46
  1456. paddlex/ppcls/data/dataloader/mix_dataset.py +0 -49
  1457. paddlex/ppcls/data/dataloader/mix_sampler.py +0 -79
  1458. paddlex/ppcls/data/dataloader/multilabel_dataset.py +0 -59
  1459. paddlex/ppcls/data/dataloader/pk_sampler.py +0 -105
  1460. paddlex/ppcls/data/dataloader/vehicle_dataset.py +0 -138
  1461. paddlex/ppcls/data/postprocess/__init__.py +0 -41
  1462. paddlex/ppcls/data/postprocess/topk.py +0 -85
  1463. paddlex/ppcls/data/preprocess/__init__.py +0 -100
  1464. paddlex/ppcls/data/preprocess/batch_ops/__init__.py +0 -0
  1465. paddlex/ppcls/data/preprocess/batch_ops/batch_operators.py +0 -231
  1466. paddlex/ppcls/data/preprocess/ops/__init__.py +0 -0
  1467. paddlex/ppcls/data/preprocess/ops/autoaugment.py +0 -264
  1468. paddlex/ppcls/data/preprocess/ops/cutout.py +0 -41
  1469. paddlex/ppcls/data/preprocess/ops/fmix.py +0 -217
  1470. paddlex/ppcls/data/preprocess/ops/functional.py +0 -141
  1471. paddlex/ppcls/data/preprocess/ops/grid.py +0 -89
  1472. paddlex/ppcls/data/preprocess/ops/hide_and_seek.py +0 -44
  1473. paddlex/ppcls/data/preprocess/ops/operators.py +0 -384
  1474. paddlex/ppcls/data/preprocess/ops/randaugment.py +0 -106
  1475. paddlex/ppcls/data/preprocess/ops/random_erasing.py +0 -90
  1476. paddlex/ppcls/data/preprocess/ops/timm_autoaugment.py +0 -877
  1477. paddlex/ppcls/data/utils/__init__.py +0 -13
  1478. paddlex/ppcls/data/utils/get_image_list.py +0 -49
  1479. paddlex/ppcls/engine/__init__.py +0 -0
  1480. paddlex/ppcls/engine/engine.py +0 -436
  1481. paddlex/ppcls/engine/evaluation/__init__.py +0 -16
  1482. paddlex/ppcls/engine/evaluation/classification.py +0 -143
  1483. paddlex/ppcls/engine/evaluation/retrieval.py +0 -169
  1484. paddlex/ppcls/engine/slim/__init__.py +0 -16
  1485. paddlex/ppcls/engine/slim/prune.py +0 -66
  1486. paddlex/ppcls/engine/slim/quant.py +0 -55
  1487. paddlex/ppcls/engine/train/__init__.py +0 -14
  1488. paddlex/ppcls/engine/train/train.py +0 -79
  1489. paddlex/ppcls/engine/train/utils.py +0 -72
  1490. paddlex/ppcls/loss/__init__.py +0 -65
  1491. paddlex/ppcls/loss/celoss.py +0 -67
  1492. paddlex/ppcls/loss/centerloss.py +0 -54
  1493. paddlex/ppcls/loss/comfunc.py +0 -45
  1494. paddlex/ppcls/loss/deephashloss.py +0 -96
  1495. paddlex/ppcls/loss/distanceloss.py +0 -43
  1496. paddlex/ppcls/loss/distillationloss.py +0 -141
  1497. paddlex/ppcls/loss/dmlloss.py +0 -46
  1498. paddlex/ppcls/loss/emlloss.py +0 -97
  1499. paddlex/ppcls/loss/googlenetloss.py +0 -42
  1500. paddlex/ppcls/loss/msmloss.py +0 -78
  1501. paddlex/ppcls/loss/multilabelloss.py +0 -43
  1502. paddlex/ppcls/loss/npairsloss.py +0 -38
  1503. paddlex/ppcls/loss/pairwisecosface.py +0 -59
  1504. paddlex/ppcls/loss/supconloss.py +0 -108
  1505. paddlex/ppcls/loss/trihardloss.py +0 -82
  1506. paddlex/ppcls/loss/triplet.py +0 -137
  1507. paddlex/ppcls/metric/__init__.py +0 -51
  1508. paddlex/ppcls/metric/metrics.py +0 -308
  1509. paddlex/ppcls/optimizer/__init__.py +0 -72
  1510. paddlex/ppcls/optimizer/learning_rate.py +0 -326
  1511. paddlex/ppcls/optimizer/optimizer.py +0 -208
  1512. paddlex/ppcls/utils/__init__.py +0 -27
  1513. paddlex/ppcls/utils/check.py +0 -151
  1514. paddlex/ppcls/utils/config.py +0 -210
  1515. paddlex/ppcls/utils/download.py +0 -319
  1516. paddlex/ppcls/utils/ema.py +0 -63
  1517. paddlex/ppcls/utils/logger.py +0 -137
  1518. paddlex/ppcls/utils/metrics.py +0 -112
  1519. paddlex/ppcls/utils/misc.py +0 -63
  1520. paddlex/ppcls/utils/model_zoo.py +0 -213
  1521. paddlex/ppcls/utils/profiler.py +0 -111
  1522. paddlex/ppcls/utils/save_load.py +0 -136
  1523. paddlex/ppdet/__init__.py +0 -16
  1524. paddlex/ppdet/core/__init__.py +0 -15
  1525. paddlex/ppdet/core/config/__init__.py +0 -13
  1526. paddlex/ppdet/core/config/schema.py +0 -248
  1527. paddlex/ppdet/core/config/yaml_helpers.py +0 -118
  1528. paddlex/ppdet/core/workspace.py +0 -278
  1529. paddlex/ppdet/data/__init__.py +0 -21
  1530. paddlex/ppdet/data/crop_utils/__init__.py +0 -13
  1531. paddlex/ppdet/data/crop_utils/annotation_cropper.py +0 -585
  1532. paddlex/ppdet/data/crop_utils/chip_box_utils.py +0 -170
  1533. paddlex/ppdet/data/reader.py +0 -302
  1534. paddlex/ppdet/data/shm_utils.py +0 -67
  1535. paddlex/ppdet/data/source/__init__.py +0 -29
  1536. paddlex/ppdet/data/source/category.py +0 -904
  1537. paddlex/ppdet/data/source/coco.py +0 -251
  1538. paddlex/ppdet/data/source/dataset.py +0 -197
  1539. paddlex/ppdet/data/source/keypoint_coco.py +0 -669
  1540. paddlex/ppdet/data/source/mot.py +0 -636
  1541. paddlex/ppdet/data/source/sniper_coco.py +0 -191
  1542. paddlex/ppdet/data/source/voc.py +0 -231
  1543. paddlex/ppdet/data/source/widerface.py +0 -180
  1544. paddlex/ppdet/data/transform/__init__.py +0 -28
  1545. paddlex/ppdet/data/transform/atss_assigner.py +0 -270
  1546. paddlex/ppdet/data/transform/autoaugment_utils.py +0 -1591
  1547. paddlex/ppdet/data/transform/batch_operators.py +0 -1080
  1548. paddlex/ppdet/data/transform/gridmask_utils.py +0 -86
  1549. paddlex/ppdet/data/transform/keypoint_operators.py +0 -868
  1550. paddlex/ppdet/data/transform/mot_operators.py +0 -628
  1551. paddlex/ppdet/data/transform/op_helper.py +0 -498
  1552. paddlex/ppdet/data/transform/operators.py +0 -3025
  1553. paddlex/ppdet/engine/__init__.py +0 -30
  1554. paddlex/ppdet/engine/callbacks.py +0 -340
  1555. paddlex/ppdet/engine/env.py +0 -50
  1556. paddlex/ppdet/engine/export_utils.py +0 -177
  1557. paddlex/ppdet/engine/tracker.py +0 -538
  1558. paddlex/ppdet/engine/trainer.py +0 -723
  1559. paddlex/ppdet/metrics/__init__.py +0 -29
  1560. paddlex/ppdet/metrics/coco_utils.py +0 -184
  1561. paddlex/ppdet/metrics/json_results.py +0 -149
  1562. paddlex/ppdet/metrics/keypoint_metrics.py +0 -401
  1563. paddlex/ppdet/metrics/map_utils.py +0 -444
  1564. paddlex/ppdet/metrics/mcmot_metrics.py +0 -470
  1565. paddlex/ppdet/metrics/metrics.py +0 -434
  1566. paddlex/ppdet/metrics/mot_metrics.py +0 -1236
  1567. paddlex/ppdet/metrics/munkres.py +0 -428
  1568. paddlex/ppdet/metrics/widerface_utils.py +0 -393
  1569. paddlex/ppdet/model_zoo/__init__.py +0 -18
  1570. paddlex/ppdet/model_zoo/model_zoo.py +0 -84
  1571. paddlex/ppdet/modeling/__init__.py +0 -45
  1572. paddlex/ppdet/modeling/architectures/__init__.py +0 -51
  1573. paddlex/ppdet/modeling/architectures/blazeface.py +0 -91
  1574. paddlex/ppdet/modeling/architectures/cascade_rcnn.py +0 -144
  1575. paddlex/ppdet/modeling/architectures/centernet.py +0 -108
  1576. paddlex/ppdet/modeling/architectures/deepsort.py +0 -69
  1577. paddlex/ppdet/modeling/architectures/detr.py +0 -93
  1578. paddlex/ppdet/modeling/architectures/fairmot.py +0 -100
  1579. paddlex/ppdet/modeling/architectures/faster_rcnn.py +0 -106
  1580. paddlex/ppdet/modeling/architectures/fcos.py +0 -105
  1581. paddlex/ppdet/modeling/architectures/gfl.py +0 -87
  1582. paddlex/ppdet/modeling/architectures/jde.py +0 -111
  1583. paddlex/ppdet/modeling/architectures/keypoint_hrhrnet.py +0 -287
  1584. paddlex/ppdet/modeling/architectures/keypoint_hrnet.py +0 -267
  1585. paddlex/ppdet/modeling/architectures/mask_rcnn.py +0 -135
  1586. paddlex/ppdet/modeling/architectures/meta_arch.py +0 -128
  1587. paddlex/ppdet/modeling/architectures/picodet.py +0 -91
  1588. paddlex/ppdet/modeling/architectures/s2anet.py +0 -102
  1589. paddlex/ppdet/modeling/architectures/solov2.py +0 -110
  1590. paddlex/ppdet/modeling/architectures/sparse_rcnn.py +0 -99
  1591. paddlex/ppdet/modeling/architectures/ssd.py +0 -93
  1592. paddlex/ppdet/modeling/architectures/tood.py +0 -78
  1593. paddlex/ppdet/modeling/architectures/ttfnet.py +0 -98
  1594. paddlex/ppdet/modeling/architectures/yolo.py +0 -124
  1595. paddlex/ppdet/modeling/assigners/__init__.py +0 -23
  1596. paddlex/ppdet/modeling/assigners/atss_assigner.py +0 -211
  1597. paddlex/ppdet/modeling/assigners/simota_assigner.py +0 -262
  1598. paddlex/ppdet/modeling/assigners/task_aligned_assigner.py +0 -158
  1599. paddlex/ppdet/modeling/assigners/utils.py +0 -195
  1600. paddlex/ppdet/modeling/backbones/__init__.py +0 -49
  1601. paddlex/ppdet/modeling/backbones/blazenet.py +0 -323
  1602. paddlex/ppdet/modeling/backbones/darknet.py +0 -340
  1603. paddlex/ppdet/modeling/backbones/dla.py +0 -244
  1604. paddlex/ppdet/modeling/backbones/esnet.py +0 -290
  1605. paddlex/ppdet/modeling/backbones/ghostnet.py +0 -470
  1606. paddlex/ppdet/modeling/backbones/hardnet.py +0 -224
  1607. paddlex/ppdet/modeling/backbones/hrnet.py +0 -727
  1608. paddlex/ppdet/modeling/backbones/lcnet.py +0 -259
  1609. paddlex/ppdet/modeling/backbones/lite_hrnet.py +0 -886
  1610. paddlex/ppdet/modeling/backbones/mobilenet_v1.py +0 -418
  1611. paddlex/ppdet/modeling/backbones/mobilenet_v3.py +0 -483
  1612. paddlex/ppdet/modeling/backbones/name_adapter.py +0 -69
  1613. paddlex/ppdet/modeling/backbones/res2net.py +0 -358
  1614. paddlex/ppdet/modeling/backbones/resnet.py +0 -613
  1615. paddlex/ppdet/modeling/backbones/senet.py +0 -139
  1616. paddlex/ppdet/modeling/backbones/shufflenet_v2.py +0 -246
  1617. paddlex/ppdet/modeling/backbones/swin_transformer.py +0 -743
  1618. paddlex/ppdet/modeling/backbones/vgg.py +0 -210
  1619. paddlex/ppdet/modeling/bbox_utils.py +0 -778
  1620. paddlex/ppdet/modeling/heads/__init__.py +0 -53
  1621. paddlex/ppdet/modeling/heads/bbox_head.py +0 -377
  1622. paddlex/ppdet/modeling/heads/cascade_head.py +0 -284
  1623. paddlex/ppdet/modeling/heads/centernet_head.py +0 -292
  1624. paddlex/ppdet/modeling/heads/detr_head.py +0 -368
  1625. paddlex/ppdet/modeling/heads/face_head.py +0 -110
  1626. paddlex/ppdet/modeling/heads/fcos_head.py +0 -259
  1627. paddlex/ppdet/modeling/heads/gfl_head.py +0 -487
  1628. paddlex/ppdet/modeling/heads/keypoint_hrhrnet_head.py +0 -108
  1629. paddlex/ppdet/modeling/heads/mask_head.py +0 -250
  1630. paddlex/ppdet/modeling/heads/pico_head.py +0 -278
  1631. paddlex/ppdet/modeling/heads/roi_extractor.py +0 -111
  1632. paddlex/ppdet/modeling/heads/s2anet_head.py +0 -1056
  1633. paddlex/ppdet/modeling/heads/simota_head.py +0 -506
  1634. paddlex/ppdet/modeling/heads/solov2_head.py +0 -560
  1635. paddlex/ppdet/modeling/heads/sparsercnn_head.py +0 -375
  1636. paddlex/ppdet/modeling/heads/ssd_head.py +0 -215
  1637. paddlex/ppdet/modeling/heads/tood_head.py +0 -366
  1638. paddlex/ppdet/modeling/heads/ttf_head.py +0 -316
  1639. paddlex/ppdet/modeling/heads/yolo_head.py +0 -124
  1640. paddlex/ppdet/modeling/initializer.py +0 -317
  1641. paddlex/ppdet/modeling/keypoint_utils.py +0 -342
  1642. paddlex/ppdet/modeling/layers.py +0 -1430
  1643. paddlex/ppdet/modeling/losses/__init__.py +0 -43
  1644. paddlex/ppdet/modeling/losses/ctfocal_loss.py +0 -68
  1645. paddlex/ppdet/modeling/losses/detr_loss.py +0 -233
  1646. paddlex/ppdet/modeling/losses/fairmot_loss.py +0 -41
  1647. paddlex/ppdet/modeling/losses/fcos_loss.py +0 -225
  1648. paddlex/ppdet/modeling/losses/gfocal_loss.py +0 -217
  1649. paddlex/ppdet/modeling/losses/iou_aware_loss.py +0 -47
  1650. paddlex/ppdet/modeling/losses/iou_loss.py +0 -210
  1651. paddlex/ppdet/modeling/losses/jde_loss.py +0 -193
  1652. paddlex/ppdet/modeling/losses/keypoint_loss.py +0 -229
  1653. paddlex/ppdet/modeling/losses/solov2_loss.py +0 -101
  1654. paddlex/ppdet/modeling/losses/sparsercnn_loss.py +0 -425
  1655. paddlex/ppdet/modeling/losses/ssd_loss.py +0 -170
  1656. paddlex/ppdet/modeling/losses/varifocal_loss.py +0 -152
  1657. paddlex/ppdet/modeling/losses/yolo_loss.py +0 -212
  1658. paddlex/ppdet/modeling/mot/__init__.py +0 -25
  1659. paddlex/ppdet/modeling/mot/matching/__init__.py +0 -19
  1660. paddlex/ppdet/modeling/mot/matching/deepsort_matching.py +0 -382
  1661. paddlex/ppdet/modeling/mot/matching/jde_matching.py +0 -144
  1662. paddlex/ppdet/modeling/mot/motion/__init__.py +0 -17
  1663. paddlex/ppdet/modeling/mot/motion/kalman_filter.py +0 -270
  1664. paddlex/ppdet/modeling/mot/tracker/__init__.py +0 -23
  1665. paddlex/ppdet/modeling/mot/tracker/base_jde_tracker.py +0 -297
  1666. paddlex/ppdet/modeling/mot/tracker/base_sde_tracker.py +0 -156
  1667. paddlex/ppdet/modeling/mot/tracker/deepsort_tracker.py +0 -188
  1668. paddlex/ppdet/modeling/mot/tracker/jde_tracker.py +0 -277
  1669. paddlex/ppdet/modeling/mot/utils.py +0 -263
  1670. paddlex/ppdet/modeling/mot/visualization.py +0 -150
  1671. paddlex/ppdet/modeling/necks/__init__.py +0 -30
  1672. paddlex/ppdet/modeling/necks/bifpn.py +0 -302
  1673. paddlex/ppdet/modeling/necks/blazeface_fpn.py +0 -216
  1674. paddlex/ppdet/modeling/necks/centernet_fpn.py +0 -426
  1675. paddlex/ppdet/modeling/necks/csp_pan.py +0 -364
  1676. paddlex/ppdet/modeling/necks/fpn.py +0 -231
  1677. paddlex/ppdet/modeling/necks/hrfpn.py +0 -126
  1678. paddlex/ppdet/modeling/necks/ttf_fpn.py +0 -242
  1679. paddlex/ppdet/modeling/necks/yolo_fpn.py +0 -988
  1680. paddlex/ppdet/modeling/ops.py +0 -1611
  1681. paddlex/ppdet/modeling/post_process.py +0 -731
  1682. paddlex/ppdet/modeling/proposal_generator/__init__.py +0 -2
  1683. paddlex/ppdet/modeling/proposal_generator/anchor_generator.py +0 -135
  1684. paddlex/ppdet/modeling/proposal_generator/proposal_generator.py +0 -77
  1685. paddlex/ppdet/modeling/proposal_generator/rpn_head.py +0 -260
  1686. paddlex/ppdet/modeling/proposal_generator/target.py +0 -681
  1687. paddlex/ppdet/modeling/proposal_generator/target_layer.py +0 -491
  1688. paddlex/ppdet/modeling/reid/__init__.py +0 -25
  1689. paddlex/ppdet/modeling/reid/fairmot_embedding_head.py +0 -225
  1690. paddlex/ppdet/modeling/reid/jde_embedding_head.py +0 -214
  1691. paddlex/ppdet/modeling/reid/pplcnet_embedding.py +0 -282
  1692. paddlex/ppdet/modeling/reid/pyramidal_embedding.py +0 -144
  1693. paddlex/ppdet/modeling/reid/resnet.py +0 -310
  1694. paddlex/ppdet/modeling/shape_spec.py +0 -25
  1695. paddlex/ppdet/modeling/transformers/__init__.py +0 -25
  1696. paddlex/ppdet/modeling/transformers/deformable_transformer.py +0 -517
  1697. paddlex/ppdet/modeling/transformers/detr_transformer.py +0 -353
  1698. paddlex/ppdet/modeling/transformers/matchers.py +0 -127
  1699. paddlex/ppdet/modeling/transformers/position_encoding.py +0 -108
  1700. paddlex/ppdet/modeling/transformers/utils.py +0 -110
  1701. paddlex/ppdet/optimizer.py +0 -335
  1702. paddlex/ppdet/slim/__init__.py +0 -82
  1703. paddlex/ppdet/slim/distill.py +0 -110
  1704. paddlex/ppdet/slim/prune.py +0 -85
  1705. paddlex/ppdet/slim/quant.py +0 -84
  1706. paddlex/ppdet/slim/unstructured_prune.py +0 -66
  1707. paddlex/ppdet/utils/__init__.py +0 -13
  1708. paddlex/ppdet/utils/check.py +0 -112
  1709. paddlex/ppdet/utils/checkpoint.py +0 -226
  1710. paddlex/ppdet/utils/cli.py +0 -151
  1711. paddlex/ppdet/utils/colormap.py +0 -58
  1712. paddlex/ppdet/utils/download.py +0 -558
  1713. paddlex/ppdet/utils/logger.py +0 -70
  1714. paddlex/ppdet/utils/profiler.py +0 -111
  1715. paddlex/ppdet/utils/stats.py +0 -94
  1716. paddlex/ppdet/utils/visualizer.py +0 -321
  1717. paddlex/ppdet/utils/voc_utils.py +0 -86
  1718. paddlex/seg.py +0 -41
  1719. paddlex/tools/__init__.py +0 -17
  1720. paddlex/tools/anchor_clustering/__init__.py +0 -15
  1721. paddlex/tools/anchor_clustering/yolo_cluster.py +0 -178
  1722. paddlex/tools/convert.py +0 -52
  1723. paddlex/tools/dataset_conversion/__init__.py +0 -24
  1724. paddlex/tools/dataset_conversion/x2coco.py +0 -379
  1725. paddlex/tools/dataset_conversion/x2imagenet.py +0 -82
  1726. paddlex/tools/dataset_conversion/x2seg.py +0 -343
  1727. paddlex/tools/dataset_conversion/x2voc.py +0 -230
  1728. paddlex/tools/dataset_split/__init__.py +0 -23
  1729. paddlex/tools/dataset_split/coco_split.py +0 -69
  1730. paddlex/tools/dataset_split/imagenet_split.py +0 -75
  1731. paddlex/tools/dataset_split/seg_split.py +0 -96
  1732. paddlex/tools/dataset_split/utils.py +0 -75
  1733. paddlex/tools/dataset_split/voc_split.py +0 -91
  1734. paddlex/tools/split.py +0 -41
  1735. paddlex/utils/checkpoint.py +0 -492
  1736. paddlex/utils/shm.py +0 -67
  1737. paddlex/utils/stats.py +0 -68
  1738. paddlex/utils/utils.py +0 -229
  1739. paddlex-2.1.0.data/data/paddlex_restful/restful/templates/paddlex_restful_demo.html +0 -5205
  1740. paddlex-2.1.0.dist-info/LICENSE +0 -201
  1741. paddlex-2.1.0.dist-info/METADATA +0 -32
  1742. paddlex-2.1.0.dist-info/RECORD +0 -698
  1743. paddlex-2.1.0.dist-info/WHEEL +0 -5
  1744. paddlex-2.1.0.dist-info/entry_points.txt +0 -4
  1745. paddlex-2.1.0.dist-info/top_level.txt +0 -3
  1746. paddlex_restful/__init__.py +0 -15
  1747. paddlex_restful/command.py +0 -63
  1748. paddlex_restful/restful/__init__.py +0 -15
  1749. paddlex_restful/restful/app.py +0 -969
  1750. paddlex_restful/restful/dataset/__init__.py +0 -13
  1751. paddlex_restful/restful/dataset/cls_dataset.py +0 -159
  1752. paddlex_restful/restful/dataset/dataset.py +0 -266
  1753. paddlex_restful/restful/dataset/datasetbase.py +0 -86
  1754. paddlex_restful/restful/dataset/det_dataset.py +0 -190
  1755. paddlex_restful/restful/dataset/ins_seg_dataset.py +0 -312
  1756. paddlex_restful/restful/dataset/operate.py +0 -155
  1757. paddlex_restful/restful/dataset/seg_dataset.py +0 -222
  1758. paddlex_restful/restful/dataset/utils.py +0 -267
  1759. paddlex_restful/restful/demo.py +0 -202
  1760. paddlex_restful/restful/dir.py +0 -45
  1761. paddlex_restful/restful/model.py +0 -312
  1762. paddlex_restful/restful/project/__init__.py +0 -13
  1763. paddlex_restful/restful/project/evaluate/__init__.py +0 -13
  1764. paddlex_restful/restful/project/evaluate/classification.py +0 -126
  1765. paddlex_restful/restful/project/evaluate/detection.py +0 -789
  1766. paddlex_restful/restful/project/evaluate/draw_pred_result.py +0 -181
  1767. paddlex_restful/restful/project/evaluate/segmentation.py +0 -122
  1768. paddlex_restful/restful/project/operate.py +0 -931
  1769. paddlex_restful/restful/project/project.py +0 -143
  1770. paddlex_restful/restful/project/prune/__init__.py +0 -13
  1771. paddlex_restful/restful/project/prune/classification.py +0 -32
  1772. paddlex_restful/restful/project/prune/detection.py +0 -48
  1773. paddlex_restful/restful/project/prune/segmentation.py +0 -34
  1774. paddlex_restful/restful/project/task.py +0 -884
  1775. paddlex_restful/restful/project/train/__init__.py +0 -13
  1776. paddlex_restful/restful/project/train/classification.py +0 -141
  1777. paddlex_restful/restful/project/train/detection.py +0 -263
  1778. paddlex_restful/restful/project/train/params.py +0 -432
  1779. paddlex_restful/restful/project/train/params_v2.py +0 -326
  1780. paddlex_restful/restful/project/train/segmentation.py +0 -191
  1781. paddlex_restful/restful/project/visualize.py +0 -244
  1782. paddlex_restful/restful/system.py +0 -102
  1783. paddlex_restful/restful/templates/paddlex_restful_demo.html +0 -5205
  1784. paddlex_restful/restful/utils.py +0 -841
  1785. paddlex_restful/restful/workspace.py +0 -343
  1786. paddlex_restful/restful/workspace_pb2.py +0 -1411
@@ -0,0 +1,2495 @@
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import math
16
+ import os
17
+ from dataclasses import dataclass
18
+ from typing import Any, Dict, List, Optional, Tuple, Union
19
+
20
+ import paddle
21
+ import paddle.distributed.fleet.meta_parallel as mpu
22
+ import paddle.nn as nn
23
+ import paddle.nn.functional as F
24
+ from paddle import Tensor
25
+ from paddle.distributed import fleet
26
+ from paddle.distributed.fleet.meta_parallel import get_rng_state_tracker
27
+ from paddle.distributed.fleet.utils import recompute
28
+
29
+ from .....utils import logging
30
+ from ....utils.benchmark import (
31
+ benchmark,
32
+ get_inference_operations,
33
+ set_inference_operations,
34
+ )
35
+ from ...common.vlm.activations import ACT2FN
36
+ from ...common.vlm.bert_padding import index_first_axis, pad_input, unpad_input
37
+ from ...common.vlm.flash_attn_utils import has_flash_attn_func
38
+ from ...common.vlm.transformers import PretrainedConfig, PretrainedModel
39
+ from ...common.vlm.transformers.model_outputs import (
40
+ BaseModelOutputWithPast,
41
+ ModelOutput,
42
+ )
43
+
44
+ flash_attn_func, flash_attn_varlen_func = has_flash_attn_func()
45
+ _IS_NPU = "npu" in paddle.get_device()
46
+
47
+ Linear = nn.Linear
48
+ ColumnParallelLinear = mpu.ColumnParallelLinear
49
+ RowParallelLinear = mpu.RowParallelLinear
50
+
51
+
52
+ class Qwen2VLVisionConfig(PretrainedConfig):
53
+ model_type = "qwen2_vl"
54
+
55
+ def __init__(
56
+ self,
57
+ depth=32,
58
+ embed_dim=1280,
59
+ hidden_size=3584,
60
+ hidden_act="quick_gelu",
61
+ mlp_ratio=4,
62
+ num_heads=16,
63
+ in_channels=3,
64
+ patch_size=14,
65
+ spatial_merge_size=2,
66
+ temporal_patch_size=2,
67
+ attn_implementation="eager", # new added
68
+ **kwargs,
69
+ ):
70
+ super().__init__(**kwargs)
71
+
72
+ self.depth = depth
73
+ self.embed_dim = embed_dim
74
+ self.hidden_size = hidden_size
75
+ self.hidden_act = hidden_act
76
+ self.mlp_ratio = mlp_ratio
77
+ self.num_heads = num_heads
78
+ self.in_channels = in_channels
79
+ self.patch_size = patch_size
80
+ self.spatial_merge_size = spatial_merge_size
81
+ self.temporal_patch_size = temporal_patch_size
82
+ self.attn_implementation = attn_implementation
83
+
84
+ @classmethod
85
+ def from_pretrained(
86
+ cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs
87
+ ) -> "PretrainedConfig":
88
+
89
+ config_dict, kwargs = cls.get_config_dict(
90
+ pretrained_model_name_or_path, **kwargs
91
+ )
92
+
93
+ if config_dict.get("model_type") == "qwen2_vl":
94
+ config_dict = config_dict["vision_config"]
95
+
96
+ if (
97
+ "model_type" in config_dict
98
+ and hasattr(cls, "model_type")
99
+ and config_dict["model_type"] != cls.model_type
100
+ ):
101
+ logging.warning(
102
+ f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
103
+ f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
104
+ )
105
+
106
+ return cls.from_dict(config_dict, **kwargs)
107
+
108
+
109
+ class Qwen2VLConfig(PretrainedConfig):
110
+ r"""
111
+ This is the configuration class to store the configuration of a [`Qwen2VLModel`]. It is used to instantiate a
112
+ Qwen2-VL model according to the specified arguments, defining the model architecture. Instantiating a configuration
113
+ with the defaults will yield a similar configuration to that of
114
+ Qwen2-VL-7B-Instruct [Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct).
115
+
116
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
117
+ documentation from [`PretrainedConfig`] for more information.
118
+
119
+ Args:
120
+ vocab_size (`int`, *optional*, defaults to 152064):
121
+ Vocabulary size of the Qwen2VL model. Defines the number of different tokens that can be represented by the
122
+ `inputs_ids` passed when calling [`Qwen2VLModel`]
123
+ hidden_size (`int`, *optional*, defaults to 8192):
124
+ Dimension of the hidden representations.
125
+ intermediate_size (`int`, *optional*, defaults to 29568):
126
+ Dimension of the MLP representations.
127
+ num_hidden_layers (`int`, *optional*, defaults to 80):
128
+ Number of hidden layers in the Transformer encoder.
129
+ num_attention_heads (`int`, *optional*, defaults to 64):
130
+ Number of attention heads for each attention layer in the Transformer encoder.
131
+ num_key_value_heads (`int`, *optional*, defaults to 8):
132
+ This is the number of key_value heads that should be used to implement Grouped Query Attention. If
133
+ `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
134
+ `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
135
+ converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
136
+ by meanpooling all the original heads within that group. For more details checkout [this
137
+ paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `32`.
138
+ hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
139
+ The non-linear activation function (function or string) in the decoder.
140
+ max_position_embeddings (`int`, *optional*, defaults to 32768):
141
+ The maximum sequence length that this model might ever be used with.
142
+ initializer_range (`float`, *optional*, defaults to 0.02):
143
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
144
+ rms_norm_eps (`float`, *optional*, defaults to 1e-05):
145
+ The epsilon used by the rms normalization layers.
146
+ use_cache (`bool`, *optional*, defaults to `True`):
147
+ Whether or not the model should return the last key/values attentions (not used by all models). Only
148
+ relevant if `config.is_decoder=True`.
149
+ tie_word_embeddings (`bool`, *optional*, defaults to `False`):
150
+ Whether the model's input and output word embeddings should be tied.
151
+ rope_theta (`float`, *optional*, defaults to 1000000.0):
152
+ The base period of the RoPE embeddings.
153
+ use_sliding_window (`bool`, *optional*, defaults to `False`):
154
+ Whether to use sliding window attention.
155
+ sliding_window (`int`, *optional*, defaults to 4096):
156
+ Sliding window attention (SWA) window size. If not specified, will default to `4096`.
157
+ max_window_layers (`int`, *optional*, defaults to 80):
158
+ The number of layers that use SWA (Sliding Window Attention). The bottom layers use SWA while the top use full attention.
159
+ attention_dropout (`float`, *optional*, defaults to 0.0):
160
+ The dropout ratio for the attention probabilities.
161
+ vision_config (`Dict`, *optional*):
162
+ The config for the visual encoder initialization.
163
+ rope_scaling (`Dict`, *optional*):
164
+ Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
165
+ strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
166
+ `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update
167
+ `max_position_embeddings` to the expected new maximum. See the following thread for more information on how
168
+ these scaling strategies behave:
169
+ https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an
170
+ experimental feature, subject to breaking API changes in future versions.
171
+ """
172
+
173
+ model_type = "qwen2_vl"
174
+ keys_to_ignore_at_inference = ["past_key_values"]
175
+
176
+ def __init__(
177
+ self,
178
+ vocab_size=152064,
179
+ hidden_size=8192,
180
+ intermediate_size=29568,
181
+ num_hidden_layers=80,
182
+ num_attention_heads=64,
183
+ num_key_value_heads=8,
184
+ hidden_act="silu",
185
+ max_position_embeddings=32768,
186
+ initializer_range=0.02,
187
+ rms_norm_eps=1e-05,
188
+ use_cache=True,
189
+ tie_word_embeddings=False,
190
+ rope_theta=1000000.0,
191
+ use_sliding_window=False,
192
+ sliding_window=4096,
193
+ max_window_layers=80,
194
+ attention_dropout=0.0,
195
+ vision_config=None,
196
+ rope_scaling=None,
197
+ **kwargs,
198
+ ):
199
+ if isinstance(vision_config, dict):
200
+ self.vision_config = Qwen2VLVisionConfig(**vision_config)
201
+ elif vision_config is None:
202
+ self.vision_config = Qwen2VLVisionConfig()
203
+
204
+ self.vocab_size = vocab_size
205
+ self.max_position_embeddings = max_position_embeddings
206
+ self.hidden_size = hidden_size
207
+ self.intermediate_size = intermediate_size
208
+ self.num_hidden_layers = num_hidden_layers
209
+ self.num_attention_heads = num_attention_heads
210
+ self.use_sliding_window = use_sliding_window
211
+ self.sliding_window = sliding_window
212
+ self.max_window_layers = max_window_layers
213
+
214
+ if num_key_value_heads is None:
215
+ num_key_value_heads = num_attention_heads
216
+
217
+ self.num_key_value_heads = num_key_value_heads
218
+ self.hidden_act = hidden_act
219
+ self.initializer_range = initializer_range
220
+ self.rms_norm_eps = rms_norm_eps
221
+ self.use_cache = use_cache
222
+ self.rope_theta = rope_theta
223
+ self.attention_dropout = attention_dropout
224
+ self.rope_scaling = rope_scaling
225
+
226
+ super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
227
+
228
+
229
+ def get_triangle_upper_mask(x, mask=None):
230
+ if mask is not None:
231
+ return mask
232
+ shape = x.shape
233
+ shape[1] = 1
234
+ mask = paddle.full(shape, paddle.finfo(x.dtype).min, dtype=x.dtype)
235
+ mask = paddle.triu(mask, diagonal=1)
236
+ mask.stop_gradient = True
237
+ return mask
238
+
239
+
240
+ def parallel_matmul(
241
+ x: Tensor, y: Tensor, transpose_y=True, tensor_parallel_output=True
242
+ ):
243
+ is_fleet_init = True
244
+ tensor_parallel_degree = 1
245
+ try:
246
+ hcg = fleet.get_hybrid_communicate_group()
247
+ model_parallel_group = hcg.get_model_parallel_group()
248
+ tensor_parallel_degree = hcg.get_model_parallel_world_size()
249
+ except:
250
+ is_fleet_init = False
251
+
252
+ if paddle.in_dynamic_mode():
253
+ y_is_distributed = y.is_distributed
254
+ else:
255
+ y_is_distributed = tensor_parallel_degree > 1
256
+
257
+ if is_fleet_init and tensor_parallel_degree > 1 and y_is_distributed:
258
+
259
+ input_parallel = paddle.distributed.collective._c_identity(
260
+ x, group=model_parallel_group
261
+ )
262
+ logits = paddle.matmul(input_parallel, y, transpose_y=transpose_y)
263
+
264
+ if tensor_parallel_output:
265
+ return logits
266
+ return paddle.distributed.collective._c_concat(
267
+ logits, group=model_parallel_group
268
+ )
269
+
270
+ else:
271
+ logits = paddle.matmul(x, y, transpose_y=transpose_y)
272
+ return logits
273
+
274
+
275
+ def _compute_default_rope_parameters(
276
+ config: Optional[PretrainedConfig] = None,
277
+ device: Optional["paddle.device"] = None,
278
+ seq_len: Optional[int] = None,
279
+ **rope_kwargs,
280
+ ) -> Tuple["paddle.Tensor", float]:
281
+ """
282
+ Computes the inverse frequencies according to the original RoPE implementation
283
+ Args:
284
+ config ([`~transformers.PretrainedConfig`]):
285
+ The model configuration.
286
+ device (`paddle.device`):
287
+ The device to use for initialization of the inverse frequencies.
288
+ seq_len (`int`, *optional*):
289
+ The current sequence length. Unused for this type of RoPE.
290
+ rope_kwargs (`Dict`, *optional*):
291
+ BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
292
+ Returns:
293
+ Tuple of (`paddle.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
294
+ post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
295
+ """
296
+ if config is not None and len(rope_kwargs) > 0:
297
+ raise ValueError(
298
+ "Unexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in "
299
+ f"`_compute_default_rope_parameters`, got `rope_kwargs`={rope_kwargs} and `config`={config}"
300
+ )
301
+ if len(rope_kwargs) > 0:
302
+ base = rope_kwargs["base"]
303
+ dim = rope_kwargs["dim"]
304
+ elif config is not None:
305
+ base = config.rope_theta
306
+ partial_rotary_factor = (
307
+ config.partial_rotary_factor
308
+ if hasattr(config, "partial_rotary_factor")
309
+ else 1.0
310
+ )
311
+ head_dim = getattr(
312
+ config, "head_dim", config.hidden_size // config.num_attention_heads
313
+ )
314
+ dim = int(head_dim * partial_rotary_factor)
315
+
316
+ attention_factor = 1.0
317
+
318
+ inv_freq = 1.0 / (
319
+ base ** (paddle.arange(0, dim, 2, dtype="int64").astype("float32") / dim)
320
+ )
321
+ return inv_freq, attention_factor
322
+
323
+
324
+ ROPE_INIT_FUNCTIONS = {
325
+ "default": _compute_default_rope_parameters,
326
+ }
327
+
328
+
329
+ def _get_unpad_data(attention_mask):
330
+ seqlens_in_batch = attention_mask.sum(axis=-1, dtype="int32")
331
+ indices = paddle.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
332
+ max_seqlen_in_batch = seqlens_in_batch.max().item() # [2, 1, 1323]
333
+ cu_seqlens = F.pad(
334
+ paddle.cumsum(seqlens_in_batch, axis=0), (1, 0), data_format="NCL"
335
+ )
336
+ return (
337
+ indices,
338
+ cu_seqlens,
339
+ max_seqlen_in_batch,
340
+ )
341
+
342
+
343
+ def is_casual_mask(attention_mask):
344
+ """
345
+ Upper triangular of attention_mask equals to attention_mask is casual
346
+ """
347
+ return (paddle.triu(attention_mask) == attention_mask).all().item()
348
+
349
+
350
+ def _make_causal_mask(input_ids_shape, past_key_values_length):
351
+ """
352
+ Make causal mask used for self-attention
353
+ """
354
+ batch_size, target_length = input_ids_shape
355
+
356
+ mask = paddle.tril(paddle.ones((target_length, target_length), dtype="bool"))
357
+
358
+ if past_key_values_length > 0:
359
+ mask = paddle.concat(
360
+ [paddle.ones([target_length, past_key_values_length], dtype="bool"), mask],
361
+ axis=-1,
362
+ )
363
+
364
+ return mask[None, None, :, :].expand(
365
+ [batch_size, 1, target_length, target_length + past_key_values_length]
366
+ )
367
+
368
+
369
+ def _expand_2d_mask(mask, dtype, tgt_length):
370
+ """
371
+ Expands attention_mask from `[batch_size, src_length]` to `[batch_size, 1, tgt_length, src_length]`.
372
+ """
373
+ batch_size, src_length = mask.shape[0], mask.shape[-1]
374
+ tgt_length = tgt_length if tgt_length is not None else src_length
375
+
376
+ mask = mask[:, None, None, :].astype("bool")
377
+ mask.stop_gradient = True
378
+ expanded_mask = mask.expand([batch_size, 1, tgt_length, src_length])
379
+
380
+ return expanded_mask
381
+
382
+
383
+ @dataclass
384
+ class Qwen2VLCausalLMOutputWithPast(ModelOutput):
385
+ """
386
+ Base class for Qwen2VL causal language model (or autoregressive) outputs.
387
+
388
+ Args:
389
+ loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
390
+ Language modeling loss (for next-token prediction).
391
+ logits (`paddle.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
392
+ Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
393
+ past_key_values (`tuple(tuple(paddle.Tensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
394
+ Tuple of `tuple(paddle.Tensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
395
+ `(batch_size, num_heads, sequence_length, embed_size_per_head)`)
396
+
397
+ Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
398
+ `past_key_values` input) to speed up sequential decoding.
399
+ hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
400
+ Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
401
+ one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.
402
+
403
+ Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
404
+ attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
405
+ Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
406
+ sequence_length)`.
407
+
408
+ Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
409
+ heads.
410
+ rope_deltas (`paddle.Tensor` of shape `(batch_size, )`, *optional*):
411
+ The rope index difference between sequence length and multimodal rope.
412
+ """
413
+
414
+ loss: Optional[paddle.Tensor] = None
415
+ logits: paddle.Tensor = None
416
+ past_key_values: Optional[List[paddle.Tensor]] = None
417
+ hidden_states: Optional[Tuple[paddle.Tensor]] = None
418
+ attentions: Optional[Tuple[paddle.Tensor]] = None
419
+ rope_deltas: Optional[paddle.Tensor] = None
420
+
421
+
422
+ class Qwen2VLRotaryEmbedding(nn.Layer):
423
+ def __init__(
424
+ self,
425
+ dim=None,
426
+ max_position_embeddings=2048,
427
+ base=10000,
428
+ device=None,
429
+ scaling_factor=1.0,
430
+ rope_type="default",
431
+ config: Optional[Qwen2VLConfig] = None,
432
+ ):
433
+ super().__init__()
434
+ self.rope_kwargs = {}
435
+ if config is None:
436
+ self.rope_kwargs = {
437
+ "rope_type": rope_type,
438
+ "factor": scaling_factor,
439
+ "dim": dim,
440
+ "base": base,
441
+ "max_position_embeddings": max_position_embeddings,
442
+ }
443
+ self.rope_type = rope_type
444
+ self.max_seq_len_cached = max_position_embeddings
445
+ self.original_max_seq_len = max_position_embeddings
446
+ else:
447
+ # BC: "rope_type" was originally "type"
448
+ if config.rope_scaling is not None:
449
+ self.rope_type = config.rope_scaling.get(
450
+ "rope_type", config.rope_scaling.get("type")
451
+ )
452
+ else:
453
+ self.rope_type = "default"
454
+ self.max_seq_len_cached = config.max_position_embeddings
455
+ self.original_max_seq_len = config.max_position_embeddings
456
+
457
+ self.config = config
458
+ self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type]
459
+
460
+ self.inv_freq, self.attention_scaling = self.rope_init_fn(
461
+ self.config, device, **self.rope_kwargs
462
+ )
463
+ self.original_inv_freq = self.inv_freq
464
+
465
+ self._set_cos_sin_cache(seq_len=max_position_embeddings)
466
+
467
+ def _set_cos_sin_cache(self, seq_len):
468
+ self.max_seq_len_cached = seq_len
469
+ t = paddle.arange(seq_len, dtype="float32")
470
+ freqs = paddle.einsum("i,j->ij", t, self.inv_freq)
471
+ emb = paddle.concat([freqs, freqs], axis=-1)
472
+ self.cos_cached = emb.cos()
473
+ self.sin_cached = emb.sin()
474
+
475
+ def _dynamic_frequency_update(self, position_ids, device):
476
+ """
477
+ dynamic RoPE layers should recompute `inv_freq` in the following situations:
478
+ 1 - growing beyond the cached sequence length (allow scaling)
479
+ 2 - the current sequence length is in the original scale (avoid losing precision with small sequences)
480
+ """
481
+ seq_len = paddle.max(position_ids) + 1
482
+ if seq_len > self.max_seq_len_cached: # growth
483
+ inv_freq, self.attention_scaling = self.rope_init_fn(
484
+ self.config, device, seq_len=seq_len, **self.rope_kwargs
485
+ )
486
+ self.inv_freq = inv_freq
487
+ self.max_seq_len_cached = seq_len
488
+
489
+ if (
490
+ seq_len < self.original_max_seq_len
491
+ and self.max_seq_len_cached > self.original_max_seq_len
492
+ ): # reset
493
+ self.inv_freq = self.original_inv_freq
494
+ self.max_seq_len_cached = self.original_max_seq_len
495
+
496
+ @paddle.no_grad()
497
+ def forward(self, x, position_ids):
498
+ if "dynamic" in self.rope_type:
499
+ self._dynamic_frequency_update(position_ids, device=x.device)
500
+
501
+ inv_freq_expanded = (
502
+ self.inv_freq[None, None, :, None]
503
+ .astype("float32")
504
+ .expand([3, position_ids.shape[1], -1, 1])
505
+ )
506
+ position_ids_expanded = position_ids[:, :, None, :].astype("float32")
507
+ device_type = paddle.get_device()
508
+ device_type = (
509
+ device_type
510
+ if isinstance(device_type, str) and device_type != "mps"
511
+ else "cpu"
512
+ )
513
+ with paddle.amp.auto_cast():
514
+ freqs = paddle.matmul(inv_freq_expanded, position_ids_expanded)
515
+ freqs = freqs.transpose([0, 1, 3, 2])
516
+ emb = paddle.concat((freqs, freqs), axis=-1)
517
+ cos = emb.cos()
518
+ sin = emb.sin()
519
+
520
+ cos = cos * self.attention_scaling
521
+ sin = sin * self.attention_scaling
522
+
523
+ return cos.astype(x.dtype), sin.astype(x.dtype)
524
+
525
+
526
+ # Copied from transformers.models.llama.modeling_llama.rotate_half
527
+ def rotate_half(x):
528
+ """Rotates half the hidden dims of the input."""
529
+ x1 = x[..., : x.shape[-1] // 2]
530
+ x2 = x[..., x.shape[-1] // 2 :]
531
+ return paddle.concat([-x2, x1], axis=-1)
532
+
533
+
534
+ def apply_multimodal_rotary_pos_emb(q, k, cos, sin, mrope_section, unsqueeze_dim=1):
535
+ """Applies Rotary Position Embedding with Multimodal Sections to the query and key tensors (https://qwenlm.github.io/blog/qwen2-vl/).
536
+
537
+ Explanation:
538
+ Multimodal 3D rotary position embedding is an extension to 1D rotary position embedding. The input embedding
539
+ sequence contains vision (images / videos) embedding and text embedding or just contains text embedding. For
540
+ vision embedding part, we apply rotary position embedding on temporal, height and width dimension separately.
541
+ Here we split the channel dimension to 3 chunks for the temporal, height and width rotary position embedding.
542
+ For text embedding part, we just apply 1D rotary position embedding. The three rotary position index (temporal,
543
+ height and width) of text embedding is always the same, so the text embedding rotary position embedding has no
544
+ difference with modern LLMs.
545
+
546
+ Args:
547
+ q (`paddle.Tensor`): The query tensor.
548
+ k (`paddle.Tensor`): The key tensor.
549
+ cos (`paddle.Tensor`): The cosine part of the rotary embedding.
550
+ sin (`paddle.Tensor`): The sine part of the rotary embedding.
551
+ position_ids (`paddle.Tensor`):
552
+ The position indices of the tokens corresponding to the query and key tensors. For example, this can be
553
+ used to pass offsetted position ids when working with a KV-cache.
554
+ mrope_section(`List(int)`):
555
+ Multimodal rope section is for channel dimension of temporal, height and width in rope calculation.
556
+ unsqueeze_dim (`int`, *optional*, defaults to 1):
557
+ The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
558
+ sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
559
+ that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
560
+ k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
561
+ cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
562
+ the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
563
+ Returns:
564
+ `tuple(paddle.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
565
+ """
566
+
567
+ mrope_section = mrope_section * 2
568
+ cos = paddle.concat(
569
+ x=[m[i % 3] for i, m in enumerate(cos.split(mrope_section, axis=-1))], axis=-1
570
+ ).unsqueeze(axis=unsqueeze_dim)
571
+ sin = paddle.concat(
572
+ x=[m[i % 3] for i, m in enumerate(sin.split(mrope_section, axis=-1))], axis=-1
573
+ ).unsqueeze(axis=unsqueeze_dim)
574
+
575
+ q_embed = (q * cos) + (rotate_half(q) * sin)
576
+ k_embed = (k * cos) + (rotate_half(k) * sin)
577
+ return q_embed, k_embed
578
+
579
+
580
+ def apply_rotary_pos_emb_vision(
581
+ tensor: paddle.Tensor, freqs: paddle.Tensor
582
+ ) -> paddle.Tensor:
583
+ orig_dtype = tensor.dtype
584
+
585
+ with paddle.amp.auto_cast(False):
586
+ tensor = tensor.astype(dtype="float32")
587
+ cos = freqs.cos()
588
+ sin = freqs.sin()
589
+ cos = (
590
+ cos.unsqueeze(1)
591
+ .tile(repeat_times=[1, 1, 2])
592
+ .unsqueeze(0)
593
+ .astype(dtype="float32")
594
+ )
595
+ sin = (
596
+ sin.unsqueeze(1)
597
+ .tile(repeat_times=[1, 1, 2])
598
+ .unsqueeze(0)
599
+ .astype(dtype="float32")
600
+ )
601
+ output = tensor * cos + rotate_half(tensor) * sin
602
+ output = paddle.cast(output, orig_dtype)
603
+ return output
604
+
605
+
606
+ class VisionRotaryEmbedding(nn.Layer):
607
+ def __init__(self, dim: int, theta: float = 10000.0) -> None:
608
+ super().__init__()
609
+ self.inv_freq = 1.0 / theta ** (
610
+ paddle.arange(start=0, end=dim, step=2, dtype="float32") / dim
611
+ )
612
+
613
+ def forward(self, seqlen: int) -> paddle.Tensor:
614
+ seq = paddle.arange(seqlen).cast(self.inv_freq.dtype)
615
+ freqs = paddle.outer(x=seq, y=self.inv_freq)
616
+ return freqs
617
+
618
+
619
+ class PatchEmbed(nn.Layer):
620
+ def __init__(
621
+ self,
622
+ patch_size: int = 14,
623
+ temporal_patch_size: int = 2,
624
+ in_channels: int = 3,
625
+ embed_dim: int = 1152,
626
+ ) -> None:
627
+ super().__init__()
628
+ self.patch_size = patch_size
629
+ self.temporal_patch_size = temporal_patch_size
630
+ self.in_channels = in_channels
631
+ self.embed_dim = embed_dim
632
+
633
+ kernel_size = [temporal_patch_size, patch_size, patch_size]
634
+ self.proj = nn.Conv3D(
635
+ in_channels,
636
+ embed_dim,
637
+ kernel_size=kernel_size,
638
+ stride=kernel_size,
639
+ bias_attr=False,
640
+ )
641
+
642
+ def forward(self, hidden_states: paddle.Tensor) -> paddle.Tensor:
643
+
644
+ target_dtype = self.proj.weight.dtype
645
+ hidden_states = hidden_states.reshape(
646
+ [
647
+ -1,
648
+ self.in_channels,
649
+ self.temporal_patch_size,
650
+ self.patch_size,
651
+ self.patch_size,
652
+ ]
653
+ )
654
+ # NOTE(changwenbin): AttributeError: 'Variable' object has no attribute 'to'.
655
+ # hidden_states = self.proj(hidden_states.to(dtype=target_dtype)).reshape([-1, self.embed_dim])
656
+ # hidden_states = paddle.cast(hidden_states, dtype=target_dtype)
657
+ hidden_states = self.proj(
658
+ paddle.cast(hidden_states, dtype=target_dtype)
659
+ ).reshape([-1, self.embed_dim])
660
+ return hidden_states
661
+
662
+
663
+ class PatchMerger(nn.Layer):
664
+ def __init__(self, dim: int, context_dim: int, spatial_merge_size: int = 2) -> None:
665
+ super().__init__()
666
+ self.hidden_size = context_dim * (spatial_merge_size**2)
667
+ self.ln_q = nn.LayerNorm(context_dim, epsilon=1e-6)
668
+ self.mlp = nn.Sequential(
669
+ nn.Linear(self.hidden_size, self.hidden_size),
670
+ nn.GELU(),
671
+ nn.Linear(self.hidden_size, dim),
672
+ )
673
+
674
+ def forward(self, x: paddle.Tensor) -> paddle.Tensor:
675
+ x = self.mlp(self.ln_q(x).reshape([-1, self.hidden_size]))
676
+ return x
677
+
678
+
679
+ class VisionMlp(nn.Layer):
680
+ def __init__(self, dim: int, hidden_dim: int, hidden_act: str) -> None:
681
+ super().__init__()
682
+ self.fc1 = nn.Linear(dim, hidden_dim)
683
+ self.act = ACT2FN[hidden_act]
684
+ self.fc2 = nn.Linear(hidden_dim, dim)
685
+
686
+ def forward(self, x) -> paddle.Tensor:
687
+ return self.fc2(self.act(self.fc1(x)))
688
+
689
+
690
+ class VisionAttention(nn.Layer):
691
+ def __init__(self, dim: int, num_heads: int = 16) -> None:
692
+ super().__init__()
693
+ self.num_heads = num_heads
694
+ self.qkv = nn.Linear(dim, dim * 3, bias_attr=True)
695
+ self.proj = nn.Linear(dim, dim)
696
+ self.head_dim = dim // num_heads # must added
697
+
698
+ def forward(
699
+ self,
700
+ hidden_states: paddle.Tensor,
701
+ cu_seqlens: paddle.Tensor,
702
+ rotary_pos_emb: paddle.Tensor = None,
703
+ ) -> paddle.Tensor:
704
+ seq_length = hidden_states.shape[0]
705
+ q, k, v = (
706
+ self.qkv(hidden_states)
707
+ .reshape([seq_length, 3, self.num_heads, -1])
708
+ .transpose([1, 0, 2, 3])
709
+ .unbind(0)
710
+ )
711
+ q = apply_rotary_pos_emb_vision(q.unsqueeze(0), rotary_pos_emb).squeeze(0)
712
+ k = apply_rotary_pos_emb_vision(k.unsqueeze(0), rotary_pos_emb).squeeze(0)
713
+
714
+ attention_mask = paddle.zeros([1, seq_length, seq_length], dtype="bool")
715
+ for i in range(1, len(cu_seqlens)):
716
+ attention_mask[
717
+ ...,
718
+ cu_seqlens[i - 1] : cu_seqlens[i],
719
+ cu_seqlens[i - 1] : cu_seqlens[i],
720
+ ] = True
721
+
722
+ zero = paddle.zeros(attention_mask.shape, dtype=hidden_states.dtype)
723
+ neg_inf = paddle.full_like(
724
+ attention_mask,
725
+ paddle.finfo(hidden_states.dtype).min,
726
+ dtype=hidden_states.dtype,
727
+ )
728
+ attention_mask = paddle.where(attention_mask, zero, neg_inf)
729
+
730
+ q = q.transpose([1, 0, 2])
731
+ k = k.transpose([1, 0, 2])
732
+ v = v.transpose([1, 0, 2])
733
+ attn_weights = paddle.matmul(q, k.transpose([0, 2, 1])) / math.sqrt(
734
+ self.head_dim
735
+ )
736
+ attn_weights = attn_weights + attention_mask
737
+ attn_weights = nn.functional.softmax(attn_weights, axis=-1, dtype="float32")
738
+ attn_output = paddle.matmul(attn_weights, v)
739
+ attn_output = attn_output.transpose([1, 0, 2])
740
+ attn_output = attn_output.reshape([seq_length, -1])
741
+ attn_output = self.proj(attn_output)
742
+ return attn_output
743
+
744
+
745
+ class VisionFlashAttention2(nn.Layer):
746
+ def __init__(self, dim: int, num_heads: int = 16) -> None:
747
+ super().__init__()
748
+ self.num_heads = num_heads
749
+ self.qkv = nn.Linear(dim, dim * 3, bias_attr=True)
750
+ self.proj = nn.Linear(dim, dim)
751
+ self.head_dim = dim // num_heads # must added
752
+
753
+ def forward(
754
+ self,
755
+ hidden_states: paddle.Tensor,
756
+ cu_seqlens: paddle.Tensor,
757
+ rotary_pos_emb: paddle.Tensor = None,
758
+ ) -> paddle.Tensor:
759
+ seq_length = tuple(hidden_states.shape)[0]
760
+ qkv = (
761
+ self.qkv(hidden_states)
762
+ .reshape([seq_length, 3, self.num_heads, -1])
763
+ .transpose(perm=[1, 0, 2, 3])
764
+ )
765
+ q, k, v = qkv.unbind(axis=0)
766
+ q = apply_rotary_pos_emb_vision(q.unsqueeze(axis=0), rotary_pos_emb).squeeze(
767
+ axis=0
768
+ )
769
+ k = apply_rotary_pos_emb_vision(k.unsqueeze(axis=0), rotary_pos_emb).squeeze(
770
+ axis=0
771
+ )
772
+
773
+ if _IS_NPU:
774
+ attn_output = paddle.nn.functional.flash_attention_npu(
775
+ q.astype("bfloat16"),
776
+ k.astype("bfloat16"),
777
+ v.astype("bfloat16"),
778
+ is_varlen=True,
779
+ batch_size=1,
780
+ seq_length=seq_length,
781
+ ).reshape([seq_length, -1])
782
+ else:
783
+ max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max().item()
784
+
785
+ softmax_scale = self.head_dim**-0.5
786
+ attn_output = (
787
+ flash_attn_varlen_func(
788
+ q.astype("bfloat16"),
789
+ k.astype("bfloat16"),
790
+ v.astype("bfloat16"),
791
+ cu_seqlens,
792
+ cu_seqlens,
793
+ max_seqlen,
794
+ max_seqlen,
795
+ scale=softmax_scale,
796
+ )[0]
797
+ .squeeze(0)
798
+ .reshape([seq_length, -1])
799
+ )
800
+ if self.proj.weight.dtype == paddle.bfloat16:
801
+ attn_output = attn_output.astype(paddle.bfloat16)
802
+ elif self.proj.weight.dtype == paddle.float16:
803
+ attn_output = attn_output.astype(paddle.float16)
804
+ elif self.proj.weight.dtype == paddle.float32:
805
+ attn_output = attn_output.astype(paddle.float32)
806
+ attn_output = self.proj(attn_output)
807
+ return attn_output
808
+
809
+
810
+ def create_attention_module(config, module_type, layer_idx=None):
811
+ if flash_attn_func is not None:
812
+ if module_type == "qwen2vl":
813
+ return Qwen2VLFlashAttention2(config, layer_idx)
814
+ elif module_type == "vision":
815
+ return VisionFlashAttention2(config.embed_dim, num_heads=config.num_heads)
816
+ else:
817
+ logging.warning_once(
818
+ f"Warning: Flash Attention2 is not available for {module_type}, fallback to normal attention."
819
+ )
820
+
821
+ if module_type == "qwen2vl":
822
+ return Qwen2VLAttention(config, layer_idx)
823
+ elif module_type == "vision":
824
+ return VisionAttention(config.embed_dim, num_heads=config.num_heads)
825
+
826
+
827
+ class Qwen2VLVisionBlock(nn.Layer):
828
+ def __init__(self, config, attn_implementation: str = "flash_attention_2") -> None:
829
+ super().__init__()
830
+ self.norm1 = nn.LayerNorm(config.embed_dim, epsilon=1e-6)
831
+ self.norm2 = nn.LayerNorm(config.embed_dim, epsilon=1e-6)
832
+ mlp_hidden_dim = int(config.embed_dim * config.mlp_ratio)
833
+
834
+ self.attn = create_attention_module(config, "vision")
835
+ self.mlp = VisionMlp(
836
+ dim=config.embed_dim,
837
+ hidden_dim=mlp_hidden_dim,
838
+ hidden_act=config.hidden_act,
839
+ )
840
+
841
+ def forward(self, hidden_states, cu_seqlens, rotary_pos_emb) -> paddle.Tensor:
842
+ hidden_states = hidden_states + self.attn(
843
+ self.norm1(hidden_states),
844
+ cu_seqlens=cu_seqlens,
845
+ rotary_pos_emb=rotary_pos_emb,
846
+ )
847
+ hidden_states = hidden_states + self.mlp(self.norm2(hidden_states))
848
+ return hidden_states
849
+
850
+
851
+ def _prepare_4d_causal_attention_mask_with_cache_position(
852
+ attention_mask: paddle.Tensor,
853
+ sequence_length: int,
854
+ target_length: int,
855
+ dtype: paddle.dtype,
856
+ min_dtype: float,
857
+ cache_position: paddle.Tensor,
858
+ batch_size: int,
859
+ ):
860
+ """
861
+ Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D mask of shape
862
+ `(batch_size, key_value_length)`, or if the input `attention_mask` is already 4D, do nothing.
863
+
864
+ Args:
865
+ attention_mask (`paddle.Tensor`):
866
+ A 2D attention mask of shape `(batch_size, key_value_length)` or a 4D attention mask of shape `(batch_size, 1, query_length, key_value_length)`.
867
+ sequence_length (`int`):
868
+ The sequence length being processed.
869
+ target_length (`int`):
870
+ The target length: when generating with static cache, the mask should be as long as the static cache, to account for the 0 padding, the part of the cache that is not filled yet.
871
+ dtype (`paddle.dtype`):
872
+ The dtype to use for the 4D attention mask.
873
+ min_dtype (`float`):
874
+ The minimum value representable with the dtype `dtype`.
875
+ cache_position (`paddle.Tensor`):
876
+ Indices depicting the position of the input sequence tokens in the sequence.
877
+ batch_size (`paddle.Tensor`):
878
+ Batch size.
879
+ """
880
+ if attention_mask is not None and attention_mask.dim() == 4:
881
+ causal_mask = attention_mask
882
+ else:
883
+ causal_mask = paddle.full(
884
+ [sequence_length, target_length], fill_value=min_dtype, dtype=dtype
885
+ )
886
+ if sequence_length != 1:
887
+ causal_mask = paddle.triu(x=causal_mask, diagonal=1)
888
+ causal_mask *= paddle.arange(target_length) > cache_position.reshape([-1, 1])
889
+ causal_mask = causal_mask[None, None, :, :].expand(
890
+ shape=[batch_size, 1, -1, -1]
891
+ )
892
+ if attention_mask is not None:
893
+ causal_mask = causal_mask.clone()
894
+ mask_length = tuple(attention_mask.shape)[-1]
895
+ padding_mask = (
896
+ causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :]
897
+ )
898
+ padding_mask = padding_mask == 0
899
+ causal_mask[:, :, :, :mask_length] = causal_mask[
900
+ :, :, :, :mask_length
901
+ ].masked_fill(mask=padding_mask, value=min_dtype)
902
+
903
+ return causal_mask
904
+
905
+
906
+ class Qwen2RMSNorm(nn.Layer):
907
+ def __init__(self, config: Qwen2VLConfig, hidden_size, eps=1e-6):
908
+ """
909
+ Qwen2RMSNorm is equivalent to T5LayerNorm
910
+ """
911
+ super().__init__()
912
+ self.weight = paddle.create_parameter(
913
+ shape=[hidden_size],
914
+ dtype=paddle.get_default_dtype(),
915
+ default_initializer=nn.initializer.Constant(1.0),
916
+ )
917
+ self.variance_epsilon = eps
918
+
919
+ def forward(self, hidden_states):
920
+ if paddle.in_dynamic_mode():
921
+ with paddle.amp.auto_cast(False):
922
+ variance = hidden_states.astype("float32").pow(2).mean(-1, keepdim=True)
923
+ hidden_states = (
924
+ paddle.rsqrt(variance + self.variance_epsilon) * hidden_states
925
+ )
926
+ else:
927
+ variance = hidden_states.astype("float32").pow(2).mean(-1, keepdim=True)
928
+ hidden_states = (
929
+ paddle.rsqrt(variance + self.variance_epsilon) * hidden_states
930
+ )
931
+
932
+ if self.weight.dtype in [paddle.float16, paddle.bfloat16]:
933
+ hidden_states = paddle.cast(hidden_states, self.weight.dtype)
934
+ return hidden_states * self.weight
935
+
936
+
937
+ class Qwen2MLP(nn.Layer):
938
+ def __init__(self, config):
939
+ super().__init__()
940
+ self.hidden_size = config.hidden_size
941
+ self.intermediate_size = config.intermediate_size
942
+ self.fuse_attention_ffn = config.fuse_attention_ffn
943
+ self.tensor_parallel_degree = config.tensor_parallel_degree
944
+
945
+ if config.tensor_parallel_degree > 1:
946
+
947
+ self.gate_proj = ColumnParallelLinear(
948
+ self.hidden_size,
949
+ self.intermediate_size,
950
+ gather_output=False,
951
+ has_bias=False,
952
+ )
953
+ self.up_proj = ColumnParallelLinear(
954
+ self.hidden_size,
955
+ self.intermediate_size,
956
+ gather_output=False,
957
+ has_bias=False,
958
+ )
959
+ self.down_proj = RowParallelLinear(
960
+ self.intermediate_size,
961
+ self.hidden_size,
962
+ input_is_parallel=True,
963
+ has_bias=False,
964
+ )
965
+ else:
966
+ self.gate_proj = Linear(
967
+ self.hidden_size, self.intermediate_size, bias_attr=False
968
+ ) # w1
969
+ self.up_proj = Linear(
970
+ self.hidden_size, self.intermediate_size, bias_attr=False
971
+ ) # w3
972
+ self.down_proj = Linear(
973
+ self.intermediate_size, self.hidden_size, bias_attr=False
974
+ ) # w2
975
+
976
+ self.act_fn = ACT2FN[config.hidden_act]
977
+ self.fuse_swiglu = False
978
+
979
+ def forward(self, x):
980
+ x, y = self.gate_proj(x), self.up_proj(x)
981
+ if self.fuse_swiglu:
982
+ x = self.act_fn(x, y)
983
+ else:
984
+ x = self.act_fn(x) * y
985
+
986
+ return self.down_proj(x)
987
+
988
+
989
+ # Copied from transformers.models.llama.modeling_llama.repeat_kv
990
+ def repeat_kv(hidden_states: paddle.Tensor, n_rep: int) -> paddle.Tensor:
991
+ """
992
+ This is the equivalent of paddle.repeat_interleave(x, axis=1, repeats=n_rep). The hidden states go from (batch,
993
+ num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
994
+ """
995
+ batch, num_key_value_heads, slen, head_dim = hidden_states.shape
996
+ if n_rep == 1:
997
+ return hidden_states
998
+ hidden_states = hidden_states[:, :, None, :, :].expand(
999
+ [batch, num_key_value_heads, n_rep, slen, head_dim]
1000
+ )
1001
+ return hidden_states.reshape([batch, num_key_value_heads * n_rep, slen, head_dim])
1002
+
1003
+
1004
+ class Qwen2VLAttention(nn.Layer):
1005
+ """
1006
+ Multi-headed attention from 'Attention Is All You Need' paper. Modified to use sliding window attention: Longformer
1007
+ and "Generating Long Sequences with Sparse Transformers".
1008
+ """
1009
+
1010
+ def __init__(self, config: Qwen2VLConfig, layer_idx: Optional[int] = None):
1011
+ super().__init__()
1012
+ self.config = config
1013
+ self.layer_idx = layer_idx
1014
+ if layer_idx is None:
1015
+ logging.warning_once(
1016
+ f"Instantiating {self.__class__.__name__} without passing `layer_idx` is not recommended and will "
1017
+ "to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` "
1018
+ "when creating this class."
1019
+ )
1020
+
1021
+ self.hidden_size = config.hidden_size
1022
+ self.num_heads = config.num_attention_heads
1023
+ self.head_dim = self.hidden_size // self.num_heads
1024
+ self.num_key_value_heads = config.num_key_value_heads
1025
+ self.num_key_value_groups = self.num_heads // self.num_key_value_heads
1026
+ self.max_position_embeddings = config.max_position_embeddings
1027
+ self.rope_theta = config.rope_theta
1028
+ self.is_causal = True
1029
+ self.attention_dropout = config.attention_dropout
1030
+ self.rope_scaling = config.rope_scaling
1031
+ # self.sequence_parallel = config.sequence_parallel
1032
+
1033
+ if config.tensor_parallel_degree > 1:
1034
+ assert (
1035
+ self.num_heads % config.tensor_parallel_degree == 0
1036
+ ), f"num_heads: {self.num_heads}, tensor_parallel_degree: {config.tensor_parallel_degree}"
1037
+ self.num_heads = self.num_heads // config.tensor_parallel_degree
1038
+
1039
+ assert (
1040
+ self.num_key_value_heads % config.tensor_parallel_degree == 0
1041
+ ), f"num_key_value_heads: {self.num_key_value_heads}, tensor_parallel_degree: {config.tensor_parallel_degree}"
1042
+ self.num_key_value_heads = (
1043
+ self.num_key_value_heads // config.tensor_parallel_degree
1044
+ )
1045
+
1046
+ if config.tensor_parallel_degree > 1:
1047
+ self.q_proj = ColumnParallelLinear(
1048
+ self.hidden_size, self.hidden_size, has_bias=True, gather_output=False
1049
+ )
1050
+ self.k_proj = ColumnParallelLinear(self.hidden_size, self.config.num_key_value_heads * self.head_dim, has_bias=True, gather_output=False) # fmt:skip
1051
+ self.v_proj = ColumnParallelLinear(self.hidden_size, self.config.num_key_value_heads * self.head_dim, has_bias=True, gather_output=False) # fmt:skip
1052
+ self.o_proj = RowParallelLinear(
1053
+ self.hidden_size,
1054
+ self.hidden_size,
1055
+ has_bias=False,
1056
+ input_is_parallel=True,
1057
+ )
1058
+ else:
1059
+ self.q_proj = Linear(self.hidden_size, self.hidden_size, bias_attr=True)
1060
+ self.k_proj = Linear(
1061
+ self.hidden_size,
1062
+ self.config.num_key_value_heads * self.head_dim,
1063
+ bias_attr=True,
1064
+ )
1065
+ self.v_proj = Linear(
1066
+ self.hidden_size,
1067
+ self.config.num_key_value_heads * self.head_dim,
1068
+ bias_attr=True,
1069
+ )
1070
+ self.o_proj = Linear(self.hidden_size, self.hidden_size, bias_attr=False)
1071
+
1072
+ self.rotary_emb = Qwen2VLRotaryEmbedding(
1073
+ self.head_dim,
1074
+ max_position_embeddings=self.max_position_embeddings,
1075
+ base=self.rope_theta,
1076
+ )
1077
+
1078
+ def forward(
1079
+ self,
1080
+ hidden_states: paddle.Tensor,
1081
+ attention_mask: Optional[paddle.Tensor] = None,
1082
+ position_ids: Optional[paddle.Tensor] = None,
1083
+ past_key_value: Optional[Tuple[paddle.Tensor]] = None,
1084
+ output_attentions: bool = False,
1085
+ use_cache: bool = False, # default true
1086
+ cache_position: Optional[paddle.Tensor] = None,
1087
+ ) -> Tuple[paddle.Tensor, Optional[paddle.Tensor], Optional[Tuple[paddle.Tensor]]]:
1088
+ bsz, q_len, _ = hidden_states.shape
1089
+
1090
+ try:
1091
+ query_states = self.q_proj(hidden_states)
1092
+ key_states = self.k_proj(hidden_states)
1093
+ value_states = self.v_proj(hidden_states)
1094
+ except:
1095
+ hidden_states = hidden_states.astype(self.config.dtype)
1096
+ query_states = self.q_proj(hidden_states)
1097
+ key_states = self.k_proj(hidden_states)
1098
+ value_states = self.v_proj(hidden_states)
1099
+
1100
+ target_query_shape = [0, 0, self.num_heads, self.head_dim]
1101
+ target_key_value_shape = [0, 0, self.num_key_value_heads, self.head_dim]
1102
+ query_states = query_states.reshape(shape=target_query_shape)
1103
+ key_states = key_states.reshape(shape=target_key_value_shape)
1104
+ value_states = value_states.reshape(shape=target_key_value_shape)
1105
+
1106
+ new_perm = [0, 2, 1, 3]
1107
+ query_states = query_states.transpose(new_perm)
1108
+ key_states = key_states.transpose(new_perm)
1109
+ value_states = value_states.transpose(new_perm)
1110
+
1111
+ kv_seq_len = key_states.shape[-2]
1112
+ if past_key_value is not None:
1113
+ kv_seq_len += cache_position[0] + 1
1114
+
1115
+ cos, sin = self.rotary_emb(value_states, position_ids)
1116
+ query_states, key_states = apply_multimodal_rotary_pos_emb(
1117
+ query_states, key_states, cos, sin, self.rope_scaling["mrope_section"]
1118
+ )
1119
+
1120
+ if past_key_value is not None:
1121
+ key_states = paddle.concat([past_key_value[0], key_states], axis=2)
1122
+ value_states = paddle.concat([past_key_value[1], value_states], axis=2)
1123
+ past_key_value = (key_states, value_states) if use_cache else None
1124
+
1125
+ # repeat k/v heads if n_kv_heads < n_heads
1126
+ key_states = repeat_kv(key_states, self.num_key_value_groups)
1127
+ value_states = repeat_kv(value_states, self.num_key_value_groups)
1128
+
1129
+ query_states = query_states.astype("float32")
1130
+ key_states = key_states.astype("float32")
1131
+ value_states = value_states.astype("float32")
1132
+
1133
+ attn_weights = paddle.matmul(
1134
+ query_states, key_states.transpose([0, 1, 3, 2])
1135
+ ) / math.sqrt(self.head_dim)
1136
+
1137
+ if attention_mask is not None:
1138
+ attn_weights = attn_weights + attention_mask
1139
+ attn_weights = nn.functional.softmax(attn_weights, axis=-1, dtype="float32")
1140
+ attn_weights = nn.functional.dropout(
1141
+ x=attn_weights, p=self.attention_dropout, training=self.training
1142
+ )
1143
+ attn_output = paddle.matmul(
1144
+ attn_weights.cast(self.config.dtype), value_states.cast(self.config.dtype)
1145
+ )
1146
+
1147
+ if attn_output.shape != [bsz, self.num_heads, q_len, self.head_dim]:
1148
+ raise ValueError(
1149
+ f"`attn_output` should be of size {(bsz, q_len, self.num_heads, self.head_dim)}, but is"
1150
+ f" {attn_output.shape}"
1151
+ )
1152
+
1153
+ attn_output = attn_output.transpose([0, 2, 1, 3])
1154
+ attn_output = attn_output.reshape([bsz, q_len, -1])
1155
+
1156
+ if self.o_proj.weight.dtype == paddle.bfloat16:
1157
+ attn_output = attn_output.astype(paddle.bfloat16)
1158
+ elif self.o_proj.weight.dtype == paddle.float16:
1159
+ attn_output = attn_output.astype(paddle.float16)
1160
+ elif self.o_proj.weight.dtype == paddle.float32:
1161
+ attn_output = attn_output.astype(paddle.float32)
1162
+
1163
+ attn_output = self.o_proj(attn_output)
1164
+ if not output_attentions:
1165
+ attn_weights = None
1166
+ return attn_output, attn_weights, past_key_value
1167
+
1168
+
1169
+ class Qwen2VLFlashAttention2(Qwen2VLAttention):
1170
+ """
1171
+ Qwen2VL flash attention module, following Qwen2VL attention module. This module inherits from `Qwen2VLAttention`
1172
+ as the weights of the module stays untouched. The only required change would be on the forward pass
1173
+ where it needs to correctly call the public API of flash attention and deal with padding tokens
1174
+ in case the input contains any of them. Additionally, for sliding window attention, we apply SWA only to the bottom
1175
+ config.max_window_layers layers.
1176
+ """
1177
+
1178
+ def __init__(self, *args, **kwargs):
1179
+ super().__init__(*args, **kwargs)
1180
+
1181
+ def forward(
1182
+ self,
1183
+ hidden_states: paddle.Tensor,
1184
+ attention_mask: Optional[paddle.Tensor] = None,
1185
+ position_ids: Optional[paddle.Tensor] = None,
1186
+ past_key_value: Optional[Tuple[paddle.Tensor]] = None,
1187
+ output_attentions: bool = False,
1188
+ use_cache: bool = False, # default true
1189
+ cache_position: Optional[paddle.Tensor] = None,
1190
+ ) -> Tuple[paddle.Tensor, Optional[paddle.Tensor], Optional[Tuple[paddle.Tensor]]]:
1191
+ bsz, q_len, _ = tuple(hidden_states.shape)
1192
+
1193
+ try:
1194
+ query_states = self.q_proj(hidden_states)
1195
+ key_states = self.k_proj(hidden_states)
1196
+ value_states = self.v_proj(hidden_states)
1197
+ except:
1198
+ hidden_states = hidden_states.astype("bfloat16")
1199
+ query_states = self.q_proj(hidden_states)
1200
+ key_states = self.k_proj(hidden_states)
1201
+ value_states = self.v_proj(hidden_states)
1202
+
1203
+ target_query_shape = [0, 0, self.num_heads, self.head_dim]
1204
+ target_key_value_shape = [0, 0, self.num_key_value_heads, self.head_dim]
1205
+ query_states = query_states.reshape(shape=target_query_shape)
1206
+ key_states = key_states.reshape(shape=target_key_value_shape)
1207
+ value_states = value_states.reshape(shape=target_key_value_shape)
1208
+
1209
+ new_perm = [0, 2, 1, 3]
1210
+ query_states = query_states.transpose(new_perm)
1211
+ key_states = key_states.transpose(new_perm)
1212
+ value_states = value_states.transpose(new_perm)
1213
+
1214
+ kv_seq_len = key_states.shape[-2]
1215
+ if past_key_value is not None:
1216
+ kv_seq_len += cache_position[0] + 1
1217
+
1218
+ # Because the input can be padded, the absolute sequence length depends on the max position id.
1219
+ cos, sin = self.rotary_emb(value_states, position_ids)
1220
+ query_states, key_states = apply_multimodal_rotary_pos_emb(
1221
+ query_states, key_states, cos, sin, self.rope_scaling["mrope_section"]
1222
+ )
1223
+
1224
+ if past_key_value is not None:
1225
+ key_states = paddle.concat([past_key_value[0], key_states], axis=2)
1226
+ value_states = paddle.concat([past_key_value[1], value_states], axis=2)
1227
+ past_key_value = (key_states, value_states) if use_cache else None
1228
+
1229
+ # repeat k/v heads if n_kv_heads < n_heads
1230
+ key_states = repeat_kv(key_states, self.num_key_value_groups)
1231
+ value_states = repeat_kv(value_states, self.num_key_value_groups)
1232
+
1233
+ # Reashape to the expected shape for Flash Attention
1234
+ # [1, 3599, 12, 128]
1235
+ query_states = query_states.transpose(perm=[0, 2, 1, 3])
1236
+ key_states = key_states.transpose(perm=[0, 2, 1, 3])
1237
+ value_states = value_states.transpose(perm=[0, 2, 1, 3])
1238
+
1239
+ attn_output = self._flash_attention_forward(
1240
+ query_states, key_states, value_states, attention_mask, q_len
1241
+ )
1242
+
1243
+ attn_output = attn_output.reshape([bsz, q_len, -1])
1244
+ attn_output = self.o_proj(attn_output)
1245
+ if not output_attentions:
1246
+ attn_weights = None
1247
+ return attn_output, attn_weights, past_key_value
1248
+
1249
+ def _flash_attention_forward(
1250
+ self,
1251
+ query_states,
1252
+ key_states,
1253
+ value_states,
1254
+ attention_mask,
1255
+ query_length,
1256
+ dropout=0.0,
1257
+ softmax_scale=None,
1258
+ ):
1259
+ """
1260
+ Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token
1261
+ first unpad the input, then computes the attention scores and pad the final attention scores.
1262
+
1263
+ Args:
1264
+ query_states (`paddle.Tensor`):
1265
+ Input query states to be passed to Flash Attention API
1266
+ key_states (`paddle.Tensor`):
1267
+ Input key states to be passed to Flash Attention API
1268
+ value_states (`paddle.Tensor`):
1269
+ Input value states to be passed to Flash Attention API
1270
+ attention_mask (`paddle.Tensor`):
1271
+ The padding mask - corresponds to a tensor of size `(batch_size, seq_len)` where 0 stands for the
1272
+ position of padding tokens and 1 for the position of non-padding tokens.
1273
+ dropout (`int`, *optional*):
1274
+ Attention dropout
1275
+ softmax_scale (`float`, *optional*):
1276
+ The scaling of QK^T before applying softmax. Default to 1 / sqrt(head_dim)
1277
+ """
1278
+ # Contains at least one padding token in the sequence
1279
+ causal = self.is_causal and query_length != 1
1280
+
1281
+ if _IS_NPU:
1282
+ if attention_mask is not None:
1283
+ attn_output = paddle.nn.functional.flash_attention_npu( # TODO: flash_attn_unpadded
1284
+ query_states,
1285
+ key_states,
1286
+ value_states,
1287
+ attn_mask=attention_mask,
1288
+ dropout=dropout,
1289
+ causal=causal,
1290
+ is_varlen=True,
1291
+ )
1292
+ else:
1293
+ dtype = query_states.dtype
1294
+ attn_output = paddle.nn.functional.flash_attention_npu( # TODO: flash_attn_unpadded
1295
+ query_states.astype("bfloat16"),
1296
+ key_states.astype("bfloat16"),
1297
+ value_states.astype("bfloat16"),
1298
+ attn_mask=attention_mask,
1299
+ dropout=dropout,
1300
+ causal=causal,
1301
+ )
1302
+ attn_output = attn_output.astype(dtype)
1303
+ else:
1304
+ head_dim = query_states.shape[-1]
1305
+ softmax_scale = head_dim**-0.5 # TODO: 需要手动加上
1306
+
1307
+ if attention_mask is not None:
1308
+ batch_size = query_states.shape[0]
1309
+ (
1310
+ query_states,
1311
+ key_states,
1312
+ value_states,
1313
+ indices_q,
1314
+ cu_seq_lens,
1315
+ max_seq_lens,
1316
+ ) = self._unpad_input(
1317
+ query_states, key_states, value_states, attention_mask, query_length
1318
+ )
1319
+ cu_seqlens_q, cu_seqlens_k = cu_seq_lens
1320
+ max_seqlen_in_batch_q, max_seqlen_in_batch_k = max_seq_lens
1321
+
1322
+ attn_output_unpad = flash_attn_varlen_func(
1323
+ query_states,
1324
+ key_states,
1325
+ value_states,
1326
+ cu_seqlens_q=cu_seqlens_q,
1327
+ cu_seqlens_k=cu_seqlens_k,
1328
+ max_seqlen_q=max_seqlen_in_batch_q,
1329
+ max_seqlen_k=max_seqlen_in_batch_k,
1330
+ scale=softmax_scale, # not softmax_scale=
1331
+ dropout=dropout,
1332
+ causal=causal,
1333
+ )[0]
1334
+
1335
+ attn_output = pad_input(
1336
+ attn_output_unpad, indices_q, batch_size, query_length
1337
+ )
1338
+ else:
1339
+ attn_output = flash_attn_func(
1340
+ query_states,
1341
+ key_states,
1342
+ value_states,
1343
+ dropout,
1344
+ causal=causal,
1345
+ )[0]
1346
+
1347
+ return attn_output
1348
+
1349
+ def _unpad_input(
1350
+ self, query_layer, key_layer, value_layer, attention_mask, query_length
1351
+ ):
1352
+ indices_k, cu_seqlens_k, max_seqlen_in_batch_k = _get_unpad_data(attention_mask)
1353
+ batch_size, kv_seq_len, num_key_value_heads, head_dim = key_layer.shape
1354
+
1355
+ # TODO:cuda error
1356
+ key_layer = index_first_axis(
1357
+ key_layer.reshape([batch_size * kv_seq_len, num_key_value_heads, head_dim]),
1358
+ indices_k,
1359
+ )
1360
+ value_layer = index_first_axis(
1361
+ value_layer.reshape(
1362
+ [batch_size * kv_seq_len, num_key_value_heads, head_dim]
1363
+ ),
1364
+ indices_k,
1365
+ )
1366
+
1367
+ if query_length == kv_seq_len:
1368
+ query_layer = index_first_axis(
1369
+ query_layer.reshape(
1370
+ [batch_size * kv_seq_len, self.num_heads, head_dim]
1371
+ ),
1372
+ indices_k,
1373
+ )
1374
+ cu_seqlens_q = cu_seqlens_k
1375
+ max_seqlen_in_batch_q = max_seqlen_in_batch_k
1376
+ indices_q = indices_k
1377
+ elif query_length == 1:
1378
+ max_seqlen_in_batch_q = 1
1379
+ cu_seqlens_q = paddle.arange(
1380
+ batch_size + 1, dtype=paddle.int32
1381
+ ) # There is a memcpy here, that is very bad.
1382
+ indices_q = cu_seqlens_q[:-1]
1383
+ query_layer = query_layer.squeeze(1)
1384
+ else:
1385
+ # The -q_len: slice assumes left padding.
1386
+ attention_mask = attention_mask[:, -query_length:]
1387
+ query_layer, indices_q, cu_seqlens_q, max_seqlen_in_batch_q = unpad_input(
1388
+ query_layer, attention_mask
1389
+ )
1390
+
1391
+ return (
1392
+ query_layer,
1393
+ key_layer,
1394
+ value_layer,
1395
+ indices_q.to(paddle.int64),
1396
+ (cu_seqlens_q, cu_seqlens_k),
1397
+ (max_seqlen_in_batch_q, max_seqlen_in_batch_k),
1398
+ )
1399
+
1400
+
1401
+ class Qwen2VLDecoderLayer(nn.Layer):
1402
+ def __init__(self, config: Qwen2VLConfig, layer_idx: int):
1403
+ super().__init__()
1404
+ self.hidden_size = config.hidden_size
1405
+
1406
+ # use_sliding_window false
1407
+ if (
1408
+ config.use_sliding_window
1409
+ and config.attn_implementation != "flash_attention_2"
1410
+ ):
1411
+ logging.warning_once(
1412
+ f"Sliding Window Attention is enabled but not implemented for `{config.attn_implementation}`; "
1413
+ "unexpected results may be encountered."
1414
+ )
1415
+
1416
+ self.self_attn = create_attention_module(config, "qwen2vl", layer_idx=layer_idx)
1417
+ # self.self_attn = Qwen2VLAttention(config, layer_idx)
1418
+ self.mlp = Qwen2MLP(config)
1419
+ self.input_layernorm = Qwen2RMSNorm(
1420
+ config, config.hidden_size, eps=config.rms_norm_eps
1421
+ )
1422
+ self.post_attention_layernorm = Qwen2RMSNorm(
1423
+ config, config.hidden_size, eps=config.rms_norm_eps
1424
+ )
1425
+
1426
+ def forward(
1427
+ self,
1428
+ hidden_states: paddle.Tensor,
1429
+ attention_mask: Optional[paddle.Tensor] = None,
1430
+ position_ids: Optional[paddle.Tensor] = None,
1431
+ past_key_value: Optional[Tuple[paddle.Tensor]] = None,
1432
+ output_attentions: Optional[bool] = False,
1433
+ use_cache: Optional[bool] = False,
1434
+ cache_position: Optional[paddle.Tensor] = None,
1435
+ **kwargs,
1436
+ ):
1437
+ """
1438
+ Args:
1439
+ hidden_states (`paddle.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
1440
+ attention_mask (`paddle.Tensor`, *optional*): attention mask of size
1441
+ `(batch, sequence_length)` where padding elements are indicated by 0.
1442
+ output_attentions (`bool`, *optional*):
1443
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
1444
+ returned tensors for more detail.
1445
+ use_cache (`bool`, *optional*):
1446
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
1447
+ (see `past_key_values`).
1448
+ past_key_value (`Tuple(paddle.Tensor)`, *optional*): cached past key and value projection states
1449
+ cache_position (`paddle.Tensor` of shape `(sequence_length)`, *optional*):
1450
+ Indices depicting the position of the input sequence tokens in the sequence.
1451
+ kwargs (`dict`, *optional*):
1452
+ Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code
1453
+ into the model
1454
+ """
1455
+
1456
+ residual = hidden_states
1457
+
1458
+ hidden_states = self.input_layernorm(hidden_states)
1459
+
1460
+ # Self Attention
1461
+ hidden_states, self_attn_weights, present_key_value = self.self_attn(
1462
+ hidden_states=hidden_states,
1463
+ attention_mask=attention_mask,
1464
+ position_ids=position_ids,
1465
+ past_key_value=past_key_value,
1466
+ output_attentions=output_attentions,
1467
+ use_cache=use_cache,
1468
+ cache_position=cache_position,
1469
+ )
1470
+ hidden_states = residual + hidden_states
1471
+
1472
+ # Fully Connected
1473
+ residual = hidden_states
1474
+ hidden_states = self.post_attention_layernorm(hidden_states)
1475
+ hidden_states = self.mlp(hidden_states)
1476
+ hidden_states = residual + hidden_states
1477
+
1478
+ outputs = (hidden_states,)
1479
+
1480
+ if output_attentions:
1481
+ outputs += (self_attn_weights,)
1482
+
1483
+ if use_cache:
1484
+ outputs += (present_key_value,)
1485
+
1486
+ return outputs
1487
+
1488
+
1489
+ class Qwen2VLPreTrainedModel(PretrainedModel):
1490
+ config_class = Qwen2VLConfig
1491
+ base_model_prefix = "model"
1492
+ _no_split_modules = ["Qwen2VLDecoderLayer", "Qwen2VLVisionBlock"]
1493
+ _skip_keys_device_placement = "past_key_values"
1494
+
1495
+ def _init_weights(self, layer):
1496
+ std = 0.2
1497
+ if isinstance(layer, (nn.Linear, nn.Conv3D)):
1498
+ nn.initializer.Normal(mean=0.0, std=std)(layer.weight)
1499
+ if layer.bias is not None:
1500
+ nn.initializer.Constant(0.0)(layer.bias)
1501
+ elif isinstance(layer, nn.Embedding):
1502
+ nn.initializer.Normal(mean=0.0, std=std)(layer.weight)
1503
+ if layer._padding_idx is not None:
1504
+ with paddle.no_grad():
1505
+ layer.weight[layer._padding_idx] = 0.0
1506
+
1507
+
1508
+ class Qwen2VisionTransformerPretrainedModel(Qwen2VLPreTrainedModel):
1509
+ config_class = Qwen2VLVisionConfig
1510
+ _no_split_modules = ["Qwen2VLVisionBlock"]
1511
+
1512
+ def __init__(self, config) -> None:
1513
+ super().__init__(config)
1514
+ self.spatial_merge_size = config.spatial_merge_size
1515
+
1516
+ self.patch_embed = PatchEmbed(
1517
+ patch_size=config.patch_size,
1518
+ temporal_patch_size=config.temporal_patch_size,
1519
+ in_channels=config.in_channels,
1520
+ embed_dim=config.embed_dim,
1521
+ )
1522
+
1523
+ head_dim = config.embed_dim // config.num_heads
1524
+ self.rotary_pos_emb = VisionRotaryEmbedding(head_dim // 2)
1525
+
1526
+ self.blocks = nn.LayerList(
1527
+ [Qwen2VLVisionBlock(config) for _ in range(config.depth)]
1528
+ )
1529
+ self.merger = PatchMerger(dim=config.hidden_size, context_dim=config.embed_dim)
1530
+ self.enable_recompute = False
1531
+
1532
+ def get_dtype(self) -> paddle.dtype:
1533
+ return self.blocks[0].mlp.fc2.weight.dtype
1534
+
1535
+ def rot_pos_emb(self, grid_thw):
1536
+ pos_ids = []
1537
+ for t, h, w in grid_thw:
1538
+ hpos_ids = paddle.arange(h).unsqueeze(1).expand([-1, w])
1539
+ hpos_ids = hpos_ids.reshape(
1540
+ [
1541
+ h // self.spatial_merge_size,
1542
+ self.spatial_merge_size,
1543
+ w // self.spatial_merge_size,
1544
+ self.spatial_merge_size,
1545
+ ]
1546
+ )
1547
+ hpos_ids = hpos_ids.transpose(perm=[0, 2, 1, 3])
1548
+ hpos_ids = hpos_ids.flatten()
1549
+
1550
+ wpos_ids = paddle.arange(w).unsqueeze(0).expand([h, -1])
1551
+ wpos_ids = wpos_ids.reshape(
1552
+ [
1553
+ h // self.spatial_merge_size,
1554
+ self.spatial_merge_size,
1555
+ w // self.spatial_merge_size,
1556
+ self.spatial_merge_size,
1557
+ ]
1558
+ )
1559
+ wpos_ids = wpos_ids.transpose([0, 2, 1, 3])
1560
+ wpos_ids = wpos_ids.flatten()
1561
+ pos_ids.append(
1562
+ paddle.stack(x=[hpos_ids, wpos_ids], axis=-1).tile(repeat_times=[t, 1])
1563
+ )
1564
+ pos_ids = paddle.concat(x=pos_ids, axis=0)
1565
+ max_grid_size = grid_thw[:, 1:].max()
1566
+ rotary_pos_emb_full = self.rotary_pos_emb(max_grid_size)
1567
+ rotary_pos_emb = rotary_pos_emb_full[pos_ids].flatten(start_axis=1)
1568
+ return rotary_pos_emb
1569
+
1570
+ @paddle.jit.not_to_static
1571
+ def recompute_training_full(
1572
+ self,
1573
+ layer_module: nn.Layer,
1574
+ hidden_states: paddle.Tensor,
1575
+ cu_seqlens_now: paddle.Tensor,
1576
+ rotary_pos_emb: paddle.Tensor,
1577
+ ):
1578
+ def create_custom_forward(module):
1579
+ def custom_forward(*inputs):
1580
+ return module(*inputs)
1581
+
1582
+ return custom_forward
1583
+
1584
+ hidden_states = recompute(
1585
+ create_custom_forward(layer_module),
1586
+ hidden_states,
1587
+ cu_seqlens_now,
1588
+ rotary_pos_emb,
1589
+ # use_reentrant=self.config.recompute_use_reentrant,
1590
+ )
1591
+ return hidden_states
1592
+
1593
+ def forward(
1594
+ self, hidden_states: paddle.Tensor, grid_thw: paddle.Tensor
1595
+ ) -> paddle.Tensor:
1596
+ # breakpoint()
1597
+ hidden_states = self.patch_embed(hidden_states)
1598
+ rotary_pos_emb = self.rot_pos_emb(grid_thw)
1599
+
1600
+ cu_seqlens = paddle.repeat_interleave(
1601
+ grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0]
1602
+ ).cumsum(axis=0, dtype="int32")
1603
+ cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0)
1604
+
1605
+ for idx, blk in enumerate(self.blocks):
1606
+ if self.enable_recompute and self.training:
1607
+ hidden_states = self.recompute_training_full(
1608
+ blk, hidden_states, cu_seqlens, rotary_pos_emb
1609
+ )
1610
+ else:
1611
+ hidden_states = blk(
1612
+ hidden_states, cu_seqlens=cu_seqlens, rotary_pos_emb=rotary_pos_emb
1613
+ )
1614
+
1615
+ return self.merger(hidden_states)
1616
+
1617
+
1618
+ class Qwen2VLModel(Qwen2VLPreTrainedModel):
1619
+ def __init__(self, config: Qwen2VLConfig):
1620
+ super().__init__(config)
1621
+ self.padding_idx = config.pad_token_id
1622
+ self.vocab_size = config.vocab_size
1623
+ self.hidden_size = config.hidden_size
1624
+ # Recompute defaults to False and is controlled by Trainer
1625
+
1626
+ if (
1627
+ config.tensor_parallel_degree > 1
1628
+ and config.vocab_size % config.tensor_parallel_degree == 0
1629
+ ):
1630
+ self.embed_tokens = mpu.VocabParallelEmbedding(
1631
+ self.vocab_size,
1632
+ self.hidden_size,
1633
+ weight_attr=paddle.ParamAttr(initializer=nn.initializer.XavierNormal()),
1634
+ )
1635
+ else:
1636
+ self.embed_tokens = nn.Embedding(
1637
+ self.vocab_size,
1638
+ self.hidden_size,
1639
+ )
1640
+
1641
+ # self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
1642
+ self.layers = nn.LayerList(
1643
+ [
1644
+ Qwen2VLDecoderLayer(config, layer_idx)
1645
+ for layer_idx in range(config.num_hidden_layers)
1646
+ ]
1647
+ )
1648
+ self.norm = Qwen2RMSNorm(config, config.hidden_size, eps=config.rms_norm_eps)
1649
+
1650
+ self.enamble_recompute = False
1651
+
1652
+ def get_input_embeddings(self):
1653
+ return self.embed_tokens
1654
+
1655
+ def set_input_embeddings(self, value):
1656
+ self.embed_tokens = value
1657
+
1658
+ @staticmethod
1659
+ def _prepare_decoder_attention_mask(
1660
+ attention_mask, input_shape, past_key_values_length, dtype
1661
+ ):
1662
+ if attention_mask is not None:
1663
+ # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
1664
+ if len(attention_mask.shape) == 2:
1665
+ expanded_attn_mask = _expand_2d_mask(
1666
+ attention_mask, dtype, tgt_length=input_shape[-1]
1667
+ )
1668
+ # For decoding phase in generation, seq_length = 1, we don't need to add causal mask
1669
+ if input_shape[-1] > 1:
1670
+ combined_attention_mask = _make_causal_mask(
1671
+ input_shape,
1672
+ past_key_values_length=past_key_values_length,
1673
+ )
1674
+ expanded_attn_mask = expanded_attn_mask & combined_attention_mask
1675
+ # [bsz, seq_len, seq_len] -> [bsz, 1, seq_len, seq_len]
1676
+ elif len(attention_mask.shape) == 3:
1677
+ expanded_attn_mask = attention_mask.unsqueeze(1).astype("bool")
1678
+ # if attention_mask is already 4-D, do nothing
1679
+ else:
1680
+ expanded_attn_mask = attention_mask
1681
+ else:
1682
+ expanded_attn_mask = _make_causal_mask(
1683
+ input_shape,
1684
+ past_key_values_length=past_key_values_length,
1685
+ )
1686
+ # Convert bool attention_mask to float attention mask, which will be added to attention_scores later
1687
+ expanded_attn_mask = paddle.where(
1688
+ expanded_attn_mask, 0.0, paddle.finfo(dtype).min
1689
+ ).astype(dtype)
1690
+ return expanded_attn_mask
1691
+
1692
+ @paddle.jit.not_to_static
1693
+ def recompute_training_full(
1694
+ self,
1695
+ layer_module: nn.Layer,
1696
+ hidden_states: paddle.Tensor,
1697
+ attention_mask: paddle.Tensor,
1698
+ position_ids: Optional[paddle.Tensor],
1699
+ past_key_value: paddle.Tensor,
1700
+ output_attentions: bool,
1701
+ use_cache: bool,
1702
+ cache_position: Optional[paddle.Tensor] = None,
1703
+ ):
1704
+ def create_custom_forward(module):
1705
+ def custom_forward(*inputs):
1706
+ return module(*inputs)
1707
+
1708
+ return custom_forward
1709
+
1710
+ hidden_states = recompute(
1711
+ create_custom_forward(layer_module),
1712
+ hidden_states,
1713
+ attention_mask,
1714
+ position_ids,
1715
+ past_key_value,
1716
+ output_attentions,
1717
+ use_cache,
1718
+ cache_position,
1719
+ use_reentrant=self.config.recompute_use_reentrant,
1720
+ )
1721
+ return hidden_states
1722
+
1723
+ def forward(
1724
+ self,
1725
+ input_ids: paddle.Tensor = None,
1726
+ attention_mask: Optional[paddle.Tensor] = None,
1727
+ position_ids: Optional[paddle.Tensor] = None,
1728
+ past_key_values: Optional[List[paddle.Tensor]] = None,
1729
+ inputs_embeds: Optional[paddle.Tensor] = None,
1730
+ use_cache: Optional[bool] = None,
1731
+ output_attentions: Optional[bool] = None,
1732
+ output_hidden_states: Optional[bool] = None,
1733
+ return_dict: Optional[bool] = None,
1734
+ cache_position: Optional[paddle.Tensor] = None,
1735
+ ) -> Union[Tuple, BaseModelOutputWithPast]:
1736
+ output_attentions = (
1737
+ output_attentions
1738
+ if output_attentions is not None
1739
+ else self.config.output_attentions
1740
+ )
1741
+ output_hidden_states = (
1742
+ output_hidden_states
1743
+ if output_hidden_states is not None
1744
+ else self.config.output_hidden_states
1745
+ )
1746
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
1747
+
1748
+ return_dict = (
1749
+ return_dict if return_dict is not None else self.config.use_return_dict
1750
+ )
1751
+
1752
+ if (input_ids is None) ^ (inputs_embeds is not None):
1753
+ raise ValueError(
1754
+ "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
1755
+ )
1756
+ elif input_ids is not None:
1757
+ batch_size, seq_length = input_ids.shape
1758
+ elif inputs_embeds is not None:
1759
+ batch_size, seq_length, _ = inputs_embeds.shape
1760
+ else:
1761
+ raise ValueError(
1762
+ "You have to specify either decoder_input_ids or decoder_inputs_embeds"
1763
+ )
1764
+
1765
+ if past_key_values is None:
1766
+ past_key_values = tuple([None] * len(self.layers))
1767
+ # NOTE: to make cache can be clear in-time
1768
+ past_key_values = list(past_key_values)
1769
+
1770
+ seq_length_with_past = seq_length
1771
+ cache_length = 0
1772
+ if past_key_values[0] is not None:
1773
+ cache_length = past_key_values[0][0].shape[2] # shape[1] in qwen2
1774
+ seq_length_with_past += cache_length
1775
+
1776
+ if inputs_embeds is None:
1777
+ inputs_embeds = self.embed_tokens(input_ids)
1778
+
1779
+ # embed positions
1780
+ if attention_mask is None:
1781
+ # [bs, seq_len]
1782
+ attention_mask = paddle.ones(
1783
+ (batch_size, seq_length_with_past), dtype=paddle.bool
1784
+ )
1785
+
1786
+ if flash_attn_varlen_func:
1787
+ causal_mask = attention_mask
1788
+ else:
1789
+ causal_mask = self._prepare_decoder_attention_mask(
1790
+ attention_mask,
1791
+ (batch_size, seq_length),
1792
+ cache_length,
1793
+ inputs_embeds.dtype,
1794
+ ) # [bs, 1, seq_len, seq_len]
1795
+
1796
+ if cache_position is None:
1797
+ past_seen_tokens = (
1798
+ past_key_values[0][0].shape[2] if past_key_values[0] is not None else 0
1799
+ )
1800
+ cache_position = paddle.arange(
1801
+ past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1]
1802
+ )
1803
+
1804
+ if position_ids is None:
1805
+ # the hard coded `3` is for temporal, height and width.
1806
+ position_ids = cache_position.reshape([1, 1, -1]).expand(
1807
+ [3, inputs_embeds.shape[0], -1]
1808
+ )
1809
+
1810
+ hidden_states = inputs_embeds
1811
+
1812
+ # decoder layers
1813
+ all_hidden_states = () if output_hidden_states else None
1814
+ all_self_attns = () if output_attentions else None
1815
+ next_decoder_cache = ()
1816
+
1817
+ for idx, (decoder_layer) in enumerate(self.layers):
1818
+ if output_hidden_states:
1819
+ all_hidden_states += (hidden_states,)
1820
+
1821
+ past_key_value = (
1822
+ past_key_values[idx] if past_key_values is not None else None
1823
+ )
1824
+
1825
+ if self.enamble_recompute and self.training:
1826
+ layer_outputs = self.recompute_training_full(
1827
+ decoder_layer,
1828
+ hidden_states,
1829
+ causal_mask,
1830
+ position_ids,
1831
+ past_key_value,
1832
+ output_attentions,
1833
+ use_cache,
1834
+ cache_position,
1835
+ )
1836
+ else:
1837
+ layer_outputs = decoder_layer(
1838
+ hidden_states,
1839
+ attention_mask=causal_mask,
1840
+ position_ids=position_ids,
1841
+ past_key_value=past_key_value,
1842
+ output_attentions=output_attentions, # False
1843
+ use_cache=use_cache, # True
1844
+ cache_position=cache_position,
1845
+ )
1846
+
1847
+ # NOTE: clear outdate cache after it has been used for memory saving
1848
+ past_key_value = past_key_values[idx] = None
1849
+
1850
+ hidden_states = layer_outputs[0]
1851
+
1852
+ next_decoder_cache = (
1853
+ next_decoder_cache + (layer_outputs[-1],) if use_cache else None
1854
+ )
1855
+
1856
+ if output_attentions:
1857
+ all_self_attns += (layer_outputs[1],)
1858
+
1859
+ hidden_states = self.norm(hidden_states)
1860
+
1861
+ # add hidden states from the last decoder layer
1862
+ if output_hidden_states:
1863
+ all_hidden_states += (hidden_states,)
1864
+
1865
+ next_cache = next_decoder_cache if use_cache else None
1866
+
1867
+ if not return_dict:
1868
+ return tuple(
1869
+ v
1870
+ for v in [hidden_states, next_cache, all_hidden_states, all_self_attns]
1871
+ if v is not None
1872
+ )
1873
+ return BaseModelOutputWithPast(
1874
+ last_hidden_state=hidden_states,
1875
+ past_key_values=next_cache,
1876
+ hidden_states=all_hidden_states,
1877
+ attentions=all_self_attns,
1878
+ )
1879
+
1880
+
1881
+ class Qwen2LMHead(nn.Layer):
1882
+ def __init__(self, config, embedding_weights=None, transpose_y=False):
1883
+ super(Qwen2LMHead, self).__init__()
1884
+ self.config = config
1885
+ if (
1886
+ config.tensor_parallel_degree > 1
1887
+ and config.vocab_size % config.tensor_parallel_degree == 0
1888
+ ):
1889
+ vocab_size = config.vocab_size // config.tensor_parallel_degree
1890
+ else:
1891
+ vocab_size = config.vocab_size
1892
+
1893
+ self.transpose_y = transpose_y
1894
+ if transpose_y:
1895
+ # only for weight from embedding_weights
1896
+ if embedding_weights is not None:
1897
+ self.weight = embedding_weights
1898
+ else:
1899
+ self.weight = self.create_parameter(
1900
+ shape=[vocab_size, config.hidden_size],
1901
+ dtype=paddle.get_default_dtype(),
1902
+ )
1903
+ else:
1904
+
1905
+ if vocab_size != config.vocab_size:
1906
+ with get_rng_state_tracker().rng_state():
1907
+ self.weight = self.create_parameter(
1908
+ shape=[config.hidden_size, vocab_size],
1909
+ dtype=paddle.get_default_dtype(),
1910
+ )
1911
+ else:
1912
+ self.weight = self.create_parameter(
1913
+ shape=[config.hidden_size, vocab_size],
1914
+ dtype=paddle.get_default_dtype(),
1915
+ )
1916
+
1917
+ # Must set distributed attr for Tensor Parallel !
1918
+ self.weight.is_distributed = (
1919
+ True if (vocab_size != config.vocab_size) else False
1920
+ )
1921
+ if self.weight.is_distributed:
1922
+ # for tie_word_embeddings
1923
+ self.weight.split_axis = 0 if self.transpose_y else 1
1924
+
1925
+ def forward(self, hidden_states, tensor_parallel_output=None):
1926
+ if tensor_parallel_output is None:
1927
+ tensor_parallel_output = self.config.tensor_parallel_output
1928
+
1929
+ # 确保数据类型一致
1930
+ if self.weight.dtype != hidden_states.dtype:
1931
+ hidden_states = paddle.cast(hidden_states, self.weight.dtype)
1932
+
1933
+ logits = parallel_matmul(
1934
+ hidden_states,
1935
+ self.weight,
1936
+ transpose_y=self.transpose_y,
1937
+ tensor_parallel_output=tensor_parallel_output,
1938
+ )
1939
+ return logits
1940
+
1941
+
1942
+ class Qwen2VLForConditionalGeneration(Qwen2VLPreTrainedModel):
1943
+ _tied_weights_keys = ["lm_head.weight"]
1944
+
1945
+ def __init__(self, config, attn_implementation="flash_attention_2"):
1946
+ super().__init__(config)
1947
+ config._attn_implementation = attn_implementation
1948
+ config.vision_config._attn_implementation = attn_implementation
1949
+
1950
+ self.visual = Qwen2VisionTransformerPretrainedModel._from_config(
1951
+ config.vision_config
1952
+ )
1953
+ self.model = Qwen2VLModel(config)
1954
+ self.vocab_size = config.vocab_size
1955
+
1956
+ if config.tie_word_embeddings:
1957
+ self.lm_head = Qwen2LMHead(
1958
+ config,
1959
+ embedding_weights=self.model.embed_tokens.weight,
1960
+ transpose_y=True,
1961
+ )
1962
+ self.tie_weights()
1963
+ else:
1964
+ self.lm_head = Qwen2LMHead(config)
1965
+ self.padding_side = "left" # set it to left by default, user can use setter to change padding_sides
1966
+
1967
+ def get_input_embeddings(self):
1968
+ return self.model.embed_tokens
1969
+
1970
+ def set_input_embeddings(self, value):
1971
+ self.model.embed_tokens = value
1972
+
1973
+ def get_output_embeddings(self):
1974
+ return self.lm_head
1975
+
1976
+ def set_output_embeddings(self, new_embeddings):
1977
+ self.lm_head = new_embeddings
1978
+
1979
+ def set_decoder(self, decoder):
1980
+ self.model = decoder
1981
+
1982
+ def get_decoder(self):
1983
+ return self.model
1984
+
1985
+ @staticmethod
1986
+ def get_rope_index(
1987
+ spatial_merge_size,
1988
+ image_token_id,
1989
+ video_token_id,
1990
+ vision_start_token_id,
1991
+ input_ids: paddle.Tensor,
1992
+ image_grid_thw: Optional[paddle.Tensor] = None,
1993
+ video_grid_thw: Optional[paddle.Tensor] = None,
1994
+ attention_mask: Optional[paddle.Tensor] = None,
1995
+ ) -> Tuple[paddle.Tensor, paddle.Tensor]:
1996
+ """
1997
+ Calculate the 3D rope index based on image and video's temporal, height and width in LLM.
1998
+
1999
+ Explanation:
2000
+ Each embedding sequence contains vision embedding and text embedding or just contains text embedding.
2001
+
2002
+ For pure text embedding sequence, the rotary position embedding has no difference with modern LLMs.
2003
+ Examples:
2004
+ input_ids: [T T T T T], here T is for text.
2005
+ temporal position_ids: [0, 1, 2, 3, 4]
2006
+ height position_ids: [0, 1, 2, 3, 4]
2007
+ width position_ids: [0, 1, 2, 3, 4]
2008
+
2009
+ For vision and text embedding sequence, we calculate 3D rotary position embedding for vision part
2010
+ and 1D rotary position embedding for text part.
2011
+ Examples:
2012
+ Assume we have a video input with 3 temporal patches, 2 height patches and 2 width patches.
2013
+ input_ids: [V V V V V V V V V V V V T T T T T], here V is for vision.
2014
+ vision temporal position_ids: [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]
2015
+ vision height position_ids: [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]
2016
+ vision width position_ids: [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
2017
+ text temporal position_ids: [3, 4, 5, 6, 7]
2018
+ text height position_ids: [3, 4, 5, 6, 7]
2019
+ text width position_ids: [3, 4, 5, 6, 7]
2020
+ Here we calculate the text start position_ids as the max vision position_ids plus 1.
2021
+
2022
+ Args:
2023
+ input_ids (`paddle.Tensor` of shape `(batch_size, sequence_length)`):
2024
+ Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
2025
+ it.
2026
+ image_grid_thw (`paddle.Tensor` of shape `(num_images, 3)`, *optional*):
2027
+ The temporal, height and width of feature shape of each image in LLM.
2028
+ video_grid_thw (`paddle.Tensor` of shape `(num_videos, 3)`, *optional*):
2029
+ The temporal, height and width of feature shape of each video in LLM.
2030
+ attention_mask (`paddle.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
2031
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
2032
+
2033
+ - 1 for tokens that are **not masked**,
2034
+ - 0 for tokens that are **masked**.
2035
+
2036
+ Returns:
2037
+ position_ids (`paddle.Tensor` of shape `(3, batch_size, sequence_length)`)
2038
+ mrope_position_deltas (`paddle.Tensor` of shape `(batch_size)`)
2039
+ """
2040
+ mrope_position_deltas = []
2041
+ if image_grid_thw is not None or video_grid_thw is not None:
2042
+ total_input_ids = input_ids
2043
+ position_ids = paddle.ones(
2044
+ [3, input_ids.shape[0], input_ids.shape[1]], dtype=input_ids.dtype
2045
+ )
2046
+ image_index, video_index = 0, 0
2047
+ for i, input_ids in enumerate(total_input_ids):
2048
+ # TODO: CUDA error in some paddle version
2049
+ if attention_mask is not None:
2050
+ input_ids = paddle.to_tensor(
2051
+ input_ids.cpu()[attention_mask[i].cpu() == 1]
2052
+ ) # NOTE 原始写法
2053
+
2054
+ image_nums, video_nums = 0, 0
2055
+ vision_start_indices = paddle.nonzero(
2056
+ input_ids == vision_start_token_id
2057
+ ).squeeze(
2058
+ 1
2059
+ ) # NOTE 原始写法
2060
+
2061
+ vision_tokens = input_ids[vision_start_indices + 1]
2062
+ image_nums = (
2063
+ (vision_tokens == image_token_id).sum()
2064
+ if vision_tokens.numel() > 0
2065
+ else 0
2066
+ )
2067
+ video_nums = (
2068
+ (vision_tokens == video_token_id).sum()
2069
+ if vision_tokens.numel() > 0
2070
+ else 0
2071
+ )
2072
+ input_tokens = input_ids.tolist()
2073
+ llm_pos_ids_list: list = []
2074
+ st = 0
2075
+ remain_images, remain_videos = image_nums, video_nums
2076
+ for _ in range(image_nums + video_nums):
2077
+ if image_token_id in input_tokens and remain_images > 0:
2078
+ ed_image = input_tokens.index(image_token_id, st)
2079
+ else:
2080
+ ed_image = len(input_tokens) + 1
2081
+ if video_token_id in input_tokens and remain_videos > 0:
2082
+ ed_video = input_tokens.index(video_token_id, st)
2083
+ else:
2084
+ ed_video = len(input_tokens) + 1
2085
+ if ed_image < ed_video:
2086
+ t, h, w = (
2087
+ image_grid_thw[image_index][0],
2088
+ image_grid_thw[image_index][1],
2089
+ image_grid_thw[image_index][2],
2090
+ )
2091
+ image_index += 1
2092
+ remain_images -= 1
2093
+ ed = ed_image
2094
+ else:
2095
+ t, h, w = (
2096
+ video_grid_thw[video_index][0],
2097
+ video_grid_thw[video_index][1],
2098
+ video_grid_thw[video_index][2],
2099
+ )
2100
+ video_index += 1
2101
+ remain_videos -= 1
2102
+ ed = ed_video
2103
+ llm_grid_t, llm_grid_h, llm_grid_w = (
2104
+ t.item(),
2105
+ h.item() // spatial_merge_size,
2106
+ w.item() // spatial_merge_size,
2107
+ )
2108
+ text_len = ed - st
2109
+
2110
+ st_idx = (
2111
+ llm_pos_ids_list[-1].max() + 1
2112
+ if len(llm_pos_ids_list) > 0
2113
+ else 0
2114
+ )
2115
+ llm_pos_ids_list.append(
2116
+ paddle.arange(text_len).reshape([1, -1]).expand([3, -1])
2117
+ + st_idx
2118
+ )
2119
+
2120
+ t_index = (
2121
+ paddle.arange(llm_grid_t)
2122
+ .reshape([-1, 1])
2123
+ .expand([-1, llm_grid_h * llm_grid_w])
2124
+ .flatten()
2125
+ )
2126
+ h_index = (
2127
+ paddle.arange(llm_grid_h)
2128
+ .reshape([1, -1, 1])
2129
+ .expand([llm_grid_t, -1, llm_grid_w])
2130
+ .flatten()
2131
+ )
2132
+ w_index = (
2133
+ paddle.arange(llm_grid_w)
2134
+ .reshape([1, 1, -1])
2135
+ .expand([llm_grid_t, llm_grid_h, -1])
2136
+ .flatten()
2137
+ )
2138
+ llm_pos_ids_list.append(
2139
+ paddle.stack([t_index, h_index, w_index]) + text_len + st_idx
2140
+ )
2141
+ st = ed + llm_grid_t * llm_grid_h * llm_grid_w
2142
+
2143
+ if st < len(input_tokens):
2144
+ st_idx = (
2145
+ llm_pos_ids_list[-1].max() + 1
2146
+ if len(llm_pos_ids_list) > 0
2147
+ else 0
2148
+ )
2149
+ text_len = len(input_tokens) - st
2150
+ llm_pos_ids_list.append(
2151
+ paddle.arange(text_len).reshape([1, -1]).expand([3, -1])
2152
+ + st_idx
2153
+ )
2154
+
2155
+ llm_positions = paddle.concat(llm_pos_ids_list, axis=1).reshape([3, -1])
2156
+ if _IS_NPU:
2157
+ bool_indices = (
2158
+ (attention_mask[i] == 1)
2159
+ .unsqueeze(0)
2160
+ .tile([position_ids.shape[0], 1])
2161
+ )
2162
+ position_ids[:, i] = paddle.index_put(
2163
+ position_ids[:, i], [bool_indices], llm_positions.reshape([-1])
2164
+ )
2165
+ else:
2166
+ position_ids[..., i, attention_mask[i] == 1] = llm_positions
2167
+ mrope_position_deltas.append(
2168
+ llm_positions.max() + 1 - len(total_input_ids[i])
2169
+ )
2170
+ mrope_position_deltas = paddle.to_tensor(mrope_position_deltas).unsqueeze(1)
2171
+ else:
2172
+ if attention_mask is not None:
2173
+ position_ids = paddle.cast(attention_mask, dtype="int64").cumsum(-1) - 1
2174
+ position_ids.masked_fill_(mask=attention_mask == 0, value=1)
2175
+ position_ids = position_ids.unsqueeze(0).expand([3, -1, -1])
2176
+ max_position_ids = position_ids.max(0, keepdim=False)[0].max(
2177
+ -1, keepdim=True
2178
+ )[0]
2179
+ mrope_position_deltas = max_position_ids + 1 - attention_mask.shape[-1]
2180
+ else:
2181
+ position_ids = (
2182
+ paddle.arange(input_ids.shape[1])
2183
+ .reshape([1, 1, -1])
2184
+ .expand(shape=[3, input_ids.shape[0], -1])
2185
+ )
2186
+ mrope_position_deltas = paddle.zeros(
2187
+ [input_ids.shape[0], 1], dtype=input_ids.dtype
2188
+ )
2189
+
2190
+ return position_ids, mrope_position_deltas
2191
+
2192
+ def update_model_kwargs_for_generation(
2193
+ self,
2194
+ outputs: ModelOutput,
2195
+ model_kwargs: Dict[str, Any],
2196
+ is_encoder_decoder: bool = False,
2197
+ # num_new_tokens: int = 1,
2198
+ ) -> Dict[str, Any]:
2199
+ model_kwargs = super().update_model_kwargs_for_generation(
2200
+ outputs=outputs,
2201
+ model_kwargs=model_kwargs,
2202
+ is_encoder_decoder=is_encoder_decoder,
2203
+ )
2204
+
2205
+ if getattr(outputs, "rope_deltas", None) is not None:
2206
+ model_kwargs["rope_deltas"] = outputs.rope_deltas
2207
+
2208
+ return model_kwargs
2209
+
2210
+ def forward(
2211
+ self,
2212
+ input_ids: paddle.Tensor = None,
2213
+ attention_mask: Optional[paddle.Tensor] = None,
2214
+ position_ids: Optional[paddle.Tensor] = None,
2215
+ past_key_values: Optional[List[paddle.Tensor]] = None,
2216
+ inputs_embeds: Optional[paddle.Tensor] = None,
2217
+ labels: Optional[paddle.Tensor] = None,
2218
+ use_cache: Optional[bool] = None,
2219
+ output_attentions: Optional[bool] = None,
2220
+ output_hidden_states: Optional[bool] = None,
2221
+ return_dict: Optional[bool] = None,
2222
+ pixel_values: Optional[paddle.Tensor] = None,
2223
+ pixel_values_videos: Optional[paddle.Tensor] = None,
2224
+ image_grid_thw: Optional[paddle.Tensor] = None,
2225
+ video_grid_thw: Optional[paddle.Tensor] = None,
2226
+ rope_deltas: Optional[paddle.Tensor] = None,
2227
+ ):
2228
+ """
2229
+ Args:
2230
+ labels (`paddle.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
2231
+ Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
2232
+ config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
2233
+ (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
2234
+ """
2235
+ output_attentions = (
2236
+ output_attentions
2237
+ if output_attentions is not None
2238
+ else self.config.output_attentions
2239
+ )
2240
+ output_hidden_states = output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states # fmt:skip
2241
+
2242
+ return_dict = True # return_dict if return_dict is not None else self.config.use_return_dict
2243
+
2244
+ if inputs_embeds is None:
2245
+ inputs_embeds = self.model.embed_tokens(input_ids)
2246
+
2247
+ if pixel_values is not None:
2248
+ pixel_values = paddle.cast(pixel_values, inputs_embeds.dtype)
2249
+
2250
+ image_embeds = self.visual(pixel_values, grid_thw=image_grid_thw)
2251
+ image_embeds = paddle.cast(image_embeds, inputs_embeds.dtype)
2252
+
2253
+ image_mask = input_ids == self.config.image_token_id
2254
+ if self.training:
2255
+ inputs_embeds = inputs_embeds.clone()
2256
+ inputs_embeds[image_mask] = image_embeds
2257
+ if pixel_values_videos is not None:
2258
+ pixel_values_videos = paddle.cast(
2259
+ pixel_values_videos, inputs_embeds.dtype
2260
+ )
2261
+ video_embeds = self.visual(pixel_values_videos, grid_thw=video_grid_thw)
2262
+ video_embeds = paddle.cast(video_embeds, inputs_embeds.dtype)
2263
+ video_mask = input_ids == self.config.video_token_id
2264
+ inputs_embeds[video_mask] = video_embeds
2265
+ if attention_mask is not None:
2266
+ attention_mask = attention_mask
2267
+
2268
+ outputs = self.model(
2269
+ input_ids=None,
2270
+ position_ids=position_ids,
2271
+ attention_mask=attention_mask,
2272
+ past_key_values=past_key_values,
2273
+ inputs_embeds=inputs_embeds,
2274
+ use_cache=use_cache,
2275
+ output_attentions=output_attentions,
2276
+ output_hidden_states=output_hidden_states,
2277
+ return_dict=return_dict,
2278
+ )
2279
+
2280
+ hidden_states = outputs[0]
2281
+
2282
+ tensor_parallel_output = (
2283
+ self.config.tensor_parallel_output
2284
+ and self.config.tensor_parallel_degree > 1
2285
+ )
2286
+
2287
+ logits = self.lm_head(
2288
+ hidden_states, tensor_parallel_output=tensor_parallel_output
2289
+ )
2290
+
2291
+ logits = paddle.cast(logits, "float32")
2292
+
2293
+ loss = None
2294
+ if labels is not None:
2295
+ # Shift so that tokens < n predict n
2296
+ shift_logits = logits[..., :-1, :]
2297
+ shift_labels = labels[..., 1:]
2298
+ # Flatten the tokens
2299
+ shift_logits = shift_logits.reshape([-1, self.config.vocab_size])
2300
+ shift_labels = shift_labels.reshape([-1])
2301
+ if _IS_NPU:
2302
+ tmp = F.log_softmax(shift_logits, axis=1)
2303
+ loss = F.nll_loss(tmp, shift_labels, reduction="sum")
2304
+ else:
2305
+ loss_fct = nn.CrossEntropyLoss(reduction="sum")
2306
+ loss = loss_fct(shift_logits, shift_labels)
2307
+ label_sum = paddle.sum(shift_labels != -100).cast("float32")
2308
+ loss = loss / label_sum
2309
+
2310
+ if not return_dict:
2311
+ output = (logits,) + tuple(outputs[1:])
2312
+ return (loss,) + output if loss is not None else output
2313
+
2314
+ return Qwen2VLCausalLMOutputWithPast(
2315
+ loss=loss,
2316
+ logits=logits,
2317
+ past_key_values=outputs.past_key_values,
2318
+ hidden_states=outputs.hidden_states,
2319
+ attentions=outputs.attentions,
2320
+ rope_deltas=rope_deltas,
2321
+ )
2322
+
2323
+ def prepare_inputs_for_generation(
2324
+ self,
2325
+ input_ids,
2326
+ past_key_values=None,
2327
+ attention_mask=None,
2328
+ inputs_embeds=None,
2329
+ cache_position=None,
2330
+ position_ids=None,
2331
+ use_cache=True,
2332
+ pixel_values=None,
2333
+ pixel_values_videos=None,
2334
+ image_grid_thw=None,
2335
+ video_grid_thw=None,
2336
+ **kwargs,
2337
+ ):
2338
+
2339
+ batch_size, seq_length = input_ids.shape
2340
+ if past_key_values is None:
2341
+ cache_position = paddle.arange(input_ids.shape[1])
2342
+ else:
2343
+ cache_position = paddle.to_tensor([seq_length - 1])
2344
+
2345
+ if past_key_values is not None:
2346
+ input_ids = input_ids[:, -1].unsqueeze(-1)
2347
+
2348
+ rope_deltas = kwargs.get("rope_deltas", None)
2349
+
2350
+ if attention_mask is not None and position_ids is None:
2351
+ if cache_position is None or (
2352
+ cache_position is not None and cache_position[0] == 0
2353
+ ):
2354
+ position_ids, rope_deltas = self.get_rope_index(
2355
+ self.config.vision_config.spatial_merge_size,
2356
+ self.config.image_token_id,
2357
+ self.config.video_token_id,
2358
+ self.config.vision_start_token_id,
2359
+ input_ids,
2360
+ image_grid_thw,
2361
+ video_grid_thw,
2362
+ attention_mask,
2363
+ )
2364
+ else:
2365
+ batch_size, seq_length = input_ids.shape
2366
+ delta = (
2367
+ cache_position[0] + rope_deltas
2368
+ if cache_position is not None and rope_deltas is not None
2369
+ else 0
2370
+ )
2371
+ position_ids = paddle.arange(seq_length)
2372
+ position_ids = position_ids.reshape([1, -1]).expand([batch_size, -1])
2373
+ position_ids = position_ids + delta
2374
+ position_ids = position_ids.unsqueeze(axis=0).expand([3, -1, -1])
2375
+
2376
+ if cache_position[0] != 0:
2377
+ pixel_values = None
2378
+ pixel_values_videos = None
2379
+
2380
+ # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
2381
+ if inputs_embeds is not None and cache_position[0] == 0:
2382
+ model_inputs = {"inputs_embeds": inputs_embeds}
2383
+ else:
2384
+ model_inputs = {"input_ids": input_ids}
2385
+
2386
+ model_inputs.update(
2387
+ {
2388
+ "position_ids": position_ids, # [3, 1, 3602]
2389
+ "past_key_values": past_key_values, # DynamicCache()
2390
+ "use_cache": use_cache, # 1
2391
+ "attention_mask": attention_mask, # [1, 3602]
2392
+ "pixel_values": pixel_values, # [14308, 1176]
2393
+ "pixel_values_videos": pixel_values_videos,
2394
+ "image_grid_thw": image_grid_thw, # [[ 1, 98, 146]]
2395
+ "video_grid_thw": video_grid_thw,
2396
+ "rope_deltas": rope_deltas, # [[-3504]]
2397
+ }
2398
+ )
2399
+ return model_inputs
2400
+
2401
+ def gme_qwen2_vl_forward(
2402
+ self,
2403
+ input_ids: paddle.Tensor = None,
2404
+ attention_mask: Optional[paddle.Tensor] = None,
2405
+ position_ids: Optional[paddle.Tensor] = None,
2406
+ past_key_values: Optional[List[paddle.Tensor]] = None,
2407
+ inputs_embeds: Optional[paddle.Tensor] = None,
2408
+ labels: Optional[paddle.Tensor] = None,
2409
+ use_cache: Optional[bool] = None,
2410
+ output_attentions: Optional[bool] = None,
2411
+ output_hidden_states: Optional[bool] = None,
2412
+ return_dict: Optional[bool] = None,
2413
+ pixel_values: Optional[paddle.Tensor] = None,
2414
+ pixel_values_videos: Optional[paddle.Tensor] = None,
2415
+ image_grid_thw: Optional[paddle.Tensor] = None,
2416
+ video_grid_thw: Optional[paddle.Tensor] = None,
2417
+ rope_deltas: Optional[paddle.Tensor] = None,
2418
+ ):
2419
+
2420
+ output_attentions = (
2421
+ output_attentions
2422
+ if output_attentions is not None
2423
+ else self.config.output_attentions
2424
+ )
2425
+ output_hidden_states = (
2426
+ output_hidden_states
2427
+ if output_hidden_states is not None
2428
+ else self.config.output_hidden_states
2429
+ )
2430
+ return_dict = True # return_dict if return_dict is not None else self.config.use_return_dict
2431
+
2432
+ if inputs_embeds is None:
2433
+ inputs_embeds = self.model.embed_tokens(input_ids)
2434
+ if pixel_values is not None:
2435
+ # 确保 pixel_values 和 inputs_embeds 使用相同的数据类型
2436
+ pixel_values = paddle.cast(pixel_values, inputs_embeds.dtype)
2437
+ image_embeds = self.visual(pixel_values, grid_thw=image_grid_thw)
2438
+ # 确保 image_embeds 和 inputs_embeds 使用相同的数据类型
2439
+ image_embeds = paddle.cast(image_embeds, inputs_embeds.dtype)
2440
+ image_mask = input_ids == self.config.image_token_id
2441
+ if self.training:
2442
+ inputs_embeds = inputs_embeds.clone()
2443
+
2444
+ inputs_embeds[image_mask] = image_embeds
2445
+
2446
+ if pixel_values_videos is not None:
2447
+ # 确保 pixel_values_videos 和 inputs_embeds 使用相同的数据类型
2448
+ pixel_values_videos = paddle.cast(
2449
+ pixel_values_videos, inputs_embeds.dtype
2450
+ )
2451
+ video_embeds = self.visual(pixel_values_videos, grid_thw=video_grid_thw)
2452
+ # 确保 video_embeds 和 inputs_embeds 使用相同的数据类型
2453
+ video_embeds = paddle.cast(video_embeds, inputs_embeds.dtype)
2454
+ video_mask = input_ids == self.config.video_token_id
2455
+ inputs_embeds[video_mask] = video_embeds
2456
+ if attention_mask is not None:
2457
+ attention_mask = attention_mask
2458
+
2459
+ outputs = self.model(
2460
+ input_ids=None,
2461
+ position_ids=position_ids,
2462
+ attention_mask=attention_mask,
2463
+ past_key_values=past_key_values,
2464
+ inputs_embeds=inputs_embeds,
2465
+ use_cache=use_cache,
2466
+ output_attentions=output_attentions,
2467
+ output_hidden_states=output_hidden_states,
2468
+ return_dict=return_dict,
2469
+ )
2470
+
2471
+ hidden_states = outputs[0]
2472
+ # get last hidden state
2473
+ last_hidden_state = hidden_states[:, -1, :]
2474
+ return last_hidden_state
2475
+
2476
+
2477
+ class PPDocBeeInference(Qwen2VLForConditionalGeneration):
2478
+ set_inference_operations(get_inference_operations() + ["docbee_generate"])
2479
+
2480
+ @benchmark.timeit_with_options(name="docbee_generate")
2481
+ def generate(self, inputs, **kwargs):
2482
+ max_new_tokens = kwargs.get("max_new_tokens", 2048)
2483
+ temperature = kwargs.get("temperature", 0.1)
2484
+ top_p = kwargs.get("top_p", 0.001)
2485
+ top_k = kwargs.get("top_k", 1)
2486
+ with paddle.no_grad():
2487
+ generated_ids = super().generate(
2488
+ **inputs,
2489
+ max_new_tokens=max_new_tokens,
2490
+ temperature=temperature,
2491
+ top_p=top_p,
2492
+ top_k=top_k,
2493
+ )
2494
+
2495
+ return generated_ids