paddlex 2.1.0__py3-none-any.whl → 3.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1708) hide show
  1. paddlex/.version +1 -0
  2. paddlex/__init__.py +52 -19
  3. paddlex/__main__.py +39 -0
  4. paddlex/configs/modules/3d_bev_detection/BEVFusion.yaml +38 -0
  5. paddlex/configs/modules/doc_text_orientation/PP-LCNet_x1_0_doc_ori.yaml +41 -0
  6. paddlex/configs/modules/face_detection/BlazeFace-FPN-SSH.yaml +40 -0
  7. paddlex/configs/modules/face_detection/BlazeFace.yaml +40 -0
  8. paddlex/configs/modules/face_detection/PP-YOLOE_plus-S_face.yaml +40 -0
  9. paddlex/configs/modules/face_detection/PicoDet_LCNet_x2_5_face.yaml +40 -0
  10. paddlex/configs/modules/face_feature/MobileFaceNet.yaml +41 -0
  11. paddlex/configs/modules/face_feature/ResNet50_face.yaml +41 -0
  12. paddlex/configs/modules/formula_recognition/LaTeX_OCR_rec.yaml +40 -0
  13. paddlex/configs/modules/formula_recognition/PP-FormulaNet-L.yaml +40 -0
  14. paddlex/configs/modules/formula_recognition/PP-FormulaNet-S.yaml +40 -0
  15. paddlex/configs/modules/formula_recognition/UniMERNet.yaml +40 -0
  16. paddlex/configs/modules/human_detection/PP-YOLOE-L_human.yaml +42 -0
  17. paddlex/configs/modules/human_detection/PP-YOLOE-S_human.yaml +42 -0
  18. paddlex/configs/modules/image_anomaly_detection/STFPM.yaml +41 -0
  19. paddlex/configs/modules/image_classification/CLIP_vit_base_patch16_224.yaml +41 -0
  20. paddlex/configs/modules/image_classification/CLIP_vit_large_patch14_224.yaml +41 -0
  21. paddlex/configs/modules/image_classification/ConvNeXt_base_224.yaml +41 -0
  22. paddlex/configs/modules/image_classification/ConvNeXt_base_384.yaml +41 -0
  23. paddlex/configs/modules/image_classification/ConvNeXt_large_224.yaml +41 -0
  24. paddlex/configs/modules/image_classification/ConvNeXt_large_384.yaml +41 -0
  25. paddlex/configs/modules/image_classification/ConvNeXt_small.yaml +41 -0
  26. paddlex/configs/modules/image_classification/ConvNeXt_tiny.yaml +41 -0
  27. paddlex/configs/modules/image_classification/FasterNet-L.yaml +40 -0
  28. paddlex/configs/modules/image_classification/FasterNet-M.yaml +40 -0
  29. paddlex/configs/modules/image_classification/FasterNet-S.yaml +40 -0
  30. paddlex/configs/modules/image_classification/FasterNet-T0.yaml +40 -0
  31. paddlex/configs/modules/image_classification/FasterNet-T1.yaml +40 -0
  32. paddlex/configs/modules/image_classification/FasterNet-T2.yaml +40 -0
  33. paddlex/configs/modules/image_classification/MobileNetV1_x0_25.yaml +41 -0
  34. paddlex/configs/modules/image_classification/MobileNetV1_x0_5.yaml +41 -0
  35. paddlex/configs/modules/image_classification/MobileNetV1_x0_75.yaml +41 -0
  36. paddlex/configs/modules/image_classification/MobileNetV1_x1_0.yaml +41 -0
  37. paddlex/configs/modules/image_classification/MobileNetV2_x0_25.yaml +41 -0
  38. paddlex/configs/modules/image_classification/MobileNetV2_x0_5.yaml +41 -0
  39. paddlex/configs/modules/image_classification/MobileNetV2_x1_0.yaml +41 -0
  40. paddlex/configs/modules/image_classification/MobileNetV2_x1_5.yaml +41 -0
  41. paddlex/configs/modules/image_classification/MobileNetV2_x2_0.yaml +41 -0
  42. paddlex/configs/modules/image_classification/MobileNetV3_large_x0_35.yaml +41 -0
  43. paddlex/configs/modules/image_classification/MobileNetV3_large_x0_5.yaml +41 -0
  44. paddlex/configs/modules/image_classification/MobileNetV3_large_x0_75.yaml +41 -0
  45. paddlex/configs/modules/image_classification/MobileNetV3_large_x1_0.yaml +41 -0
  46. paddlex/configs/modules/image_classification/MobileNetV3_large_x1_25.yaml +41 -0
  47. paddlex/configs/modules/image_classification/MobileNetV3_small_x0_35.yaml +41 -0
  48. paddlex/configs/modules/image_classification/MobileNetV3_small_x0_5.yaml +41 -0
  49. paddlex/configs/modules/image_classification/MobileNetV3_small_x0_75.yaml +41 -0
  50. paddlex/configs/modules/image_classification/MobileNetV3_small_x1_0.yaml +41 -0
  51. paddlex/configs/modules/image_classification/MobileNetV3_small_x1_25.yaml +41 -0
  52. paddlex/configs/modules/image_classification/MobileNetV4_conv_large.yaml +41 -0
  53. paddlex/configs/modules/image_classification/MobileNetV4_conv_medium.yaml +41 -0
  54. paddlex/configs/modules/image_classification/MobileNetV4_conv_small.yaml +41 -0
  55. paddlex/configs/modules/image_classification/MobileNetV4_hybrid_large.yaml +41 -0
  56. paddlex/configs/modules/image_classification/MobileNetV4_hybrid_medium.yaml +41 -0
  57. paddlex/configs/modules/image_classification/PP-HGNetV2-B0.yaml +41 -0
  58. paddlex/configs/modules/image_classification/PP-HGNetV2-B1.yaml +41 -0
  59. paddlex/configs/modules/image_classification/PP-HGNetV2-B2.yaml +41 -0
  60. paddlex/configs/modules/image_classification/PP-HGNetV2-B3.yaml +41 -0
  61. paddlex/configs/modules/image_classification/PP-HGNetV2-B4.yaml +41 -0
  62. paddlex/configs/modules/image_classification/PP-HGNetV2-B5.yaml +41 -0
  63. paddlex/configs/modules/image_classification/PP-HGNetV2-B6.yaml +41 -0
  64. paddlex/configs/modules/image_classification/PP-HGNet_base.yaml +41 -0
  65. paddlex/configs/modules/image_classification/PP-HGNet_small.yaml +41 -0
  66. paddlex/configs/modules/image_classification/PP-HGNet_tiny.yaml +41 -0
  67. paddlex/configs/modules/image_classification/PP-LCNetV2_base.yaml +41 -0
  68. paddlex/configs/modules/image_classification/PP-LCNetV2_large.yaml +41 -0
  69. paddlex/configs/modules/image_classification/PP-LCNetV2_small.yaml +41 -0
  70. paddlex/configs/modules/image_classification/PP-LCNet_x0_25.yaml +41 -0
  71. paddlex/configs/modules/image_classification/PP-LCNet_x0_35.yaml +41 -0
  72. paddlex/configs/modules/image_classification/PP-LCNet_x0_5.yaml +41 -0
  73. paddlex/configs/modules/image_classification/PP-LCNet_x0_75.yaml +41 -0
  74. paddlex/configs/modules/image_classification/PP-LCNet_x1_0.yaml +41 -0
  75. paddlex/configs/modules/image_classification/PP-LCNet_x1_5.yaml +41 -0
  76. paddlex/configs/modules/image_classification/PP-LCNet_x2_0.yaml +41 -0
  77. paddlex/configs/modules/image_classification/PP-LCNet_x2_5.yaml +41 -0
  78. paddlex/configs/modules/image_classification/ResNet101.yaml +41 -0
  79. paddlex/configs/modules/image_classification/ResNet101_vd.yaml +41 -0
  80. paddlex/configs/modules/image_classification/ResNet152.yaml +41 -0
  81. paddlex/configs/modules/image_classification/ResNet152_vd.yaml +41 -0
  82. paddlex/configs/modules/image_classification/ResNet18.yaml +41 -0
  83. paddlex/configs/modules/image_classification/ResNet18_vd.yaml +41 -0
  84. paddlex/configs/modules/image_classification/ResNet200_vd.yaml +41 -0
  85. paddlex/configs/modules/image_classification/ResNet34.yaml +41 -0
  86. paddlex/configs/modules/image_classification/ResNet34_vd.yaml +41 -0
  87. paddlex/configs/modules/image_classification/ResNet50.yaml +41 -0
  88. paddlex/configs/modules/image_classification/ResNet50_vd.yaml +41 -0
  89. paddlex/configs/modules/image_classification/StarNet-S1.yaml +41 -0
  90. paddlex/configs/modules/image_classification/StarNet-S2.yaml +41 -0
  91. paddlex/configs/modules/image_classification/StarNet-S3.yaml +41 -0
  92. paddlex/configs/modules/image_classification/StarNet-S4.yaml +41 -0
  93. paddlex/configs/modules/image_classification/SwinTransformer_base_patch4_window12_384.yaml +41 -0
  94. paddlex/configs/modules/image_classification/SwinTransformer_base_patch4_window7_224.yaml +41 -0
  95. paddlex/configs/modules/image_classification/SwinTransformer_large_patch4_window12_384.yaml +41 -0
  96. paddlex/configs/modules/image_classification/SwinTransformer_large_patch4_window7_224.yaml +41 -0
  97. paddlex/configs/modules/image_classification/SwinTransformer_small_patch4_window7_224.yaml +41 -0
  98. paddlex/configs/modules/image_classification/SwinTransformer_tiny_patch4_window7_224.yaml +41 -0
  99. paddlex/configs/modules/image_feature/PP-ShiTuV2_rec.yaml +42 -0
  100. paddlex/configs/modules/image_feature/PP-ShiTuV2_rec_CLIP_vit_base.yaml +42 -0
  101. paddlex/configs/modules/image_feature/PP-ShiTuV2_rec_CLIP_vit_large.yaml +41 -0
  102. paddlex/configs/modules/image_multilabel_classification/CLIP_vit_base_patch16_448_ML.yaml +41 -0
  103. paddlex/configs/modules/image_multilabel_classification/PP-HGNetV2-B0_ML.yaml +41 -0
  104. paddlex/configs/modules/image_multilabel_classification/PP-HGNetV2-B4_ML.yaml +41 -0
  105. paddlex/configs/modules/image_multilabel_classification/PP-HGNetV2-B6_ML.yaml +41 -0
  106. paddlex/configs/modules/image_multilabel_classification/PP-LCNet_x1_0_ML.yaml +41 -0
  107. paddlex/configs/modules/image_multilabel_classification/ResNet50_ML.yaml +41 -0
  108. paddlex/configs/modules/image_unwarping/UVDoc.yaml +12 -0
  109. paddlex/configs/modules/instance_segmentation/Cascade-MaskRCNN-ResNet50-FPN.yaml +40 -0
  110. paddlex/configs/modules/instance_segmentation/Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml +40 -0
  111. paddlex/configs/modules/instance_segmentation/Mask-RT-DETR-H.yaml +40 -0
  112. paddlex/configs/modules/instance_segmentation/Mask-RT-DETR-L.yaml +40 -0
  113. paddlex/configs/modules/instance_segmentation/Mask-RT-DETR-M.yaml +40 -0
  114. paddlex/configs/modules/instance_segmentation/Mask-RT-DETR-S.yaml +40 -0
  115. paddlex/configs/modules/instance_segmentation/Mask-RT-DETR-X.yaml +40 -0
  116. paddlex/configs/modules/instance_segmentation/MaskRCNN-ResNeXt101-vd-FPN.yaml +39 -0
  117. paddlex/configs/modules/instance_segmentation/MaskRCNN-ResNet101-FPN.yaml +40 -0
  118. paddlex/configs/modules/instance_segmentation/MaskRCNN-ResNet101-vd-FPN.yaml +40 -0
  119. paddlex/configs/modules/instance_segmentation/MaskRCNN-ResNet50-FPN.yaml +40 -0
  120. paddlex/configs/modules/instance_segmentation/MaskRCNN-ResNet50-vd-FPN.yaml +40 -0
  121. paddlex/configs/modules/instance_segmentation/MaskRCNN-ResNet50.yaml +40 -0
  122. paddlex/configs/modules/instance_segmentation/PP-YOLOE_seg-S.yaml +40 -0
  123. paddlex/configs/modules/instance_segmentation/SOLOv2.yaml +40 -0
  124. paddlex/configs/modules/keypoint_detection/PP-TinyPose_128x96.yaml +40 -0
  125. paddlex/configs/modules/keypoint_detection/PP-TinyPose_256x192.yaml +40 -0
  126. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +40 -0
  127. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +40 -0
  128. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +40 -0
  129. paddlex/configs/modules/layout_detection/PicoDet-L_layout_17cls.yaml +40 -0
  130. paddlex/configs/modules/layout_detection/PicoDet-L_layout_3cls.yaml +40 -0
  131. paddlex/configs/modules/layout_detection/PicoDet-S_layout_17cls.yaml +40 -0
  132. paddlex/configs/modules/layout_detection/PicoDet-S_layout_3cls.yaml +40 -0
  133. paddlex/configs/modules/layout_detection/PicoDet_layout_1x.yaml +40 -0
  134. paddlex/configs/modules/layout_detection/PicoDet_layout_1x_table.yaml +40 -0
  135. paddlex/configs/modules/layout_detection/RT-DETR-H_layout_17cls.yaml +40 -0
  136. paddlex/configs/modules/layout_detection/RT-DETR-H_layout_3cls.yaml +40 -0
  137. paddlex/configs/modules/mainbody_detection/PP-ShiTuV2_det.yaml +41 -0
  138. paddlex/configs/modules/multilingual_speech_recognition/whisper_base.yaml +12 -0
  139. paddlex/configs/modules/multilingual_speech_recognition/whisper_large.yaml +12 -0
  140. paddlex/configs/modules/multilingual_speech_recognition/whisper_medium.yaml +12 -0
  141. paddlex/configs/modules/multilingual_speech_recognition/whisper_small.yaml +12 -0
  142. paddlex/configs/modules/multilingual_speech_recognition/whisper_tiny.yaml +12 -0
  143. paddlex/configs/modules/object_detection/Cascade-FasterRCNN-ResNet50-FPN.yaml +41 -0
  144. paddlex/configs/modules/object_detection/Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml +42 -0
  145. paddlex/configs/modules/object_detection/CenterNet-DLA-34.yaml +41 -0
  146. paddlex/configs/modules/object_detection/CenterNet-ResNet50.yaml +41 -0
  147. paddlex/configs/modules/object_detection/Co-DINO-R50.yaml +40 -0
  148. paddlex/configs/modules/object_detection/Co-DINO-Swin-L.yaml +40 -0
  149. paddlex/configs/modules/object_detection/Co-Deformable-DETR-R50.yaml +40 -0
  150. paddlex/configs/modules/object_detection/Co-Deformable-DETR-Swin-T.yaml +40 -0
  151. paddlex/configs/modules/object_detection/DETR-R50.yaml +42 -0
  152. paddlex/configs/modules/object_detection/FCOS-ResNet50.yaml +41 -0
  153. paddlex/configs/modules/object_detection/FasterRCNN-ResNeXt101-vd-FPN.yaml +42 -0
  154. paddlex/configs/modules/object_detection/FasterRCNN-ResNet101-FPN.yaml +42 -0
  155. paddlex/configs/modules/object_detection/FasterRCNN-ResNet101.yaml +42 -0
  156. paddlex/configs/modules/object_detection/FasterRCNN-ResNet34-FPN.yaml +42 -0
  157. paddlex/configs/modules/object_detection/FasterRCNN-ResNet50-FPN.yaml +42 -0
  158. paddlex/configs/modules/object_detection/FasterRCNN-ResNet50-vd-FPN.yaml +42 -0
  159. paddlex/configs/modules/object_detection/FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml +42 -0
  160. paddlex/configs/modules/object_detection/FasterRCNN-ResNet50.yaml +42 -0
  161. paddlex/configs/modules/object_detection/FasterRCNN-Swin-Tiny-FPN.yaml +42 -0
  162. paddlex/configs/modules/object_detection/PP-YOLOE_plus-L.yaml +40 -0
  163. paddlex/configs/modules/object_detection/PP-YOLOE_plus-M.yaml +40 -0
  164. paddlex/configs/modules/object_detection/PP-YOLOE_plus-S.yaml +40 -0
  165. paddlex/configs/modules/object_detection/PP-YOLOE_plus-X.yaml +40 -0
  166. paddlex/configs/modules/object_detection/PicoDet-L.yaml +40 -0
  167. paddlex/configs/modules/object_detection/PicoDet-M.yaml +42 -0
  168. paddlex/configs/modules/object_detection/PicoDet-S.yaml +40 -0
  169. paddlex/configs/modules/object_detection/PicoDet-XS.yaml +42 -0
  170. paddlex/configs/modules/object_detection/RT-DETR-H.yaml +40 -0
  171. paddlex/configs/modules/object_detection/RT-DETR-L.yaml +40 -0
  172. paddlex/configs/modules/object_detection/RT-DETR-R18.yaml +40 -0
  173. paddlex/configs/modules/object_detection/RT-DETR-R50.yaml +40 -0
  174. paddlex/configs/modules/object_detection/RT-DETR-X.yaml +40 -0
  175. paddlex/configs/modules/object_detection/YOLOX-L.yaml +40 -0
  176. paddlex/configs/modules/object_detection/YOLOX-M.yaml +40 -0
  177. paddlex/configs/modules/object_detection/YOLOX-N.yaml +40 -0
  178. paddlex/configs/modules/object_detection/YOLOX-S.yaml +40 -0
  179. paddlex/configs/modules/object_detection/YOLOX-T.yaml +40 -0
  180. paddlex/configs/modules/object_detection/YOLOX-X.yaml +40 -0
  181. paddlex/configs/modules/object_detection/YOLOv3-DarkNet53.yaml +40 -0
  182. paddlex/configs/modules/object_detection/YOLOv3-MobileNetV3.yaml +40 -0
  183. paddlex/configs/modules/object_detection/YOLOv3-ResNet50_vd_DCN.yaml +40 -0
  184. paddlex/configs/modules/open_vocabulary_detection/GroundingDINO-T.yaml +13 -0
  185. paddlex/configs/modules/open_vocabulary_segmentation/SAM-H_box.yaml +17 -0
  186. paddlex/configs/modules/open_vocabulary_segmentation/SAM-H_point.yaml +15 -0
  187. paddlex/configs/modules/pedestrian_attribute_recognition/PP-LCNet_x1_0_pedestrian_attribute.yaml +41 -0
  188. paddlex/configs/modules/rotated_object_detection/PP-YOLOE-R-L.yaml +40 -0
  189. paddlex/configs/modules/seal_text_detection/PP-OCRv4_mobile_seal_det.yaml +40 -0
  190. paddlex/configs/modules/seal_text_detection/PP-OCRv4_server_seal_det.yaml +40 -0
  191. paddlex/configs/modules/semantic_segmentation/Deeplabv3-R101.yaml +40 -0
  192. paddlex/configs/modules/semantic_segmentation/Deeplabv3-R50.yaml +40 -0
  193. paddlex/configs/modules/semantic_segmentation/Deeplabv3_Plus-R101.yaml +40 -0
  194. paddlex/configs/modules/semantic_segmentation/Deeplabv3_Plus-R50.yaml +40 -0
  195. paddlex/configs/modules/semantic_segmentation/MaskFormer_small.yaml +42 -0
  196. paddlex/configs/modules/semantic_segmentation/MaskFormer_tiny.yaml +42 -0
  197. paddlex/configs/modules/semantic_segmentation/OCRNet_HRNet-W18.yaml +40 -0
  198. paddlex/configs/modules/semantic_segmentation/OCRNet_HRNet-W48.yaml +40 -0
  199. paddlex/configs/modules/semantic_segmentation/PP-LiteSeg-B.yaml +41 -0
  200. paddlex/configs/modules/semantic_segmentation/PP-LiteSeg-T.yaml +40 -0
  201. paddlex/configs/modules/semantic_segmentation/SeaFormer_base.yaml +40 -0
  202. paddlex/configs/modules/semantic_segmentation/SeaFormer_large.yaml +40 -0
  203. paddlex/configs/modules/semantic_segmentation/SeaFormer_small.yaml +40 -0
  204. paddlex/configs/modules/semantic_segmentation/SeaFormer_tiny.yaml +40 -0
  205. paddlex/configs/modules/semantic_segmentation/SegFormer-B0.yaml +40 -0
  206. paddlex/configs/modules/semantic_segmentation/SegFormer-B1.yaml +40 -0
  207. paddlex/configs/modules/semantic_segmentation/SegFormer-B2.yaml +40 -0
  208. paddlex/configs/modules/semantic_segmentation/SegFormer-B3.yaml +40 -0
  209. paddlex/configs/modules/semantic_segmentation/SegFormer-B4.yaml +40 -0
  210. paddlex/configs/modules/semantic_segmentation/SegFormer-B5.yaml +40 -0
  211. paddlex/configs/modules/small_object_detection/PP-YOLOE_plus_SOD-L.yaml +42 -0
  212. paddlex/configs/modules/small_object_detection/PP-YOLOE_plus_SOD-S.yaml +42 -0
  213. paddlex/configs/modules/small_object_detection/PP-YOLOE_plus_SOD-largesize-L.yaml +42 -0
  214. paddlex/configs/modules/table_cells_detection/RT-DETR-L_wired_table_cell_det.yaml +40 -0
  215. paddlex/configs/modules/table_cells_detection/RT-DETR-L_wireless_table_cell_det.yaml +40 -0
  216. paddlex/configs/modules/table_classification/PP-LCNet_x1_0_table_cls.yaml +41 -0
  217. paddlex/configs/modules/table_structure_recognition/SLANeXt_wired.yaml +39 -0
  218. paddlex/configs/modules/table_structure_recognition/SLANeXt_wireless.yaml +39 -0
  219. paddlex/configs/modules/table_structure_recognition/SLANet.yaml +39 -0
  220. paddlex/configs/modules/table_structure_recognition/SLANet_plus.yaml +39 -0
  221. paddlex/configs/modules/text_detection/PP-OCRv3_mobile_det.yaml +40 -0
  222. paddlex/configs/modules/text_detection/PP-OCRv3_server_det.yaml +40 -0
  223. paddlex/configs/modules/text_detection/PP-OCRv4_mobile_det.yaml +40 -0
  224. paddlex/configs/modules/text_detection/PP-OCRv4_server_det.yaml +40 -0
  225. paddlex/configs/modules/text_recognition/PP-OCRv3_mobile_rec.yaml +39 -0
  226. paddlex/configs/modules/text_recognition/PP-OCRv4_mobile_rec.yaml +39 -0
  227. paddlex/configs/modules/text_recognition/PP-OCRv4_server_rec.yaml +39 -0
  228. paddlex/configs/modules/text_recognition/PP-OCRv4_server_rec_doc.yaml +39 -0
  229. paddlex/configs/modules/text_recognition/arabic_PP-OCRv3_mobile_rec.yaml +39 -0
  230. paddlex/configs/modules/text_recognition/ch_RepSVTR_rec.yaml +39 -0
  231. paddlex/configs/modules/text_recognition/ch_SVTRv2_rec.yaml +39 -0
  232. paddlex/configs/modules/text_recognition/chinese_cht_PP-OCRv3_mobile_rec.yaml +39 -0
  233. paddlex/configs/modules/text_recognition/cyrillic_PP-OCRv3_mobile_rec.yaml +39 -0
  234. paddlex/configs/modules/text_recognition/devanagari_PP-OCRv3_mobile_rec.yaml +39 -0
  235. paddlex/configs/modules/text_recognition/en_PP-OCRv3_mobile_rec.yaml +39 -0
  236. paddlex/configs/modules/text_recognition/en_PP-OCRv4_mobile_rec.yaml +39 -0
  237. paddlex/configs/modules/text_recognition/japan_PP-OCRv3_mobile_rec.yaml +39 -0
  238. paddlex/configs/modules/text_recognition/ka_PP-OCRv3_mobile_rec.yaml +39 -0
  239. paddlex/configs/modules/text_recognition/korean_PP-OCRv3_mobile_rec.yaml +39 -0
  240. paddlex/configs/modules/text_recognition/latin_PP-OCRv3_mobile_rec.yaml +39 -0
  241. paddlex/configs/modules/text_recognition/ta_PP-OCRv3_mobile_rec.yaml +39 -0
  242. paddlex/configs/modules/text_recognition/te_PP-OCRv3_mobile_rec.yaml +39 -0
  243. paddlex/configs/modules/textline_orientation/PP-LCNet_x0_25_textline_ori.yaml +41 -0
  244. paddlex/configs/modules/ts_anomaly_detection/AutoEncoder_ad.yaml +37 -0
  245. paddlex/configs/modules/ts_anomaly_detection/DLinear_ad.yaml +37 -0
  246. paddlex/configs/modules/ts_anomaly_detection/Nonstationary_ad.yaml +37 -0
  247. paddlex/configs/modules/ts_anomaly_detection/PatchTST_ad.yaml +37 -0
  248. paddlex/configs/modules/ts_anomaly_detection/TimesNet_ad.yaml +37 -0
  249. paddlex/configs/modules/ts_classification/TimesNet_cls.yaml +37 -0
  250. paddlex/configs/modules/ts_forecast/DLinear.yaml +38 -0
  251. paddlex/configs/modules/ts_forecast/NLinear.yaml +38 -0
  252. paddlex/configs/modules/ts_forecast/Nonstationary.yaml +38 -0
  253. paddlex/configs/modules/ts_forecast/PatchTST.yaml +38 -0
  254. paddlex/configs/modules/ts_forecast/RLinear.yaml +38 -0
  255. paddlex/configs/modules/ts_forecast/TiDE.yaml +38 -0
  256. paddlex/configs/modules/ts_forecast/TimesNet.yaml +38 -0
  257. paddlex/configs/modules/vehicle_attribute_recognition/PP-LCNet_x1_0_vehicle_attribute.yaml +41 -0
  258. paddlex/configs/modules/vehicle_detection/PP-YOLOE-L_vehicle.yaml +41 -0
  259. paddlex/configs/modules/vehicle_detection/PP-YOLOE-S_vehicle.yaml +42 -0
  260. paddlex/configs/modules/video_classification/PP-TSM-R50_8frames_uniform.yaml +42 -0
  261. paddlex/configs/modules/video_classification/PP-TSMv2-LCNetV2_16frames_uniform.yaml +42 -0
  262. paddlex/configs/modules/video_classification/PP-TSMv2-LCNetV2_8frames_uniform.yaml +42 -0
  263. paddlex/configs/modules/video_detection/YOWO.yaml +40 -0
  264. paddlex/configs/pipelines/3d_bev_detection.yaml +9 -0
  265. paddlex/configs/pipelines/OCR.yaml +44 -0
  266. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +149 -0
  267. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +184 -0
  268. paddlex/configs/pipelines/PP-ShiTuV2.yaml +18 -0
  269. paddlex/configs/pipelines/PP-StructureV3.yaml +226 -0
  270. paddlex/configs/pipelines/anomaly_detection.yaml +8 -0
  271. paddlex/configs/pipelines/doc_preprocessor.yaml +15 -0
  272. paddlex/configs/pipelines/face_recognition.yaml +18 -0
  273. paddlex/configs/pipelines/formula_recognition.yaml +39 -0
  274. paddlex/configs/pipelines/human_keypoint_detection.yaml +17 -0
  275. paddlex/configs/pipelines/image_classification.yaml +10 -0
  276. paddlex/configs/pipelines/image_multilabel_classification.yaml +9 -0
  277. paddlex/configs/pipelines/instance_segmentation.yaml +10 -0
  278. paddlex/configs/pipelines/layout_parsing.yaml +101 -0
  279. paddlex/configs/pipelines/multilingual_speech_recognition.yaml +9 -0
  280. paddlex/configs/pipelines/object_detection.yaml +10 -0
  281. paddlex/configs/pipelines/open_vocabulary_detection.yaml +12 -0
  282. paddlex/configs/pipelines/open_vocabulary_segmentation.yaml +13 -0
  283. paddlex/configs/pipelines/pedestrian_attribute_recognition.yaml +15 -0
  284. paddlex/configs/pipelines/rotated_object_detection.yaml +10 -0
  285. paddlex/configs/pipelines/seal_recognition.yaml +51 -0
  286. paddlex/configs/pipelines/semantic_segmentation.yaml +10 -0
  287. paddlex/configs/pipelines/small_object_detection.yaml +10 -0
  288. paddlex/configs/pipelines/table_recognition.yaml +56 -0
  289. paddlex/configs/pipelines/table_recognition_v2.yaml +76 -0
  290. paddlex/configs/pipelines/ts_anomaly_detection.yaml +8 -0
  291. paddlex/configs/pipelines/ts_classification.yaml +8 -0
  292. paddlex/configs/pipelines/ts_forecast.yaml +8 -0
  293. paddlex/configs/pipelines/vehicle_attribute_recognition.yaml +15 -0
  294. paddlex/configs/pipelines/video_classification.yaml +9 -0
  295. paddlex/configs/pipelines/video_detection.yaml +10 -0
  296. paddlex/engine.py +54 -0
  297. paddlex/hpip_links.html +19 -0
  298. paddlex/inference/__init__.py +19 -0
  299. paddlex/inference/common/__init__.py +13 -0
  300. paddlex/inference/common/batch_sampler/__init__.py +20 -0
  301. paddlex/inference/common/batch_sampler/audio_batch_sampler.py +84 -0
  302. paddlex/inference/common/batch_sampler/base_batch_sampler.py +90 -0
  303. paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +147 -0
  304. paddlex/inference/common/batch_sampler/image_batch_sampler.py +136 -0
  305. paddlex/inference/common/batch_sampler/ts_batch_sampler.py +110 -0
  306. paddlex/inference/common/batch_sampler/video_batch_sampler.py +94 -0
  307. paddlex/inference/common/reader/__init__.py +19 -0
  308. paddlex/inference/common/reader/audio_reader.py +46 -0
  309. paddlex/inference/common/reader/det_3d_reader.py +239 -0
  310. paddlex/inference/common/reader/image_reader.py +69 -0
  311. paddlex/inference/common/reader/ts_reader.py +45 -0
  312. paddlex/inference/common/reader/video_reader.py +42 -0
  313. paddlex/inference/common/result/__init__.py +29 -0
  314. paddlex/inference/common/result/base_cv_result.py +31 -0
  315. paddlex/inference/common/result/base_result.py +70 -0
  316. paddlex/inference/common/result/base_ts_result.py +42 -0
  317. paddlex/inference/common/result/base_video_result.py +36 -0
  318. paddlex/inference/common/result/mixin.py +703 -0
  319. paddlex/inference/models/3d_bev_detection/__init__.py +15 -0
  320. paddlex/inference/models/3d_bev_detection/predictor.py +314 -0
  321. paddlex/inference/models/3d_bev_detection/processors.py +978 -0
  322. paddlex/inference/models/3d_bev_detection/result.py +65 -0
  323. paddlex/inference/models/3d_bev_detection/visualizer_3d.py +131 -0
  324. paddlex/inference/models/__init__.py +130 -0
  325. paddlex/inference/models/anomaly_detection/__init__.py +15 -0
  326. paddlex/inference/models/anomaly_detection/predictor.py +145 -0
  327. paddlex/inference/models/anomaly_detection/processors.py +46 -0
  328. paddlex/inference/models/anomaly_detection/result.py +70 -0
  329. paddlex/inference/models/base/__init__.py +15 -0
  330. paddlex/inference/models/base/predictor/__init__.py +16 -0
  331. paddlex/inference/models/base/predictor/base_predictor.py +175 -0
  332. paddlex/inference/models/base/predictor/basic_predictor.py +139 -0
  333. paddlex/inference/models/common/__init__.py +35 -0
  334. paddlex/inference/models/common/static_infer.py +329 -0
  335. paddlex/inference/models/common/tokenizer/__init__.py +17 -0
  336. paddlex/inference/models/common/tokenizer/bert_tokenizer.py +655 -0
  337. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +451 -0
  338. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +2141 -0
  339. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3504 -0
  340. paddlex/inference/models/common/tokenizer/utils.py +66 -0
  341. paddlex/inference/models/common/tokenizer/vocab.py +647 -0
  342. paddlex/inference/models/common/ts/__init__.py +15 -0
  343. paddlex/inference/models/common/ts/funcs.py +533 -0
  344. paddlex/inference/models/common/ts/processors.py +313 -0
  345. paddlex/inference/models/common/vision/__init__.py +23 -0
  346. paddlex/inference/models/common/vision/funcs.py +93 -0
  347. paddlex/inference/models/common/vision/processors.py +270 -0
  348. paddlex/inference/models/face_feature/__init__.py +15 -0
  349. paddlex/inference/models/face_feature/predictor.py +65 -0
  350. paddlex/inference/models/formula_recognition/__init__.py +15 -0
  351. paddlex/inference/models/formula_recognition/predictor.py +203 -0
  352. paddlex/inference/models/formula_recognition/processors.py +986 -0
  353. paddlex/inference/models/formula_recognition/result.py +403 -0
  354. paddlex/inference/models/image_classification/__init__.py +15 -0
  355. paddlex/inference/models/image_classification/predictor.py +182 -0
  356. paddlex/inference/models/image_classification/processors.py +87 -0
  357. paddlex/inference/models/image_classification/result.py +92 -0
  358. paddlex/inference/models/image_feature/__init__.py +15 -0
  359. paddlex/inference/models/image_feature/predictor.py +156 -0
  360. paddlex/inference/models/image_feature/processors.py +29 -0
  361. paddlex/inference/models/image_feature/result.py +33 -0
  362. paddlex/inference/models/image_multilabel_classification/__init__.py +15 -0
  363. paddlex/inference/models/image_multilabel_classification/predictor.py +94 -0
  364. paddlex/inference/models/image_multilabel_classification/processors.py +85 -0
  365. paddlex/inference/models/image_multilabel_classification/result.py +95 -0
  366. paddlex/inference/models/image_unwarping/__init__.py +15 -0
  367. paddlex/inference/models/image_unwarping/predictor.py +105 -0
  368. paddlex/inference/models/image_unwarping/processors.py +88 -0
  369. paddlex/inference/models/image_unwarping/result.py +45 -0
  370. paddlex/inference/models/instance_segmentation/__init__.py +15 -0
  371. paddlex/inference/models/instance_segmentation/predictor.py +210 -0
  372. paddlex/inference/models/instance_segmentation/processors.py +105 -0
  373. paddlex/inference/models/instance_segmentation/result.py +161 -0
  374. paddlex/inference/models/keypoint_detection/__init__.py +15 -0
  375. paddlex/inference/models/keypoint_detection/predictor.py +188 -0
  376. paddlex/inference/models/keypoint_detection/processors.py +359 -0
  377. paddlex/inference/models/keypoint_detection/result.py +192 -0
  378. paddlex/inference/models/multilingual_speech_recognition/__init__.py +15 -0
  379. paddlex/inference/models/multilingual_speech_recognition/predictor.py +141 -0
  380. paddlex/inference/models/multilingual_speech_recognition/processors.py +1941 -0
  381. paddlex/inference/models/multilingual_speech_recognition/result.py +21 -0
  382. paddlex/inference/models/object_detection/__init__.py +15 -0
  383. paddlex/inference/models/object_detection/predictor.py +348 -0
  384. paddlex/inference/models/object_detection/processors.py +855 -0
  385. paddlex/inference/models/object_detection/result.py +113 -0
  386. paddlex/inference/models/object_detection/utils.py +68 -0
  387. paddlex/inference/models/open_vocabulary_detection/__init__.py +15 -0
  388. paddlex/inference/models/open_vocabulary_detection/predictor.py +155 -0
  389. paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +15 -0
  390. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +485 -0
  391. paddlex/inference/models/open_vocabulary_segmentation/__init__.py +15 -0
  392. paddlex/inference/models/open_vocabulary_segmentation/predictor.py +120 -0
  393. paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +15 -0
  394. paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +249 -0
  395. paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +15 -0
  396. paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +147 -0
  397. paddlex/inference/models/semantic_segmentation/__init__.py +15 -0
  398. paddlex/inference/models/semantic_segmentation/predictor.py +167 -0
  399. paddlex/inference/models/semantic_segmentation/processors.py +114 -0
  400. paddlex/inference/models/semantic_segmentation/result.py +72 -0
  401. paddlex/inference/models/table_structure_recognition/__init__.py +15 -0
  402. paddlex/inference/models/table_structure_recognition/predictor.py +171 -0
  403. paddlex/inference/models/table_structure_recognition/processors.py +235 -0
  404. paddlex/inference/models/table_structure_recognition/result.py +70 -0
  405. paddlex/inference/models/text_detection/__init__.py +15 -0
  406. paddlex/inference/models/text_detection/predictor.py +191 -0
  407. paddlex/inference/models/text_detection/processors.py +466 -0
  408. paddlex/inference/models/text_detection/result.py +51 -0
  409. paddlex/inference/models/text_recognition/__init__.py +15 -0
  410. paddlex/inference/models/text_recognition/predictor.py +106 -0
  411. paddlex/inference/models/text_recognition/processors.py +231 -0
  412. paddlex/inference/models/text_recognition/result.py +75 -0
  413. paddlex/inference/models/ts_anomaly_detection/__init__.py +15 -0
  414. paddlex/inference/models/ts_anomaly_detection/predictor.py +146 -0
  415. paddlex/inference/models/ts_anomaly_detection/processors.py +94 -0
  416. paddlex/inference/models/ts_anomaly_detection/result.py +72 -0
  417. paddlex/inference/models/ts_classification/__init__.py +15 -0
  418. paddlex/inference/models/ts_classification/predictor.py +135 -0
  419. paddlex/inference/models/ts_classification/processors.py +117 -0
  420. paddlex/inference/models/ts_classification/result.py +78 -0
  421. paddlex/inference/models/ts_forecasting/__init__.py +15 -0
  422. paddlex/inference/models/ts_forecasting/predictor.py +159 -0
  423. paddlex/inference/models/ts_forecasting/processors.py +149 -0
  424. paddlex/inference/models/ts_forecasting/result.py +83 -0
  425. paddlex/inference/models/video_classification/__init__.py +15 -0
  426. paddlex/inference/models/video_classification/predictor.py +147 -0
  427. paddlex/inference/models/video_classification/processors.py +409 -0
  428. paddlex/inference/models/video_classification/result.py +92 -0
  429. paddlex/inference/models/video_detection/__init__.py +15 -0
  430. paddlex/inference/models/video_detection/predictor.py +136 -0
  431. paddlex/inference/models/video_detection/processors.py +450 -0
  432. paddlex/inference/models/video_detection/result.py +104 -0
  433. paddlex/inference/pipelines/3d_bev_detection/__init__.py +15 -0
  434. paddlex/inference/pipelines/3d_bev_detection/pipeline.py +67 -0
  435. paddlex/inference/pipelines/__init__.py +228 -0
  436. paddlex/inference/pipelines/anomaly_detection/__init__.py +15 -0
  437. paddlex/inference/pipelines/anomaly_detection/pipeline.py +62 -0
  438. paddlex/inference/pipelines/attribute_recognition/__init__.py +15 -0
  439. paddlex/inference/pipelines/attribute_recognition/pipeline.py +105 -0
  440. paddlex/inference/pipelines/attribute_recognition/result.py +100 -0
  441. paddlex/inference/pipelines/base.py +132 -0
  442. paddlex/inference/pipelines/components/__init__.py +23 -0
  443. paddlex/inference/pipelines/components/chat_server/__init__.py +16 -0
  444. paddlex/inference/pipelines/components/chat_server/base.py +39 -0
  445. paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +236 -0
  446. paddlex/inference/pipelines/components/common/__init__.py +18 -0
  447. paddlex/inference/pipelines/components/common/base_operator.py +36 -0
  448. paddlex/inference/pipelines/components/common/base_result.py +65 -0
  449. paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +46 -0
  450. paddlex/inference/pipelines/components/common/crop_image_regions.py +550 -0
  451. paddlex/inference/pipelines/components/common/seal_det_warp.py +941 -0
  452. paddlex/inference/pipelines/components/common/sort_boxes.py +83 -0
  453. paddlex/inference/pipelines/components/faisser.py +352 -0
  454. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +16 -0
  455. paddlex/inference/pipelines/components/prompt_engineering/base.py +35 -0
  456. paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +127 -0
  457. paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +148 -0
  458. paddlex/inference/pipelines/components/retriever/__init__.py +16 -0
  459. paddlex/inference/pipelines/components/retriever/base.py +226 -0
  460. paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +70 -0
  461. paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +163 -0
  462. paddlex/inference/pipelines/components/utils/__init__.py +13 -0
  463. paddlex/inference/pipelines/components/utils/mixin.py +206 -0
  464. paddlex/inference/pipelines/doc_preprocessor/__init__.py +15 -0
  465. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +190 -0
  466. paddlex/inference/pipelines/doc_preprocessor/result.py +103 -0
  467. paddlex/inference/pipelines/face_recognition/__init__.py +15 -0
  468. paddlex/inference/pipelines/face_recognition/pipeline.py +61 -0
  469. paddlex/inference/pipelines/face_recognition/result.py +43 -0
  470. paddlex/inference/pipelines/formula_recognition/__init__.py +15 -0
  471. paddlex/inference/pipelines/formula_recognition/pipeline.py +303 -0
  472. paddlex/inference/pipelines/formula_recognition/result.py +291 -0
  473. paddlex/inference/pipelines/image_classification/__init__.py +15 -0
  474. paddlex/inference/pipelines/image_classification/pipeline.py +71 -0
  475. paddlex/inference/pipelines/image_multilabel_classification/__init__.py +15 -0
  476. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +78 -0
  477. paddlex/inference/pipelines/instance_segmentation/__init__.py +15 -0
  478. paddlex/inference/pipelines/instance_segmentation/pipeline.py +70 -0
  479. paddlex/inference/pipelines/keypoint_detection/__init__.py +15 -0
  480. paddlex/inference/pipelines/keypoint_detection/pipeline.py +137 -0
  481. paddlex/inference/pipelines/layout_parsing/__init__.py +16 -0
  482. paddlex/inference/pipelines/layout_parsing/pipeline.py +570 -0
  483. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +739 -0
  484. paddlex/inference/pipelines/layout_parsing/result.py +203 -0
  485. paddlex/inference/pipelines/layout_parsing/result_v2.py +470 -0
  486. paddlex/inference/pipelines/layout_parsing/utils.py +2385 -0
  487. paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +15 -0
  488. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +67 -0
  489. paddlex/inference/pipelines/object_detection/__init__.py +15 -0
  490. paddlex/inference/pipelines/object_detection/pipeline.py +95 -0
  491. paddlex/inference/pipelines/ocr/__init__.py +15 -0
  492. paddlex/inference/pipelines/ocr/pipeline.py +389 -0
  493. paddlex/inference/pipelines/ocr/result.py +248 -0
  494. paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +15 -0
  495. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +75 -0
  496. paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +15 -0
  497. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +89 -0
  498. paddlex/inference/pipelines/pp_chatocr/__init__.py +16 -0
  499. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +102 -0
  500. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +773 -0
  501. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +977 -0
  502. paddlex/inference/pipelines/pp_shitu_v2/__init__.py +15 -0
  503. paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +152 -0
  504. paddlex/inference/pipelines/pp_shitu_v2/result.py +126 -0
  505. paddlex/inference/pipelines/rotated_object_detection/__init__.py +15 -0
  506. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +74 -0
  507. paddlex/inference/pipelines/seal_recognition/__init__.py +15 -0
  508. paddlex/inference/pipelines/seal_recognition/pipeline.py +271 -0
  509. paddlex/inference/pipelines/seal_recognition/result.py +87 -0
  510. paddlex/inference/pipelines/semantic_segmentation/__init__.py +15 -0
  511. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +74 -0
  512. paddlex/inference/pipelines/small_object_detection/__init__.py +15 -0
  513. paddlex/inference/pipelines/small_object_detection/pipeline.py +74 -0
  514. paddlex/inference/pipelines/table_recognition/__init__.py +16 -0
  515. paddlex/inference/pipelines/table_recognition/pipeline.py +462 -0
  516. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +792 -0
  517. paddlex/inference/pipelines/table_recognition/result.py +216 -0
  518. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +362 -0
  519. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +470 -0
  520. paddlex/inference/pipelines/table_recognition/utils.py +44 -0
  521. paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +15 -0
  522. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +62 -0
  523. paddlex/inference/pipelines/ts_classification/__init__.py +15 -0
  524. paddlex/inference/pipelines/ts_classification/pipeline.py +62 -0
  525. paddlex/inference/pipelines/ts_forecasting/__init__.py +15 -0
  526. paddlex/inference/pipelines/ts_forecasting/pipeline.py +62 -0
  527. paddlex/inference/pipelines/video_classification/__init__.py +15 -0
  528. paddlex/inference/pipelines/video_classification/pipeline.py +68 -0
  529. paddlex/inference/pipelines/video_detection/__init__.py +15 -0
  530. paddlex/inference/pipelines/video_detection/pipeline.py +73 -0
  531. paddlex/inference/serving/__init__.py +13 -0
  532. paddlex/inference/serving/basic_serving/__init__.py +18 -0
  533. paddlex/inference/serving/basic_serving/_app.py +209 -0
  534. paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +41 -0
  535. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +13 -0
  536. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +96 -0
  537. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +36 -0
  538. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +90 -0
  539. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +64 -0
  540. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +97 -0
  541. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +223 -0
  542. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +97 -0
  543. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +78 -0
  544. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +66 -0
  545. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +70 -0
  546. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +81 -0
  547. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +115 -0
  548. paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +76 -0
  549. paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +89 -0
  550. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +74 -0
  551. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +99 -0
  552. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +78 -0
  553. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +85 -0
  554. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +81 -0
  555. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +191 -0
  556. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +221 -0
  557. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +218 -0
  558. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +136 -0
  559. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +78 -0
  560. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +103 -0
  561. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +64 -0
  562. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +69 -0
  563. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +105 -0
  564. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +107 -0
  565. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +62 -0
  566. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +61 -0
  567. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +62 -0
  568. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +81 -0
  569. paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +73 -0
  570. paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +89 -0
  571. paddlex/inference/serving/basic_serving/_server.py +35 -0
  572. paddlex/inference/serving/infra/__init__.py +13 -0
  573. paddlex/inference/serving/infra/config.py +36 -0
  574. paddlex/inference/serving/infra/models.py +72 -0
  575. paddlex/inference/serving/infra/storage.py +175 -0
  576. paddlex/inference/serving/infra/utils.py +259 -0
  577. paddlex/inference/serving/schemas/__init__.py +13 -0
  578. paddlex/inference/serving/schemas/anomaly_detection.py +39 -0
  579. paddlex/inference/serving/schemas/doc_preprocessor.py +54 -0
  580. paddlex/inference/serving/schemas/face_recognition.py +124 -0
  581. paddlex/inference/serving/schemas/formula_recognition.py +56 -0
  582. paddlex/inference/serving/schemas/human_keypoint_detection.py +55 -0
  583. paddlex/inference/serving/schemas/image_classification.py +45 -0
  584. paddlex/inference/serving/schemas/image_multilabel_classification.py +47 -0
  585. paddlex/inference/serving/schemas/instance_segmentation.py +53 -0
  586. paddlex/inference/serving/schemas/layout_parsing.py +72 -0
  587. paddlex/inference/serving/schemas/m_3d_bev_detection.py +48 -0
  588. paddlex/inference/serving/schemas/multilingual_speech_recognition.py +57 -0
  589. paddlex/inference/serving/schemas/object_detection.py +52 -0
  590. paddlex/inference/serving/schemas/ocr.py +60 -0
  591. paddlex/inference/serving/schemas/open_vocabulary_detection.py +52 -0
  592. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +52 -0
  593. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +61 -0
  594. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +134 -0
  595. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +151 -0
  596. paddlex/inference/serving/schemas/pp_shituv2.py +124 -0
  597. paddlex/inference/serving/schemas/pp_structurev3.py +84 -0
  598. paddlex/inference/serving/schemas/rotated_object_detection.py +52 -0
  599. paddlex/inference/serving/schemas/seal_recognition.py +62 -0
  600. paddlex/inference/serving/schemas/semantic_segmentation.py +45 -0
  601. paddlex/inference/serving/schemas/shared/__init__.py +13 -0
  602. paddlex/inference/serving/schemas/shared/classification.py +23 -0
  603. paddlex/inference/serving/schemas/shared/image_segmentation.py +28 -0
  604. paddlex/inference/serving/schemas/shared/object_detection.py +24 -0
  605. paddlex/inference/serving/schemas/shared/ocr.py +25 -0
  606. paddlex/inference/serving/schemas/small_object_detection.py +52 -0
  607. paddlex/inference/serving/schemas/table_recognition.py +64 -0
  608. paddlex/inference/serving/schemas/table_recognition_v2.py +66 -0
  609. paddlex/inference/serving/schemas/ts_anomaly_detection.py +37 -0
  610. paddlex/inference/serving/schemas/ts_classification.py +38 -0
  611. paddlex/inference/serving/schemas/ts_forecast.py +37 -0
  612. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +61 -0
  613. paddlex/inference/serving/schemas/video_classification.py +44 -0
  614. paddlex/inference/serving/schemas/video_detection.py +56 -0
  615. paddlex/inference/utils/__init__.py +13 -0
  616. paddlex/inference/utils/benchmark.py +226 -0
  617. paddlex/inference/utils/color_map.py +123 -0
  618. paddlex/inference/utils/get_pipeline_path.py +27 -0
  619. paddlex/inference/utils/io/__init__.py +36 -0
  620. paddlex/inference/utils/io/readers.py +500 -0
  621. paddlex/inference/utils/io/style.py +374 -0
  622. paddlex/inference/utils/io/tablepyxl.py +149 -0
  623. paddlex/inference/utils/io/writers.py +459 -0
  624. paddlex/inference/utils/new_ir_blacklist.py +28 -0
  625. paddlex/inference/utils/official_models.py +352 -0
  626. paddlex/inference/utils/pp_option.py +256 -0
  627. paddlex/model.py +113 -0
  628. paddlex/modules/3d_bev_detection/__init__.py +18 -0
  629. paddlex/modules/3d_bev_detection/dataset_checker/__init__.py +95 -0
  630. paddlex/modules/3d_bev_detection/dataset_checker/dataset_src/__init__.py +17 -0
  631. paddlex/modules/3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +106 -0
  632. paddlex/modules/3d_bev_detection/dataset_checker/dataset_src/check_dataset.py +102 -0
  633. paddlex/modules/3d_bev_detection/evaluator.py +46 -0
  634. paddlex/modules/3d_bev_detection/exportor.py +22 -0
  635. paddlex/modules/3d_bev_detection/model_list.py +18 -0
  636. paddlex/modules/3d_bev_detection/trainer.py +70 -0
  637. paddlex/modules/__init__.py +138 -0
  638. paddlex/modules/anomaly_detection/__init__.py +18 -0
  639. paddlex/modules/anomaly_detection/dataset_checker/__init__.py +95 -0
  640. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/__init__.py +19 -0
  641. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +79 -0
  642. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/check_dataset.py +87 -0
  643. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +230 -0
  644. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/split_dataset.py +87 -0
  645. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/__init__.py +13 -0
  646. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/visualizer.py +71 -0
  647. paddlex/modules/anomaly_detection/evaluator.py +58 -0
  648. paddlex/modules/anomaly_detection/exportor.py +22 -0
  649. paddlex/modules/anomaly_detection/model_list.py +16 -0
  650. paddlex/modules/anomaly_detection/trainer.py +71 -0
  651. paddlex/modules/base/__init__.py +18 -0
  652. paddlex/modules/base/build_model.py +34 -0
  653. paddlex/modules/base/dataset_checker/__init__.py +16 -0
  654. paddlex/modules/base/dataset_checker/dataset_checker.py +169 -0
  655. paddlex/modules/base/dataset_checker/utils.py +110 -0
  656. paddlex/modules/base/evaluator.py +170 -0
  657. paddlex/modules/base/exportor.py +146 -0
  658. paddlex/modules/base/trainer.py +134 -0
  659. paddlex/modules/face_recognition/__init__.py +18 -0
  660. paddlex/modules/face_recognition/dataset_checker/__init__.py +71 -0
  661. paddlex/modules/face_recognition/dataset_checker/dataset_src/__init__.py +16 -0
  662. paddlex/modules/face_recognition/dataset_checker/dataset_src/check_dataset.py +174 -0
  663. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/__init__.py +13 -0
  664. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +156 -0
  665. paddlex/modules/face_recognition/evaluator.py +52 -0
  666. paddlex/modules/face_recognition/exportor.py +22 -0
  667. paddlex/modules/face_recognition/model_list.py +15 -0
  668. paddlex/modules/face_recognition/trainer.py +75 -0
  669. paddlex/modules/formula_recognition/__init__.py +18 -0
  670. paddlex/modules/formula_recognition/dataset_checker/__init__.py +113 -0
  671. paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +19 -0
  672. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +157 -0
  673. paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +80 -0
  674. paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +94 -0
  675. paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +81 -0
  676. paddlex/modules/formula_recognition/evaluator.py +77 -0
  677. paddlex/modules/formula_recognition/exportor.py +22 -0
  678. paddlex/modules/formula_recognition/model_list.py +20 -0
  679. paddlex/modules/formula_recognition/trainer.py +121 -0
  680. paddlex/modules/general_recognition/__init__.py +18 -0
  681. paddlex/modules/general_recognition/dataset_checker/__init__.py +107 -0
  682. paddlex/modules/general_recognition/dataset_checker/dataset_src/__init__.py +19 -0
  683. paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +98 -0
  684. paddlex/modules/general_recognition/dataset_checker/dataset_src/check_dataset.py +100 -0
  685. paddlex/modules/general_recognition/dataset_checker/dataset_src/convert_dataset.py +99 -0
  686. paddlex/modules/general_recognition/dataset_checker/dataset_src/split_dataset.py +82 -0
  687. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/__init__.py +13 -0
  688. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +150 -0
  689. paddlex/modules/general_recognition/evaluator.py +31 -0
  690. paddlex/modules/general_recognition/exportor.py +22 -0
  691. paddlex/modules/general_recognition/model_list.py +19 -0
  692. paddlex/modules/general_recognition/trainer.py +52 -0
  693. paddlex/modules/image_classification/__init__.py +18 -0
  694. paddlex/modules/image_classification/dataset_checker/__init__.py +104 -0
  695. paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py +19 -0
  696. paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +93 -0
  697. paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py +131 -0
  698. paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py +51 -0
  699. paddlex/modules/image_classification/dataset_checker/dataset_src/split_dataset.py +81 -0
  700. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/__init__.py +13 -0
  701. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +156 -0
  702. paddlex/modules/image_classification/evaluator.py +43 -0
  703. paddlex/modules/image_classification/exportor.py +22 -0
  704. paddlex/modules/image_classification/model_list.py +99 -0
  705. paddlex/modules/image_classification/trainer.py +82 -0
  706. paddlex/modules/image_unwarping/__init__.py +13 -0
  707. paddlex/modules/image_unwarping/model_list.py +17 -0
  708. paddlex/modules/instance_segmentation/__init__.py +18 -0
  709. paddlex/modules/instance_segmentation/dataset_checker/__init__.py +108 -0
  710. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/__init__.py +19 -0
  711. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +78 -0
  712. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/check_dataset.py +92 -0
  713. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/convert_dataset.py +241 -0
  714. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/split_dataset.py +119 -0
  715. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/__init__.py +13 -0
  716. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +221 -0
  717. paddlex/modules/instance_segmentation/evaluator.py +32 -0
  718. paddlex/modules/instance_segmentation/exportor.py +22 -0
  719. paddlex/modules/instance_segmentation/model_list.py +33 -0
  720. paddlex/modules/instance_segmentation/trainer.py +31 -0
  721. paddlex/modules/keypoint_detection/__init__.py +18 -0
  722. paddlex/modules/keypoint_detection/dataset_checker/__init__.py +56 -0
  723. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +15 -0
  724. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +86 -0
  725. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +13 -0
  726. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +119 -0
  727. paddlex/modules/keypoint_detection/evaluator.py +41 -0
  728. paddlex/modules/keypoint_detection/exportor.py +22 -0
  729. paddlex/modules/keypoint_detection/model_list.py +16 -0
  730. paddlex/modules/keypoint_detection/trainer.py +39 -0
  731. paddlex/modules/multilabel_classification/__init__.py +18 -0
  732. paddlex/modules/multilabel_classification/dataset_checker/__init__.py +106 -0
  733. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/__init__.py +19 -0
  734. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +95 -0
  735. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/check_dataset.py +131 -0
  736. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/convert_dataset.py +117 -0
  737. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/split_dataset.py +81 -0
  738. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/__init__.py +13 -0
  739. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +153 -0
  740. paddlex/modules/multilabel_classification/evaluator.py +43 -0
  741. paddlex/modules/multilabel_classification/exportor.py +22 -0
  742. paddlex/modules/multilabel_classification/model_list.py +24 -0
  743. paddlex/modules/multilabel_classification/trainer.py +85 -0
  744. paddlex/modules/multilingual_speech_recognition/__init__.py +18 -0
  745. paddlex/modules/multilingual_speech_recognition/dataset_checker.py +27 -0
  746. paddlex/modules/multilingual_speech_recognition/evaluator.py +27 -0
  747. paddlex/modules/multilingual_speech_recognition/exportor.py +27 -0
  748. paddlex/modules/multilingual_speech_recognition/model_list.py +22 -0
  749. paddlex/modules/multilingual_speech_recognition/trainer.py +40 -0
  750. paddlex/modules/object_detection/__init__.py +18 -0
  751. paddlex/modules/object_detection/dataset_checker/__init__.py +115 -0
  752. paddlex/modules/object_detection/dataset_checker/dataset_src/__init__.py +19 -0
  753. paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +80 -0
  754. paddlex/modules/object_detection/dataset_checker/dataset_src/check_dataset.py +86 -0
  755. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +433 -0
  756. paddlex/modules/object_detection/dataset_checker/dataset_src/split_dataset.py +119 -0
  757. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/__init__.py +13 -0
  758. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +192 -0
  759. paddlex/modules/object_detection/evaluator.py +52 -0
  760. paddlex/modules/object_detection/exportor.py +22 -0
  761. paddlex/modules/object_detection/model_list.py +84 -0
  762. paddlex/modules/object_detection/trainer.py +99 -0
  763. paddlex/modules/open_vocabulary_detection/__init__.py +18 -0
  764. paddlex/modules/open_vocabulary_detection/dataset_checker.py +29 -0
  765. paddlex/modules/open_vocabulary_detection/evaluator.py +29 -0
  766. paddlex/modules/open_vocabulary_detection/exportor.py +29 -0
  767. paddlex/modules/open_vocabulary_detection/model_list.py +18 -0
  768. paddlex/modules/open_vocabulary_detection/trainer.py +42 -0
  769. paddlex/modules/open_vocabulary_segmentation/__init__.py +18 -0
  770. paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +29 -0
  771. paddlex/modules/open_vocabulary_segmentation/evaluator.py +29 -0
  772. paddlex/modules/open_vocabulary_segmentation/exportor.py +29 -0
  773. paddlex/modules/open_vocabulary_segmentation/model_list.py +19 -0
  774. paddlex/modules/open_vocabulary_segmentation/trainer.py +42 -0
  775. paddlex/modules/semantic_segmentation/__init__.py +18 -0
  776. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +110 -0
  777. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/__init__.py +19 -0
  778. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/analyse_dataset.py +73 -0
  779. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/check_dataset.py +80 -0
  780. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/convert_dataset.py +162 -0
  781. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/split_dataset.py +87 -0
  782. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/__init__.py +13 -0
  783. paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/visualizer.py +71 -0
  784. paddlex/modules/semantic_segmentation/evaluator.py +58 -0
  785. paddlex/modules/semantic_segmentation/exportor.py +31 -0
  786. paddlex/modules/semantic_segmentation/model_list.py +37 -0
  787. paddlex/modules/semantic_segmentation/trainer.py +73 -0
  788. paddlex/modules/table_recognition/__init__.py +18 -0
  789. paddlex/modules/table_recognition/dataset_checker/__init__.py +98 -0
  790. paddlex/modules/table_recognition/dataset_checker/dataset_src/__init__.py +18 -0
  791. paddlex/modules/table_recognition/dataset_checker/dataset_src/analyse_dataset.py +58 -0
  792. paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +86 -0
  793. paddlex/modules/table_recognition/dataset_checker/dataset_src/split_dataset.py +79 -0
  794. paddlex/modules/table_recognition/evaluator.py +43 -0
  795. paddlex/modules/table_recognition/exportor.py +22 -0
  796. paddlex/modules/table_recognition/model_list.py +21 -0
  797. paddlex/modules/table_recognition/trainer.py +70 -0
  798. paddlex/modules/text_detection/__init__.py +18 -0
  799. paddlex/modules/text_detection/dataset_checker/__init__.py +109 -0
  800. paddlex/modules/text_detection/dataset_checker/dataset_src/__init__.py +18 -0
  801. paddlex/modules/text_detection/dataset_checker/dataset_src/analyse_dataset.py +217 -0
  802. paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +106 -0
  803. paddlex/modules/text_detection/dataset_checker/dataset_src/split_dataset.py +140 -0
  804. paddlex/modules/text_detection/evaluator.py +41 -0
  805. paddlex/modules/text_detection/exportor.py +22 -0
  806. paddlex/modules/text_detection/model_list.py +24 -0
  807. paddlex/modules/text_detection/trainer.py +68 -0
  808. paddlex/modules/text_recognition/__init__.py +18 -0
  809. paddlex/modules/text_recognition/dataset_checker/__init__.py +126 -0
  810. paddlex/modules/text_recognition/dataset_checker/dataset_src/__init__.py +19 -0
  811. paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +161 -0
  812. paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +107 -0
  813. paddlex/modules/text_recognition/dataset_checker/dataset_src/convert_dataset.py +94 -0
  814. paddlex/modules/text_recognition/dataset_checker/dataset_src/split_dataset.py +81 -0
  815. paddlex/modules/text_recognition/evaluator.py +64 -0
  816. paddlex/modules/text_recognition/exportor.py +22 -0
  817. paddlex/modules/text_recognition/model_list.py +34 -0
  818. paddlex/modules/text_recognition/trainer.py +106 -0
  819. paddlex/modules/ts_anomaly_detection/__init__.py +19 -0
  820. paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +112 -0
  821. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/__init__.py +19 -0
  822. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +27 -0
  823. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/check_dataset.py +64 -0
  824. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +78 -0
  825. paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/split_dataset.py +63 -0
  826. paddlex/modules/ts_anomaly_detection/evaluator.py +67 -0
  827. paddlex/modules/ts_anomaly_detection/exportor.py +45 -0
  828. paddlex/modules/ts_anomaly_detection/model_list.py +22 -0
  829. paddlex/modules/ts_anomaly_detection/trainer.py +113 -0
  830. paddlex/modules/ts_classification/__init__.py +19 -0
  831. paddlex/modules/ts_classification/dataset_checker/__init__.py +112 -0
  832. paddlex/modules/ts_classification/dataset_checker/dataset_src/__init__.py +19 -0
  833. paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +74 -0
  834. paddlex/modules/ts_classification/dataset_checker/dataset_src/check_dataset.py +64 -0
  835. paddlex/modules/ts_classification/dataset_checker/dataset_src/convert_dataset.py +78 -0
  836. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +88 -0
  837. paddlex/modules/ts_classification/evaluator.py +66 -0
  838. paddlex/modules/ts_classification/exportor.py +45 -0
  839. paddlex/modules/ts_classification/model_list.py +18 -0
  840. paddlex/modules/ts_classification/trainer.py +108 -0
  841. paddlex/modules/ts_forecast/__init__.py +19 -0
  842. paddlex/modules/ts_forecast/dataset_checker/__init__.py +112 -0
  843. paddlex/modules/ts_forecast/dataset_checker/dataset_src/__init__.py +19 -0
  844. paddlex/modules/ts_forecast/dataset_checker/dataset_src/analyse_dataset.py +27 -0
  845. paddlex/modules/ts_forecast/dataset_checker/dataset_src/check_dataset.py +64 -0
  846. paddlex/modules/ts_forecast/dataset_checker/dataset_src/convert_dataset.py +77 -0
  847. paddlex/modules/ts_forecast/dataset_checker/dataset_src/split_dataset.py +63 -0
  848. paddlex/modules/ts_forecast/evaluator.py +66 -0
  849. paddlex/modules/ts_forecast/exportor.py +45 -0
  850. paddlex/modules/ts_forecast/model_list.py +24 -0
  851. paddlex/modules/ts_forecast/trainer.py +108 -0
  852. paddlex/modules/video_classification/__init__.py +18 -0
  853. paddlex/modules/video_classification/dataset_checker/__init__.py +93 -0
  854. paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +18 -0
  855. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +93 -0
  856. paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +121 -0
  857. paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +82 -0
  858. paddlex/modules/video_classification/evaluator.py +44 -0
  859. paddlex/modules/video_classification/exportor.py +22 -0
  860. paddlex/modules/video_classification/model_list.py +19 -0
  861. paddlex/modules/video_classification/trainer.py +88 -0
  862. paddlex/modules/video_detection/__init__.py +18 -0
  863. paddlex/modules/video_detection/dataset_checker/__init__.py +86 -0
  864. paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +17 -0
  865. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +101 -0
  866. paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +134 -0
  867. paddlex/modules/video_detection/evaluator.py +42 -0
  868. paddlex/modules/video_detection/exportor.py +22 -0
  869. paddlex/modules/video_detection/model_list.py +15 -0
  870. paddlex/modules/video_detection/trainer.py +82 -0
  871. paddlex/ops/__init__.py +149 -0
  872. paddlex/ops/iou3d_nms/iou3d_cpu.cpp +264 -0
  873. paddlex/ops/iou3d_nms/iou3d_cpu.h +27 -0
  874. paddlex/ops/iou3d_nms/iou3d_nms.cpp +204 -0
  875. paddlex/ops/iou3d_nms/iou3d_nms.h +33 -0
  876. paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +108 -0
  877. paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +482 -0
  878. paddlex/ops/setup.py +37 -0
  879. paddlex/ops/voxel/voxelize_op.cc +191 -0
  880. paddlex/ops/voxel/voxelize_op.cu +346 -0
  881. paddlex/paddle2onnx_requirements.txt +1 -0
  882. paddlex/paddlex_cli.py +464 -0
  883. paddlex/repo_apis/Paddle3D_api/__init__.py +17 -0
  884. paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +18 -0
  885. paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +118 -0
  886. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +238 -0
  887. paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +55 -0
  888. paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +104 -0
  889. paddlex/repo_apis/Paddle3D_api/pp3d_config.py +144 -0
  890. paddlex/repo_apis/PaddleClas_api/__init__.py +17 -0
  891. paddlex/repo_apis/PaddleClas_api/cls/__init__.py +19 -0
  892. paddlex/repo_apis/PaddleClas_api/cls/config.py +594 -0
  893. paddlex/repo_apis/PaddleClas_api/cls/model.py +355 -0
  894. paddlex/repo_apis/PaddleClas_api/cls/register.py +908 -0
  895. paddlex/repo_apis/PaddleClas_api/cls/runner.py +219 -0
  896. paddlex/repo_apis/PaddleClas_api/shitu_rec/__init__.py +18 -0
  897. paddlex/repo_apis/PaddleClas_api/shitu_rec/config.py +141 -0
  898. paddlex/repo_apis/PaddleClas_api/shitu_rec/model.py +23 -0
  899. paddlex/repo_apis/PaddleClas_api/shitu_rec/register.py +68 -0
  900. paddlex/repo_apis/PaddleClas_api/shitu_rec/runner.py +55 -0
  901. paddlex/repo_apis/PaddleDetection_api/__init__.py +17 -0
  902. paddlex/repo_apis/PaddleDetection_api/config_helper.py +280 -0
  903. paddlex/repo_apis/PaddleDetection_api/instance_seg/__init__.py +18 -0
  904. paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +458 -0
  905. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +403 -0
  906. paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py +263 -0
  907. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +226 -0
  908. paddlex/repo_apis/PaddleDetection_api/object_det/__init__.py +19 -0
  909. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +539 -0
  910. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +430 -0
  911. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +220 -0
  912. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +1106 -0
  913. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +226 -0
  914. paddlex/repo_apis/PaddleNLP_api/__init__.py +13 -0
  915. paddlex/repo_apis/PaddleOCR_api/__init__.py +21 -0
  916. paddlex/repo_apis/PaddleOCR_api/config_utils.py +53 -0
  917. paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +16 -0
  918. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +570 -0
  919. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +402 -0
  920. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +73 -0
  921. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +240 -0
  922. paddlex/repo_apis/PaddleOCR_api/table_rec/__init__.py +16 -0
  923. paddlex/repo_apis/PaddleOCR_api/table_rec/config.py +64 -0
  924. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +126 -0
  925. paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +71 -0
  926. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +51 -0
  927. paddlex/repo_apis/PaddleOCR_api/text_det/__init__.py +16 -0
  928. paddlex/repo_apis/PaddleOCR_api/text_det/config.py +62 -0
  929. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +72 -0
  930. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +90 -0
  931. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +53 -0
  932. paddlex/repo_apis/PaddleOCR_api/text_rec/__init__.py +16 -0
  933. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +563 -0
  934. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +402 -0
  935. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +199 -0
  936. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +240 -0
  937. paddlex/repo_apis/PaddleSeg_api/__init__.py +16 -0
  938. paddlex/repo_apis/PaddleSeg_api/base_seg_config.py +134 -0
  939. paddlex/repo_apis/PaddleSeg_api/seg/__init__.py +16 -0
  940. paddlex/repo_apis/PaddleSeg_api/seg/config.py +186 -0
  941. paddlex/repo_apis/PaddleSeg_api/seg/model.py +491 -0
  942. paddlex/repo_apis/PaddleSeg_api/seg/register.py +273 -0
  943. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +262 -0
  944. paddlex/repo_apis/PaddleTS_api/__init__.py +19 -0
  945. paddlex/repo_apis/PaddleTS_api/ts_ad/__init__.py +16 -0
  946. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +89 -0
  947. paddlex/repo_apis/PaddleTS_api/ts_ad/register.py +146 -0
  948. paddlex/repo_apis/PaddleTS_api/ts_ad/runner.py +158 -0
  949. paddlex/repo_apis/PaddleTS_api/ts_base/__init__.py +13 -0
  950. paddlex/repo_apis/PaddleTS_api/ts_base/config.py +246 -0
  951. paddlex/repo_apis/PaddleTS_api/ts_base/model.py +276 -0
  952. paddlex/repo_apis/PaddleTS_api/ts_base/runner.py +158 -0
  953. paddlex/repo_apis/PaddleTS_api/ts_cls/__init__.py +16 -0
  954. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +73 -0
  955. paddlex/repo_apis/PaddleTS_api/ts_cls/register.py +59 -0
  956. paddlex/repo_apis/PaddleTS_api/ts_cls/runner.py +158 -0
  957. paddlex/repo_apis/PaddleTS_api/ts_fc/__init__.py +16 -0
  958. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +137 -0
  959. paddlex/repo_apis/PaddleTS_api/ts_fc/register.py +186 -0
  960. paddlex/repo_apis/PaddleVideo_api/__init__.py +17 -0
  961. paddlex/repo_apis/PaddleVideo_api/config_utils.py +51 -0
  962. paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +19 -0
  963. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +547 -0
  964. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +346 -0
  965. paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +71 -0
  966. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +205 -0
  967. paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +19 -0
  968. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +548 -0
  969. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +298 -0
  970. paddlex/repo_apis/PaddleVideo_api/video_det/register.py +45 -0
  971. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +200 -0
  972. paddlex/repo_apis/__init__.py +13 -0
  973. paddlex/repo_apis/base/__init__.py +23 -0
  974. paddlex/repo_apis/base/config.py +238 -0
  975. paddlex/repo_apis/base/model.py +571 -0
  976. paddlex/repo_apis/base/register.py +135 -0
  977. paddlex/repo_apis/base/runner.py +391 -0
  978. paddlex/repo_apis/base/utils/__init__.py +13 -0
  979. paddlex/repo_apis/base/utils/arg.py +64 -0
  980. paddlex/repo_apis/base/utils/subprocess.py +107 -0
  981. paddlex/repo_manager/__init__.py +24 -0
  982. paddlex/repo_manager/core.py +271 -0
  983. paddlex/repo_manager/meta.py +170 -0
  984. paddlex/repo_manager/repo.py +415 -0
  985. paddlex/repo_manager/requirements.txt +21 -0
  986. paddlex/repo_manager/utils.py +359 -0
  987. paddlex/serving_requirements.txt +9 -0
  988. paddlex/utils/__init__.py +1 -12
  989. paddlex/utils/cache.py +148 -0
  990. paddlex/utils/config.py +215 -0
  991. paddlex/utils/custom_device_whitelist.py +457 -0
  992. paddlex/utils/device.py +151 -0
  993. paddlex/utils/download.py +168 -182
  994. paddlex/utils/env.py +11 -50
  995. paddlex/utils/errors/__init__.py +17 -0
  996. paddlex/utils/errors/dataset_checker.py +78 -0
  997. paddlex/utils/errors/others.py +152 -0
  998. paddlex/utils/file_interface.py +212 -0
  999. paddlex/utils/flags.py +65 -0
  1000. paddlex/utils/fonts/__init__.py +67 -0
  1001. paddlex/utils/func_register.py +41 -0
  1002. paddlex/utils/interactive_get_pipeline.py +55 -0
  1003. paddlex/utils/lazy_loader.py +68 -0
  1004. paddlex/utils/logging.py +131 -33
  1005. paddlex/utils/misc.py +201 -0
  1006. paddlex/utils/pipeline_arguments.py +711 -0
  1007. paddlex/utils/result_saver.py +59 -0
  1008. paddlex/utils/subclass_register.py +101 -0
  1009. paddlex/version.py +54 -0
  1010. paddlex-3.0.0rc0.dist-info/LICENSE +169 -0
  1011. paddlex-3.0.0rc0.dist-info/METADATA +1035 -0
  1012. paddlex-3.0.0rc0.dist-info/RECORD +1015 -0
  1013. paddlex-3.0.0rc0.dist-info/WHEEL +5 -0
  1014. paddlex-3.0.0rc0.dist-info/entry_points.txt +2 -0
  1015. paddlex-3.0.0rc0.dist-info/top_level.txt +1 -0
  1016. PaddleClas/__init__.py +0 -16
  1017. PaddleClas/deploy/__init__.py +0 -1
  1018. PaddleClas/deploy/paddleserving/__init__.py +0 -0
  1019. PaddleClas/deploy/paddleserving/classification_web_service.py +0 -74
  1020. PaddleClas/deploy/paddleserving/cpu_utilization.py +0 -4
  1021. PaddleClas/deploy/paddleserving/pipeline_http_client.py +0 -20
  1022. PaddleClas/deploy/paddleserving/pipeline_rpc_client.py +0 -33
  1023. PaddleClas/deploy/paddleserving/recognition/__init__.py +0 -0
  1024. PaddleClas/deploy/paddleserving/recognition/pipeline_http_client.py +0 -21
  1025. PaddleClas/deploy/paddleserving/recognition/pipeline_rpc_client.py +0 -34
  1026. PaddleClas/deploy/paddleserving/recognition/recognition_web_service.py +0 -209
  1027. PaddleClas/deploy/python/__init__.py +0 -0
  1028. PaddleClas/deploy/python/build_gallery.py +0 -214
  1029. PaddleClas/deploy/python/det_preprocess.py +0 -205
  1030. PaddleClas/deploy/python/postprocess.py +0 -161
  1031. PaddleClas/deploy/python/predict_cls.py +0 -142
  1032. PaddleClas/deploy/python/predict_det.py +0 -158
  1033. PaddleClas/deploy/python/predict_rec.py +0 -138
  1034. PaddleClas/deploy/python/predict_system.py +0 -144
  1035. PaddleClas/deploy/python/preprocess.py +0 -337
  1036. PaddleClas/deploy/utils/__init__.py +0 -5
  1037. PaddleClas/deploy/utils/config.py +0 -197
  1038. PaddleClas/deploy/utils/draw_bbox.py +0 -61
  1039. PaddleClas/deploy/utils/encode_decode.py +0 -31
  1040. PaddleClas/deploy/utils/get_image_list.py +0 -49
  1041. PaddleClas/deploy/utils/logger.py +0 -120
  1042. PaddleClas/deploy/utils/predictor.py +0 -71
  1043. PaddleClas/deploy/vector_search/__init__.py +0 -1
  1044. PaddleClas/deploy/vector_search/interface.py +0 -272
  1045. PaddleClas/deploy/vector_search/test.py +0 -34
  1046. PaddleClas/hubconf.py +0 -788
  1047. PaddleClas/paddleclas.py +0 -552
  1048. PaddleClas/ppcls/__init__.py +0 -20
  1049. PaddleClas/ppcls/arch/__init__.py +0 -127
  1050. PaddleClas/ppcls/arch/backbone/__init__.py +0 -80
  1051. PaddleClas/ppcls/arch/backbone/base/__init__.py +0 -0
  1052. PaddleClas/ppcls/arch/backbone/base/theseus_layer.py +0 -126
  1053. PaddleClas/ppcls/arch/backbone/legendary_models/__init__.py +0 -6
  1054. PaddleClas/ppcls/arch/backbone/legendary_models/esnet.py +0 -355
  1055. PaddleClas/ppcls/arch/backbone/legendary_models/hrnet.py +0 -744
  1056. PaddleClas/ppcls/arch/backbone/legendary_models/inception_v3.py +0 -539
  1057. PaddleClas/ppcls/arch/backbone/legendary_models/mobilenet_v1.py +0 -234
  1058. PaddleClas/ppcls/arch/backbone/legendary_models/mobilenet_v3.py +0 -561
  1059. PaddleClas/ppcls/arch/backbone/legendary_models/pp_lcnet.py +0 -399
  1060. PaddleClas/ppcls/arch/backbone/legendary_models/resnet.py +0 -534
  1061. PaddleClas/ppcls/arch/backbone/legendary_models/vgg.py +0 -231
  1062. PaddleClas/ppcls/arch/backbone/model_zoo/__init__.py +0 -0
  1063. PaddleClas/ppcls/arch/backbone/model_zoo/alexnet.py +0 -168
  1064. PaddleClas/ppcls/arch/backbone/model_zoo/cspnet.py +0 -376
  1065. PaddleClas/ppcls/arch/backbone/model_zoo/darknet.py +0 -197
  1066. PaddleClas/ppcls/arch/backbone/model_zoo/densenet.py +0 -344
  1067. PaddleClas/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py +0 -272
  1068. PaddleClas/ppcls/arch/backbone/model_zoo/dla.py +0 -528
  1069. PaddleClas/ppcls/arch/backbone/model_zoo/dpn.py +0 -451
  1070. PaddleClas/ppcls/arch/backbone/model_zoo/efficientnet.py +0 -976
  1071. PaddleClas/ppcls/arch/backbone/model_zoo/ghostnet.py +0 -363
  1072. PaddleClas/ppcls/arch/backbone/model_zoo/googlenet.py +0 -229
  1073. PaddleClas/ppcls/arch/backbone/model_zoo/gvt.py +0 -693
  1074. PaddleClas/ppcls/arch/backbone/model_zoo/hardnet.py +0 -293
  1075. PaddleClas/ppcls/arch/backbone/model_zoo/inception_v4.py +0 -477
  1076. PaddleClas/ppcls/arch/backbone/model_zoo/levit.py +0 -589
  1077. PaddleClas/ppcls/arch/backbone/model_zoo/mixnet.py +0 -815
  1078. PaddleClas/ppcls/arch/backbone/model_zoo/mobilenet_v2.py +0 -287
  1079. PaddleClas/ppcls/arch/backbone/model_zoo/rednet.py +0 -203
  1080. PaddleClas/ppcls/arch/backbone/model_zoo/regnet.py +0 -431
  1081. PaddleClas/ppcls/arch/backbone/model_zoo/repvgg.py +0 -422
  1082. PaddleClas/ppcls/arch/backbone/model_zoo/res2net.py +0 -264
  1083. PaddleClas/ppcls/arch/backbone/model_zoo/res2net_vd.py +0 -305
  1084. PaddleClas/ppcls/arch/backbone/model_zoo/resnest.py +0 -740
  1085. PaddleClas/ppcls/arch/backbone/model_zoo/resnet_vc.py +0 -309
  1086. PaddleClas/ppcls/arch/backbone/model_zoo/resnext.py +0 -298
  1087. PaddleClas/ppcls/arch/backbone/model_zoo/resnext101_wsl.py +0 -490
  1088. PaddleClas/ppcls/arch/backbone/model_zoo/resnext_vd.py +0 -317
  1089. PaddleClas/ppcls/arch/backbone/model_zoo/rexnet.py +0 -281
  1090. PaddleClas/ppcls/arch/backbone/model_zoo/se_resnet_vd.py +0 -390
  1091. PaddleClas/ppcls/arch/backbone/model_zoo/se_resnext.py +0 -364
  1092. PaddleClas/ppcls/arch/backbone/model_zoo/se_resnext_vd.py +0 -309
  1093. PaddleClas/ppcls/arch/backbone/model_zoo/shufflenet_v2.py +0 -362
  1094. PaddleClas/ppcls/arch/backbone/model_zoo/squeezenet.py +0 -194
  1095. PaddleClas/ppcls/arch/backbone/model_zoo/swin_transformer.py +0 -857
  1096. PaddleClas/ppcls/arch/backbone/model_zoo/tnt.py +0 -385
  1097. PaddleClas/ppcls/arch/backbone/model_zoo/vision_transformer.py +0 -495
  1098. PaddleClas/ppcls/arch/backbone/model_zoo/xception.py +0 -377
  1099. PaddleClas/ppcls/arch/backbone/model_zoo/xception_deeplab.py +0 -421
  1100. PaddleClas/ppcls/arch/backbone/variant_models/__init__.py +0 -3
  1101. PaddleClas/ppcls/arch/backbone/variant_models/pp_lcnet_variant.py +0 -29
  1102. PaddleClas/ppcls/arch/backbone/variant_models/resnet_variant.py +0 -23
  1103. PaddleClas/ppcls/arch/backbone/variant_models/vgg_variant.py +0 -28
  1104. PaddleClas/ppcls/arch/gears/__init__.py +0 -32
  1105. PaddleClas/ppcls/arch/gears/arcmargin.py +0 -72
  1106. PaddleClas/ppcls/arch/gears/circlemargin.py +0 -59
  1107. PaddleClas/ppcls/arch/gears/cosmargin.py +0 -55
  1108. PaddleClas/ppcls/arch/gears/fc.py +0 -35
  1109. PaddleClas/ppcls/arch/gears/identity_head.py +0 -9
  1110. PaddleClas/ppcls/arch/gears/vehicle_neck.py +0 -52
  1111. PaddleClas/ppcls/arch/utils.py +0 -53
  1112. PaddleClas/ppcls/data/__init__.py +0 -144
  1113. PaddleClas/ppcls/data/dataloader/DistributedRandomIdentitySampler.py +0 -90
  1114. PaddleClas/ppcls/data/dataloader/__init__.py +0 -9
  1115. PaddleClas/ppcls/data/dataloader/common_dataset.py +0 -84
  1116. PaddleClas/ppcls/data/dataloader/dali.py +0 -319
  1117. PaddleClas/ppcls/data/dataloader/icartoon_dataset.py +0 -36
  1118. PaddleClas/ppcls/data/dataloader/imagenet_dataset.py +0 -38
  1119. PaddleClas/ppcls/data/dataloader/logo_dataset.py +0 -46
  1120. PaddleClas/ppcls/data/dataloader/mix_dataset.py +0 -49
  1121. PaddleClas/ppcls/data/dataloader/mix_sampler.py +0 -79
  1122. PaddleClas/ppcls/data/dataloader/multilabel_dataset.py +0 -59
  1123. PaddleClas/ppcls/data/dataloader/pk_sampler.py +0 -105
  1124. PaddleClas/ppcls/data/dataloader/vehicle_dataset.py +0 -138
  1125. PaddleClas/ppcls/data/postprocess/__init__.py +0 -41
  1126. PaddleClas/ppcls/data/postprocess/topk.py +0 -85
  1127. PaddleClas/ppcls/data/preprocess/__init__.py +0 -100
  1128. PaddleClas/ppcls/data/preprocess/batch_ops/__init__.py +0 -1
  1129. PaddleClas/ppcls/data/preprocess/batch_ops/batch_operators.py +0 -231
  1130. PaddleClas/ppcls/data/preprocess/ops/__init__.py +0 -1
  1131. PaddleClas/ppcls/data/preprocess/ops/autoaugment.py +0 -264
  1132. PaddleClas/ppcls/data/preprocess/ops/cutout.py +0 -41
  1133. PaddleClas/ppcls/data/preprocess/ops/fmix.py +0 -217
  1134. PaddleClas/ppcls/data/preprocess/ops/functional.py +0 -138
  1135. PaddleClas/ppcls/data/preprocess/ops/grid.py +0 -89
  1136. PaddleClas/ppcls/data/preprocess/ops/hide_and_seek.py +0 -44
  1137. PaddleClas/ppcls/data/preprocess/ops/operators.py +0 -384
  1138. PaddleClas/ppcls/data/preprocess/ops/randaugment.py +0 -106
  1139. PaddleClas/ppcls/data/preprocess/ops/random_erasing.py +0 -90
  1140. PaddleClas/ppcls/data/preprocess/ops/timm_autoaugment.py +0 -877
  1141. PaddleClas/ppcls/data/utils/__init__.py +0 -13
  1142. PaddleClas/ppcls/data/utils/get_image_list.py +0 -49
  1143. PaddleClas/ppcls/engine/__init__.py +0 -0
  1144. PaddleClas/ppcls/engine/engine.py +0 -436
  1145. PaddleClas/ppcls/engine/evaluation/__init__.py +0 -16
  1146. PaddleClas/ppcls/engine/evaluation/classification.py +0 -143
  1147. PaddleClas/ppcls/engine/evaluation/retrieval.py +0 -169
  1148. PaddleClas/ppcls/engine/slim/__init__.py +0 -16
  1149. PaddleClas/ppcls/engine/slim/prune.py +0 -66
  1150. PaddleClas/ppcls/engine/slim/quant.py +0 -55
  1151. PaddleClas/ppcls/engine/train/__init__.py +0 -14
  1152. PaddleClas/ppcls/engine/train/train.py +0 -79
  1153. PaddleClas/ppcls/engine/train/utils.py +0 -72
  1154. PaddleClas/ppcls/loss/__init__.py +0 -65
  1155. PaddleClas/ppcls/loss/celoss.py +0 -67
  1156. PaddleClas/ppcls/loss/centerloss.py +0 -54
  1157. PaddleClas/ppcls/loss/comfunc.py +0 -45
  1158. PaddleClas/ppcls/loss/deephashloss.py +0 -92
  1159. PaddleClas/ppcls/loss/distanceloss.py +0 -43
  1160. PaddleClas/ppcls/loss/distillationloss.py +0 -141
  1161. PaddleClas/ppcls/loss/dmlloss.py +0 -46
  1162. PaddleClas/ppcls/loss/emlloss.py +0 -97
  1163. PaddleClas/ppcls/loss/googlenetloss.py +0 -41
  1164. PaddleClas/ppcls/loss/msmloss.py +0 -78
  1165. PaddleClas/ppcls/loss/multilabelloss.py +0 -43
  1166. PaddleClas/ppcls/loss/npairsloss.py +0 -38
  1167. PaddleClas/ppcls/loss/pairwisecosface.py +0 -55
  1168. PaddleClas/ppcls/loss/supconloss.py +0 -108
  1169. PaddleClas/ppcls/loss/trihardloss.py +0 -82
  1170. PaddleClas/ppcls/loss/triplet.py +0 -137
  1171. PaddleClas/ppcls/metric/__init__.py +0 -51
  1172. PaddleClas/ppcls/metric/metrics.py +0 -308
  1173. PaddleClas/ppcls/optimizer/__init__.py +0 -72
  1174. PaddleClas/ppcls/optimizer/learning_rate.py +0 -326
  1175. PaddleClas/ppcls/optimizer/optimizer.py +0 -207
  1176. PaddleClas/ppcls/utils/__init__.py +0 -27
  1177. PaddleClas/ppcls/utils/check.py +0 -151
  1178. PaddleClas/ppcls/utils/config.py +0 -210
  1179. PaddleClas/ppcls/utils/download.py +0 -319
  1180. PaddleClas/ppcls/utils/ema.py +0 -63
  1181. PaddleClas/ppcls/utils/logger.py +0 -137
  1182. PaddleClas/ppcls/utils/metrics.py +0 -107
  1183. PaddleClas/ppcls/utils/misc.py +0 -63
  1184. PaddleClas/ppcls/utils/model_zoo.py +0 -213
  1185. PaddleClas/ppcls/utils/profiler.py +0 -111
  1186. PaddleClas/ppcls/utils/save_load.py +0 -136
  1187. PaddleClas/setup.py +0 -58
  1188. PaddleClas/tools/__init__.py +0 -15
  1189. PaddleClas/tools/eval.py +0 -31
  1190. PaddleClas/tools/export_model.py +0 -34
  1191. PaddleClas/tools/infer.py +0 -31
  1192. PaddleClas/tools/train.py +0 -32
  1193. paddlex/cls.py +0 -82
  1194. paddlex/command.py +0 -215
  1195. paddlex/cv/__init__.py +0 -17
  1196. paddlex/cv/datasets/__init__.py +0 -18
  1197. paddlex/cv/datasets/coco.py +0 -208
  1198. paddlex/cv/datasets/imagenet.py +0 -88
  1199. paddlex/cv/datasets/seg_dataset.py +0 -91
  1200. paddlex/cv/datasets/voc.py +0 -445
  1201. paddlex/cv/models/__init__.py +0 -18
  1202. paddlex/cv/models/base.py +0 -631
  1203. paddlex/cv/models/classifier.py +0 -989
  1204. paddlex/cv/models/detector.py +0 -2292
  1205. paddlex/cv/models/load_model.py +0 -148
  1206. paddlex/cv/models/segmenter.py +0 -768
  1207. paddlex/cv/models/slim/__init__.py +0 -13
  1208. paddlex/cv/models/slim/prune.py +0 -55
  1209. paddlex/cv/models/utils/__init__.py +0 -13
  1210. paddlex/cv/models/utils/det_metrics/__init__.py +0 -15
  1211. paddlex/cv/models/utils/det_metrics/coco_utils.py +0 -476
  1212. paddlex/cv/models/utils/det_metrics/metrics.py +0 -220
  1213. paddlex/cv/models/utils/infer_nets.py +0 -45
  1214. paddlex/cv/models/utils/seg_metrics.py +0 -62
  1215. paddlex/cv/models/utils/visualize.py +0 -399
  1216. paddlex/cv/transforms/__init__.py +0 -46
  1217. paddlex/cv/transforms/batch_operators.py +0 -286
  1218. paddlex/cv/transforms/box_utils.py +0 -41
  1219. paddlex/cv/transforms/functions.py +0 -193
  1220. paddlex/cv/transforms/operators.py +0 -1402
  1221. paddlex/deploy.py +0 -268
  1222. paddlex/det.py +0 -49
  1223. paddlex/paddleseg/__init__.py +0 -17
  1224. paddlex/paddleseg/core/__init__.py +0 -20
  1225. paddlex/paddleseg/core/infer.py +0 -289
  1226. paddlex/paddleseg/core/predict.py +0 -145
  1227. paddlex/paddleseg/core/train.py +0 -258
  1228. paddlex/paddleseg/core/val.py +0 -172
  1229. paddlex/paddleseg/cvlibs/__init__.py +0 -17
  1230. paddlex/paddleseg/cvlibs/callbacks.py +0 -279
  1231. paddlex/paddleseg/cvlibs/config.py +0 -359
  1232. paddlex/paddleseg/cvlibs/manager.py +0 -142
  1233. paddlex/paddleseg/cvlibs/param_init.py +0 -91
  1234. paddlex/paddleseg/datasets/__init__.py +0 -21
  1235. paddlex/paddleseg/datasets/ade.py +0 -112
  1236. paddlex/paddleseg/datasets/cityscapes.py +0 -86
  1237. paddlex/paddleseg/datasets/cocostuff.py +0 -79
  1238. paddlex/paddleseg/datasets/dataset.py +0 -164
  1239. paddlex/paddleseg/datasets/mini_deep_globe_road_extraction.py +0 -95
  1240. paddlex/paddleseg/datasets/optic_disc_seg.py +0 -97
  1241. paddlex/paddleseg/datasets/pascal_context.py +0 -80
  1242. paddlex/paddleseg/datasets/voc.py +0 -113
  1243. paddlex/paddleseg/models/__init__.py +0 -39
  1244. paddlex/paddleseg/models/ann.py +0 -436
  1245. paddlex/paddleseg/models/attention_unet.py +0 -189
  1246. paddlex/paddleseg/models/backbones/__init__.py +0 -18
  1247. paddlex/paddleseg/models/backbones/hrnet.py +0 -815
  1248. paddlex/paddleseg/models/backbones/mobilenetv3.py +0 -365
  1249. paddlex/paddleseg/models/backbones/resnet_vd.py +0 -364
  1250. paddlex/paddleseg/models/backbones/xception_deeplab.py +0 -415
  1251. paddlex/paddleseg/models/bisenet.py +0 -311
  1252. paddlex/paddleseg/models/danet.py +0 -220
  1253. paddlex/paddleseg/models/decoupled_segnet.py +0 -233
  1254. paddlex/paddleseg/models/deeplab.py +0 -258
  1255. paddlex/paddleseg/models/dnlnet.py +0 -231
  1256. paddlex/paddleseg/models/emanet.py +0 -219
  1257. paddlex/paddleseg/models/fast_scnn.py +0 -318
  1258. paddlex/paddleseg/models/fcn.py +0 -135
  1259. paddlex/paddleseg/models/gcnet.py +0 -223
  1260. paddlex/paddleseg/models/gscnn.py +0 -357
  1261. paddlex/paddleseg/models/hardnet.py +0 -309
  1262. paddlex/paddleseg/models/isanet.py +0 -202
  1263. paddlex/paddleseg/models/layers/__init__.py +0 -19
  1264. paddlex/paddleseg/models/layers/activation.py +0 -73
  1265. paddlex/paddleseg/models/layers/attention.py +0 -146
  1266. paddlex/paddleseg/models/layers/layer_libs.py +0 -168
  1267. paddlex/paddleseg/models/layers/nonlocal2d.py +0 -155
  1268. paddlex/paddleseg/models/layers/pyramid_pool.py +0 -182
  1269. paddlex/paddleseg/models/losses/__init__.py +0 -27
  1270. paddlex/paddleseg/models/losses/binary_cross_entropy_loss.py +0 -174
  1271. paddlex/paddleseg/models/losses/bootstrapped_cross_entropy.py +0 -73
  1272. paddlex/paddleseg/models/losses/cross_entropy_loss.py +0 -94
  1273. paddlex/paddleseg/models/losses/decoupledsegnet_relax_boundary_loss.py +0 -129
  1274. paddlex/paddleseg/models/losses/dice_loss.py +0 -61
  1275. paddlex/paddleseg/models/losses/edge_attention_loss.py +0 -78
  1276. paddlex/paddleseg/models/losses/gscnn_dual_task_loss.py +0 -141
  1277. paddlex/paddleseg/models/losses/l1_loss.py +0 -76
  1278. paddlex/paddleseg/models/losses/lovasz_loss.py +0 -222
  1279. paddlex/paddleseg/models/losses/mean_square_error_loss.py +0 -65
  1280. paddlex/paddleseg/models/losses/mixed_loss.py +0 -58
  1281. paddlex/paddleseg/models/losses/ohem_cross_entropy_loss.py +0 -99
  1282. paddlex/paddleseg/models/losses/ohem_edge_attention_loss.py +0 -114
  1283. paddlex/paddleseg/models/ocrnet.py +0 -248
  1284. paddlex/paddleseg/models/pspnet.py +0 -147
  1285. paddlex/paddleseg/models/sfnet.py +0 -236
  1286. paddlex/paddleseg/models/shufflenet_slim.py +0 -268
  1287. paddlex/paddleseg/models/u2net.py +0 -574
  1288. paddlex/paddleseg/models/unet.py +0 -155
  1289. paddlex/paddleseg/models/unet_3plus.py +0 -316
  1290. paddlex/paddleseg/models/unet_plusplus.py +0 -237
  1291. paddlex/paddleseg/transforms/__init__.py +0 -16
  1292. paddlex/paddleseg/transforms/functional.py +0 -161
  1293. paddlex/paddleseg/transforms/transforms.py +0 -937
  1294. paddlex/paddleseg/utils/__init__.py +0 -22
  1295. paddlex/paddleseg/utils/config_check.py +0 -60
  1296. paddlex/paddleseg/utils/download.py +0 -163
  1297. paddlex/paddleseg/utils/env/__init__.py +0 -16
  1298. paddlex/paddleseg/utils/env/seg_env.py +0 -56
  1299. paddlex/paddleseg/utils/env/sys_env.py +0 -122
  1300. paddlex/paddleseg/utils/logger.py +0 -48
  1301. paddlex/paddleseg/utils/metrics.py +0 -146
  1302. paddlex/paddleseg/utils/progbar.py +0 -212
  1303. paddlex/paddleseg/utils/timer.py +0 -53
  1304. paddlex/paddleseg/utils/utils.py +0 -120
  1305. paddlex/paddleseg/utils/visualize.py +0 -90
  1306. paddlex/ppcls/__init__.py +0 -20
  1307. paddlex/ppcls/arch/__init__.py +0 -127
  1308. paddlex/ppcls/arch/backbone/__init__.py +0 -80
  1309. paddlex/ppcls/arch/backbone/base/__init__.py +0 -0
  1310. paddlex/ppcls/arch/backbone/base/theseus_layer.py +0 -130
  1311. paddlex/ppcls/arch/backbone/legendary_models/__init__.py +0 -6
  1312. paddlex/ppcls/arch/backbone/legendary_models/esnet.py +0 -355
  1313. paddlex/ppcls/arch/backbone/legendary_models/hrnet.py +0 -748
  1314. paddlex/ppcls/arch/backbone/legendary_models/inception_v3.py +0 -539
  1315. paddlex/ppcls/arch/backbone/legendary_models/mobilenet_v1.py +0 -234
  1316. paddlex/ppcls/arch/backbone/legendary_models/mobilenet_v3.py +0 -561
  1317. paddlex/ppcls/arch/backbone/legendary_models/pp_lcnet.py +0 -399
  1318. paddlex/ppcls/arch/backbone/legendary_models/resnet.py +0 -534
  1319. paddlex/ppcls/arch/backbone/legendary_models/vgg.py +0 -235
  1320. paddlex/ppcls/arch/backbone/model_zoo/__init__.py +0 -0
  1321. paddlex/ppcls/arch/backbone/model_zoo/alexnet.py +0 -168
  1322. paddlex/ppcls/arch/backbone/model_zoo/cspnet.py +0 -376
  1323. paddlex/ppcls/arch/backbone/model_zoo/darknet.py +0 -197
  1324. paddlex/ppcls/arch/backbone/model_zoo/densenet.py +0 -344
  1325. paddlex/ppcls/arch/backbone/model_zoo/distilled_vision_transformer.py +0 -272
  1326. paddlex/ppcls/arch/backbone/model_zoo/dla.py +0 -528
  1327. paddlex/ppcls/arch/backbone/model_zoo/dpn.py +0 -451
  1328. paddlex/ppcls/arch/backbone/model_zoo/efficientnet.py +0 -976
  1329. paddlex/ppcls/arch/backbone/model_zoo/ghostnet.py +0 -363
  1330. paddlex/ppcls/arch/backbone/model_zoo/googlenet.py +0 -229
  1331. paddlex/ppcls/arch/backbone/model_zoo/gvt.py +0 -693
  1332. paddlex/ppcls/arch/backbone/model_zoo/hardnet.py +0 -293
  1333. paddlex/ppcls/arch/backbone/model_zoo/inception_v4.py +0 -477
  1334. paddlex/ppcls/arch/backbone/model_zoo/levit.py +0 -589
  1335. paddlex/ppcls/arch/backbone/model_zoo/mixnet.py +0 -815
  1336. paddlex/ppcls/arch/backbone/model_zoo/mobilenet_v2.py +0 -287
  1337. paddlex/ppcls/arch/backbone/model_zoo/rednet.py +0 -203
  1338. paddlex/ppcls/arch/backbone/model_zoo/regnet.py +0 -431
  1339. paddlex/ppcls/arch/backbone/model_zoo/repvgg.py +0 -422
  1340. paddlex/ppcls/arch/backbone/model_zoo/res2net.py +0 -264
  1341. paddlex/ppcls/arch/backbone/model_zoo/res2net_vd.py +0 -305
  1342. paddlex/ppcls/arch/backbone/model_zoo/resnest.py +0 -740
  1343. paddlex/ppcls/arch/backbone/model_zoo/resnet_vc.py +0 -309
  1344. paddlex/ppcls/arch/backbone/model_zoo/resnext.py +0 -298
  1345. paddlex/ppcls/arch/backbone/model_zoo/resnext101_wsl.py +0 -490
  1346. paddlex/ppcls/arch/backbone/model_zoo/resnext_vd.py +0 -317
  1347. paddlex/ppcls/arch/backbone/model_zoo/rexnet.py +0 -281
  1348. paddlex/ppcls/arch/backbone/model_zoo/se_resnet_vd.py +0 -390
  1349. paddlex/ppcls/arch/backbone/model_zoo/se_resnext.py +0 -364
  1350. paddlex/ppcls/arch/backbone/model_zoo/se_resnext_vd.py +0 -309
  1351. paddlex/ppcls/arch/backbone/model_zoo/shufflenet_v2.py +0 -362
  1352. paddlex/ppcls/arch/backbone/model_zoo/squeezenet.py +0 -194
  1353. paddlex/ppcls/arch/backbone/model_zoo/swin_transformer.py +0 -857
  1354. paddlex/ppcls/arch/backbone/model_zoo/tnt.py +0 -385
  1355. paddlex/ppcls/arch/backbone/model_zoo/vision_transformer.py +0 -495
  1356. paddlex/ppcls/arch/backbone/model_zoo/xception.py +0 -377
  1357. paddlex/ppcls/arch/backbone/model_zoo/xception_deeplab.py +0 -421
  1358. paddlex/ppcls/arch/backbone/variant_models/__init__.py +0 -3
  1359. paddlex/ppcls/arch/backbone/variant_models/pp_lcnet_variant.py +0 -29
  1360. paddlex/ppcls/arch/backbone/variant_models/resnet_variant.py +0 -23
  1361. paddlex/ppcls/arch/backbone/variant_models/vgg_variant.py +0 -28
  1362. paddlex/ppcls/arch/gears/__init__.py +0 -32
  1363. paddlex/ppcls/arch/gears/arcmargin.py +0 -72
  1364. paddlex/ppcls/arch/gears/circlemargin.py +0 -59
  1365. paddlex/ppcls/arch/gears/cosmargin.py +0 -55
  1366. paddlex/ppcls/arch/gears/fc.py +0 -35
  1367. paddlex/ppcls/arch/gears/identity_head.py +0 -9
  1368. paddlex/ppcls/arch/gears/vehicle_neck.py +0 -52
  1369. paddlex/ppcls/arch/utils.py +0 -53
  1370. paddlex/ppcls/data/__init__.py +0 -144
  1371. paddlex/ppcls/data/dataloader/DistributedRandomIdentitySampler.py +0 -90
  1372. paddlex/ppcls/data/dataloader/__init__.py +0 -9
  1373. paddlex/ppcls/data/dataloader/common_dataset.py +0 -84
  1374. paddlex/ppcls/data/dataloader/dali.py +0 -319
  1375. paddlex/ppcls/data/dataloader/icartoon_dataset.py +0 -36
  1376. paddlex/ppcls/data/dataloader/imagenet_dataset.py +0 -38
  1377. paddlex/ppcls/data/dataloader/logo_dataset.py +0 -46
  1378. paddlex/ppcls/data/dataloader/mix_dataset.py +0 -49
  1379. paddlex/ppcls/data/dataloader/mix_sampler.py +0 -79
  1380. paddlex/ppcls/data/dataloader/multilabel_dataset.py +0 -59
  1381. paddlex/ppcls/data/dataloader/pk_sampler.py +0 -105
  1382. paddlex/ppcls/data/dataloader/vehicle_dataset.py +0 -138
  1383. paddlex/ppcls/data/postprocess/__init__.py +0 -41
  1384. paddlex/ppcls/data/postprocess/topk.py +0 -85
  1385. paddlex/ppcls/data/preprocess/__init__.py +0 -100
  1386. paddlex/ppcls/data/preprocess/batch_ops/__init__.py +0 -0
  1387. paddlex/ppcls/data/preprocess/batch_ops/batch_operators.py +0 -231
  1388. paddlex/ppcls/data/preprocess/ops/__init__.py +0 -0
  1389. paddlex/ppcls/data/preprocess/ops/autoaugment.py +0 -264
  1390. paddlex/ppcls/data/preprocess/ops/cutout.py +0 -41
  1391. paddlex/ppcls/data/preprocess/ops/fmix.py +0 -217
  1392. paddlex/ppcls/data/preprocess/ops/functional.py +0 -141
  1393. paddlex/ppcls/data/preprocess/ops/grid.py +0 -89
  1394. paddlex/ppcls/data/preprocess/ops/hide_and_seek.py +0 -44
  1395. paddlex/ppcls/data/preprocess/ops/operators.py +0 -384
  1396. paddlex/ppcls/data/preprocess/ops/randaugment.py +0 -106
  1397. paddlex/ppcls/data/preprocess/ops/random_erasing.py +0 -90
  1398. paddlex/ppcls/data/preprocess/ops/timm_autoaugment.py +0 -877
  1399. paddlex/ppcls/data/utils/__init__.py +0 -13
  1400. paddlex/ppcls/data/utils/get_image_list.py +0 -49
  1401. paddlex/ppcls/engine/__init__.py +0 -0
  1402. paddlex/ppcls/engine/engine.py +0 -436
  1403. paddlex/ppcls/engine/evaluation/__init__.py +0 -16
  1404. paddlex/ppcls/engine/evaluation/classification.py +0 -143
  1405. paddlex/ppcls/engine/evaluation/retrieval.py +0 -169
  1406. paddlex/ppcls/engine/slim/__init__.py +0 -16
  1407. paddlex/ppcls/engine/slim/prune.py +0 -66
  1408. paddlex/ppcls/engine/slim/quant.py +0 -55
  1409. paddlex/ppcls/engine/train/__init__.py +0 -14
  1410. paddlex/ppcls/engine/train/train.py +0 -79
  1411. paddlex/ppcls/engine/train/utils.py +0 -72
  1412. paddlex/ppcls/loss/__init__.py +0 -65
  1413. paddlex/ppcls/loss/celoss.py +0 -67
  1414. paddlex/ppcls/loss/centerloss.py +0 -54
  1415. paddlex/ppcls/loss/comfunc.py +0 -45
  1416. paddlex/ppcls/loss/deephashloss.py +0 -96
  1417. paddlex/ppcls/loss/distanceloss.py +0 -43
  1418. paddlex/ppcls/loss/distillationloss.py +0 -141
  1419. paddlex/ppcls/loss/dmlloss.py +0 -46
  1420. paddlex/ppcls/loss/emlloss.py +0 -97
  1421. paddlex/ppcls/loss/googlenetloss.py +0 -42
  1422. paddlex/ppcls/loss/msmloss.py +0 -78
  1423. paddlex/ppcls/loss/multilabelloss.py +0 -43
  1424. paddlex/ppcls/loss/npairsloss.py +0 -38
  1425. paddlex/ppcls/loss/pairwisecosface.py +0 -59
  1426. paddlex/ppcls/loss/supconloss.py +0 -108
  1427. paddlex/ppcls/loss/trihardloss.py +0 -82
  1428. paddlex/ppcls/loss/triplet.py +0 -137
  1429. paddlex/ppcls/metric/__init__.py +0 -51
  1430. paddlex/ppcls/metric/metrics.py +0 -308
  1431. paddlex/ppcls/optimizer/__init__.py +0 -72
  1432. paddlex/ppcls/optimizer/learning_rate.py +0 -326
  1433. paddlex/ppcls/optimizer/optimizer.py +0 -208
  1434. paddlex/ppcls/utils/__init__.py +0 -27
  1435. paddlex/ppcls/utils/check.py +0 -151
  1436. paddlex/ppcls/utils/config.py +0 -210
  1437. paddlex/ppcls/utils/download.py +0 -319
  1438. paddlex/ppcls/utils/ema.py +0 -63
  1439. paddlex/ppcls/utils/logger.py +0 -137
  1440. paddlex/ppcls/utils/metrics.py +0 -112
  1441. paddlex/ppcls/utils/misc.py +0 -63
  1442. paddlex/ppcls/utils/model_zoo.py +0 -213
  1443. paddlex/ppcls/utils/profiler.py +0 -111
  1444. paddlex/ppcls/utils/save_load.py +0 -136
  1445. paddlex/ppdet/__init__.py +0 -16
  1446. paddlex/ppdet/core/__init__.py +0 -15
  1447. paddlex/ppdet/core/config/__init__.py +0 -13
  1448. paddlex/ppdet/core/config/schema.py +0 -248
  1449. paddlex/ppdet/core/config/yaml_helpers.py +0 -118
  1450. paddlex/ppdet/core/workspace.py +0 -278
  1451. paddlex/ppdet/data/__init__.py +0 -21
  1452. paddlex/ppdet/data/crop_utils/__init__.py +0 -13
  1453. paddlex/ppdet/data/crop_utils/annotation_cropper.py +0 -585
  1454. paddlex/ppdet/data/crop_utils/chip_box_utils.py +0 -170
  1455. paddlex/ppdet/data/reader.py +0 -302
  1456. paddlex/ppdet/data/shm_utils.py +0 -67
  1457. paddlex/ppdet/data/source/__init__.py +0 -29
  1458. paddlex/ppdet/data/source/category.py +0 -904
  1459. paddlex/ppdet/data/source/coco.py +0 -251
  1460. paddlex/ppdet/data/source/dataset.py +0 -197
  1461. paddlex/ppdet/data/source/keypoint_coco.py +0 -669
  1462. paddlex/ppdet/data/source/mot.py +0 -636
  1463. paddlex/ppdet/data/source/sniper_coco.py +0 -191
  1464. paddlex/ppdet/data/source/voc.py +0 -231
  1465. paddlex/ppdet/data/source/widerface.py +0 -180
  1466. paddlex/ppdet/data/transform/__init__.py +0 -28
  1467. paddlex/ppdet/data/transform/atss_assigner.py +0 -270
  1468. paddlex/ppdet/data/transform/autoaugment_utils.py +0 -1591
  1469. paddlex/ppdet/data/transform/batch_operators.py +0 -1080
  1470. paddlex/ppdet/data/transform/gridmask_utils.py +0 -86
  1471. paddlex/ppdet/data/transform/keypoint_operators.py +0 -868
  1472. paddlex/ppdet/data/transform/mot_operators.py +0 -628
  1473. paddlex/ppdet/data/transform/op_helper.py +0 -498
  1474. paddlex/ppdet/data/transform/operators.py +0 -3025
  1475. paddlex/ppdet/engine/__init__.py +0 -30
  1476. paddlex/ppdet/engine/callbacks.py +0 -340
  1477. paddlex/ppdet/engine/env.py +0 -50
  1478. paddlex/ppdet/engine/export_utils.py +0 -177
  1479. paddlex/ppdet/engine/tracker.py +0 -538
  1480. paddlex/ppdet/engine/trainer.py +0 -723
  1481. paddlex/ppdet/metrics/__init__.py +0 -29
  1482. paddlex/ppdet/metrics/coco_utils.py +0 -184
  1483. paddlex/ppdet/metrics/json_results.py +0 -149
  1484. paddlex/ppdet/metrics/keypoint_metrics.py +0 -401
  1485. paddlex/ppdet/metrics/map_utils.py +0 -444
  1486. paddlex/ppdet/metrics/mcmot_metrics.py +0 -470
  1487. paddlex/ppdet/metrics/metrics.py +0 -434
  1488. paddlex/ppdet/metrics/mot_metrics.py +0 -1236
  1489. paddlex/ppdet/metrics/munkres.py +0 -428
  1490. paddlex/ppdet/metrics/widerface_utils.py +0 -393
  1491. paddlex/ppdet/model_zoo/__init__.py +0 -18
  1492. paddlex/ppdet/model_zoo/model_zoo.py +0 -84
  1493. paddlex/ppdet/modeling/__init__.py +0 -45
  1494. paddlex/ppdet/modeling/architectures/__init__.py +0 -51
  1495. paddlex/ppdet/modeling/architectures/blazeface.py +0 -91
  1496. paddlex/ppdet/modeling/architectures/cascade_rcnn.py +0 -144
  1497. paddlex/ppdet/modeling/architectures/centernet.py +0 -108
  1498. paddlex/ppdet/modeling/architectures/deepsort.py +0 -69
  1499. paddlex/ppdet/modeling/architectures/detr.py +0 -93
  1500. paddlex/ppdet/modeling/architectures/fairmot.py +0 -100
  1501. paddlex/ppdet/modeling/architectures/faster_rcnn.py +0 -106
  1502. paddlex/ppdet/modeling/architectures/fcos.py +0 -105
  1503. paddlex/ppdet/modeling/architectures/gfl.py +0 -87
  1504. paddlex/ppdet/modeling/architectures/jde.py +0 -111
  1505. paddlex/ppdet/modeling/architectures/keypoint_hrhrnet.py +0 -287
  1506. paddlex/ppdet/modeling/architectures/keypoint_hrnet.py +0 -267
  1507. paddlex/ppdet/modeling/architectures/mask_rcnn.py +0 -135
  1508. paddlex/ppdet/modeling/architectures/meta_arch.py +0 -128
  1509. paddlex/ppdet/modeling/architectures/picodet.py +0 -91
  1510. paddlex/ppdet/modeling/architectures/s2anet.py +0 -102
  1511. paddlex/ppdet/modeling/architectures/solov2.py +0 -110
  1512. paddlex/ppdet/modeling/architectures/sparse_rcnn.py +0 -99
  1513. paddlex/ppdet/modeling/architectures/ssd.py +0 -93
  1514. paddlex/ppdet/modeling/architectures/tood.py +0 -78
  1515. paddlex/ppdet/modeling/architectures/ttfnet.py +0 -98
  1516. paddlex/ppdet/modeling/architectures/yolo.py +0 -124
  1517. paddlex/ppdet/modeling/assigners/__init__.py +0 -23
  1518. paddlex/ppdet/modeling/assigners/atss_assigner.py +0 -211
  1519. paddlex/ppdet/modeling/assigners/simota_assigner.py +0 -262
  1520. paddlex/ppdet/modeling/assigners/task_aligned_assigner.py +0 -158
  1521. paddlex/ppdet/modeling/assigners/utils.py +0 -195
  1522. paddlex/ppdet/modeling/backbones/__init__.py +0 -49
  1523. paddlex/ppdet/modeling/backbones/blazenet.py +0 -323
  1524. paddlex/ppdet/modeling/backbones/darknet.py +0 -340
  1525. paddlex/ppdet/modeling/backbones/dla.py +0 -244
  1526. paddlex/ppdet/modeling/backbones/esnet.py +0 -290
  1527. paddlex/ppdet/modeling/backbones/ghostnet.py +0 -470
  1528. paddlex/ppdet/modeling/backbones/hardnet.py +0 -224
  1529. paddlex/ppdet/modeling/backbones/hrnet.py +0 -727
  1530. paddlex/ppdet/modeling/backbones/lcnet.py +0 -259
  1531. paddlex/ppdet/modeling/backbones/lite_hrnet.py +0 -886
  1532. paddlex/ppdet/modeling/backbones/mobilenet_v1.py +0 -418
  1533. paddlex/ppdet/modeling/backbones/mobilenet_v3.py +0 -483
  1534. paddlex/ppdet/modeling/backbones/name_adapter.py +0 -69
  1535. paddlex/ppdet/modeling/backbones/res2net.py +0 -358
  1536. paddlex/ppdet/modeling/backbones/resnet.py +0 -613
  1537. paddlex/ppdet/modeling/backbones/senet.py +0 -139
  1538. paddlex/ppdet/modeling/backbones/shufflenet_v2.py +0 -246
  1539. paddlex/ppdet/modeling/backbones/swin_transformer.py +0 -743
  1540. paddlex/ppdet/modeling/backbones/vgg.py +0 -210
  1541. paddlex/ppdet/modeling/bbox_utils.py +0 -778
  1542. paddlex/ppdet/modeling/heads/__init__.py +0 -53
  1543. paddlex/ppdet/modeling/heads/bbox_head.py +0 -377
  1544. paddlex/ppdet/modeling/heads/cascade_head.py +0 -284
  1545. paddlex/ppdet/modeling/heads/centernet_head.py +0 -292
  1546. paddlex/ppdet/modeling/heads/detr_head.py +0 -368
  1547. paddlex/ppdet/modeling/heads/face_head.py +0 -110
  1548. paddlex/ppdet/modeling/heads/fcos_head.py +0 -259
  1549. paddlex/ppdet/modeling/heads/gfl_head.py +0 -487
  1550. paddlex/ppdet/modeling/heads/keypoint_hrhrnet_head.py +0 -108
  1551. paddlex/ppdet/modeling/heads/mask_head.py +0 -250
  1552. paddlex/ppdet/modeling/heads/pico_head.py +0 -278
  1553. paddlex/ppdet/modeling/heads/roi_extractor.py +0 -111
  1554. paddlex/ppdet/modeling/heads/s2anet_head.py +0 -1056
  1555. paddlex/ppdet/modeling/heads/simota_head.py +0 -506
  1556. paddlex/ppdet/modeling/heads/solov2_head.py +0 -560
  1557. paddlex/ppdet/modeling/heads/sparsercnn_head.py +0 -375
  1558. paddlex/ppdet/modeling/heads/ssd_head.py +0 -215
  1559. paddlex/ppdet/modeling/heads/tood_head.py +0 -366
  1560. paddlex/ppdet/modeling/heads/ttf_head.py +0 -316
  1561. paddlex/ppdet/modeling/heads/yolo_head.py +0 -124
  1562. paddlex/ppdet/modeling/initializer.py +0 -317
  1563. paddlex/ppdet/modeling/keypoint_utils.py +0 -342
  1564. paddlex/ppdet/modeling/layers.py +0 -1430
  1565. paddlex/ppdet/modeling/losses/__init__.py +0 -43
  1566. paddlex/ppdet/modeling/losses/ctfocal_loss.py +0 -68
  1567. paddlex/ppdet/modeling/losses/detr_loss.py +0 -233
  1568. paddlex/ppdet/modeling/losses/fairmot_loss.py +0 -41
  1569. paddlex/ppdet/modeling/losses/fcos_loss.py +0 -225
  1570. paddlex/ppdet/modeling/losses/gfocal_loss.py +0 -217
  1571. paddlex/ppdet/modeling/losses/iou_aware_loss.py +0 -47
  1572. paddlex/ppdet/modeling/losses/iou_loss.py +0 -210
  1573. paddlex/ppdet/modeling/losses/jde_loss.py +0 -193
  1574. paddlex/ppdet/modeling/losses/keypoint_loss.py +0 -229
  1575. paddlex/ppdet/modeling/losses/solov2_loss.py +0 -101
  1576. paddlex/ppdet/modeling/losses/sparsercnn_loss.py +0 -425
  1577. paddlex/ppdet/modeling/losses/ssd_loss.py +0 -170
  1578. paddlex/ppdet/modeling/losses/varifocal_loss.py +0 -152
  1579. paddlex/ppdet/modeling/losses/yolo_loss.py +0 -212
  1580. paddlex/ppdet/modeling/mot/__init__.py +0 -25
  1581. paddlex/ppdet/modeling/mot/matching/__init__.py +0 -19
  1582. paddlex/ppdet/modeling/mot/matching/deepsort_matching.py +0 -382
  1583. paddlex/ppdet/modeling/mot/matching/jde_matching.py +0 -144
  1584. paddlex/ppdet/modeling/mot/motion/__init__.py +0 -17
  1585. paddlex/ppdet/modeling/mot/motion/kalman_filter.py +0 -270
  1586. paddlex/ppdet/modeling/mot/tracker/__init__.py +0 -23
  1587. paddlex/ppdet/modeling/mot/tracker/base_jde_tracker.py +0 -297
  1588. paddlex/ppdet/modeling/mot/tracker/base_sde_tracker.py +0 -156
  1589. paddlex/ppdet/modeling/mot/tracker/deepsort_tracker.py +0 -188
  1590. paddlex/ppdet/modeling/mot/tracker/jde_tracker.py +0 -277
  1591. paddlex/ppdet/modeling/mot/utils.py +0 -263
  1592. paddlex/ppdet/modeling/mot/visualization.py +0 -150
  1593. paddlex/ppdet/modeling/necks/__init__.py +0 -30
  1594. paddlex/ppdet/modeling/necks/bifpn.py +0 -302
  1595. paddlex/ppdet/modeling/necks/blazeface_fpn.py +0 -216
  1596. paddlex/ppdet/modeling/necks/centernet_fpn.py +0 -426
  1597. paddlex/ppdet/modeling/necks/csp_pan.py +0 -364
  1598. paddlex/ppdet/modeling/necks/fpn.py +0 -231
  1599. paddlex/ppdet/modeling/necks/hrfpn.py +0 -126
  1600. paddlex/ppdet/modeling/necks/ttf_fpn.py +0 -242
  1601. paddlex/ppdet/modeling/necks/yolo_fpn.py +0 -988
  1602. paddlex/ppdet/modeling/ops.py +0 -1611
  1603. paddlex/ppdet/modeling/post_process.py +0 -731
  1604. paddlex/ppdet/modeling/proposal_generator/__init__.py +0 -2
  1605. paddlex/ppdet/modeling/proposal_generator/anchor_generator.py +0 -135
  1606. paddlex/ppdet/modeling/proposal_generator/proposal_generator.py +0 -77
  1607. paddlex/ppdet/modeling/proposal_generator/rpn_head.py +0 -260
  1608. paddlex/ppdet/modeling/proposal_generator/target.py +0 -681
  1609. paddlex/ppdet/modeling/proposal_generator/target_layer.py +0 -491
  1610. paddlex/ppdet/modeling/reid/__init__.py +0 -25
  1611. paddlex/ppdet/modeling/reid/fairmot_embedding_head.py +0 -225
  1612. paddlex/ppdet/modeling/reid/jde_embedding_head.py +0 -214
  1613. paddlex/ppdet/modeling/reid/pplcnet_embedding.py +0 -282
  1614. paddlex/ppdet/modeling/reid/pyramidal_embedding.py +0 -144
  1615. paddlex/ppdet/modeling/reid/resnet.py +0 -310
  1616. paddlex/ppdet/modeling/shape_spec.py +0 -25
  1617. paddlex/ppdet/modeling/transformers/__init__.py +0 -25
  1618. paddlex/ppdet/modeling/transformers/deformable_transformer.py +0 -517
  1619. paddlex/ppdet/modeling/transformers/detr_transformer.py +0 -353
  1620. paddlex/ppdet/modeling/transformers/matchers.py +0 -127
  1621. paddlex/ppdet/modeling/transformers/position_encoding.py +0 -108
  1622. paddlex/ppdet/modeling/transformers/utils.py +0 -110
  1623. paddlex/ppdet/optimizer.py +0 -335
  1624. paddlex/ppdet/slim/__init__.py +0 -82
  1625. paddlex/ppdet/slim/distill.py +0 -110
  1626. paddlex/ppdet/slim/prune.py +0 -85
  1627. paddlex/ppdet/slim/quant.py +0 -84
  1628. paddlex/ppdet/slim/unstructured_prune.py +0 -66
  1629. paddlex/ppdet/utils/__init__.py +0 -13
  1630. paddlex/ppdet/utils/check.py +0 -112
  1631. paddlex/ppdet/utils/checkpoint.py +0 -226
  1632. paddlex/ppdet/utils/cli.py +0 -151
  1633. paddlex/ppdet/utils/colormap.py +0 -58
  1634. paddlex/ppdet/utils/download.py +0 -558
  1635. paddlex/ppdet/utils/logger.py +0 -70
  1636. paddlex/ppdet/utils/profiler.py +0 -111
  1637. paddlex/ppdet/utils/stats.py +0 -94
  1638. paddlex/ppdet/utils/visualizer.py +0 -321
  1639. paddlex/ppdet/utils/voc_utils.py +0 -86
  1640. paddlex/seg.py +0 -41
  1641. paddlex/tools/__init__.py +0 -17
  1642. paddlex/tools/anchor_clustering/__init__.py +0 -15
  1643. paddlex/tools/anchor_clustering/yolo_cluster.py +0 -178
  1644. paddlex/tools/convert.py +0 -52
  1645. paddlex/tools/dataset_conversion/__init__.py +0 -24
  1646. paddlex/tools/dataset_conversion/x2coco.py +0 -379
  1647. paddlex/tools/dataset_conversion/x2imagenet.py +0 -82
  1648. paddlex/tools/dataset_conversion/x2seg.py +0 -343
  1649. paddlex/tools/dataset_conversion/x2voc.py +0 -230
  1650. paddlex/tools/dataset_split/__init__.py +0 -23
  1651. paddlex/tools/dataset_split/coco_split.py +0 -69
  1652. paddlex/tools/dataset_split/imagenet_split.py +0 -75
  1653. paddlex/tools/dataset_split/seg_split.py +0 -96
  1654. paddlex/tools/dataset_split/utils.py +0 -75
  1655. paddlex/tools/dataset_split/voc_split.py +0 -91
  1656. paddlex/tools/split.py +0 -41
  1657. paddlex/utils/checkpoint.py +0 -492
  1658. paddlex/utils/shm.py +0 -67
  1659. paddlex/utils/stats.py +0 -68
  1660. paddlex/utils/utils.py +0 -229
  1661. paddlex-2.1.0.data/data/paddlex_restful/restful/templates/paddlex_restful_demo.html +0 -5205
  1662. paddlex-2.1.0.dist-info/LICENSE +0 -201
  1663. paddlex-2.1.0.dist-info/METADATA +0 -32
  1664. paddlex-2.1.0.dist-info/RECORD +0 -698
  1665. paddlex-2.1.0.dist-info/WHEEL +0 -5
  1666. paddlex-2.1.0.dist-info/entry_points.txt +0 -4
  1667. paddlex-2.1.0.dist-info/top_level.txt +0 -3
  1668. paddlex_restful/__init__.py +0 -15
  1669. paddlex_restful/command.py +0 -63
  1670. paddlex_restful/restful/__init__.py +0 -15
  1671. paddlex_restful/restful/app.py +0 -969
  1672. paddlex_restful/restful/dataset/__init__.py +0 -13
  1673. paddlex_restful/restful/dataset/cls_dataset.py +0 -159
  1674. paddlex_restful/restful/dataset/dataset.py +0 -266
  1675. paddlex_restful/restful/dataset/datasetbase.py +0 -86
  1676. paddlex_restful/restful/dataset/det_dataset.py +0 -190
  1677. paddlex_restful/restful/dataset/ins_seg_dataset.py +0 -312
  1678. paddlex_restful/restful/dataset/operate.py +0 -155
  1679. paddlex_restful/restful/dataset/seg_dataset.py +0 -222
  1680. paddlex_restful/restful/dataset/utils.py +0 -267
  1681. paddlex_restful/restful/demo.py +0 -202
  1682. paddlex_restful/restful/dir.py +0 -45
  1683. paddlex_restful/restful/model.py +0 -312
  1684. paddlex_restful/restful/project/__init__.py +0 -13
  1685. paddlex_restful/restful/project/evaluate/__init__.py +0 -13
  1686. paddlex_restful/restful/project/evaluate/classification.py +0 -126
  1687. paddlex_restful/restful/project/evaluate/detection.py +0 -789
  1688. paddlex_restful/restful/project/evaluate/draw_pred_result.py +0 -181
  1689. paddlex_restful/restful/project/evaluate/segmentation.py +0 -122
  1690. paddlex_restful/restful/project/operate.py +0 -931
  1691. paddlex_restful/restful/project/project.py +0 -143
  1692. paddlex_restful/restful/project/prune/__init__.py +0 -13
  1693. paddlex_restful/restful/project/prune/classification.py +0 -32
  1694. paddlex_restful/restful/project/prune/detection.py +0 -48
  1695. paddlex_restful/restful/project/prune/segmentation.py +0 -34
  1696. paddlex_restful/restful/project/task.py +0 -884
  1697. paddlex_restful/restful/project/train/__init__.py +0 -13
  1698. paddlex_restful/restful/project/train/classification.py +0 -141
  1699. paddlex_restful/restful/project/train/detection.py +0 -263
  1700. paddlex_restful/restful/project/train/params.py +0 -432
  1701. paddlex_restful/restful/project/train/params_v2.py +0 -326
  1702. paddlex_restful/restful/project/train/segmentation.py +0 -191
  1703. paddlex_restful/restful/project/visualize.py +0 -244
  1704. paddlex_restful/restful/system.py +0 -102
  1705. paddlex_restful/restful/templates/paddlex_restful_demo.html +0 -5205
  1706. paddlex_restful/restful/utils.py +0 -841
  1707. paddlex_restful/restful/workspace.py +0 -343
  1708. paddlex_restful/restful/workspace_pb2.py +0 -1411
@@ -0,0 +1,2385 @@
1
+ # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ __all__ = [
16
+ "get_sub_regions_ocr_res",
17
+ "get_layout_ordering",
18
+ "get_single_block_parsing_res",
19
+ "get_show_color",
20
+ "sorted_layout_boxes",
21
+ ]
22
+
23
+ import numpy as np
24
+ from PIL import Image
25
+ import uuid
26
+ import re
27
+ from pathlib import Path
28
+ from copy import deepcopy
29
+ from typing import Optional, Union, List, Tuple, Dict, Any
30
+ from ..ocr.result import OCRResult
31
+ from ...models.object_detection.result import DetResult
32
+ from ..components import convert_points_to_boxes
33
+
34
+
35
+ def get_overlap_boxes_idx(src_boxes: np.ndarray, ref_boxes: np.ndarray) -> List:
36
+ """
37
+ Get the indices of source boxes that overlap with reference boxes based on a specified threshold.
38
+
39
+ Args:
40
+ src_boxes (np.ndarray): A 2D numpy array of source bounding boxes.
41
+ ref_boxes (np.ndarray): A 2D numpy array of reference bounding boxes.
42
+ Returns:
43
+ match_idx_list (list): A list of indices of source boxes that overlap with reference boxes.
44
+ """
45
+ match_idx_list = []
46
+ src_boxes_num = len(src_boxes)
47
+ if src_boxes_num > 0 and len(ref_boxes) > 0:
48
+ for rno in range(len(ref_boxes)):
49
+ ref_box = ref_boxes[rno]
50
+ x1 = np.maximum(ref_box[0], src_boxes[:, 0])
51
+ y1 = np.maximum(ref_box[1], src_boxes[:, 1])
52
+ x2 = np.minimum(ref_box[2], src_boxes[:, 2])
53
+ y2 = np.minimum(ref_box[3], src_boxes[:, 3])
54
+ pub_w = x2 - x1
55
+ pub_h = y2 - y1
56
+ match_idx = np.where((pub_w > 3) & (pub_h > 3))[0]
57
+ match_idx_list.extend(match_idx)
58
+ return match_idx_list
59
+
60
+
61
+ def get_sub_regions_ocr_res(
62
+ overall_ocr_res: OCRResult,
63
+ object_boxes: List,
64
+ flag_within: bool = True,
65
+ return_match_idx: bool = False,
66
+ ) -> OCRResult:
67
+ """
68
+ Filters OCR results to only include text boxes within specified object boxes based on a flag.
69
+
70
+ Args:
71
+ overall_ocr_res (OCRResult): The original OCR result containing all text boxes.
72
+ object_boxes (list): A list of bounding boxes for the objects of interest.
73
+ flag_within (bool): If True, only include text boxes within the object boxes. If False, exclude text boxes within the object boxes.
74
+ return_match_idx (bool): If True, return the list of matching indices.
75
+
76
+ Returns:
77
+ OCRResult: A filtered OCR result containing only the relevant text boxes.
78
+ """
79
+ sub_regions_ocr_res = {}
80
+ sub_regions_ocr_res["rec_polys"] = []
81
+ sub_regions_ocr_res["rec_texts"] = []
82
+ sub_regions_ocr_res["rec_scores"] = []
83
+ sub_regions_ocr_res["rec_boxes"] = []
84
+
85
+ overall_text_boxes = overall_ocr_res["rec_boxes"]
86
+ match_idx_list = get_overlap_boxes_idx(overall_text_boxes, object_boxes)
87
+ match_idx_list = list(set(match_idx_list))
88
+ for box_no in range(len(overall_text_boxes)):
89
+ if flag_within:
90
+ if box_no in match_idx_list:
91
+ flag_match = True
92
+ else:
93
+ flag_match = False
94
+ else:
95
+ if box_no not in match_idx_list:
96
+ flag_match = True
97
+ else:
98
+ flag_match = False
99
+ if flag_match:
100
+ sub_regions_ocr_res["rec_polys"].append(
101
+ overall_ocr_res["rec_polys"][box_no]
102
+ )
103
+ sub_regions_ocr_res["rec_texts"].append(
104
+ overall_ocr_res["rec_texts"][box_no]
105
+ )
106
+ sub_regions_ocr_res["rec_scores"].append(
107
+ overall_ocr_res["rec_scores"][box_no]
108
+ )
109
+ sub_regions_ocr_res["rec_boxes"].append(
110
+ overall_ocr_res["rec_boxes"][box_no]
111
+ )
112
+ for key in ["rec_polys", "rec_scores", "rec_boxes"]:
113
+ sub_regions_ocr_res[key] = np.array(sub_regions_ocr_res[key])
114
+ return (
115
+ (sub_regions_ocr_res, match_idx_list)
116
+ if return_match_idx
117
+ else sub_regions_ocr_res
118
+ )
119
+
120
+
121
+ def sorted_layout_boxes(res, w):
122
+ """
123
+ Sort text boxes in order from top to bottom, left to right
124
+ Args:
125
+ res: List of dictionaries containing layout information.
126
+ w: Width of image.
127
+
128
+ Returns:
129
+ List of dictionaries containing sorted layout information.
130
+ """
131
+ num_boxes = len(res)
132
+ if num_boxes == 1:
133
+ return res
134
+
135
+ # Sort on the y axis first or sort it on the x axis
136
+ sorted_boxes = sorted(res, key=lambda x: (x["block_bbox"][1], x["block_bbox"][0]))
137
+ _boxes = list(sorted_boxes)
138
+
139
+ new_res = []
140
+ res_left = []
141
+ res_right = []
142
+ i = 0
143
+
144
+ while True:
145
+ if i >= num_boxes:
146
+ break
147
+ # Check that the bbox is on the left
148
+ elif (
149
+ _boxes[i]["block_bbox"][0] < w / 4
150
+ and _boxes[i]["block_bbox"][2] < 3 * w / 5
151
+ ):
152
+ res_left.append(_boxes[i])
153
+ i += 1
154
+ elif _boxes[i]["block_bbox"][0] > 2 * w / 5:
155
+ res_right.append(_boxes[i])
156
+ i += 1
157
+ else:
158
+ new_res += res_left
159
+ new_res += res_right
160
+ new_res.append(_boxes[i])
161
+ res_left = []
162
+ res_right = []
163
+ i += 1
164
+
165
+ res_left = sorted(res_left, key=lambda x: (x["block_bbox"][1]))
166
+ res_right = sorted(res_right, key=lambda x: (x["block_bbox"][1]))
167
+
168
+ if res_left:
169
+ new_res += res_left
170
+ if res_right:
171
+ new_res += res_right
172
+
173
+ return new_res
174
+
175
+
176
+ def _calculate_overlap_area_div_minbox_area_ratio(
177
+ bbox1: Union[list, tuple],
178
+ bbox2: Union[list, tuple],
179
+ ) -> float:
180
+ """
181
+ Calculate the ratio of the overlap area between bbox1 and bbox2
182
+ to the area of the smaller bounding box.
183
+
184
+ Args:
185
+ bbox1 (list or tuple): Coordinates of the first bounding box [x_min, y_min, x_max, y_max].
186
+ bbox2 (list or tuple): Coordinates of the second bounding box [x_min, y_min, x_max, y_max].
187
+
188
+ Returns:
189
+ float: The ratio of the overlap area to the area of the smaller bounding box.
190
+ """
191
+ bbox1 = list(map(int, bbox1))
192
+ bbox2 = list(map(int, bbox2))
193
+
194
+ x_left = max(bbox1[0], bbox2[0])
195
+ y_top = max(bbox1[1], bbox2[1])
196
+ x_right = min(bbox1[2], bbox2[2])
197
+ y_bottom = min(bbox1[3], bbox2[3])
198
+
199
+ if x_right <= x_left or y_bottom <= y_top:
200
+ return 0.0
201
+
202
+ intersection_area = (x_right - x_left) * (y_bottom - y_top)
203
+ area_bbox1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
204
+ area_bbox2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
205
+ min_box_area = min(area_bbox1, area_bbox2)
206
+
207
+ if min_box_area <= 0:
208
+ return 0.0
209
+
210
+ return intersection_area / min_box_area
211
+
212
+
213
+ def _whether_y_overlap_exceeds_threshold(
214
+ bbox1: Union[list, tuple],
215
+ bbox2: Union[list, tuple],
216
+ overlap_ratio_threshold: float = 0.6,
217
+ ) -> bool:
218
+ """
219
+ Determines whether the vertical overlap between two bounding boxes exceeds a given threshold.
220
+
221
+ Args:
222
+ bbox1 (list or tuple): The first bounding box defined as (left, top, right, bottom).
223
+ bbox2 (list or tuple): The second bounding box defined as (left, top, right, bottom).
224
+ overlap_ratio_threshold (float): The threshold ratio to determine if the overlap is significant.
225
+ Defaults to 0.6.
226
+
227
+ Returns:
228
+ bool: True if the vertical overlap divided by the minimum height of the two bounding boxes
229
+ exceeds the overlap_ratio_threshold, otherwise False.
230
+ """
231
+ _, y1_0, _, y1_1 = bbox1
232
+ _, y2_0, _, y2_1 = bbox2
233
+
234
+ overlap = max(0, min(y1_1, y2_1) - max(y1_0, y2_0))
235
+ min_height = min(y1_1 - y1_0, y2_1 - y2_0)
236
+
237
+ return (overlap / min_height) > overlap_ratio_threshold
238
+
239
+
240
+ def _adjust_span_text(span: List[str], prepend: bool = False, append: bool = False):
241
+ """
242
+ Adjust the text of a span by prepending or appending a newline.
243
+
244
+ Args:
245
+ span (list): A list where the second element is the text of the span.
246
+ prepend (bool): If True, prepend a newline to the text.
247
+ append (bool): If True, append a newline to the text.
248
+
249
+ Returns:
250
+ None: The function modifies the span in place.
251
+ """
252
+ if prepend:
253
+ span[1] = "\n" + span[1]
254
+ if append:
255
+ span[1] = span[1] + "\n"
256
+ return span
257
+
258
+
259
+ def _format_line(
260
+ line: List[List[Union[List[int], str]]],
261
+ layout_min: int,
262
+ layout_max: int,
263
+ is_reference: bool = False,
264
+ ) -> None:
265
+ """
266
+ Format a line of text spans based on layout constraints.
267
+
268
+ Args:
269
+ line (list): A list of spans, where each span is a list containing a bounding box and text.
270
+ layout_min (int): The minimum x-coordinate of the layout bounding box.
271
+ layout_max (int): The maximum x-coordinate of the layout bounding box.
272
+ is_reference (bool): A flag indicating whether the line is a reference line, which affects formatting rules.
273
+
274
+ Returns:
275
+ None: The function modifies the line in place.
276
+ """
277
+ first_span = line[0]
278
+ end_span = line[-1]
279
+
280
+ if not is_reference:
281
+ if first_span[0][0] - layout_min > 10:
282
+ first_span = _adjust_span_text(first_span, prepend=True)
283
+ if layout_max - end_span[0][2] > 10:
284
+ end_span = _adjust_span_text(end_span, append=True)
285
+ else:
286
+ if first_span[0][0] - layout_min < 5:
287
+ first_span = _adjust_span_text(first_span, prepend=True)
288
+ if layout_max - end_span[0][2] > 20:
289
+ end_span = _adjust_span_text(end_span, append=True)
290
+
291
+ line[0] = first_span
292
+ line[-1] = end_span
293
+
294
+ return line
295
+
296
+
297
+ def split_boxes_if_x_contained(boxes, offset=1e-5):
298
+ """
299
+ Check if there is any complete containment in the x-direction
300
+ between the bounding boxes and split the containing box accordingly.
301
+
302
+ Args:
303
+ boxes (list of lists): Each element is a list containing an ndarray of length 4, a description, and a label.
304
+ offset (float): A small offset value to ensure that the split boxes are not too close to the original boxes.
305
+ Returns:
306
+ A new list of boxes, including split boxes, with the same `rec_text` and `label` attributes.
307
+ """
308
+
309
+ def is_x_contained(box_a, box_b):
310
+ """Check if box_a completely contains box_b in the x-direction."""
311
+ return box_a[0][0] <= box_b[0][0] and box_a[0][2] >= box_b[0][2]
312
+
313
+ new_boxes = []
314
+
315
+ for i in range(len(boxes)):
316
+ box_a = boxes[i]
317
+ is_split = False
318
+ for j in range(len(boxes)):
319
+ if i == j:
320
+ continue
321
+ box_b = boxes[j]
322
+ if is_x_contained(box_a, box_b):
323
+ is_split = True
324
+ # Split box_a based on the x-coordinates of box_b
325
+ if box_a[0][0] < box_b[0][0]:
326
+ w = box_b[0][0] - offset - box_a[0][0]
327
+ if w > 1:
328
+ new_boxes.append(
329
+ [
330
+ np.array(
331
+ [
332
+ box_a[0][0],
333
+ box_a[0][1],
334
+ box_b[0][0] - offset,
335
+ box_a[0][3],
336
+ ]
337
+ ),
338
+ box_a[1],
339
+ box_a[2],
340
+ ]
341
+ )
342
+ if box_a[0][2] > box_b[0][2]:
343
+ w = box_a[0][2] - box_b[0][2] + offset
344
+ if w > 1:
345
+ box_a = [
346
+ np.array(
347
+ [
348
+ box_b[0][2] + offset,
349
+ box_a[0][1],
350
+ box_a[0][2],
351
+ box_a[0][3],
352
+ ]
353
+ ),
354
+ box_a[1],
355
+ box_a[2],
356
+ ]
357
+ if j == len(boxes) - 1 and is_split:
358
+ new_boxes.append(box_a)
359
+ if not is_split:
360
+ new_boxes.append(box_a)
361
+
362
+ return new_boxes
363
+
364
+
365
+ def _sort_line_by_x_projection(
366
+ input_img: np.ndarray,
367
+ general_ocr_pipeline: Any,
368
+ line: List[List[Union[List[int], str]]],
369
+ ) -> None:
370
+ """
371
+ Sort a line of text spans based on their vertical position within the layout bounding box.
372
+
373
+ Args:
374
+ input_img (ndarray): The input image used for OCR.
375
+ general_ocr_pipeline (Any): The general OCR pipeline used for text recognition.
376
+ line (list): A list of spans, where each span is a list containing a bounding box and text.
377
+
378
+ Returns:
379
+ list: The sorted line of text spans.
380
+ """
381
+ splited_boxes = split_boxes_if_x_contained(line)
382
+ splited_lines = []
383
+ if len(line) != len(splited_boxes):
384
+ splited_boxes.sort(key=lambda span: span[0][0])
385
+ text_rec_model = general_ocr_pipeline.text_rec_model
386
+ for span in splited_boxes:
387
+ if span[2] == "text":
388
+ crop_img = input_img[
389
+ int(span[0][1]) : int(span[0][3]),
390
+ int(span[0][0]) : int(span[0][2]),
391
+ ]
392
+ span[1] = next(text_rec_model([crop_img]))["rec_text"]
393
+ splited_lines.append(span)
394
+ else:
395
+ splited_lines = line
396
+
397
+ return splited_lines
398
+
399
+
400
+ def _sort_ocr_res_by_y_projection(
401
+ input_img: np.ndarray,
402
+ general_ocr_pipeline: Any,
403
+ label: Any,
404
+ block_bbox: Tuple[int, int, int, int],
405
+ ocr_res: Dict[str, List[Any]],
406
+ line_height_iou_threshold: float = 0.7,
407
+ ) -> Dict[str, List[Any]]:
408
+ """
409
+ Sorts OCR results based on their spatial arrangement, grouping them into lines and blocks.
410
+
411
+ Args:
412
+ input_img (ndarray): The input image used for OCR.
413
+ general_ocr_pipeline (Any): The general OCR pipeline used for text recognition.
414
+ label (Any): The label associated with the OCR results. It's not used in the function but might be
415
+ relevant for other parts of the calling context.
416
+ block_bbox (Tuple[int, int, int, int]): A tuple representing the layout bounding box, defined as
417
+ (left, top, right, bottom).
418
+ ocr_res (Dict[str, List[Any]]): A dictionary containing OCR results with the following keys:
419
+ - "boxes": A list of bounding boxes, each defined as [left, top, right, bottom].
420
+ - "rec_texts": A corresponding list of recognized text strings for each box.
421
+ line_height_iou_threshold (float): The threshold for determining whether two boxes belong to
422
+ the same line based on their vertical overlap. Defaults to 0.7.
423
+
424
+ Returns:
425
+ Dict[str, List[Any]]: A dictionary with the same structure as `ocr_res`, but with boxes and texts sorted
426
+ and grouped into lines and blocks.
427
+ """
428
+ assert (
429
+ ocr_res["boxes"] and ocr_res["rec_texts"]
430
+ ), "OCR results must contain 'boxes' and 'rec_texts'"
431
+
432
+ boxes = ocr_res["boxes"]
433
+ rec_texts = ocr_res["rec_texts"]
434
+ rec_labels = ocr_res["rec_labels"]
435
+
436
+ x_min, _, x_max, _ = block_bbox
437
+ inline_x_min = min([box[0] for box in boxes])
438
+ inline_x_max = max([box[2] for box in boxes])
439
+
440
+ spans = list(zip(boxes, rec_texts, rec_labels))
441
+
442
+ spans.sort(key=lambda span: span[0][1])
443
+ spans = [list(span) for span in spans]
444
+
445
+ lines = []
446
+ current_line = [spans[0]]
447
+ current_y0, current_y1 = spans[0][0][1], spans[0][0][3]
448
+
449
+ for span in spans[1:]:
450
+ y0, y1 = span[0][1], span[0][3]
451
+ if _whether_y_overlap_exceeds_threshold(
452
+ (0, current_y0, 0, current_y1),
453
+ (0, y0, 0, y1),
454
+ line_height_iou_threshold,
455
+ ):
456
+ current_line.append(span)
457
+ current_y0 = min(current_y0, y0)
458
+ current_y1 = max(current_y1, y1)
459
+ else:
460
+ lines.append(current_line)
461
+ current_line = [span]
462
+ current_y0, current_y1 = y0, y1
463
+
464
+ if current_line:
465
+ lines.append(current_line)
466
+
467
+ new_lines = []
468
+ for line in lines:
469
+ line.sort(key=lambda span: span[0][0])
470
+
471
+ ocr_labels = [span[2] for span in line]
472
+ if "formula" in ocr_labels:
473
+ line = _sort_line_by_x_projection(input_img, general_ocr_pipeline, line)
474
+ if label == "reference":
475
+ line = _format_line(line, inline_x_min, inline_x_max, is_reference=True)
476
+ elif label != "content":
477
+ line = _format_line(line, x_min, x_max)
478
+ new_lines.append(line)
479
+
480
+ ocr_res["boxes"] = [span[0] for line in new_lines for span in line]
481
+ if label == "content":
482
+ ocr_res["rec_texts"] = [
483
+ "".join(f"{span[1]} " for span in line).rstrip() for line in new_lines
484
+ ]
485
+ else:
486
+ ocr_res["rec_texts"] = [span[1] + " " for line in new_lines for span in line]
487
+ return ocr_res, len(new_lines)
488
+
489
+
490
+ def _process_text(input_text: str) -> str:
491
+ """
492
+ Process the input text to handle spaces.
493
+
494
+ The function removes multiple consecutive spaces between Chinese characters and ensures that
495
+ only a single space is retained between Chinese and non-Chinese characters.
496
+
497
+ Args:
498
+ input_text (str): The text to be processed.
499
+
500
+ Returns:
501
+ str: The processed text with properly formatted spaces.
502
+ """
503
+
504
+ def handle_spaces_(text: str) -> str:
505
+ """
506
+ Handle spaces in the text by removing multiple consecutive spaces and inserting a single space
507
+ between Chinese and non-Chinese characters.
508
+
509
+ Args:
510
+ text (str): The text to handle spaces for.
511
+
512
+ Returns:
513
+ str: The text with properly formatted spaces.
514
+ """
515
+ spaces = re.finditer(r"\s+", text)
516
+ processed_text = list(text)
517
+
518
+ for space in reversed(list(spaces)):
519
+ start, end = space.span()
520
+ prev_char = processed_text[start - 1] if start > 0 else ""
521
+ next_char = processed_text[end] if end < len(processed_text) else ""
522
+
523
+ is_prev_chinese = (
524
+ re.match(r"[\u4e00-\u9fff]", prev_char) if prev_char else False
525
+ )
526
+ is_next_chinese = (
527
+ re.match(r"[\u4e00-\u9fff]", next_char) if next_char else False
528
+ )
529
+
530
+ if is_prev_chinese and is_next_chinese:
531
+ processed_text[start:end] = []
532
+ else:
533
+ processed_text[start:end] = [" "]
534
+
535
+ return "".join(processed_text)
536
+
537
+ text_without_spaces = handle_spaces_(input_text)
538
+
539
+ final_text = re.sub(r"\s+", " ", text_without_spaces).strip()
540
+ return final_text
541
+
542
+
543
+ def get_single_block_parsing_res(
544
+ general_ocr_pipeline: Any,
545
+ overall_ocr_res: OCRResult,
546
+ layout_det_res: DetResult,
547
+ table_res_list: list,
548
+ seal_res_list: list,
549
+ ) -> OCRResult:
550
+ """
551
+ Extract structured information from OCR and layout detection results.
552
+
553
+ Args:
554
+ overall_ocr_res (OCRResult): An object containing the overall OCR results, including detected text boxes and recognized text. The structure is expected to have:
555
+ - "input_img": The image on which OCR was performed.
556
+ - "dt_boxes": A list of detected text box coordinates.
557
+ - "rec_texts": A list of recognized text corresponding to the detected boxes.
558
+
559
+ layout_det_res (DetResult): An object containing the layout detection results, including detected layout boxes and their labels. The structure is expected to have:
560
+ - "boxes": A list of dictionaries with keys "coordinate" for box coordinates and "block_label" for the type of content.
561
+
562
+ table_res_list (list): A list of table detection results, where each item is a dictionary containing:
563
+ - "block_bbox": The bounding box of the table layout.
564
+ - "pred_html": The predicted HTML representation of the table.
565
+
566
+ seal_res_list (List): A list of seal detection results. The details of each item depend on the specific application context.
567
+
568
+ Returns:
569
+ list: A list of structured boxes where each item is a dictionary containing:
570
+ - "block_label": The label of the content (e.g., 'table', 'chart', 'image').
571
+ - The label as a key with either table HTML or image data and text.
572
+ - "block_bbox": The coordinates of the layout box.
573
+ """
574
+
575
+ single_block_layout_parsing_res = []
576
+ input_img = overall_ocr_res["doc_preprocessor_res"]["output_img"]
577
+ seal_index = 0
578
+ with_doc_title = False
579
+ max_block_area = 0.0
580
+ paragraph_title_indexs = []
581
+
582
+ layout_det_res_list, _ = _remove_overlap_blocks(
583
+ deepcopy(layout_det_res["boxes"]),
584
+ threshold=0.5,
585
+ smaller=True,
586
+ )
587
+
588
+ for box_idx, box_info in enumerate(layout_det_res_list):
589
+ block_bbox = box_info["coordinate"]
590
+ label = box_info["label"]
591
+ rec_res = {"boxes": [], "rec_texts": [], "rec_labels": [], "flag": False}
592
+ seg_start_coordinate = float("inf")
593
+ seg_end_coordinate = float("-inf")
594
+ num_of_lines = 1
595
+
596
+ if label == "doc_title":
597
+ with_doc_title = True
598
+ elif label == "paragraph_title":
599
+ paragraph_title_indexs.append(box_idx)
600
+
601
+ block_area = (block_bbox[2] - block_bbox[0]) * (block_bbox[3] - block_bbox[1])
602
+ max_block_area = max(max_block_area, block_area)
603
+
604
+ if label == "table":
605
+ for table_res in table_res_list:
606
+ if len(table_res["cell_box_list"]) == 0:
607
+ continue
608
+ if (
609
+ _calculate_overlap_area_div_minbox_area_ratio(
610
+ block_bbox, table_res["cell_box_list"][0]
611
+ )
612
+ > 0.5
613
+ ):
614
+ single_block_layout_parsing_res.append(
615
+ {
616
+ "block_label": label,
617
+ "block_content": table_res["pred_html"],
618
+ "block_bbox": block_bbox,
619
+ },
620
+ )
621
+ break
622
+ elif label == "seal":
623
+ if len(seal_res_list) > 0:
624
+ single_block_layout_parsing_res.append(
625
+ {
626
+ "block_label": label,
627
+ "block_content": _process_text(
628
+ ", ".join(seal_res_list[seal_index]["rec_texts"])
629
+ ),
630
+ "block_bbox": block_bbox,
631
+ },
632
+ )
633
+ seal_index += 1
634
+ else:
635
+ overall_text_boxes = overall_ocr_res["rec_boxes"]
636
+ for box_no in range(len(overall_text_boxes)):
637
+ if (
638
+ _calculate_overlap_area_div_minbox_area_ratio(
639
+ block_bbox, overall_text_boxes[box_no]
640
+ )
641
+ > 0.5
642
+ ):
643
+ rec_res["boxes"].append(overall_text_boxes[box_no])
644
+ rec_res["rec_texts"].append(
645
+ overall_ocr_res["rec_texts"][box_no],
646
+ )
647
+ rec_res["rec_labels"].append(
648
+ overall_ocr_res["rec_labels"][box_no],
649
+ )
650
+ rec_res["flag"] = True
651
+
652
+ if rec_res["flag"]:
653
+ rec_res, num_of_lines = _sort_ocr_res_by_y_projection(
654
+ input_img, general_ocr_pipeline, label, block_bbox, rec_res, 0.7
655
+ )
656
+ seg_start_coordinate = rec_res["boxes"][0][0]
657
+ seg_end_coordinate = rec_res["boxes"][-1][2]
658
+ if label == "formula":
659
+ rec_res["rec_texts"] = [
660
+ rec_res_text.replace("$", "")
661
+ for rec_res_text in rec_res["rec_texts"]
662
+ ]
663
+
664
+ if label in ["chart", "image"]:
665
+ x_min, y_min, x_max, y_max = list(map(int, block_bbox))
666
+ img_path = f"imgs/img_in_table_box_{x_min}_{y_min}_{x_max}_{y_max}.jpg"
667
+ img = Image.fromarray(input_img[y_min:y_max, x_min:x_max, ::-1])
668
+ single_block_layout_parsing_res.append(
669
+ {
670
+ "block_label": label,
671
+ "block_content": _process_text("".join(rec_res["rec_texts"])),
672
+ "block_image": {img_path: img},
673
+ "block_bbox": block_bbox,
674
+ },
675
+ )
676
+ else:
677
+ if label in ["doc_title"]:
678
+ content = " ".join(rec_res["rec_texts"])
679
+ elif label in ["content"]:
680
+ content = "\n".join(rec_res["rec_texts"])
681
+ else:
682
+ content = "".join(rec_res["rec_texts"])
683
+ if label != "reference":
684
+ content = _process_text(content)
685
+ single_block_layout_parsing_res.append(
686
+ {
687
+ "block_label": label,
688
+ "block_content": content,
689
+ "block_bbox": block_bbox,
690
+ "seg_start_coordinate": seg_start_coordinate,
691
+ "seg_end_coordinate": seg_end_coordinate,
692
+ "num_of_lines": num_of_lines,
693
+ "block_area": block_area,
694
+ },
695
+ )
696
+
697
+ if (
698
+ not with_doc_title
699
+ and len(paragraph_title_indexs) == 1
700
+ and single_block_layout_parsing_res[paragraph_title_indexs[0]].get(
701
+ "block_area", 0
702
+ )
703
+ > max_block_area * 0.3
704
+ ):
705
+ single_block_layout_parsing_res[paragraph_title_indexs[0]][
706
+ "block_label"
707
+ ] = "doc_title"
708
+
709
+ if len(layout_det_res_list) == 0:
710
+ for ocr_rec_box, ocr_rec_text in zip(
711
+ overall_ocr_res["rec_boxes"], overall_ocr_res["rec_texts"]
712
+ ):
713
+ single_block_layout_parsing_res.append(
714
+ {
715
+ "block_label": "text",
716
+ "block_content": ocr_rec_text,
717
+ "block_bbox": ocr_rec_box,
718
+ "seg_start_coordinate": ocr_rec_box[0],
719
+ "seg_end_coordinate": ocr_rec_box[2],
720
+ },
721
+ )
722
+
723
+ single_block_layout_parsing_res = get_layout_ordering(
724
+ single_block_layout_parsing_res,
725
+ no_mask_labels=[
726
+ "text",
727
+ "formula",
728
+ "algorithm",
729
+ "reference",
730
+ "content",
731
+ "abstract",
732
+ ],
733
+ )
734
+
735
+ return single_block_layout_parsing_res
736
+
737
+
738
+ def _projection_by_bboxes(boxes: np.ndarray, axis: int) -> np.ndarray:
739
+ """
740
+ Generate a 1D projection histogram from bounding boxes along a specified axis.
741
+
742
+ Args:
743
+ boxes: A (N, 4) array of bounding boxes defined by [x_min, y_min, x_max, y_max].
744
+ axis: Axis for projection; 0 for horizontal (x-axis), 1 for vertical (y-axis).
745
+
746
+ Returns:
747
+ A 1D numpy array representing the projection histogram based on bounding box intervals.
748
+ """
749
+ assert axis in [0, 1]
750
+ max_length = np.max(boxes[:, axis::2])
751
+ projection = np.zeros(max_length, dtype=int)
752
+
753
+ # Increment projection histogram over the interval defined by each bounding box
754
+ for start, end in boxes[:, axis::2]:
755
+ projection[start:end] += 1
756
+
757
+ return projection
758
+
759
+
760
+ def _split_projection_profile(arr_values: np.ndarray, min_value: float, min_gap: float):
761
+ """
762
+ Split the projection profile into segments based on specified thresholds.
763
+
764
+ Args:
765
+ arr_values: 1D array representing the projection profile.
766
+ min_value: Minimum value threshold to consider a profile segment significant.
767
+ min_gap: Minimum gap width to consider a separation between segments.
768
+
769
+ Returns:
770
+ A tuple of start and end indices for each segment that meets the criteria.
771
+ """
772
+ # Identify indices where the projection exceeds the minimum value
773
+ significant_indices = np.where(arr_values > min_value)[0]
774
+ if not len(significant_indices):
775
+ return
776
+
777
+ # Calculate gaps between significant indices
778
+ index_diffs = significant_indices[1:] - significant_indices[:-1]
779
+ gap_indices = np.where(index_diffs > min_gap)[0]
780
+
781
+ # Determine start and end indices of segments
782
+ segment_starts = np.insert(
783
+ significant_indices[gap_indices + 1],
784
+ 0,
785
+ significant_indices[0],
786
+ )
787
+ segment_ends = np.append(
788
+ significant_indices[gap_indices],
789
+ significant_indices[-1] + 1,
790
+ )
791
+
792
+ return segment_starts, segment_ends
793
+
794
+
795
+ def _recursive_yx_cut(
796
+ boxes: np.ndarray, indices: List[int], res: List[int], min_gap: int = 1
797
+ ):
798
+ """
799
+ Recursively project and segment bounding boxes, starting with Y-axis and followed by X-axis.
800
+
801
+ Args:
802
+ boxes: A (N, 4) array representing bounding boxes.
803
+ indices: List of indices indicating the original position of boxes.
804
+ res: List to store indices of the final segmented bounding boxes.
805
+ min_gap (int): Minimum gap width to consider a separation between segments on the X-axis. Defaults to 1.
806
+
807
+ Returns:
808
+ None: This function modifies the `res` list in place.
809
+ """
810
+ assert len(boxes) == len(
811
+ indices
812
+ ), "The length of boxes and indices must be the same."
813
+
814
+ # Sort by y_min for Y-axis projection
815
+ y_sorted_indices = boxes[:, 1].argsort()
816
+ y_sorted_boxes = boxes[y_sorted_indices]
817
+ y_sorted_indices = np.array(indices)[y_sorted_indices]
818
+
819
+ # Perform Y-axis projection
820
+ y_projection = _projection_by_bboxes(boxes=y_sorted_boxes, axis=1)
821
+ y_intervals = _split_projection_profile(y_projection, 0, 1)
822
+
823
+ if not y_intervals:
824
+ return
825
+
826
+ # Process each segment defined by Y-axis projection
827
+ for y_start, y_end in zip(*y_intervals):
828
+ # Select boxes within the current y interval
829
+ y_interval_indices = (y_start <= y_sorted_boxes[:, 1]) & (
830
+ y_sorted_boxes[:, 1] < y_end
831
+ )
832
+ y_boxes_chunk = y_sorted_boxes[y_interval_indices]
833
+ y_indices_chunk = y_sorted_indices[y_interval_indices]
834
+
835
+ # Sort by x_min for X-axis projection
836
+ x_sorted_indices = y_boxes_chunk[:, 0].argsort()
837
+ x_sorted_boxes_chunk = y_boxes_chunk[x_sorted_indices]
838
+ x_sorted_indices_chunk = y_indices_chunk[x_sorted_indices]
839
+
840
+ # Perform X-axis projection
841
+ x_projection = _projection_by_bboxes(boxes=x_sorted_boxes_chunk, axis=0)
842
+ x_intervals = _split_projection_profile(x_projection, 0, min_gap)
843
+
844
+ if not x_intervals:
845
+ continue
846
+
847
+ # If X-axis cannot be further segmented, add current indices to results
848
+ if len(x_intervals[0]) == 1:
849
+ res.extend(x_sorted_indices_chunk)
850
+ continue
851
+
852
+ # Recursively process each segment defined by X-axis projection
853
+ for x_start, x_end in zip(*x_intervals):
854
+ x_interval_indices = (x_start <= x_sorted_boxes_chunk[:, 0]) & (
855
+ x_sorted_boxes_chunk[:, 0] < x_end
856
+ )
857
+ _recursive_yx_cut(
858
+ x_sorted_boxes_chunk[x_interval_indices],
859
+ x_sorted_indices_chunk[x_interval_indices],
860
+ res,
861
+ )
862
+
863
+
864
+ def _recursive_xy_cut(
865
+ boxes: np.ndarray, indices: List[int], res: List[int], min_gap: int = 1
866
+ ):
867
+ """
868
+ Recursively performs X-axis projection followed by Y-axis projection to segment bounding boxes.
869
+
870
+ Args:
871
+ boxes: A (N, 4) array representing bounding boxes with [x_min, y_min, x_max, y_max].
872
+ indices: A list of indices representing the position of boxes in the original data.
873
+ res: A list to store indices of bounding boxes that meet the criteria.
874
+ min_gap (int): Minimum gap width to consider a separation between segments on the X-axis. Defaults to 1.
875
+
876
+ Returns:
877
+ None: This function modifies the `res` list in place.
878
+ """
879
+ # Ensure boxes and indices have the same length
880
+ assert len(boxes) == len(
881
+ indices
882
+ ), "The length of boxes and indices must be the same."
883
+
884
+ # Sort by x_min to prepare for X-axis projection
885
+ x_sorted_indices = boxes[:, 0].argsort()
886
+ x_sorted_boxes = boxes[x_sorted_indices]
887
+ x_sorted_indices = np.array(indices)[x_sorted_indices]
888
+
889
+ # Perform X-axis projection
890
+ x_projection = _projection_by_bboxes(boxes=x_sorted_boxes, axis=0)
891
+ x_intervals = _split_projection_profile(x_projection, 0, 1)
892
+
893
+ if not x_intervals:
894
+ return
895
+
896
+ # Process each segment defined by X-axis projection
897
+ for x_start, x_end in zip(*x_intervals):
898
+ # Select boxes within the current x interval
899
+ x_interval_indices = (x_start <= x_sorted_boxes[:, 0]) & (
900
+ x_sorted_boxes[:, 0] < x_end
901
+ )
902
+ x_boxes_chunk = x_sorted_boxes[x_interval_indices]
903
+ x_indices_chunk = x_sorted_indices[x_interval_indices]
904
+
905
+ # Sort selected boxes by y_min to prepare for Y-axis projection
906
+ y_sorted_indices = x_boxes_chunk[:, 1].argsort()
907
+ y_sorted_boxes_chunk = x_boxes_chunk[y_sorted_indices]
908
+ y_sorted_indices_chunk = x_indices_chunk[y_sorted_indices]
909
+
910
+ # Perform Y-axis projection
911
+ y_projection = _projection_by_bboxes(boxes=y_sorted_boxes_chunk, axis=1)
912
+ y_intervals = _split_projection_profile(y_projection, 0, min_gap)
913
+
914
+ if not y_intervals:
915
+ continue
916
+
917
+ # If Y-axis cannot be further segmented, add current indices to results
918
+ if len(y_intervals[0]) == 1:
919
+ res.extend(y_sorted_indices_chunk)
920
+ continue
921
+
922
+ # Recursively process each segment defined by Y-axis projection
923
+ for y_start, y_end in zip(*y_intervals):
924
+ y_interval_indices = (y_start <= y_sorted_boxes_chunk[:, 1]) & (
925
+ y_sorted_boxes_chunk[:, 1] < y_end
926
+ )
927
+ _recursive_xy_cut(
928
+ y_sorted_boxes_chunk[y_interval_indices],
929
+ y_sorted_indices_chunk[y_interval_indices],
930
+ res,
931
+ )
932
+
933
+
934
+ def sort_by_xycut(
935
+ block_bboxes: Union[np.ndarray, List[List[int]]],
936
+ direction: int = 0,
937
+ min_gap: int = 1,
938
+ ) -> List[int]:
939
+ """
940
+ Sort bounding boxes using recursive XY cut method based on the specified direction.
941
+
942
+ Args:
943
+ block_bboxes (Union[np.ndarray, List[List[int]]]): An array or list of bounding boxes,
944
+ where each box is represented as
945
+ [x_min, y_min, x_max, y_max].
946
+ direction (int): Direction for the initial cut. Use 1 for Y-axis first and 0 for X-axis first.
947
+ Defaults to 0.
948
+ min_gap (int): Minimum gap width to consider a separation between segments. Defaults to 1.
949
+
950
+ Returns:
951
+ List[int]: A list of indices representing the order of sorted bounding boxes.
952
+ """
953
+ block_bboxes = np.asarray(block_bboxes).astype(int)
954
+ res = []
955
+ if direction == 1:
956
+ _recursive_yx_cut(
957
+ block_bboxes,
958
+ np.arange(len(block_bboxes)).tolist(),
959
+ res,
960
+ min_gap,
961
+ )
962
+ else:
963
+ _recursive_xy_cut(
964
+ block_bboxes,
965
+ np.arange(len(block_bboxes)).tolist(),
966
+ res,
967
+ min_gap,
968
+ )
969
+ return res
970
+
971
+
972
+ def gather_imgs(original_img, layout_det_objs):
973
+ imgs_in_doc = []
974
+ for det_obj in layout_det_objs:
975
+ if det_obj["label"] in ("image", "chart"):
976
+ x_min, y_min, x_max, y_max = list(map(int, det_obj["coordinate"]))
977
+ img_path = f"imgs/img_in_table_box_{x_min}_{y_min}_{x_max}_{y_max}.jpg"
978
+ img = Image.fromarray(original_img[y_min:y_max, x_min:x_max, ::-1])
979
+ imgs_in_doc.append(
980
+ {
981
+ "path": img_path,
982
+ "img": img,
983
+ "coordinate": (x_min, y_min, x_max, y_max),
984
+ "score": det_obj["score"],
985
+ }
986
+ )
987
+ return imgs_in_doc
988
+
989
+
990
+ def _get_minbox_if_overlap_by_ratio(
991
+ bbox1: Union[List[int], Tuple[int, int, int, int]],
992
+ bbox2: Union[List[int], Tuple[int, int, int, int]],
993
+ ratio: float,
994
+ smaller: bool = True,
995
+ ) -> Optional[Union[List[int], Tuple[int, int, int, int]]]:
996
+ """
997
+ Determine if the overlap area between two bounding boxes exceeds a given ratio
998
+ and return the smaller (or larger) bounding box based on the `smaller` flag.
999
+
1000
+ Args:
1001
+ bbox1 (Union[List[int], Tuple[int, int, int, int]]): Coordinates of the first bounding box [x_min, y_min, x_max, y_max].
1002
+ bbox2 (Union[List[int], Tuple[int, int, int, int]]): Coordinates of the second bounding box [x_min, y_min, x_max, y_max].
1003
+ ratio (float): The overlap ratio threshold.
1004
+ smaller (bool): If True, return the smaller bounding box; otherwise, return the larger one.
1005
+
1006
+ Returns:
1007
+ Optional[Union[List[int], Tuple[int, int, int, int]]]:
1008
+ The selected bounding box or None if the overlap ratio is not exceeded.
1009
+ """
1010
+ # Calculate the areas of both bounding boxes
1011
+ area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
1012
+ area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
1013
+ # Calculate the overlap ratio using a helper function
1014
+ overlap_ratio = _calculate_overlap_area_div_minbox_area_ratio(bbox1, bbox2)
1015
+ # Check if the overlap ratio exceeds the threshold
1016
+ if overlap_ratio > ratio:
1017
+ if (area1 <= area2 and smaller) or (area1 >= area2 and not smaller):
1018
+ return 1
1019
+ else:
1020
+ return 2
1021
+ return None
1022
+
1023
+
1024
+ def _remove_overlap_blocks(
1025
+ blocks: List[Dict[str, List[int]]], threshold: float = 0.65, smaller: bool = True
1026
+ ) -> Tuple[List[Dict[str, List[int]]], List[Dict[str, List[int]]]]:
1027
+ """
1028
+ Remove overlapping blocks based on a specified overlap ratio threshold.
1029
+
1030
+ Args:
1031
+ blocks (List[Dict[str, List[int]]]): List of block dictionaries, each containing a 'block_bbox' key.
1032
+ threshold (float): Ratio threshold to determine significant overlap.
1033
+ smaller (bool): If True, the smaller block in overlap is removed.
1034
+
1035
+ Returns:
1036
+ Tuple[List[Dict[str, List[int]]], List[Dict[str, List[int]]]]:
1037
+ A tuple containing the updated list of blocks and a list of dropped blocks.
1038
+ """
1039
+ dropped_blocks = []
1040
+ dropped_indexes = set()
1041
+
1042
+ # Iterate over each pair of blocks to find overlaps
1043
+ for i, block1 in enumerate(blocks):
1044
+ for j in range(i + 1, len(blocks)):
1045
+ block2 = blocks[j]
1046
+ # Skip blocks that are already marked for removal
1047
+ if i in dropped_indexes or j in dropped_indexes:
1048
+ continue
1049
+ # Check for overlap and determine which block to remove
1050
+ overlap_box_index = _get_minbox_if_overlap_by_ratio(
1051
+ block1["coordinate"],
1052
+ block2["coordinate"],
1053
+ threshold,
1054
+ smaller=smaller,
1055
+ )
1056
+ if overlap_box_index is not None:
1057
+ # Determine which block to remove based on overlap_box_index
1058
+ if overlap_box_index == 1:
1059
+ drop_index = i
1060
+ else:
1061
+ drop_index = j
1062
+ dropped_indexes.add(drop_index)
1063
+
1064
+ # Remove marked blocks from the original list
1065
+ for index in sorted(dropped_indexes, reverse=True):
1066
+ dropped_blocks.append(blocks[index])
1067
+ del blocks[index]
1068
+
1069
+ return blocks, dropped_blocks
1070
+
1071
+
1072
+ def _get_text_median_width(blocks: List[Dict[str, any]]) -> float:
1073
+ """
1074
+ Calculate the median width of blocks labeled as "text".
1075
+
1076
+ Args:
1077
+ blocks (List[Dict[str, any]]): List of block dictionaries, each containing a 'block_bbox' and 'label'.
1078
+
1079
+ Returns:
1080
+ float: The median width of text blocks, or infinity if no text blocks are found.
1081
+ """
1082
+ widths = [
1083
+ block["block_bbox"][2] - block["block_bbox"][0]
1084
+ for block in blocks
1085
+ if block.get("block_label") == "text"
1086
+ ]
1087
+ return np.median(widths) if widths else float("inf")
1088
+
1089
+
1090
+ def _get_layout_property(
1091
+ blocks: List[Dict[str, any]],
1092
+ median_width: float,
1093
+ no_mask_labels: List[str],
1094
+ threshold: float = 0.8,
1095
+ ) -> Tuple[List[Dict[str, any]], bool]:
1096
+ """
1097
+ Determine the layout (single or double column) of text blocks.
1098
+
1099
+ Args:
1100
+ blocks (List[Dict[str, any]]): List of block dictionaries containing 'label' and 'block_bbox'.
1101
+ median_width (float): Median width of text blocks.
1102
+ no_mask_labels (List[str]): Labels of blocks to be considered for layout analysis.
1103
+ threshold (float): Threshold for determining layout overlap.
1104
+
1105
+ Returns:
1106
+ Tuple[List[Dict[str, any]], bool]: Updated list of blocks with layout information and a boolean
1107
+ indicating if the double layout area is greater than the single layout area.
1108
+ """
1109
+ blocks.sort(
1110
+ key=lambda x: (
1111
+ x["block_bbox"][0],
1112
+ (x["block_bbox"][2] - x["block_bbox"][0]),
1113
+ ),
1114
+ )
1115
+ check_single_layout = {}
1116
+ page_min_x, page_max_x = float("inf"), 0
1117
+ double_label_area = 0
1118
+ single_label_area = 0
1119
+
1120
+ for i, block in enumerate(blocks):
1121
+ page_min_x = min(page_min_x, block["block_bbox"][0])
1122
+ page_max_x = max(page_max_x, block["block_bbox"][2])
1123
+ page_width = page_max_x - page_min_x
1124
+
1125
+ for i, block in enumerate(blocks):
1126
+ if block["block_label"] not in no_mask_labels:
1127
+ continue
1128
+
1129
+ x_min_i, _, x_max_i, _ = block["block_bbox"]
1130
+ layout_length = x_max_i - x_min_i
1131
+ cover_count, cover_with_threshold_count = 0, 0
1132
+ match_block_with_threshold_indexes = []
1133
+
1134
+ for j, other_block in enumerate(blocks):
1135
+ if i == j or other_block["block_label"] not in no_mask_labels:
1136
+ continue
1137
+
1138
+ x_min_j, _, x_max_j, _ = other_block["block_bbox"]
1139
+ x_match_min, x_match_max = max(
1140
+ x_min_i,
1141
+ x_min_j,
1142
+ ), min(x_max_i, x_max_j)
1143
+ match_block_iou = (x_match_max - x_match_min) / (x_max_j - x_min_j)
1144
+
1145
+ if match_block_iou > 0:
1146
+ cover_count += 1
1147
+ if match_block_iou > threshold:
1148
+ cover_with_threshold_count += 1
1149
+ match_block_with_threshold_indexes.append(
1150
+ (j, match_block_iou),
1151
+ )
1152
+ x_min_i = x_match_max
1153
+ if x_min_i >= x_max_i:
1154
+ break
1155
+
1156
+ if (
1157
+ layout_length > median_width * 1.3
1158
+ and (cover_with_threshold_count >= 2 or cover_count >= 2)
1159
+ ) or layout_length > 0.6 * page_width:
1160
+ # if layout_length > median_width * 1.3 and (cover_with_threshold_count >= 2):
1161
+ block["layout"] = "double"
1162
+ double_label_area += (block["block_bbox"][2] - block["block_bbox"][0]) * (
1163
+ block["block_bbox"][3] - block["block_bbox"][1]
1164
+ )
1165
+ else:
1166
+ block["layout"] = "single"
1167
+ check_single_layout[i] = match_block_with_threshold_indexes
1168
+
1169
+ # Check single-layout block
1170
+ for i, single_layout in check_single_layout.items():
1171
+ if single_layout:
1172
+ index, match_iou = single_layout[-1]
1173
+ if match_iou > 0.9 and blocks[index]["layout"] == "double":
1174
+ blocks[i]["layout"] = "double"
1175
+ double_label_area += (
1176
+ blocks[i]["block_bbox"][2] - blocks[i]["block_bbox"][0]
1177
+ ) * (blocks[i]["block_bbox"][3] - blocks[i]["block_bbox"][1])
1178
+ else:
1179
+ single_label_area += (
1180
+ blocks[i]["block_bbox"][2] - blocks[i]["block_bbox"][0]
1181
+ ) * (blocks[i]["block_bbox"][3] - blocks[i]["block_bbox"][1])
1182
+
1183
+ return blocks, (double_label_area > single_label_area)
1184
+
1185
+
1186
+ def _get_bbox_direction(input_bbox: List[float], ratio: float = 1.0) -> bool:
1187
+ """
1188
+ Determine if a bounding box is horizontal or vertical.
1189
+
1190
+ Args:
1191
+ input_bbox (List[float]): Bounding box [x_min, y_min, x_max, y_max].
1192
+ ratio (float): Ratio for determining orientation. Default is 1.0.
1193
+
1194
+ Returns:
1195
+ bool: True if the bounding box is considered horizontal, False if vertical.
1196
+ """
1197
+ width = input_bbox[2] - input_bbox[0]
1198
+ height = input_bbox[3] - input_bbox[1]
1199
+ return width * ratio >= height
1200
+
1201
+
1202
+ def _get_projection_iou(
1203
+ input_bbox: List[float], match_bbox: List[float], is_horizontal: bool = True
1204
+ ) -> float:
1205
+ """
1206
+ Calculate the IoU of lines between two bounding boxes.
1207
+
1208
+ Args:
1209
+ input_bbox (List[float]): First bounding box [x_min, y_min, x_max, y_max].
1210
+ match_bbox (List[float]): Second bounding box [x_min, y_min, x_max, y_max].
1211
+ is_horizontal (bool): Whether to compare horizontally or vertically.
1212
+
1213
+ Returns:
1214
+ float: Line IoU. Returns 0 if there is no overlap.
1215
+ """
1216
+ if is_horizontal:
1217
+ x_match_min = max(input_bbox[0], match_bbox[0])
1218
+ x_match_max = min(input_bbox[2], match_bbox[2])
1219
+ overlap = max(0, x_match_max - x_match_min)
1220
+ input_width = min(input_bbox[2] - input_bbox[0], match_bbox[2] - match_bbox[0])
1221
+ else:
1222
+ y_match_min = max(input_bbox[1], match_bbox[1])
1223
+ y_match_max = min(input_bbox[3], match_bbox[3])
1224
+ overlap = max(0, y_match_max - y_match_min)
1225
+ input_width = min(input_bbox[3] - input_bbox[1], match_bbox[3] - match_bbox[1])
1226
+
1227
+ return overlap / input_width if input_width > 0 else 0.0
1228
+
1229
+
1230
+ def _get_sub_category(
1231
+ blocks: List[Dict[str, Any]], title_labels: List[str]
1232
+ ) -> Tuple[List[Dict[str, Any]], List[float]]:
1233
+ """
1234
+ Determine the layout of title and text blocks and collect pre_cuts.
1235
+
1236
+ Args:
1237
+ blocks (List[Dict[str, Any]]): List of block dictionaries.
1238
+ title_labels (List[str]): List of labels considered as titles.
1239
+
1240
+ Returns:
1241
+ List[Dict[str, Any]]: Updated list of blocks with title-text layout information.
1242
+ Dict[float]: Dict of pre_cuts coordinates.
1243
+ """
1244
+
1245
+ sub_title_labels = ["paragraph_title"]
1246
+ vision_labels = ["image", "table", "chart", "figure"]
1247
+ vision_title_labels = ["figure_title", "chart_title", "table_title"]
1248
+ all_labels = title_labels + sub_title_labels + vision_labels + vision_title_labels
1249
+ special_pre_cut_labels = sub_title_labels
1250
+
1251
+ # single doc title is irregular,pre cut not applicable
1252
+ num_doc_title = 0
1253
+ for block in blocks:
1254
+ if block["block_label"] == "doc_title":
1255
+ num_doc_title += 1
1256
+ if num_doc_title == 2:
1257
+ special_pre_cut_labels = title_labels + sub_title_labels
1258
+ break
1259
+ if len(blocks) == 0:
1260
+ return blocks, {}
1261
+
1262
+ min_x = min(block["block_bbox"][0] for block in blocks)
1263
+ min_y = min(block["block_bbox"][1] for block in blocks)
1264
+ max_x = max(block["block_bbox"][2] for block in blocks)
1265
+ max_y = max(block["block_bbox"][3] for block in blocks)
1266
+ region_bbox = (min_x, min_y, max_x, max_y)
1267
+ region_x_center = (region_bbox[0] + region_bbox[2]) / 2
1268
+ region_y_center = (region_bbox[1] + region_bbox[3]) / 2
1269
+ region_width = region_bbox[2] - region_bbox[0]
1270
+ region_height = region_bbox[3] - region_bbox[1]
1271
+
1272
+ pre_cuts = {}
1273
+
1274
+ for i, block1 in enumerate(blocks):
1275
+ block1.setdefault("title_text", [])
1276
+ block1.setdefault("sub_title", [])
1277
+ block1.setdefault("vision_footnote", [])
1278
+ block1.setdefault("sub_label", block1["block_label"])
1279
+
1280
+ if block1["block_label"] not in all_labels:
1281
+ continue
1282
+
1283
+ bbox1 = block1["block_bbox"]
1284
+ x1, y1, x2, y2 = bbox1
1285
+ is_horizontal_1 = _get_bbox_direction(block1["block_bbox"])
1286
+ left_up_title_text_distance = float("inf")
1287
+ left_up_title_text_index = -1
1288
+ left_up_title_text_direction = None
1289
+ right_down_title_text_distance = float("inf")
1290
+ right_down_title_text_index = -1
1291
+ right_down_title_text_direction = None
1292
+
1293
+ # pre-cuts
1294
+ # Condition 1: Length is greater than half of the layout region
1295
+ if is_horizontal_1:
1296
+ block_length = x2 - x1
1297
+ required_length = region_width / 2
1298
+ else:
1299
+ block_length = y2 - y1
1300
+ required_length = region_height / 2
1301
+ if block1["block_label"] in special_pre_cut_labels:
1302
+ length_condition = True
1303
+ else:
1304
+ length_condition = block_length > required_length
1305
+
1306
+ # Condition 2: Centered check (must be within ±20 in both horizontal and vertical directions)
1307
+ block_x_center = (x1 + x2) / 2
1308
+ block_y_center = (y1 + y2) / 2
1309
+ tolerance_len = block_length // 5
1310
+ if block1["block_label"] in special_pre_cut_labels:
1311
+ tolerance_len = block_length // 10
1312
+ if is_horizontal_1:
1313
+ is_centered = abs(block_x_center - region_x_center) <= tolerance_len
1314
+ else:
1315
+ is_centered = abs(block_y_center - region_y_center) <= tolerance_len
1316
+
1317
+ # Condition 3: Check for surrounding text
1318
+ has_left_text = False
1319
+ has_right_text = False
1320
+ has_above_text = False
1321
+ has_below_text = False
1322
+ for block2 in blocks:
1323
+ if block2["block_label"] != "text":
1324
+ continue
1325
+ bbox2 = block2["block_bbox"]
1326
+ x1_2, y1_2, x2_2, y2_2 = bbox2
1327
+ if is_horizontal_1:
1328
+ if x2_2 <= x1 and not (y2_2 <= y1 or y1_2 >= y2):
1329
+ has_left_text = True
1330
+ if x1_2 >= x2 and not (y2_2 <= y1 or y1_2 >= y2):
1331
+ has_right_text = True
1332
+ else:
1333
+ if y2_2 <= y1 and not (x2_2 <= x1 or x1_2 >= x2):
1334
+ has_above_text = True
1335
+ if y1_2 >= y2 and not (x2_2 <= x1 or x1_2 >= x2):
1336
+ has_below_text = True
1337
+
1338
+ if (is_horizontal_1 and has_left_text and has_right_text) or (
1339
+ not is_horizontal_1 and has_above_text and has_below_text
1340
+ ):
1341
+ break
1342
+
1343
+ no_text_on_sides = (
1344
+ not (has_left_text or has_right_text)
1345
+ if is_horizontal_1
1346
+ else not (has_above_text or has_below_text)
1347
+ )
1348
+
1349
+ # Add coordinates if all conditions are met
1350
+ if is_centered and length_condition and no_text_on_sides:
1351
+ if is_horizontal_1:
1352
+ pre_cuts.setdefault("y", []).append(y1)
1353
+ else:
1354
+ pre_cuts.setdefault("x", []).append(x1)
1355
+
1356
+ for j, block2 in enumerate(blocks):
1357
+ if i == j:
1358
+ continue
1359
+
1360
+ bbox2 = block2["block_bbox"]
1361
+ x1_prime, y1_prime, x2_prime, y2_prime = bbox2
1362
+ is_horizontal_2 = _get_bbox_direction(bbox2)
1363
+ match_block_iou = _get_projection_iou(
1364
+ bbox2,
1365
+ bbox1,
1366
+ is_horizontal_1,
1367
+ )
1368
+
1369
+ def distance_(is_horizontal, is_left_up):
1370
+ if is_horizontal:
1371
+ if is_left_up:
1372
+ return (y1 - y2_prime + 2) // 5 + x1_prime / 5000
1373
+ else:
1374
+ return (y1_prime - y2 + 2) // 5 + x1_prime / 5000
1375
+
1376
+ else:
1377
+ if is_left_up:
1378
+ return (x1 - x2_prime + 2) // 5 + y1_prime / 5000
1379
+ else:
1380
+ return (x1_prime - x2 + 2) // 5 + y1_prime / 5000
1381
+
1382
+ block_iou_threshold = 0.1
1383
+ if block1["block_label"] in sub_title_labels:
1384
+ block_iou_threshold = 0.5
1385
+
1386
+ if is_horizontal_1:
1387
+ if match_block_iou >= block_iou_threshold:
1388
+ left_up_distance = distance_(True, True)
1389
+ right_down_distance = distance_(True, False)
1390
+ if (
1391
+ y2_prime <= y1
1392
+ and left_up_distance <= left_up_title_text_distance
1393
+ ):
1394
+ left_up_title_text_distance = left_up_distance
1395
+ left_up_title_text_index = j
1396
+ left_up_title_text_direction = is_horizontal_2
1397
+ elif (
1398
+ y1_prime > y2
1399
+ and right_down_distance < right_down_title_text_distance
1400
+ ):
1401
+ right_down_title_text_distance = right_down_distance
1402
+ right_down_title_text_index = j
1403
+ right_down_title_text_direction = is_horizontal_2
1404
+ else:
1405
+ if match_block_iou >= block_iou_threshold:
1406
+ left_up_distance = distance_(False, True)
1407
+ right_down_distance = distance_(False, False)
1408
+ if (
1409
+ x2_prime <= x1
1410
+ and left_up_distance <= left_up_title_text_distance
1411
+ ):
1412
+ left_up_title_text_distance = left_up_distance
1413
+ left_up_title_text_index = j
1414
+ left_up_title_text_direction = is_horizontal_2
1415
+ elif (
1416
+ x1_prime > x2
1417
+ and right_down_distance < right_down_title_text_distance
1418
+ ):
1419
+ right_down_title_text_distance = right_down_distance
1420
+ right_down_title_text_index = j
1421
+ right_down_title_text_direction = is_horizontal_2
1422
+
1423
+ height = bbox1[3] - bbox1[1]
1424
+ width = bbox1[2] - bbox1[0]
1425
+ title_text_weight = [0.8, 0.8]
1426
+
1427
+ title_text, sub_title, vision_footnote = [], [], []
1428
+
1429
+ def get_sub_category_(
1430
+ title_text_direction,
1431
+ title_text_index,
1432
+ label,
1433
+ is_left_up=True,
1434
+ ):
1435
+ direction_ = [1, 3] if is_left_up else [2, 4]
1436
+ if (
1437
+ title_text_direction == is_horizontal_1
1438
+ and title_text_index != -1
1439
+ and (label == "text" or label == "paragraph_title")
1440
+ ):
1441
+ bbox2 = blocks[title_text_index]["block_bbox"]
1442
+ if is_horizontal_1:
1443
+ height1 = bbox2[3] - bbox2[1]
1444
+ width1 = bbox2[2] - bbox2[0]
1445
+ if label == "text":
1446
+ if (
1447
+ _nearest_edge_distance(bbox1, bbox2)[0] <= 15
1448
+ and block1["block_label"] in vision_labels
1449
+ and width1 < width
1450
+ and height1 < 0.5 * height
1451
+ ):
1452
+ blocks[title_text_index]["sub_label"] = "vision_footnote"
1453
+ vision_footnote.append(bbox2)
1454
+ elif (
1455
+ height1 < height * title_text_weight[0]
1456
+ and (width1 < width or width1 > 1.5 * width)
1457
+ and block1["block_label"] in title_labels
1458
+ ):
1459
+ blocks[title_text_index]["sub_label"] = "title_text"
1460
+ title_text.append((direction_[0], bbox2))
1461
+ elif (
1462
+ label == "paragraph_title"
1463
+ and block1["block_label"] in sub_title_labels
1464
+ ):
1465
+ sub_title.append(bbox2)
1466
+ else:
1467
+ height1 = bbox2[3] - bbox2[1]
1468
+ width1 = bbox2[2] - bbox2[0]
1469
+ if label == "text":
1470
+ if (
1471
+ _nearest_edge_distance(bbox1, bbox2)[0] <= 15
1472
+ and block1["block_label"] in vision_labels
1473
+ and height1 < height
1474
+ and width1 < 0.5 * width
1475
+ ):
1476
+ blocks[title_text_index]["sub_label"] = "vision_footnote"
1477
+ vision_footnote.append(bbox2)
1478
+ elif (
1479
+ width1 < width * title_text_weight[1]
1480
+ and block1["block_label"] in title_labels
1481
+ ):
1482
+ blocks[title_text_index]["sub_label"] = "title_text"
1483
+ title_text.append((direction_[1], bbox2))
1484
+ elif (
1485
+ label == "paragraph_title"
1486
+ and block1["block_label"] in sub_title_labels
1487
+ ):
1488
+ sub_title.append(bbox2)
1489
+
1490
+ if (
1491
+ is_horizontal_1
1492
+ and abs(left_up_title_text_distance - right_down_title_text_distance) * 5
1493
+ > height
1494
+ ) or (
1495
+ not is_horizontal_1
1496
+ and abs(left_up_title_text_distance - right_down_title_text_distance) * 5
1497
+ > width
1498
+ ):
1499
+ if left_up_title_text_distance < right_down_title_text_distance:
1500
+ get_sub_category_(
1501
+ left_up_title_text_direction,
1502
+ left_up_title_text_index,
1503
+ blocks[left_up_title_text_index]["block_label"],
1504
+ True,
1505
+ )
1506
+ else:
1507
+ get_sub_category_(
1508
+ right_down_title_text_direction,
1509
+ right_down_title_text_index,
1510
+ blocks[right_down_title_text_index]["block_label"],
1511
+ False,
1512
+ )
1513
+ else:
1514
+ get_sub_category_(
1515
+ left_up_title_text_direction,
1516
+ left_up_title_text_index,
1517
+ blocks[left_up_title_text_index]["block_label"],
1518
+ True,
1519
+ )
1520
+ get_sub_category_(
1521
+ right_down_title_text_direction,
1522
+ right_down_title_text_index,
1523
+ blocks[right_down_title_text_index]["block_label"],
1524
+ False,
1525
+ )
1526
+
1527
+ if block1["block_label"] in title_labels:
1528
+ if blocks[i].get("title_text") == []:
1529
+ blocks[i]["title_text"] = title_text
1530
+
1531
+ if block1["block_label"] in sub_title_labels:
1532
+ if blocks[i].get("sub_title") == []:
1533
+ blocks[i]["sub_title"] = sub_title
1534
+
1535
+ if block1["block_label"] in vision_labels:
1536
+ if blocks[i].get("vision_footnote") == []:
1537
+ blocks[i]["vision_footnote"] = vision_footnote
1538
+
1539
+ return blocks, pre_cuts
1540
+
1541
+
1542
+ def get_layout_ordering(
1543
+ parsing_res_list: List[Dict[str, Any]],
1544
+ no_mask_labels: List[str] = [],
1545
+ ) -> None:
1546
+ """
1547
+ Process layout parsing results to remove overlapping bounding boxes
1548
+ and assign an ordering index based on their positions.
1549
+
1550
+ Modifies:
1551
+ The 'parsing_res_list' list by adding an 'index' to each block.
1552
+
1553
+ Args:
1554
+ parsing_res_list (List[Dict[str, Any]]): List of block dictionaries with 'block_bbox' and 'block_label'.
1555
+ no_mask_labels (List[str]): Labels for which overlapping removal is not performed.
1556
+ """
1557
+ title_text_labels = ["doc_title"]
1558
+ title_labels = ["doc_title", "paragraph_title"]
1559
+ vision_labels = ["image", "table", "seal", "chart", "figure"]
1560
+ vision_title_labels = ["table_title", "chart_title", "figure_title"]
1561
+
1562
+ parsing_res_list, pre_cuts = _get_sub_category(parsing_res_list, title_text_labels)
1563
+
1564
+ parsing_res_by_pre_cuts_list = []
1565
+ if len(pre_cuts) > 0:
1566
+ block_bboxes = [block["block_bbox"] for block in parsing_res_list]
1567
+ for axis, cuts in pre_cuts.items():
1568
+ axis_index = 1 if axis == "y" else 0
1569
+
1570
+ max_val = max(bbox[axis_index + 2] for bbox in block_bboxes)
1571
+
1572
+ intervals = []
1573
+ prev = 0
1574
+ for cut in sorted(cuts):
1575
+ intervals.append((prev, cut))
1576
+ prev = cut
1577
+ intervals.append((prev, max_val))
1578
+
1579
+ for start, end in intervals:
1580
+ mask = [
1581
+ (bbox[axis_index] >= start) and (bbox[axis_index] < end)
1582
+ for bbox in block_bboxes
1583
+ ]
1584
+ parsing_res_by_pre_cuts_list.append(
1585
+ [parsing_res_list[i] for i, m in enumerate(mask) if m]
1586
+ )
1587
+ else:
1588
+ parsing_res_by_pre_cuts_list = [parsing_res_list]
1589
+
1590
+ final_parsing_res_list = []
1591
+ num_index = 0
1592
+ num_sub_index = 0
1593
+ for parsing_res_by_pre_cuts in parsing_res_by_pre_cuts_list:
1594
+
1595
+ doc_flag = False
1596
+ median_width = _get_text_median_width(parsing_res_by_pre_cuts)
1597
+ parsing_res_by_pre_cuts, projection_direction = _get_layout_property(
1598
+ parsing_res_by_pre_cuts,
1599
+ median_width,
1600
+ no_mask_labels=no_mask_labels,
1601
+ threshold=0.3,
1602
+ )
1603
+ # Convert bounding boxes to float and remove overlaps
1604
+ (
1605
+ double_text_blocks,
1606
+ title_text_blocks,
1607
+ title_blocks,
1608
+ vision_blocks,
1609
+ vision_title_blocks,
1610
+ vision_footnote_blocks,
1611
+ other_blocks,
1612
+ ) = ([], [], [], [], [], [], [])
1613
+
1614
+ drop_indexes = []
1615
+
1616
+ for index, block in enumerate(parsing_res_by_pre_cuts):
1617
+ label = block["sub_label"]
1618
+ block["block_bbox"] = list(map(int, block["block_bbox"]))
1619
+
1620
+ if label == "doc_title":
1621
+ doc_flag = True
1622
+
1623
+ if label in no_mask_labels:
1624
+ if block["layout"] == "double":
1625
+ double_text_blocks.append(block)
1626
+ drop_indexes.append(index)
1627
+ elif label == "title_text":
1628
+ title_text_blocks.append(block)
1629
+ drop_indexes.append(index)
1630
+ elif label == "vision_footnote":
1631
+ vision_footnote_blocks.append(block)
1632
+ drop_indexes.append(index)
1633
+ elif label in vision_title_labels:
1634
+ vision_title_blocks.append(block)
1635
+ drop_indexes.append(index)
1636
+ elif label in title_labels:
1637
+ title_blocks.append(block)
1638
+ drop_indexes.append(index)
1639
+ elif label in vision_labels:
1640
+ vision_blocks.append(block)
1641
+ drop_indexes.append(index)
1642
+ else:
1643
+ other_blocks.append(block)
1644
+ drop_indexes.append(index)
1645
+
1646
+ for index in sorted(drop_indexes, reverse=True):
1647
+ del parsing_res_by_pre_cuts[index]
1648
+
1649
+ if len(parsing_res_by_pre_cuts) > 0:
1650
+ # single text label
1651
+ if (
1652
+ len(double_text_blocks) > len(parsing_res_by_pre_cuts)
1653
+ or projection_direction
1654
+ ):
1655
+ parsing_res_by_pre_cuts.extend(title_blocks + double_text_blocks)
1656
+ title_blocks = []
1657
+ double_text_blocks = []
1658
+ block_bboxes = [
1659
+ block["block_bbox"] for block in parsing_res_by_pre_cuts
1660
+ ]
1661
+ block_bboxes.sort(
1662
+ key=lambda x: (
1663
+ x[0] // max(20, median_width),
1664
+ x[1],
1665
+ ),
1666
+ )
1667
+ block_bboxes = np.array(block_bboxes)
1668
+ sorted_indices = sort_by_xycut(block_bboxes, direction=1, min_gap=1)
1669
+ else:
1670
+ block_bboxes = [
1671
+ block["block_bbox"] for block in parsing_res_by_pre_cuts
1672
+ ]
1673
+ block_bboxes.sort(key=lambda x: (x[0] // 20, x[1]))
1674
+ block_bboxes = np.array(block_bboxes)
1675
+ sorted_indices = sort_by_xycut(block_bboxes, direction=0, min_gap=20)
1676
+
1677
+ sorted_boxes = block_bboxes[sorted_indices].tolist()
1678
+
1679
+ for block in parsing_res_by_pre_cuts:
1680
+ block["index"] = num_index + sorted_boxes.index(block["block_bbox"]) + 1
1681
+ block["sub_index"] = (
1682
+ num_sub_index + sorted_boxes.index(block["block_bbox"]) + 1
1683
+ )
1684
+
1685
+ def nearest_match_(input_blocks, distance_type="manhattan", is_add_index=True):
1686
+ for block in input_blocks:
1687
+ bbox = block["block_bbox"]
1688
+ min_distance = float("inf")
1689
+ min_distance_config = [
1690
+ [float("inf"), float("inf")],
1691
+ float("inf"),
1692
+ float("inf"),
1693
+ ] # for double text
1694
+ nearest_gt_index = 0
1695
+ for match_block in parsing_res_by_pre_cuts:
1696
+ match_bbox = match_block["block_bbox"]
1697
+ if distance_type == "nearest_iou_edge_distance":
1698
+ distance, min_distance_config = _nearest_iou_edge_distance(
1699
+ bbox,
1700
+ match_bbox,
1701
+ block["sub_label"],
1702
+ vision_labels=vision_labels,
1703
+ no_mask_labels=no_mask_labels,
1704
+ median_width=median_width,
1705
+ title_labels=title_labels,
1706
+ title_text=block["title_text"],
1707
+ sub_title=block["sub_title"],
1708
+ min_distance_config=min_distance_config,
1709
+ tolerance_len=10,
1710
+ )
1711
+ elif distance_type == "title_text":
1712
+ if (
1713
+ match_block["block_label"] in title_labels + ["abstract"]
1714
+ and match_block["title_text"] != []
1715
+ ):
1716
+ iou_left_up = _calculate_overlap_area_div_minbox_area_ratio(
1717
+ bbox,
1718
+ match_block["title_text"][0][1],
1719
+ )
1720
+ iou_right_down = (
1721
+ _calculate_overlap_area_div_minbox_area_ratio(
1722
+ bbox,
1723
+ match_block["title_text"][-1][1],
1724
+ )
1725
+ )
1726
+ iou = 1 - max(iou_left_up, iou_right_down)
1727
+ distance = _manhattan_distance(bbox, match_bbox) * iou
1728
+ else:
1729
+ distance = float("inf")
1730
+ elif distance_type == "manhattan":
1731
+ distance = _manhattan_distance(bbox, match_bbox)
1732
+ elif distance_type == "vision_footnote":
1733
+ if (
1734
+ match_block["block_label"] in vision_labels
1735
+ and match_block["vision_footnote"] != []
1736
+ ):
1737
+ iou_left_up = _calculate_overlap_area_div_minbox_area_ratio(
1738
+ bbox,
1739
+ match_block["vision_footnote"][0],
1740
+ )
1741
+ iou_right_down = (
1742
+ _calculate_overlap_area_div_minbox_area_ratio(
1743
+ bbox,
1744
+ match_block["vision_footnote"][-1],
1745
+ )
1746
+ )
1747
+ iou = 1 - max(iou_left_up, iou_right_down)
1748
+ distance = _manhattan_distance(bbox, match_bbox) * iou
1749
+ else:
1750
+ distance = float("inf")
1751
+ elif distance_type == "vision_body":
1752
+ if (
1753
+ match_block["block_label"] in vision_title_labels
1754
+ and block["vision_footnote"] != []
1755
+ ):
1756
+ iou_left_up = _calculate_overlap_area_div_minbox_area_ratio(
1757
+ match_bbox,
1758
+ block["vision_footnote"][0],
1759
+ )
1760
+ iou_right_down = (
1761
+ _calculate_overlap_area_div_minbox_area_ratio(
1762
+ match_bbox,
1763
+ block["vision_footnote"][-1],
1764
+ )
1765
+ )
1766
+ iou = 1 - max(iou_left_up, iou_right_down)
1767
+ distance = _manhattan_distance(bbox, match_bbox) * iou
1768
+ else:
1769
+ distance = float("inf")
1770
+ # when reference block cross mulitple columns, its order should be after the blocks above it.
1771
+ elif distance_type == "append":
1772
+ if match_bbox[3] <= bbox[1]:
1773
+ distance = -(match_bbox[2] * 10 + match_bbox[3])
1774
+ else:
1775
+ distance = float("inf")
1776
+ else:
1777
+ raise NotImplementedError
1778
+
1779
+ if distance < min_distance:
1780
+ min_distance = distance
1781
+ if is_add_index:
1782
+ nearest_gt_index = match_block.get("index", 999)
1783
+ else:
1784
+ nearest_gt_index = match_block.get("sub_index", 999)
1785
+
1786
+ if is_add_index:
1787
+ block["index"] = nearest_gt_index
1788
+ else:
1789
+ block["sub_index"] = nearest_gt_index
1790
+
1791
+ parsing_res_by_pre_cuts.append(block)
1792
+
1793
+ # double text label
1794
+ double_text_blocks.sort(
1795
+ key=lambda x: (
1796
+ x["block_bbox"][1] // 10,
1797
+ x["block_bbox"][0] // median_width,
1798
+ x["block_bbox"][1] ** 2 + x["block_bbox"][0] ** 2,
1799
+ ),
1800
+ )
1801
+ # filter the reference blocks from all blocks that cross mulitple columns.
1802
+ # they should be ordered using "append".
1803
+ double_text_reference_blocks = []
1804
+ i = 0
1805
+ while i < len(double_text_blocks):
1806
+ if double_text_blocks[i]["block_label"] == "reference":
1807
+ double_text_reference_blocks.append(double_text_blocks.pop(i))
1808
+ else:
1809
+ i += 1
1810
+ nearest_match_(
1811
+ double_text_blocks,
1812
+ distance_type="nearest_iou_edge_distance",
1813
+ )
1814
+ nearest_match_(
1815
+ double_text_reference_blocks,
1816
+ distance_type="append",
1817
+ )
1818
+ parsing_res_by_pre_cuts.sort(
1819
+ key=lambda x: (x["index"], x["block_bbox"][1], x["block_bbox"][0]),
1820
+ )
1821
+
1822
+ for idx, block in enumerate(parsing_res_by_pre_cuts):
1823
+ block["index"] = num_index + idx + 1
1824
+ block["sub_index"] = num_sub_index + idx + 1
1825
+
1826
+ # title label
1827
+ title_blocks.sort(
1828
+ key=lambda x: (
1829
+ x["block_bbox"][1] // 10,
1830
+ x["block_bbox"][0] // median_width,
1831
+ x["block_bbox"][1] ** 2 + x["block_bbox"][0] ** 2,
1832
+ ),
1833
+ )
1834
+ nearest_match_(title_blocks, distance_type="nearest_iou_edge_distance")
1835
+
1836
+ if doc_flag:
1837
+ text_sort_labels = ["doc_title"]
1838
+ text_label_priority = {
1839
+ label: priority for priority, label in enumerate(text_sort_labels)
1840
+ }
1841
+ doc_titles = []
1842
+ for i, block in enumerate(parsing_res_by_pre_cuts):
1843
+ if block["block_label"] == "doc_title":
1844
+ doc_titles.append(
1845
+ (i, block["block_bbox"][1], block["block_bbox"][0]),
1846
+ )
1847
+ doc_titles.sort(key=lambda x: (x[1], x[2]))
1848
+ first_doc_title_index = doc_titles[0][0]
1849
+ parsing_res_by_pre_cuts[first_doc_title_index]["index"] = 1
1850
+ parsing_res_by_pre_cuts.sort(
1851
+ key=lambda x: (
1852
+ x["index"],
1853
+ text_label_priority.get(x["block_label"], 9999),
1854
+ x["block_bbox"][1],
1855
+ x["block_bbox"][0],
1856
+ ),
1857
+ )
1858
+ else:
1859
+ parsing_res_by_pre_cuts.sort(
1860
+ key=lambda x: (
1861
+ x["index"],
1862
+ x["block_bbox"][1],
1863
+ x["block_bbox"][0],
1864
+ ),
1865
+ )
1866
+
1867
+ for idx, block in enumerate(parsing_res_by_pre_cuts):
1868
+ block["index"] = num_index + idx + 1
1869
+ block["sub_index"] = num_sub_index + idx + 1
1870
+
1871
+ # title-text label
1872
+ nearest_match_(title_text_blocks, distance_type="title_text")
1873
+
1874
+ def hor_tb_and_ver_lr(x):
1875
+ input_bbox = x["block_bbox"]
1876
+ is_horizontal = _get_bbox_direction(input_bbox)
1877
+ if is_horizontal:
1878
+ return input_bbox[1]
1879
+ else:
1880
+ return input_bbox[0]
1881
+
1882
+ parsing_res_by_pre_cuts.sort(
1883
+ key=lambda x: (x["index"], hor_tb_and_ver_lr(x)),
1884
+ )
1885
+
1886
+ for idx, block in enumerate(parsing_res_by_pre_cuts):
1887
+ block["index"] = num_index + idx + 1
1888
+ block["sub_index"] = num_sub_index + idx + 1
1889
+
1890
+ # image,figure,chart,seal label
1891
+ nearest_match_(
1892
+ vision_blocks,
1893
+ distance_type="nearest_iou_edge_distance",
1894
+ is_add_index=False,
1895
+ )
1896
+ parsing_res_by_pre_cuts.sort(
1897
+ key=lambda x: (
1898
+ x["sub_index"],
1899
+ x["block_bbox"][1],
1900
+ x["block_bbox"][0],
1901
+ ),
1902
+ )
1903
+
1904
+ for idx, block in enumerate(parsing_res_by_pre_cuts):
1905
+ block["sub_index"] = num_sub_index + idx + 1
1906
+
1907
+ # image,figure,chart,seal title label
1908
+ nearest_match_(
1909
+ vision_title_blocks,
1910
+ distance_type="nearest_iou_edge_distance",
1911
+ is_add_index=False,
1912
+ )
1913
+ parsing_res_by_pre_cuts.sort(
1914
+ key=lambda x: (
1915
+ x["sub_index"],
1916
+ x["block_bbox"][1],
1917
+ x["block_bbox"][0],
1918
+ ),
1919
+ )
1920
+
1921
+ for idx, block in enumerate(parsing_res_by_pre_cuts):
1922
+ block["sub_index"] = num_sub_index + idx + 1
1923
+
1924
+ # vision footnote label
1925
+ nearest_match_(
1926
+ vision_footnote_blocks,
1927
+ distance_type="vision_footnote",
1928
+ is_add_index=False,
1929
+ )
1930
+ text_label_priority = {"vision_footnote": 9999}
1931
+ parsing_res_by_pre_cuts.sort(
1932
+ key=lambda x: (
1933
+ x["sub_index"],
1934
+ text_label_priority.get(x["sub_label"], 0),
1935
+ x["block_bbox"][1],
1936
+ x["block_bbox"][0],
1937
+ ),
1938
+ )
1939
+
1940
+ for idx, block in enumerate(parsing_res_by_pre_cuts):
1941
+ block["sub_index"] = num_sub_index + idx + 1
1942
+
1943
+ # header、footnote、header_image... label
1944
+ nearest_match_(other_blocks, distance_type="manhattan", is_add_index=False)
1945
+
1946
+ # add all parsing result
1947
+ final_parsing_res_list.extend(parsing_res_by_pre_cuts)
1948
+
1949
+ # update num index
1950
+ num_sub_index += len(parsing_res_by_pre_cuts)
1951
+ for parsing_res in parsing_res_by_pre_cuts:
1952
+ if parsing_res.get("index"):
1953
+ num_index += 1
1954
+
1955
+ parsing_res_list = [
1956
+ {
1957
+ "block_label": parsing_res["block_label"],
1958
+ "block_content": parsing_res["block_content"],
1959
+ "block_bbox": parsing_res["block_bbox"],
1960
+ "block_image": parsing_res.get("block_image", None),
1961
+ "sub_label": parsing_res["sub_label"],
1962
+ "sub_index": parsing_res["sub_index"],
1963
+ "index": parsing_res.get("index", None),
1964
+ "seg_start_coordinate": parsing_res.get(
1965
+ "seg_start_coordinate", float("inf")
1966
+ ),
1967
+ "seg_end_coordinate": parsing_res.get("seg_end_coordinate", float("-inf")),
1968
+ "num_of_lines": parsing_res.get("num_of_lines", 1),
1969
+ }
1970
+ for parsing_res in final_parsing_res_list
1971
+ ]
1972
+
1973
+ return parsing_res_list
1974
+
1975
+
1976
+ def _manhattan_distance(
1977
+ point1: Tuple[float, float],
1978
+ point2: Tuple[float, float],
1979
+ weight_x: float = 1.0,
1980
+ weight_y: float = 1.0,
1981
+ ) -> float:
1982
+ """
1983
+ Calculate the weighted Manhattan distance between two points.
1984
+
1985
+ Args:
1986
+ point1 (Tuple[float, float]): The first point as (x, y).
1987
+ point2 (Tuple[float, float]): The second point as (x, y).
1988
+ weight_x (float): The weight for the x-axis distance. Default is 1.0.
1989
+ weight_y (float): The weight for the y-axis distance. Default is 1.0.
1990
+
1991
+ Returns:
1992
+ float: The weighted Manhattan distance between the two points.
1993
+ """
1994
+ return weight_x * abs(point1[0] - point2[0]) + weight_y * abs(point1[1] - point2[1])
1995
+
1996
+
1997
+ def _calculate_horizontal_distance(
1998
+ input_bbox: List[int],
1999
+ match_bbox: List[int],
2000
+ height: int,
2001
+ disperse: int,
2002
+ title_text: List[Tuple[int, List[int]]],
2003
+ ) -> float:
2004
+ """
2005
+ Calculate the horizontal distance between two bounding boxes, considering title text adjustments.
2006
+
2007
+ Args:
2008
+ input_bbox (List[int]): The bounding box coordinates [x1, y1, x2, y2] of the input object.
2009
+ match_bbox (List[int]): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
2010
+ height (int): The height of the input bounding box used for normalization.
2011
+ disperse (int): The dispersion factor used to normalize the horizontal distance.
2012
+ title_text (List[Tuple[int, List[int]]]): A list of tuples containing title text information and their bounding box coordinates.
2013
+ Format: [(position_indicator, [x1, y1, x2, y2]), ...].
2014
+
2015
+ Returns:
2016
+ float: The calculated horizontal distance taking into account the title text adjustments.
2017
+ """
2018
+ x1, y1, x2, y2 = input_bbox
2019
+ x1_prime, y1_prime, x2_prime, y2_prime = match_bbox
2020
+
2021
+ # Determine vertical distance adjustment based on title text
2022
+ if y2 < y1_prime:
2023
+ if title_text and title_text[-1][0] == 2:
2024
+ y2 += title_text[-1][1][3] - title_text[-1][1][1]
2025
+ vertical_adjustment = (y1_prime - y2) * 0.5
2026
+ else:
2027
+ if title_text and title_text[0][0] == 1:
2028
+ y1 -= title_text[0][1][3] - title_text[0][1][1]
2029
+ vertical_adjustment = y1 - y2_prime
2030
+
2031
+ # Calculate horizontal distance with adjustments
2032
+ horizontal_distance = (
2033
+ abs(x2_prime - x1) // disperse
2034
+ + vertical_adjustment // height
2035
+ + vertical_adjustment / 5000
2036
+ )
2037
+
2038
+ return horizontal_distance
2039
+
2040
+
2041
+ def _calculate_vertical_distance(
2042
+ input_bbox: List[int],
2043
+ match_bbox: List[int],
2044
+ width: int,
2045
+ disperse: int,
2046
+ title_text: List[Tuple[int, List[int]]],
2047
+ ) -> float:
2048
+ """
2049
+ Calculate the vertical distance between two bounding boxes, considering title text adjustments.
2050
+
2051
+ Args:
2052
+ input_bbox (List[int]): The bounding box coordinates [x1, y1, x2, y2] of the input object.
2053
+ match_bbox (List[int]): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
2054
+ width (int): The width of the input bounding box used for normalization.
2055
+ disperse (int): The dispersion factor used to normalize the vertical distance.
2056
+ title_text (List[Tuple[int, List[int]]]): A list of tuples containing title text information and their bounding box coordinates.
2057
+ Format: [(position_indicator, [x1, y1, x2, y2]), ...].
2058
+
2059
+ Returns:
2060
+ float: The calculated vertical distance taking into account the title text adjustments.
2061
+ """
2062
+ x1, y1, x2, y2 = input_bbox
2063
+ x1_prime, y1_prime, x2_prime, y2_prime = match_bbox
2064
+
2065
+ # Determine horizontal distance adjustment based on title text
2066
+ if x1 > x2_prime:
2067
+ if title_text and title_text[0][0] == 3:
2068
+ x1 -= title_text[0][1][2] - title_text[0][1][0]
2069
+ horizontal_adjustment = (x1 - x2_prime) * 0.5
2070
+ else:
2071
+ if title_text and title_text[-1][0] == 4:
2072
+ x2 += title_text[-1][1][2] - title_text[-1][1][0]
2073
+ horizontal_adjustment = x1_prime - x2
2074
+
2075
+ # Calculate vertical distance with adjustments
2076
+ vertical_distance = (
2077
+ abs(y2_prime - y1) // disperse
2078
+ + horizontal_adjustment // width
2079
+ + horizontal_adjustment / 5000
2080
+ )
2081
+
2082
+ return vertical_distance
2083
+
2084
+
2085
+ def _nearest_edge_distance(
2086
+ input_bbox: List[int],
2087
+ match_bbox: List[int],
2088
+ weight: List[float] = [1.0, 1.0, 1.0, 1.0],
2089
+ label: str = "text",
2090
+ no_mask_labels: List[str] = [],
2091
+ min_edge_distance_config: List[float] = [],
2092
+ tolerance_len: float = 10.0,
2093
+ ) -> Tuple[float, List[float]]:
2094
+ """
2095
+ Calculate the nearest edge distance between two bounding boxes, considering directional weights.
2096
+
2097
+ Args:
2098
+ input_bbox (list): The bounding box coordinates [x1, y1, x2, y2] of the input object.
2099
+ match_bbox (list): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
2100
+ weight (list, optional): Directional weights for the edge distances [left, right, up, down]. Defaults to [1, 1, 1, 1].
2101
+ label (str, optional): The label/type of the object in the bounding box (e.g., 'text'). Defaults to 'text'.
2102
+ no_mask_labels (list, optional): Labels for which no masking is applied when calculating edge distances. Defaults to an empty list.
2103
+ min_edge_distance_config (list, optional): Configuration for minimum edge distances [min_edge_distance_x, min_edge_distance_y].
2104
+ Defaults to [float('inf'), float('inf')].
2105
+ tolerance_len (float, optional): The tolerance length for adjusting edge distances. Defaults to 10.
2106
+
2107
+ Returns:
2108
+ Tuple[float, List[float]]: A tuple containing:
2109
+ - The calculated minimum edge distance between the bounding boxes.
2110
+ - A list with the minimum edge distances in the x and y directions.
2111
+ """
2112
+ match_bbox_iou = _calculate_overlap_area_div_minbox_area_ratio(
2113
+ input_bbox,
2114
+ match_bbox,
2115
+ )
2116
+ if match_bbox_iou > 0 and label not in no_mask_labels:
2117
+ return 0, [0, 0]
2118
+
2119
+ if not min_edge_distance_config:
2120
+ min_edge_distance_config = [float("inf"), float("inf")]
2121
+ min_edge_distance_x, min_edge_distance_y = min_edge_distance_config
2122
+
2123
+ x1, y1, x2, y2 = input_bbox
2124
+ x1_prime, y1_prime, x2_prime, y2_prime = match_bbox
2125
+
2126
+ direction_num = 0
2127
+ distance_x = float("inf")
2128
+ distance_y = float("inf")
2129
+ distance = [float("inf")] * 4
2130
+
2131
+ # input_bbox is to the left of match_bbox
2132
+ if x2 < x1_prime:
2133
+ direction_num += 1
2134
+ distance[0] = x1_prime - x2
2135
+ if abs(distance[0] - min_edge_distance_x) <= tolerance_len:
2136
+ distance_x = min_edge_distance_x * weight[0]
2137
+ else:
2138
+ distance_x = distance[0] * weight[0]
2139
+ # input_bbox is to the right of match_bbox
2140
+ elif x1 > x2_prime:
2141
+ direction_num += 1
2142
+ distance[1] = x1 - x2_prime
2143
+ if abs(distance[1] - min_edge_distance_x) <= tolerance_len:
2144
+ distance_x = min_edge_distance_x * weight[1]
2145
+ else:
2146
+ distance_x = distance[1] * weight[1]
2147
+ elif match_bbox_iou > 0:
2148
+ distance[0] = 0
2149
+ distance_x = 0
2150
+
2151
+ # input_bbox is above match_bbox
2152
+ if y2 < y1_prime:
2153
+ direction_num += 1
2154
+ distance[2] = y1_prime - y2
2155
+ if abs(distance[2] - min_edge_distance_y) <= tolerance_len:
2156
+ distance_y = min_edge_distance_y * weight[2]
2157
+ else:
2158
+ distance_y = distance[2] * weight[2]
2159
+ if label in no_mask_labels:
2160
+ distance_y = max(0.1, distance_y) * 10 # for abstract
2161
+ # input_bbox is below match_bbox
2162
+ elif y1 > y2_prime:
2163
+ direction_num += 1
2164
+ distance[3] = y1 - y2_prime
2165
+ if abs(distance[3] - min_edge_distance_y) <= tolerance_len:
2166
+ distance_y = min_edge_distance_y * weight[3]
2167
+ else:
2168
+ distance_y = distance[3] * weight[3]
2169
+ elif match_bbox_iou > 0:
2170
+ distance[2] = 0
2171
+ distance_y = 0
2172
+
2173
+ if direction_num == 2:
2174
+ return (distance_x + distance_y), [
2175
+ min(distance[0], distance[1]),
2176
+ min(distance[2], distance[3]),
2177
+ ]
2178
+ else:
2179
+ return min(distance_x, distance_y), [
2180
+ min(distance[0], distance[1]),
2181
+ min(distance[2], distance[3]),
2182
+ ]
2183
+
2184
+
2185
+ def _get_weights(label, horizontal):
2186
+ """Define weights based on the label and orientation."""
2187
+ if label == "doc_title":
2188
+ return (
2189
+ [1, 0.1, 0.1, 1] if horizontal else [0.2, 0.1, 1, 1]
2190
+ ) # left-down , right-left
2191
+ elif label in [
2192
+ "paragraph_title",
2193
+ "table_title",
2194
+ "abstract",
2195
+ "image",
2196
+ "seal",
2197
+ "chart",
2198
+ "figure",
2199
+ ]:
2200
+ return [1, 1, 0.1, 1] # down
2201
+ else:
2202
+ return [1, 1, 1, 0.1] # up
2203
+
2204
+
2205
+ def _nearest_iou_edge_distance(
2206
+ input_bbox: List[int],
2207
+ match_bbox: List[int],
2208
+ label: str,
2209
+ vision_labels: List[str],
2210
+ no_mask_labels: List[str],
2211
+ median_width: int = -1,
2212
+ title_labels: List[str] = [],
2213
+ title_text: List[Tuple[int, List[int]]] = [],
2214
+ sub_title: List[List[int]] = [],
2215
+ min_distance_config: List[float] = [],
2216
+ tolerance_len: float = 10.0,
2217
+ ) -> Tuple[float, List[float]]:
2218
+ """
2219
+ Calculate the nearest IOU edge distance between two bounding boxes, considering label types, title adjustments, and minimum distance configurations.
2220
+ This function computes the edge distance between two bounding boxes while considering their overlap (IOU) and various adjustments based on label types,
2221
+ title text, and subtitle information. It also applies minimum distance configurations and tolerance adjustments.
2222
+
2223
+ Args:
2224
+ input_bbox (List[int]): The bounding box coordinates [x1, y1, x2, y2] of the input object.
2225
+ match_bbox (List[int]): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
2226
+ label (str): The label/type of the object in the bounding box (e.g., 'image', 'text', etc.).
2227
+ vision_labels (List[str]): List of labels for vision-related objects (e.g., images, icons).
2228
+ no_mask_labels (List[str]): Labels for which no masking is applied when calculating edge distances.
2229
+ median_width (int, optional): The median width for title dispersion calculation. Defaults to -1.
2230
+ title_labels (List[str], optional): Labels that indicate the object is a title. Defaults to an empty list.
2231
+ title_text (List[Tuple[int, List[int]]], optional): Text content associated with title labels, in the format [(position_indicator, [x1, y1, x2, y2]), ...].
2232
+ sub_title (List[List[int]], optional): List of subtitle bounding boxes to adjust the input_bbox. Defaults to an empty list.
2233
+ min_distance_config (List[float], optional): Configuration for minimum distances [min_edge_distance_config, up_edge_distances_config, total_distance].
2234
+ tolerance_len (float, optional): The tolerance length for adjusting edge distances. Defaults to 10.0.
2235
+
2236
+ Returns:
2237
+ Tuple[float, List[float]]: A tuple containing:
2238
+ - The calculated distance considering IOU and adjustments.
2239
+ - The updated minimum distance configuration.
2240
+ """
2241
+
2242
+ x1, y1, x2, y2 = input_bbox
2243
+ x1_prime, y1_prime, x2_prime, y2_prime = match_bbox
2244
+
2245
+ min_edge_distance_config, up_edge_distances_config, total_distance = (
2246
+ min_distance_config
2247
+ )
2248
+
2249
+ iou_distance = 0
2250
+
2251
+ if label in vision_labels:
2252
+ horizontal1 = horizontal2 = True
2253
+ else:
2254
+ horizontal1 = _get_bbox_direction(input_bbox)
2255
+ horizontal2 = _get_bbox_direction(match_bbox, 3)
2256
+
2257
+ if (
2258
+ horizontal1 != horizontal2
2259
+ or _get_projection_iou(input_bbox, match_bbox, horizontal1) < 0.01
2260
+ ):
2261
+ iou_distance = 1
2262
+
2263
+ if label == "doc_title":
2264
+ # Calculate distance for titles
2265
+ disperse = max(1, median_width)
2266
+ tolerance_len = max(tolerance_len, disperse)
2267
+
2268
+ # Adjust input_bbox based on sub_title
2269
+ if sub_title:
2270
+ for sub in sub_title:
2271
+ x1_, y1_, x2_, y2_ = sub
2272
+ x1, y1, x2, y2 = (
2273
+ min(x1, x1_),
2274
+ min(y1, y1_),
2275
+ min(x2, x2_),
2276
+ max(y2, y2_),
2277
+ )
2278
+ input_bbox = [x1, y1, x2, y2]
2279
+
2280
+ if title_text:
2281
+ for sub in title_text:
2282
+ x1_, y1_, x2_, y2_ = sub[1]
2283
+ if horizontal1:
2284
+ x1, y1, x2, y2 = (
2285
+ min(x1, x1_),
2286
+ min(y1, y1_),
2287
+ min(x2, x2_),
2288
+ max(y2, y2_),
2289
+ )
2290
+ else:
2291
+ x1, y1, x2, y2 = (
2292
+ min(x1, x1_),
2293
+ min(y1, y1_),
2294
+ max(x2, x2_),
2295
+ min(y2, y2_),
2296
+ )
2297
+ input_bbox = [x1, y1, x2, y2]
2298
+
2299
+ # Calculate edge distance
2300
+ weight = _get_weights(label, horizontal1)
2301
+ if label == "abstract":
2302
+ tolerance_len *= 2
2303
+
2304
+ edge_distance, edge_distance_config = _nearest_edge_distance(
2305
+ input_bbox,
2306
+ match_bbox,
2307
+ weight,
2308
+ label=label,
2309
+ no_mask_labels=no_mask_labels,
2310
+ min_edge_distance_config=min_edge_distance_config,
2311
+ tolerance_len=tolerance_len,
2312
+ )
2313
+
2314
+ # Weights for combining distances
2315
+ iou_edge_weight = [10**8, 10**4, 1, 0.0001]
2316
+
2317
+ # Calculate up and left edge distances
2318
+ up_edge_distance = y1_prime
2319
+ left_edge_distance = x1_prime
2320
+ if (
2321
+ label in no_mask_labels or label in title_labels or label in vision_labels
2322
+ ) and y1 > y2_prime:
2323
+ up_edge_distance = -y2_prime
2324
+ left_edge_distance = -x2_prime
2325
+
2326
+ min_up_edge_distance = up_edge_distances_config
2327
+ if abs(min_up_edge_distance - up_edge_distance) <= tolerance_len:
2328
+ up_edge_distance = min_up_edge_distance
2329
+
2330
+ # Calculate total distance
2331
+ distance = (
2332
+ iou_distance * iou_edge_weight[0]
2333
+ + edge_distance * iou_edge_weight[1]
2334
+ + up_edge_distance * iou_edge_weight[2]
2335
+ + left_edge_distance * iou_edge_weight[3]
2336
+ )
2337
+
2338
+ # Update minimum distance configuration if a smaller distance is found
2339
+ if total_distance > distance:
2340
+ edge_distance_config = [
2341
+ edge_distance_config[0],
2342
+ edge_distance_config[1],
2343
+ ]
2344
+ min_distance_config = [
2345
+ edge_distance_config,
2346
+ up_edge_distance,
2347
+ distance,
2348
+ ]
2349
+
2350
+ return distance, min_distance_config
2351
+
2352
+
2353
+ def get_show_color(label: str) -> Tuple:
2354
+ label_colors = {
2355
+ # Medium Blue (from 'titles_list')
2356
+ "paragraph_title": (102, 102, 255, 100),
2357
+ "doc_title": (255, 248, 220, 100), # Cornsilk
2358
+ # Light Yellow (from 'tables_caption_list')
2359
+ "table_title": (255, 255, 102, 100),
2360
+ # Sky Blue (from 'imgs_caption_list')
2361
+ "figure_title": (102, 178, 255, 100),
2362
+ "chart_title": (221, 160, 221, 100), # Plum
2363
+ "vision_footnote": (144, 238, 144, 100), # Light Green
2364
+ # Deep Purple (from 'texts_list')
2365
+ "text": (153, 0, 76, 100),
2366
+ # Bright Green (from 'interequations_list')
2367
+ "formula": (0, 255, 0, 100),
2368
+ "abstract": (255, 239, 213, 100), # Papaya Whip
2369
+ # Medium Green (from 'lists_list' and 'indexs_list')
2370
+ "content": (40, 169, 92, 100),
2371
+ # Neutral Gray (from 'dropped_bbox_list')
2372
+ "seal": (158, 158, 158, 100),
2373
+ # Olive Yellow (from 'tables_body_list')
2374
+ "table": (204, 204, 0, 100),
2375
+ # Bright Green (from 'imgs_body_list')
2376
+ "image": (153, 255, 51, 100),
2377
+ # Bright Green (from 'imgs_body_list')
2378
+ "figure": (153, 255, 51, 100),
2379
+ "chart": (216, 191, 216, 100), # Thistle
2380
+ # Pale Yellow-Green (from 'tables_footnote_list')
2381
+ "reference": (229, 255, 204, 100),
2382
+ "algorithm": (255, 250, 240, 100), # Floral White
2383
+ }
2384
+ default_color = (158, 158, 158, 100)
2385
+ return label_colors.get(label, default_color)