paddlex 3.0.0b2__py3-none-any.whl → 3.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (940) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +1 -0
  3. paddlex/__main__.py +3 -4
  4. paddlex/configs/modules/3d_bev_detection/BEVFusion.yaml +38 -0
  5. paddlex/configs/modules/face_feature/MobileFaceNet.yaml +41 -0
  6. paddlex/configs/modules/face_feature/ResNet50_face.yaml +41 -0
  7. paddlex/configs/modules/formula_recognition/LaTeX_OCR_rec.yaml +40 -0
  8. paddlex/configs/modules/formula_recognition/PP-FormulaNet-L.yaml +40 -0
  9. paddlex/configs/modules/formula_recognition/PP-FormulaNet-S.yaml +40 -0
  10. paddlex/configs/modules/formula_recognition/UniMERNet.yaml +40 -0
  11. paddlex/configs/modules/image_classification/CLIP_vit_base_patch16_224.yaml +41 -0
  12. paddlex/configs/modules/image_classification/CLIP_vit_large_patch14_224.yaml +41 -0
  13. paddlex/configs/modules/image_classification/ConvNeXt_large_384.yaml +41 -0
  14. paddlex/configs/modules/keypoint_detection/PP-TinyPose_128x96.yaml +40 -0
  15. paddlex/configs/modules/keypoint_detection/PP-TinyPose_256x192.yaml +40 -0
  16. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +40 -0
  17. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +40 -0
  18. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +40 -0
  19. paddlex/configs/modules/multilingual_speech_recognition/whisper_base.yaml +12 -0
  20. paddlex/configs/modules/multilingual_speech_recognition/whisper_large.yaml +12 -0
  21. paddlex/configs/modules/multilingual_speech_recognition/whisper_medium.yaml +12 -0
  22. paddlex/configs/modules/multilingual_speech_recognition/whisper_small.yaml +12 -0
  23. paddlex/configs/modules/multilingual_speech_recognition/whisper_tiny.yaml +12 -0
  24. paddlex/configs/modules/object_detection/Co-DINO-R50.yaml +40 -0
  25. paddlex/configs/modules/object_detection/Co-DINO-Swin-L.yaml +40 -0
  26. paddlex/configs/modules/object_detection/Co-Deformable-DETR-R50.yaml +40 -0
  27. paddlex/configs/modules/object_detection/Co-Deformable-DETR-Swin-T.yaml +40 -0
  28. paddlex/configs/modules/object_detection/YOLOX-X.yaml +40 -0
  29. paddlex/configs/modules/open_vocabulary_detection/GroundingDINO-T.yaml +13 -0
  30. paddlex/configs/modules/open_vocabulary_segmentation/SAM-H_box.yaml +17 -0
  31. paddlex/configs/modules/open_vocabulary_segmentation/SAM-H_point.yaml +15 -0
  32. paddlex/configs/modules/rotated_object_detection/PP-YOLOE-R-L.yaml +40 -0
  33. paddlex/configs/modules/semantic_segmentation/MaskFormer_small.yaml +42 -0
  34. paddlex/configs/modules/semantic_segmentation/MaskFormer_tiny.yaml +42 -0
  35. paddlex/configs/modules/semantic_segmentation/SeaFormer_base.yaml +40 -0
  36. paddlex/configs/modules/semantic_segmentation/SeaFormer_large.yaml +40 -0
  37. paddlex/configs/modules/semantic_segmentation/SeaFormer_small.yaml +40 -0
  38. paddlex/configs/modules/semantic_segmentation/SeaFormer_tiny.yaml +40 -0
  39. paddlex/configs/modules/table_cells_detection/RT-DETR-L_wired_table_cell_det.yaml +40 -0
  40. paddlex/configs/modules/table_cells_detection/RT-DETR-L_wireless_table_cell_det.yaml +40 -0
  41. paddlex/configs/modules/table_classification/PP-LCNet_x1_0_table_cls.yaml +41 -0
  42. paddlex/configs/modules/table_structure_recognition/SLANeXt_wired.yaml +39 -0
  43. paddlex/configs/modules/table_structure_recognition/SLANeXt_wireless.yaml +39 -0
  44. paddlex/configs/modules/text_detection/PP-OCRv3_mobile_det.yaml +40 -0
  45. paddlex/configs/modules/text_detection/PP-OCRv3_server_det.yaml +40 -0
  46. paddlex/configs/modules/text_recognition/PP-OCRv3_mobile_rec.yaml +39 -0
  47. paddlex/configs/modules/text_recognition/PP-OCRv4_server_rec_doc.yaml +39 -0
  48. paddlex/configs/modules/text_recognition/arabic_PP-OCRv3_mobile_rec.yaml +39 -0
  49. paddlex/configs/modules/text_recognition/chinese_cht_PP-OCRv3_mobile_rec.yaml +39 -0
  50. paddlex/configs/modules/text_recognition/cyrillic_PP-OCRv3_mobile_rec.yaml +39 -0
  51. paddlex/configs/modules/text_recognition/devanagari_PP-OCRv3_mobile_rec.yaml +39 -0
  52. paddlex/configs/modules/text_recognition/en_PP-OCRv3_mobile_rec.yaml +39 -0
  53. paddlex/configs/modules/text_recognition/en_PP-OCRv4_mobile_rec.yaml +39 -0
  54. paddlex/configs/modules/text_recognition/japan_PP-OCRv3_mobile_rec.yaml +39 -0
  55. paddlex/configs/modules/text_recognition/ka_PP-OCRv3_mobile_rec.yaml +39 -0
  56. paddlex/configs/modules/text_recognition/korean_PP-OCRv3_mobile_rec.yaml +39 -0
  57. paddlex/configs/modules/text_recognition/latin_PP-OCRv3_mobile_rec.yaml +39 -0
  58. paddlex/configs/modules/text_recognition/ta_PP-OCRv3_mobile_rec.yaml +39 -0
  59. paddlex/configs/modules/text_recognition/te_PP-OCRv3_mobile_rec.yaml +39 -0
  60. paddlex/configs/modules/textline_orientation/PP-LCNet_x0_25_textline_ori.yaml +41 -0
  61. paddlex/configs/modules/video_classification/PP-TSM-R50_8frames_uniform.yaml +42 -0
  62. paddlex/configs/modules/video_classification/PP-TSMv2-LCNetV2_16frames_uniform.yaml +42 -0
  63. paddlex/configs/modules/video_classification/PP-TSMv2-LCNetV2_8frames_uniform.yaml +42 -0
  64. paddlex/configs/modules/video_detection/YOWO.yaml +40 -0
  65. paddlex/configs/pipelines/3d_bev_detection.yaml +9 -0
  66. paddlex/configs/pipelines/OCR.yaml +44 -0
  67. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +149 -0
  68. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +184 -0
  69. paddlex/configs/pipelines/PP-ShiTuV2.yaml +18 -0
  70. paddlex/configs/pipelines/PP-StructureV3.yaml +226 -0
  71. paddlex/configs/pipelines/anomaly_detection.yaml +8 -0
  72. paddlex/configs/pipelines/doc_preprocessor.yaml +15 -0
  73. paddlex/configs/pipelines/face_recognition.yaml +18 -0
  74. paddlex/configs/pipelines/formula_recognition.yaml +39 -0
  75. paddlex/configs/pipelines/human_keypoint_detection.yaml +17 -0
  76. paddlex/configs/pipelines/image_classification.yaml +10 -0
  77. paddlex/configs/pipelines/image_multilabel_classification.yaml +9 -0
  78. paddlex/configs/pipelines/instance_segmentation.yaml +10 -0
  79. paddlex/configs/pipelines/layout_parsing.yaml +101 -0
  80. paddlex/configs/pipelines/multilingual_speech_recognition.yaml +9 -0
  81. paddlex/configs/pipelines/object_detection.yaml +10 -0
  82. paddlex/configs/pipelines/open_vocabulary_detection.yaml +12 -0
  83. paddlex/configs/pipelines/open_vocabulary_segmentation.yaml +13 -0
  84. paddlex/configs/pipelines/pedestrian_attribute_recognition.yaml +15 -0
  85. paddlex/configs/pipelines/rotated_object_detection.yaml +10 -0
  86. paddlex/configs/pipelines/seal_recognition.yaml +51 -0
  87. paddlex/configs/pipelines/semantic_segmentation.yaml +10 -0
  88. paddlex/configs/pipelines/small_object_detection.yaml +10 -0
  89. paddlex/configs/pipelines/table_recognition.yaml +56 -0
  90. paddlex/configs/pipelines/table_recognition_v2.yaml +76 -0
  91. paddlex/configs/pipelines/ts_anomaly_detection.yaml +8 -0
  92. paddlex/configs/pipelines/ts_classification.yaml +8 -0
  93. paddlex/configs/pipelines/ts_forecast.yaml +8 -0
  94. paddlex/configs/pipelines/vehicle_attribute_recognition.yaml +15 -0
  95. paddlex/configs/pipelines/video_classification.yaml +9 -0
  96. paddlex/configs/pipelines/video_detection.yaml +10 -0
  97. paddlex/engine.py +1 -1
  98. paddlex/hpip_links.html +19 -0
  99. paddlex/inference/__init__.py +3 -1
  100. paddlex/inference/common/batch_sampler/__init__.py +20 -0
  101. paddlex/inference/common/batch_sampler/audio_batch_sampler.py +84 -0
  102. paddlex/inference/common/batch_sampler/base_batch_sampler.py +90 -0
  103. paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +147 -0
  104. paddlex/inference/common/batch_sampler/image_batch_sampler.py +136 -0
  105. paddlex/inference/common/batch_sampler/ts_batch_sampler.py +110 -0
  106. paddlex/inference/common/batch_sampler/video_batch_sampler.py +94 -0
  107. paddlex/inference/common/reader/__init__.py +19 -0
  108. paddlex/inference/common/reader/audio_reader.py +46 -0
  109. paddlex/inference/common/reader/det_3d_reader.py +239 -0
  110. paddlex/inference/common/reader/image_reader.py +69 -0
  111. paddlex/inference/common/reader/ts_reader.py +45 -0
  112. paddlex/inference/common/reader/video_reader.py +42 -0
  113. paddlex/inference/common/result/__init__.py +29 -0
  114. paddlex/inference/common/result/base_cv_result.py +31 -0
  115. paddlex/inference/common/result/base_result.py +70 -0
  116. paddlex/inference/common/result/base_ts_result.py +42 -0
  117. paddlex/inference/common/result/base_video_result.py +36 -0
  118. paddlex/inference/common/result/mixin.py +703 -0
  119. paddlex/inference/models/3d_bev_detection/__init__.py +15 -0
  120. paddlex/inference/models/3d_bev_detection/predictor.py +314 -0
  121. paddlex/inference/models/3d_bev_detection/processors.py +978 -0
  122. paddlex/inference/models/3d_bev_detection/result.py +65 -0
  123. paddlex/inference/models/3d_bev_detection/visualizer_3d.py +131 -0
  124. paddlex/inference/models/__init__.py +37 -13
  125. paddlex/inference/models/anomaly_detection/__init__.py +15 -0
  126. paddlex/inference/models/anomaly_detection/predictor.py +145 -0
  127. paddlex/inference/models/anomaly_detection/processors.py +46 -0
  128. paddlex/inference/models/anomaly_detection/result.py +70 -0
  129. paddlex/inference/models/base/__init__.py +1 -2
  130. paddlex/inference/models/base/predictor/__init__.py +16 -0
  131. paddlex/inference/models/base/predictor/base_predictor.py +175 -0
  132. paddlex/inference/models/base/predictor/basic_predictor.py +139 -0
  133. paddlex/inference/models/common/__init__.py +35 -0
  134. paddlex/inference/models/common/static_infer.py +329 -0
  135. paddlex/inference/models/common/tokenizer/__init__.py +17 -0
  136. paddlex/inference/models/common/tokenizer/bert_tokenizer.py +655 -0
  137. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +451 -0
  138. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +2141 -0
  139. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3504 -0
  140. paddlex/inference/models/common/tokenizer/utils.py +66 -0
  141. paddlex/inference/models/common/tokenizer/vocab.py +647 -0
  142. paddlex/inference/models/common/ts/__init__.py +15 -0
  143. paddlex/inference/models/common/ts/funcs.py +533 -0
  144. paddlex/inference/models/common/ts/processors.py +313 -0
  145. paddlex/inference/models/common/vision/__init__.py +23 -0
  146. paddlex/inference/models/common/vision/funcs.py +93 -0
  147. paddlex/inference/models/common/vision/processors.py +270 -0
  148. paddlex/inference/models/face_feature/__init__.py +15 -0
  149. paddlex/inference/models/face_feature/predictor.py +65 -0
  150. paddlex/inference/models/formula_recognition/__init__.py +15 -0
  151. paddlex/inference/models/formula_recognition/predictor.py +203 -0
  152. paddlex/inference/models/formula_recognition/processors.py +986 -0
  153. paddlex/inference/models/formula_recognition/result.py +403 -0
  154. paddlex/inference/models/image_classification/__init__.py +15 -0
  155. paddlex/inference/models/image_classification/predictor.py +182 -0
  156. paddlex/inference/models/image_classification/processors.py +87 -0
  157. paddlex/inference/models/image_classification/result.py +92 -0
  158. paddlex/inference/models/image_feature/__init__.py +15 -0
  159. paddlex/inference/models/image_feature/predictor.py +156 -0
  160. paddlex/inference/models/image_feature/processors.py +29 -0
  161. paddlex/inference/models/image_feature/result.py +33 -0
  162. paddlex/inference/models/image_multilabel_classification/__init__.py +15 -0
  163. paddlex/inference/models/image_multilabel_classification/predictor.py +94 -0
  164. paddlex/inference/models/image_multilabel_classification/processors.py +85 -0
  165. paddlex/inference/models/image_multilabel_classification/result.py +95 -0
  166. paddlex/inference/models/image_unwarping/__init__.py +15 -0
  167. paddlex/inference/models/image_unwarping/predictor.py +105 -0
  168. paddlex/inference/models/image_unwarping/processors.py +88 -0
  169. paddlex/inference/models/image_unwarping/result.py +45 -0
  170. paddlex/inference/models/instance_segmentation/__init__.py +15 -0
  171. paddlex/inference/models/instance_segmentation/predictor.py +210 -0
  172. paddlex/inference/models/instance_segmentation/processors.py +105 -0
  173. paddlex/inference/models/instance_segmentation/result.py +161 -0
  174. paddlex/inference/models/keypoint_detection/__init__.py +15 -0
  175. paddlex/inference/models/keypoint_detection/predictor.py +188 -0
  176. paddlex/inference/models/keypoint_detection/processors.py +359 -0
  177. paddlex/inference/models/keypoint_detection/result.py +192 -0
  178. paddlex/inference/models/multilingual_speech_recognition/__init__.py +15 -0
  179. paddlex/inference/models/multilingual_speech_recognition/predictor.py +141 -0
  180. paddlex/inference/models/multilingual_speech_recognition/processors.py +1941 -0
  181. paddlex/inference/models/multilingual_speech_recognition/result.py +21 -0
  182. paddlex/inference/models/object_detection/__init__.py +15 -0
  183. paddlex/inference/models/object_detection/predictor.py +348 -0
  184. paddlex/inference/models/object_detection/processors.py +855 -0
  185. paddlex/inference/models/object_detection/result.py +113 -0
  186. paddlex/inference/models/object_detection/utils.py +68 -0
  187. paddlex/inference/models/open_vocabulary_detection/__init__.py +15 -0
  188. paddlex/inference/models/open_vocabulary_detection/predictor.py +155 -0
  189. paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +15 -0
  190. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +485 -0
  191. paddlex/inference/models/open_vocabulary_segmentation/__init__.py +15 -0
  192. paddlex/inference/models/open_vocabulary_segmentation/predictor.py +120 -0
  193. paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +15 -0
  194. paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +249 -0
  195. paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +15 -0
  196. paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +147 -0
  197. paddlex/inference/models/semantic_segmentation/__init__.py +15 -0
  198. paddlex/inference/models/semantic_segmentation/predictor.py +167 -0
  199. paddlex/inference/models/semantic_segmentation/processors.py +114 -0
  200. paddlex/inference/models/semantic_segmentation/result.py +72 -0
  201. paddlex/inference/models/table_structure_recognition/__init__.py +15 -0
  202. paddlex/inference/models/table_structure_recognition/predictor.py +171 -0
  203. paddlex/inference/models/table_structure_recognition/processors.py +235 -0
  204. paddlex/inference/models/table_structure_recognition/result.py +70 -0
  205. paddlex/inference/models/text_detection/__init__.py +15 -0
  206. paddlex/inference/models/text_detection/predictor.py +191 -0
  207. paddlex/inference/models/text_detection/processors.py +466 -0
  208. paddlex/inference/models/text_detection/result.py +51 -0
  209. paddlex/inference/models/text_recognition/__init__.py +15 -0
  210. paddlex/inference/models/text_recognition/predictor.py +106 -0
  211. paddlex/inference/models/text_recognition/processors.py +231 -0
  212. paddlex/inference/models/text_recognition/result.py +75 -0
  213. paddlex/inference/models/ts_anomaly_detection/__init__.py +15 -0
  214. paddlex/inference/models/ts_anomaly_detection/predictor.py +146 -0
  215. paddlex/inference/models/ts_anomaly_detection/processors.py +94 -0
  216. paddlex/inference/models/ts_anomaly_detection/result.py +72 -0
  217. paddlex/inference/models/ts_classification/__init__.py +15 -0
  218. paddlex/inference/models/ts_classification/predictor.py +135 -0
  219. paddlex/inference/models/ts_classification/processors.py +117 -0
  220. paddlex/inference/models/ts_classification/result.py +78 -0
  221. paddlex/inference/models/ts_forecasting/__init__.py +15 -0
  222. paddlex/inference/models/ts_forecasting/predictor.py +159 -0
  223. paddlex/inference/models/ts_forecasting/processors.py +149 -0
  224. paddlex/inference/models/ts_forecasting/result.py +83 -0
  225. paddlex/inference/models/video_classification/__init__.py +15 -0
  226. paddlex/inference/models/video_classification/predictor.py +147 -0
  227. paddlex/inference/models/video_classification/processors.py +409 -0
  228. paddlex/inference/models/video_classification/result.py +92 -0
  229. paddlex/inference/models/video_detection/__init__.py +15 -0
  230. paddlex/inference/models/video_detection/predictor.py +136 -0
  231. paddlex/inference/models/video_detection/processors.py +450 -0
  232. paddlex/inference/models/video_detection/result.py +104 -0
  233. paddlex/inference/pipelines/3d_bev_detection/__init__.py +15 -0
  234. paddlex/inference/pipelines/3d_bev_detection/pipeline.py +67 -0
  235. paddlex/inference/pipelines/__init__.py +174 -73
  236. paddlex/inference/pipelines/anomaly_detection/__init__.py +15 -0
  237. paddlex/inference/pipelines/anomaly_detection/pipeline.py +62 -0
  238. paddlex/inference/pipelines/attribute_recognition/__init__.py +15 -0
  239. paddlex/inference/pipelines/attribute_recognition/pipeline.py +105 -0
  240. paddlex/inference/pipelines/attribute_recognition/result.py +100 -0
  241. paddlex/inference/pipelines/base.py +103 -57
  242. paddlex/inference/pipelines/components/__init__.py +23 -0
  243. paddlex/inference/pipelines/components/chat_server/__init__.py +16 -0
  244. paddlex/inference/pipelines/components/chat_server/base.py +39 -0
  245. paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +236 -0
  246. paddlex/inference/pipelines/components/common/__init__.py +18 -0
  247. paddlex/inference/pipelines/components/common/base_operator.py +36 -0
  248. paddlex/inference/pipelines/components/common/base_result.py +65 -0
  249. paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +46 -0
  250. paddlex/inference/pipelines/components/common/crop_image_regions.py +550 -0
  251. paddlex/inference/pipelines/components/common/seal_det_warp.py +941 -0
  252. paddlex/inference/pipelines/components/common/sort_boxes.py +83 -0
  253. paddlex/inference/pipelines/components/faisser.py +352 -0
  254. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +16 -0
  255. paddlex/inference/pipelines/components/prompt_engineering/base.py +35 -0
  256. paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +127 -0
  257. paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +148 -0
  258. paddlex/inference/pipelines/components/retriever/__init__.py +16 -0
  259. paddlex/inference/pipelines/components/retriever/base.py +226 -0
  260. paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +70 -0
  261. paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +163 -0
  262. paddlex/inference/pipelines/components/utils/__init__.py +13 -0
  263. paddlex/inference/pipelines/components/utils/mixin.py +206 -0
  264. paddlex/inference/pipelines/doc_preprocessor/__init__.py +15 -0
  265. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +190 -0
  266. paddlex/inference/pipelines/doc_preprocessor/result.py +103 -0
  267. paddlex/inference/pipelines/face_recognition/__init__.py +15 -0
  268. paddlex/inference/pipelines/face_recognition/pipeline.py +61 -0
  269. paddlex/inference/pipelines/face_recognition/result.py +43 -0
  270. paddlex/inference/pipelines/formula_recognition/__init__.py +15 -0
  271. paddlex/inference/pipelines/formula_recognition/pipeline.py +303 -0
  272. paddlex/inference/pipelines/formula_recognition/result.py +291 -0
  273. paddlex/inference/pipelines/image_classification/__init__.py +15 -0
  274. paddlex/inference/pipelines/image_classification/pipeline.py +71 -0
  275. paddlex/inference/pipelines/image_multilabel_classification/__init__.py +15 -0
  276. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +78 -0
  277. paddlex/inference/pipelines/instance_segmentation/__init__.py +15 -0
  278. paddlex/inference/pipelines/instance_segmentation/pipeline.py +70 -0
  279. paddlex/inference/pipelines/keypoint_detection/__init__.py +15 -0
  280. paddlex/inference/pipelines/keypoint_detection/pipeline.py +137 -0
  281. paddlex/inference/pipelines/layout_parsing/__init__.py +2 -1
  282. paddlex/inference/pipelines/layout_parsing/pipeline.py +570 -0
  283. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +739 -0
  284. paddlex/inference/pipelines/layout_parsing/result.py +203 -0
  285. paddlex/inference/pipelines/layout_parsing/result_v2.py +470 -0
  286. paddlex/inference/pipelines/layout_parsing/utils.py +2385 -0
  287. paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +15 -0
  288. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +67 -0
  289. paddlex/inference/pipelines/object_detection/__init__.py +15 -0
  290. paddlex/inference/pipelines/object_detection/pipeline.py +95 -0
  291. paddlex/inference/pipelines/ocr/__init__.py +15 -0
  292. paddlex/inference/pipelines/ocr/pipeline.py +389 -0
  293. paddlex/inference/pipelines/ocr/result.py +248 -0
  294. paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +15 -0
  295. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +75 -0
  296. paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +15 -0
  297. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +89 -0
  298. paddlex/inference/pipelines/pp_chatocr/__init__.py +16 -0
  299. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +102 -0
  300. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +773 -0
  301. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +977 -0
  302. paddlex/inference/pipelines/pp_shitu_v2/__init__.py +15 -0
  303. paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +152 -0
  304. paddlex/inference/pipelines/pp_shitu_v2/result.py +126 -0
  305. paddlex/inference/pipelines/rotated_object_detection/__init__.py +15 -0
  306. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +74 -0
  307. paddlex/inference/pipelines/seal_recognition/__init__.py +15 -0
  308. paddlex/inference/pipelines/seal_recognition/pipeline.py +271 -0
  309. paddlex/inference/pipelines/seal_recognition/result.py +87 -0
  310. paddlex/inference/pipelines/semantic_segmentation/__init__.py +15 -0
  311. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +74 -0
  312. paddlex/inference/pipelines/small_object_detection/__init__.py +15 -0
  313. paddlex/inference/pipelines/small_object_detection/pipeline.py +74 -0
  314. paddlex/inference/pipelines/table_recognition/__init__.py +2 -1
  315. paddlex/inference/pipelines/table_recognition/pipeline.py +462 -0
  316. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +792 -0
  317. paddlex/inference/pipelines/table_recognition/result.py +216 -0
  318. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +362 -0
  319. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +470 -0
  320. paddlex/inference/pipelines/table_recognition/utils.py +23 -436
  321. paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +15 -0
  322. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +62 -0
  323. paddlex/inference/pipelines/ts_classification/__init__.py +15 -0
  324. paddlex/inference/pipelines/ts_classification/pipeline.py +62 -0
  325. paddlex/inference/pipelines/ts_forecasting/__init__.py +15 -0
  326. paddlex/inference/pipelines/ts_forecasting/pipeline.py +62 -0
  327. paddlex/inference/pipelines/video_classification/__init__.py +15 -0
  328. paddlex/inference/pipelines/video_classification/pipeline.py +68 -0
  329. paddlex/inference/pipelines/video_detection/__init__.py +15 -0
  330. paddlex/inference/pipelines/video_detection/pipeline.py +73 -0
  331. paddlex/inference/serving/__init__.py +13 -0
  332. paddlex/inference/serving/basic_serving/__init__.py +18 -0
  333. paddlex/inference/serving/basic_serving/_app.py +209 -0
  334. paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +41 -0
  335. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +13 -0
  336. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +96 -0
  337. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +36 -0
  338. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +90 -0
  339. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +64 -0
  340. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +97 -0
  341. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +223 -0
  342. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +97 -0
  343. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +78 -0
  344. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +66 -0
  345. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +70 -0
  346. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +81 -0
  347. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +115 -0
  348. paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +76 -0
  349. paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +89 -0
  350. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +74 -0
  351. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +99 -0
  352. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +78 -0
  353. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +85 -0
  354. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +81 -0
  355. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +191 -0
  356. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +221 -0
  357. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +218 -0
  358. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +136 -0
  359. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +78 -0
  360. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +103 -0
  361. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +64 -0
  362. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +69 -0
  363. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +105 -0
  364. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +107 -0
  365. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +62 -0
  366. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +61 -0
  367. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +62 -0
  368. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +81 -0
  369. paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +73 -0
  370. paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +89 -0
  371. paddlex/inference/serving/basic_serving/_server.py +35 -0
  372. paddlex/inference/serving/infra/__init__.py +13 -0
  373. paddlex/inference/serving/infra/config.py +36 -0
  374. paddlex/inference/serving/infra/models.py +72 -0
  375. paddlex/inference/serving/infra/storage.py +175 -0
  376. paddlex/inference/serving/infra/utils.py +259 -0
  377. paddlex/inference/serving/schemas/__init__.py +13 -0
  378. paddlex/inference/serving/schemas/anomaly_detection.py +39 -0
  379. paddlex/inference/serving/schemas/doc_preprocessor.py +54 -0
  380. paddlex/inference/serving/schemas/face_recognition.py +124 -0
  381. paddlex/inference/serving/schemas/formula_recognition.py +56 -0
  382. paddlex/inference/serving/schemas/human_keypoint_detection.py +55 -0
  383. paddlex/inference/serving/schemas/image_classification.py +45 -0
  384. paddlex/inference/serving/schemas/image_multilabel_classification.py +47 -0
  385. paddlex/inference/serving/schemas/instance_segmentation.py +53 -0
  386. paddlex/inference/serving/schemas/layout_parsing.py +72 -0
  387. paddlex/inference/serving/schemas/m_3d_bev_detection.py +48 -0
  388. paddlex/inference/serving/schemas/multilingual_speech_recognition.py +57 -0
  389. paddlex/inference/serving/schemas/object_detection.py +52 -0
  390. paddlex/inference/serving/schemas/ocr.py +60 -0
  391. paddlex/inference/serving/schemas/open_vocabulary_detection.py +52 -0
  392. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +52 -0
  393. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +61 -0
  394. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +134 -0
  395. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +151 -0
  396. paddlex/inference/serving/schemas/pp_shituv2.py +124 -0
  397. paddlex/inference/serving/schemas/pp_structurev3.py +84 -0
  398. paddlex/inference/serving/schemas/rotated_object_detection.py +52 -0
  399. paddlex/inference/serving/schemas/seal_recognition.py +62 -0
  400. paddlex/inference/serving/schemas/semantic_segmentation.py +45 -0
  401. paddlex/inference/serving/schemas/shared/__init__.py +13 -0
  402. paddlex/inference/serving/schemas/shared/classification.py +23 -0
  403. paddlex/inference/serving/schemas/shared/image_segmentation.py +28 -0
  404. paddlex/inference/serving/schemas/shared/object_detection.py +24 -0
  405. paddlex/inference/serving/schemas/shared/ocr.py +25 -0
  406. paddlex/inference/serving/schemas/small_object_detection.py +52 -0
  407. paddlex/inference/serving/schemas/table_recognition.py +64 -0
  408. paddlex/inference/serving/schemas/table_recognition_v2.py +66 -0
  409. paddlex/inference/serving/schemas/ts_anomaly_detection.py +37 -0
  410. paddlex/inference/serving/schemas/ts_classification.py +38 -0
  411. paddlex/inference/serving/schemas/ts_forecast.py +37 -0
  412. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +61 -0
  413. paddlex/inference/serving/schemas/video_classification.py +44 -0
  414. paddlex/inference/serving/schemas/video_detection.py +56 -0
  415. paddlex/inference/utils/benchmark.py +23 -11
  416. paddlex/inference/utils/get_pipeline_path.py +2 -1
  417. paddlex/inference/utils/io/__init__.py +3 -0
  418. paddlex/inference/utils/io/readers.py +164 -17
  419. paddlex/inference/utils/io/writers.py +85 -2
  420. paddlex/inference/utils/new_ir_blacklist.py +6 -0
  421. paddlex/inference/utils/official_models.py +277 -211
  422. paddlex/inference/utils/pp_option.py +24 -4
  423. paddlex/model.py +12 -5
  424. paddlex/modules/3d_bev_detection/__init__.py +18 -0
  425. paddlex/modules/3d_bev_detection/dataset_checker/__init__.py +95 -0
  426. paddlex/modules/3d_bev_detection/dataset_checker/dataset_src/__init__.py +17 -0
  427. paddlex/modules/3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +106 -0
  428. paddlex/modules/3d_bev_detection/dataset_checker/dataset_src/check_dataset.py +102 -0
  429. paddlex/modules/3d_bev_detection/evaluator.py +46 -0
  430. paddlex/modules/3d_bev_detection/exportor.py +22 -0
  431. paddlex/modules/3d_bev_detection/model_list.py +18 -0
  432. paddlex/modules/3d_bev_detection/trainer.py +70 -0
  433. paddlex/modules/__init__.py +34 -1
  434. paddlex/modules/base/build_model.py +1 -1
  435. paddlex/modules/base/dataset_checker/dataset_checker.py +6 -1
  436. paddlex/modules/base/evaluator.py +20 -4
  437. paddlex/modules/base/exportor.py +30 -5
  438. paddlex/modules/base/trainer.py +29 -6
  439. paddlex/modules/face_recognition/trainer.py +1 -23
  440. paddlex/modules/formula_recognition/__init__.py +5 -0
  441. paddlex/modules/formula_recognition/dataset_checker/__init__.py +113 -0
  442. paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +19 -0
  443. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +157 -0
  444. paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +80 -0
  445. paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +94 -0
  446. paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +81 -0
  447. paddlex/modules/formula_recognition/evaluator.py +77 -0
  448. paddlex/modules/formula_recognition/exportor.py +22 -0
  449. paddlex/modules/formula_recognition/model_list.py +3 -0
  450. paddlex/modules/formula_recognition/trainer.py +121 -0
  451. paddlex/modules/image_classification/model_list.py +2 -0
  452. paddlex/modules/instance_segmentation/dataset_checker/__init__.py +15 -0
  453. paddlex/modules/keypoint_detection/__init__.py +18 -0
  454. paddlex/modules/keypoint_detection/dataset_checker/__init__.py +56 -0
  455. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +15 -0
  456. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +86 -0
  457. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +13 -0
  458. paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +119 -0
  459. paddlex/modules/keypoint_detection/evaluator.py +41 -0
  460. paddlex/modules/keypoint_detection/exportor.py +22 -0
  461. paddlex/modules/keypoint_detection/model_list.py +16 -0
  462. paddlex/modules/keypoint_detection/trainer.py +39 -0
  463. paddlex/modules/multilingual_speech_recognition/__init__.py +18 -0
  464. paddlex/modules/multilingual_speech_recognition/dataset_checker.py +27 -0
  465. paddlex/modules/multilingual_speech_recognition/evaluator.py +27 -0
  466. paddlex/modules/multilingual_speech_recognition/exportor.py +27 -0
  467. paddlex/modules/multilingual_speech_recognition/model_list.py +22 -0
  468. paddlex/modules/multilingual_speech_recognition/trainer.py +40 -0
  469. paddlex/modules/object_detection/evaluator.py +12 -1
  470. paddlex/modules/object_detection/model_list.py +10 -0
  471. paddlex/modules/object_detection/trainer.py +15 -1
  472. paddlex/modules/open_vocabulary_detection/__init__.py +18 -0
  473. paddlex/modules/open_vocabulary_detection/dataset_checker.py +29 -0
  474. paddlex/modules/open_vocabulary_detection/evaluator.py +29 -0
  475. paddlex/modules/open_vocabulary_detection/exportor.py +29 -0
  476. paddlex/modules/open_vocabulary_detection/model_list.py +18 -0
  477. paddlex/modules/open_vocabulary_detection/trainer.py +42 -0
  478. paddlex/modules/open_vocabulary_segmentation/__init__.py +18 -0
  479. paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +29 -0
  480. paddlex/modules/open_vocabulary_segmentation/evaluator.py +29 -0
  481. paddlex/modules/open_vocabulary_segmentation/exportor.py +29 -0
  482. paddlex/modules/open_vocabulary_segmentation/model_list.py +19 -0
  483. paddlex/modules/open_vocabulary_segmentation/trainer.py +42 -0
  484. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +15 -0
  485. paddlex/modules/semantic_segmentation/exportor.py +9 -0
  486. paddlex/modules/semantic_segmentation/model_list.py +2 -0
  487. paddlex/modules/semantic_segmentation/trainer.py +2 -0
  488. paddlex/modules/table_recognition/dataset_checker/__init__.py +16 -1
  489. paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +13 -14
  490. paddlex/modules/table_recognition/model_list.py +2 -0
  491. paddlex/modules/text_detection/dataset_checker/__init__.py +16 -1
  492. paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +13 -3
  493. paddlex/modules/text_detection/model_list.py +2 -0
  494. paddlex/modules/text_recognition/dataset_checker/__init__.py +16 -4
  495. paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +13 -3
  496. paddlex/modules/text_recognition/evaluator.py +4 -3
  497. paddlex/modules/text_recognition/exportor.py +0 -3
  498. paddlex/modules/text_recognition/model_list.py +14 -0
  499. paddlex/modules/text_recognition/trainer.py +4 -3
  500. paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +15 -0
  501. paddlex/modules/ts_anomaly_detection/trainer.py +17 -1
  502. paddlex/modules/ts_classification/dataset_checker/__init__.py +15 -0
  503. paddlex/modules/ts_classification/trainer.py +17 -1
  504. paddlex/modules/ts_forecast/dataset_checker/__init__.py +15 -0
  505. paddlex/modules/ts_forecast/trainer.py +17 -1
  506. paddlex/modules/video_classification/__init__.py +18 -0
  507. paddlex/modules/video_classification/dataset_checker/__init__.py +93 -0
  508. paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +18 -0
  509. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +93 -0
  510. paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +121 -0
  511. paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +82 -0
  512. paddlex/modules/video_classification/evaluator.py +44 -0
  513. paddlex/modules/video_classification/exportor.py +22 -0
  514. paddlex/modules/video_classification/model_list.py +19 -0
  515. paddlex/modules/video_classification/trainer.py +88 -0
  516. paddlex/modules/video_detection/__init__.py +18 -0
  517. paddlex/modules/video_detection/dataset_checker/__init__.py +86 -0
  518. paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +17 -0
  519. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +101 -0
  520. paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +134 -0
  521. paddlex/modules/video_detection/evaluator.py +42 -0
  522. paddlex/modules/video_detection/exportor.py +22 -0
  523. paddlex/modules/video_detection/model_list.py +15 -0
  524. paddlex/modules/video_detection/trainer.py +82 -0
  525. paddlex/ops/__init__.py +149 -0
  526. paddlex/ops/iou3d_nms/iou3d_cpu.cpp +264 -0
  527. paddlex/ops/iou3d_nms/iou3d_cpu.h +27 -0
  528. paddlex/ops/iou3d_nms/iou3d_nms.cpp +204 -0
  529. paddlex/ops/iou3d_nms/iou3d_nms.h +33 -0
  530. paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +108 -0
  531. paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +482 -0
  532. paddlex/ops/setup.py +37 -0
  533. paddlex/ops/voxel/voxelize_op.cc +191 -0
  534. paddlex/ops/voxel/voxelize_op.cu +346 -0
  535. paddlex/paddle2onnx_requirements.txt +1 -0
  536. paddlex/paddlex_cli.py +339 -72
  537. paddlex/repo_apis/Paddle3D_api/__init__.py +17 -0
  538. paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +18 -0
  539. paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +118 -0
  540. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +238 -0
  541. paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +55 -0
  542. paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +104 -0
  543. paddlex/repo_apis/Paddle3D_api/pp3d_config.py +144 -0
  544. paddlex/repo_apis/PaddleClas_api/cls/model.py +6 -0
  545. paddlex/repo_apis/PaddleClas_api/cls/register.py +20 -2
  546. paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +8 -4
  547. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +6 -0
  548. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +27 -5
  549. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +6 -0
  550. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +81 -0
  551. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +182 -3
  552. paddlex/repo_apis/PaddleOCR_api/__init__.py +1 -0
  553. paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +16 -0
  554. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +570 -0
  555. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +402 -0
  556. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +73 -0
  557. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +240 -0
  558. paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +18 -0
  559. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
  560. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +21 -0
  561. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +6 -0
  562. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +126 -7
  563. paddlex/repo_apis/PaddleSeg_api/seg/config.py +9 -0
  564. paddlex/repo_apis/PaddleSeg_api/seg/model.py +10 -0
  565. paddlex/repo_apis/PaddleSeg_api/seg/register.py +20 -0
  566. paddlex/repo_apis/PaddleTS_api/ts_base/config.py +24 -0
  567. paddlex/repo_apis/PaddleTS_api/ts_base/model.py +11 -7
  568. paddlex/repo_apis/PaddleVideo_api/__init__.py +17 -0
  569. paddlex/repo_apis/PaddleVideo_api/config_utils.py +51 -0
  570. paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +19 -0
  571. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +547 -0
  572. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +346 -0
  573. paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +71 -0
  574. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +205 -0
  575. paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +19 -0
  576. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +548 -0
  577. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +298 -0
  578. paddlex/repo_apis/PaddleVideo_api/video_det/register.py +45 -0
  579. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +200 -0
  580. paddlex/repo_apis/base/runner.py +2 -1
  581. paddlex/repo_manager/meta.py +29 -2
  582. paddlex/repo_manager/repo.py +24 -5
  583. paddlex/repo_manager/requirements.txt +10 -7
  584. paddlex/repo_manager/utils.py +62 -1
  585. paddlex/serving_requirements.txt +9 -0
  586. paddlex/utils/config.py +4 -3
  587. paddlex/utils/custom_device_whitelist.py +457 -0
  588. paddlex/utils/device.py +74 -26
  589. paddlex/utils/env.py +28 -0
  590. paddlex/utils/flags.py +4 -0
  591. paddlex/utils/fonts/__init__.py +48 -5
  592. paddlex/utils/lazy_loader.py +2 -0
  593. paddlex/utils/logging.py +1 -2
  594. paddlex/utils/pipeline_arguments.py +711 -0
  595. paddlex-3.0.0rc0.dist-info/METADATA +1035 -0
  596. paddlex-3.0.0rc0.dist-info/RECORD +1015 -0
  597. paddlex-3.0.0rc0.dist-info/WHEEL +5 -0
  598. paddlex/configs/face_recognition/MobileFaceNet.yaml +0 -44
  599. paddlex/configs/face_recognition/ResNet50_face.yaml +0 -44
  600. paddlex/configs/formula_recognition/LaTeX_OCR_rec.yaml +0 -40
  601. paddlex/configs/image_classification/CLIP_vit_base_patch16_224.yaml +0 -41
  602. paddlex/configs/image_classification/CLIP_vit_large_patch14_224.yaml +0 -41
  603. paddlex/configs/image_classification/ConvNeXt_large_384.yaml +0 -41
  604. paddlex/configs/object_detection/YOLOX-X.yaml +0 -40
  605. paddlex/configs/semantic_segmentation/SeaFormer_base.yaml +0 -40
  606. paddlex/configs/semantic_segmentation/SeaFormer_large.yaml +0 -40
  607. paddlex/configs/semantic_segmentation/SeaFormer_small.yaml +0 -40
  608. paddlex/configs/semantic_segmentation/SeaFormer_tiny.yaml +0 -40
  609. paddlex/inference/components/__init__.py +0 -18
  610. paddlex/inference/components/base.py +0 -292
  611. paddlex/inference/components/llm/__init__.py +0 -25
  612. paddlex/inference/components/llm/base.py +0 -65
  613. paddlex/inference/components/llm/erniebot.py +0 -212
  614. paddlex/inference/components/paddle_predictor/__init__.py +0 -20
  615. paddlex/inference/components/paddle_predictor/predictor.py +0 -332
  616. paddlex/inference/components/retrieval/__init__.py +0 -15
  617. paddlex/inference/components/retrieval/faiss.py +0 -359
  618. paddlex/inference/components/task_related/__init__.py +0 -33
  619. paddlex/inference/components/task_related/clas.py +0 -124
  620. paddlex/inference/components/task_related/det.py +0 -284
  621. paddlex/inference/components/task_related/instance_seg.py +0 -89
  622. paddlex/inference/components/task_related/seal_det_warp.py +0 -940
  623. paddlex/inference/components/task_related/seg.py +0 -40
  624. paddlex/inference/components/task_related/table_rec.py +0 -191
  625. paddlex/inference/components/task_related/text_det.py +0 -895
  626. paddlex/inference/components/task_related/text_rec.py +0 -353
  627. paddlex/inference/components/task_related/warp.py +0 -43
  628. paddlex/inference/components/transforms/__init__.py +0 -16
  629. paddlex/inference/components/transforms/image/__init__.py +0 -15
  630. paddlex/inference/components/transforms/image/common.py +0 -598
  631. paddlex/inference/components/transforms/image/funcs.py +0 -58
  632. paddlex/inference/components/transforms/read_data.py +0 -67
  633. paddlex/inference/components/transforms/ts/__init__.py +0 -15
  634. paddlex/inference/components/transforms/ts/common.py +0 -393
  635. paddlex/inference/components/transforms/ts/funcs.py +0 -424
  636. paddlex/inference/models/anomaly_detection.py +0 -87
  637. paddlex/inference/models/base/base_predictor.py +0 -76
  638. paddlex/inference/models/base/basic_predictor.py +0 -122
  639. paddlex/inference/models/face_recognition.py +0 -21
  640. paddlex/inference/models/formula_recognition.py +0 -55
  641. paddlex/inference/models/general_recognition.py +0 -99
  642. paddlex/inference/models/image_classification.py +0 -101
  643. paddlex/inference/models/image_unwarping.py +0 -43
  644. paddlex/inference/models/instance_segmentation.py +0 -66
  645. paddlex/inference/models/multilabel_classification.py +0 -33
  646. paddlex/inference/models/object_detection.py +0 -129
  647. paddlex/inference/models/semantic_segmentation.py +0 -86
  648. paddlex/inference/models/table_recognition.py +0 -106
  649. paddlex/inference/models/text_detection.py +0 -105
  650. paddlex/inference/models/text_recognition.py +0 -78
  651. paddlex/inference/models/ts_ad.py +0 -68
  652. paddlex/inference/models/ts_cls.py +0 -57
  653. paddlex/inference/models/ts_fc.py +0 -73
  654. paddlex/inference/pipelines/attribute_recognition.py +0 -92
  655. paddlex/inference/pipelines/face_recognition.py +0 -49
  656. paddlex/inference/pipelines/formula_recognition.py +0 -102
  657. paddlex/inference/pipelines/layout_parsing/layout_parsing.py +0 -362
  658. paddlex/inference/pipelines/ocr.py +0 -80
  659. paddlex/inference/pipelines/pp_shitu_v2.py +0 -152
  660. paddlex/inference/pipelines/ppchatocrv3/__init__.py +0 -15
  661. paddlex/inference/pipelines/ppchatocrv3/ch_prompt.yaml +0 -14
  662. paddlex/inference/pipelines/ppchatocrv3/ppchatocrv3.py +0 -717
  663. paddlex/inference/pipelines/ppchatocrv3/utils.py +0 -168
  664. paddlex/inference/pipelines/seal_recognition.py +0 -152
  665. paddlex/inference/pipelines/serving/__init__.py +0 -17
  666. paddlex/inference/pipelines/serving/_pipeline_apps/__init__.py +0 -205
  667. paddlex/inference/pipelines/serving/_pipeline_apps/anomaly_detection.py +0 -80
  668. paddlex/inference/pipelines/serving/_pipeline_apps/face_recognition.py +0 -317
  669. paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py +0 -119
  670. paddlex/inference/pipelines/serving/_pipeline_apps/image_classification.py +0 -101
  671. paddlex/inference/pipelines/serving/_pipeline_apps/instance_segmentation.py +0 -112
  672. paddlex/inference/pipelines/serving/_pipeline_apps/layout_parsing.py +0 -205
  673. paddlex/inference/pipelines/serving/_pipeline_apps/multi_label_image_classification.py +0 -90
  674. paddlex/inference/pipelines/serving/_pipeline_apps/object_detection.py +0 -90
  675. paddlex/inference/pipelines/serving/_pipeline_apps/ocr.py +0 -98
  676. paddlex/inference/pipelines/serving/_pipeline_apps/pedestrian_attribute_recognition.py +0 -102
  677. paddlex/inference/pipelines/serving/_pipeline_apps/pp_shitu_v2.py +0 -319
  678. paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py +0 -445
  679. paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py +0 -110
  680. paddlex/inference/pipelines/serving/_pipeline_apps/semantic_segmentation.py +0 -82
  681. paddlex/inference/pipelines/serving/_pipeline_apps/small_object_detection.py +0 -92
  682. paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py +0 -110
  683. paddlex/inference/pipelines/serving/_pipeline_apps/ts_ad.py +0 -68
  684. paddlex/inference/pipelines/serving/_pipeline_apps/ts_cls.py +0 -68
  685. paddlex/inference/pipelines/serving/_pipeline_apps/ts_fc.py +0 -68
  686. paddlex/inference/pipelines/serving/_pipeline_apps/vehicle_attribute_recognition.py +0 -102
  687. paddlex/inference/pipelines/serving/app.py +0 -164
  688. paddlex/inference/pipelines/serving/models.py +0 -30
  689. paddlex/inference/pipelines/serving/server.py +0 -25
  690. paddlex/inference/pipelines/serving/storage.py +0 -161
  691. paddlex/inference/pipelines/serving/utils.py +0 -190
  692. paddlex/inference/pipelines/single_model_pipeline.py +0 -76
  693. paddlex/inference/pipelines/table_recognition/table_recognition.py +0 -193
  694. paddlex/inference/results/__init__.py +0 -31
  695. paddlex/inference/results/attribute_rec.py +0 -89
  696. paddlex/inference/results/base.py +0 -43
  697. paddlex/inference/results/chat_ocr.py +0 -158
  698. paddlex/inference/results/clas.py +0 -133
  699. paddlex/inference/results/det.py +0 -86
  700. paddlex/inference/results/face_rec.py +0 -34
  701. paddlex/inference/results/formula_rec.py +0 -363
  702. paddlex/inference/results/instance_seg.py +0 -152
  703. paddlex/inference/results/ocr.py +0 -157
  704. paddlex/inference/results/seal_rec.py +0 -50
  705. paddlex/inference/results/seg.py +0 -72
  706. paddlex/inference/results/shitu.py +0 -35
  707. paddlex/inference/results/table_rec.py +0 -109
  708. paddlex/inference/results/text_det.py +0 -33
  709. paddlex/inference/results/text_rec.py +0 -66
  710. paddlex/inference/results/ts.py +0 -37
  711. paddlex/inference/results/utils/mixin.py +0 -204
  712. paddlex/inference/results/warp.py +0 -31
  713. paddlex/inference/utils/process_hook.py +0 -54
  714. paddlex/pipelines/OCR.yaml +0 -8
  715. paddlex/pipelines/PP-ChatOCRv3-doc.yaml +0 -27
  716. paddlex/pipelines/PP-ShiTuV2.yaml +0 -13
  717. paddlex/pipelines/anomaly_detection.yaml +0 -7
  718. paddlex/pipelines/face_recognition.yaml +0 -13
  719. paddlex/pipelines/formula_recognition.yaml +0 -8
  720. paddlex/pipelines/image_classification.yaml +0 -7
  721. paddlex/pipelines/instance_segmentation.yaml +0 -7
  722. paddlex/pipelines/layout_parsing.yaml +0 -14
  723. paddlex/pipelines/multi_label_image_classification.yaml +0 -7
  724. paddlex/pipelines/object_detection.yaml +0 -7
  725. paddlex/pipelines/pedestrian_attribute_recognition.yaml +0 -7
  726. paddlex/pipelines/seal_recognition.yaml +0 -10
  727. paddlex/pipelines/semantic_segmentation.yaml +0 -7
  728. paddlex/pipelines/small_object_detection.yaml +0 -7
  729. paddlex/pipelines/table_recognition.yaml +0 -12
  730. paddlex/pipelines/ts_ad.yaml +0 -7
  731. paddlex/pipelines/ts_cls.yaml +0 -7
  732. paddlex/pipelines/ts_fc.yaml +0 -7
  733. paddlex/pipelines/vehicle_attribute_recognition.yaml +0 -7
  734. paddlex/utils/fonts/PingFang-SC-Regular.ttf +0 -0
  735. paddlex-3.0.0b2.dist-info/METADATA +0 -760
  736. paddlex-3.0.0b2.dist-info/RECORD +0 -646
  737. paddlex-3.0.0b2.dist-info/WHEEL +0 -5
  738. /paddlex/configs/{doc_text_orientation → modules/doc_text_orientation}/PP-LCNet_x1_0_doc_ori.yaml +0 -0
  739. /paddlex/configs/{face_detection → modules/face_detection}/BlazeFace-FPN-SSH.yaml +0 -0
  740. /paddlex/configs/{face_detection → modules/face_detection}/BlazeFace.yaml +0 -0
  741. /paddlex/configs/{face_detection → modules/face_detection}/PP-YOLOE_plus-S_face.yaml +0 -0
  742. /paddlex/configs/{face_detection → modules/face_detection}/PicoDet_LCNet_x2_5_face.yaml +0 -0
  743. /paddlex/configs/{human_detection → modules/human_detection}/PP-YOLOE-L_human.yaml +0 -0
  744. /paddlex/configs/{human_detection → modules/human_detection}/PP-YOLOE-S_human.yaml +0 -0
  745. /paddlex/configs/{anomaly_detection → modules/image_anomaly_detection}/STFPM.yaml +0 -0
  746. /paddlex/configs/{image_classification → modules/image_classification}/ConvNeXt_base_224.yaml +0 -0
  747. /paddlex/configs/{image_classification → modules/image_classification}/ConvNeXt_base_384.yaml +0 -0
  748. /paddlex/configs/{image_classification → modules/image_classification}/ConvNeXt_large_224.yaml +0 -0
  749. /paddlex/configs/{image_classification → modules/image_classification}/ConvNeXt_small.yaml +0 -0
  750. /paddlex/configs/{image_classification → modules/image_classification}/ConvNeXt_tiny.yaml +0 -0
  751. /paddlex/configs/{image_classification → modules/image_classification}/FasterNet-L.yaml +0 -0
  752. /paddlex/configs/{image_classification → modules/image_classification}/FasterNet-M.yaml +0 -0
  753. /paddlex/configs/{image_classification → modules/image_classification}/FasterNet-S.yaml +0 -0
  754. /paddlex/configs/{image_classification → modules/image_classification}/FasterNet-T0.yaml +0 -0
  755. /paddlex/configs/{image_classification → modules/image_classification}/FasterNet-T1.yaml +0 -0
  756. /paddlex/configs/{image_classification → modules/image_classification}/FasterNet-T2.yaml +0 -0
  757. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV1_x0_25.yaml +0 -0
  758. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV1_x0_5.yaml +0 -0
  759. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV1_x0_75.yaml +0 -0
  760. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV1_x1_0.yaml +0 -0
  761. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV2_x0_25.yaml +0 -0
  762. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV2_x0_5.yaml +0 -0
  763. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV2_x1_0.yaml +0 -0
  764. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV2_x1_5.yaml +0 -0
  765. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV2_x2_0.yaml +0 -0
  766. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_large_x0_35.yaml +0 -0
  767. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_large_x0_5.yaml +0 -0
  768. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_large_x0_75.yaml +0 -0
  769. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_large_x1_0.yaml +0 -0
  770. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_large_x1_25.yaml +0 -0
  771. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_small_x0_35.yaml +0 -0
  772. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_small_x0_5.yaml +0 -0
  773. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_small_x0_75.yaml +0 -0
  774. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_small_x1_0.yaml +0 -0
  775. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV3_small_x1_25.yaml +0 -0
  776. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV4_conv_large.yaml +0 -0
  777. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV4_conv_medium.yaml +0 -0
  778. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV4_conv_small.yaml +0 -0
  779. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV4_hybrid_large.yaml +0 -0
  780. /paddlex/configs/{image_classification → modules/image_classification}/MobileNetV4_hybrid_medium.yaml +0 -0
  781. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B0.yaml +0 -0
  782. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B1.yaml +0 -0
  783. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B2.yaml +0 -0
  784. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B3.yaml +0 -0
  785. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B4.yaml +0 -0
  786. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B5.yaml +0 -0
  787. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNetV2-B6.yaml +0 -0
  788. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNet_base.yaml +0 -0
  789. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNet_small.yaml +0 -0
  790. /paddlex/configs/{image_classification → modules/image_classification}/PP-HGNet_tiny.yaml +0 -0
  791. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNetV2_base.yaml +0 -0
  792. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNetV2_large.yaml +0 -0
  793. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNetV2_small.yaml +0 -0
  794. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x0_25.yaml +0 -0
  795. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x0_35.yaml +0 -0
  796. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x0_5.yaml +0 -0
  797. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x0_75.yaml +0 -0
  798. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x1_0.yaml +0 -0
  799. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x1_5.yaml +0 -0
  800. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x2_0.yaml +0 -0
  801. /paddlex/configs/{image_classification → modules/image_classification}/PP-LCNet_x2_5.yaml +0 -0
  802. /paddlex/configs/{image_classification → modules/image_classification}/ResNet101.yaml +0 -0
  803. /paddlex/configs/{image_classification → modules/image_classification}/ResNet101_vd.yaml +0 -0
  804. /paddlex/configs/{image_classification → modules/image_classification}/ResNet152.yaml +0 -0
  805. /paddlex/configs/{image_classification → modules/image_classification}/ResNet152_vd.yaml +0 -0
  806. /paddlex/configs/{image_classification → modules/image_classification}/ResNet18.yaml +0 -0
  807. /paddlex/configs/{image_classification → modules/image_classification}/ResNet18_vd.yaml +0 -0
  808. /paddlex/configs/{image_classification → modules/image_classification}/ResNet200_vd.yaml +0 -0
  809. /paddlex/configs/{image_classification → modules/image_classification}/ResNet34.yaml +0 -0
  810. /paddlex/configs/{image_classification → modules/image_classification}/ResNet34_vd.yaml +0 -0
  811. /paddlex/configs/{image_classification → modules/image_classification}/ResNet50.yaml +0 -0
  812. /paddlex/configs/{image_classification → modules/image_classification}/ResNet50_vd.yaml +0 -0
  813. /paddlex/configs/{image_classification → modules/image_classification}/StarNet-S1.yaml +0 -0
  814. /paddlex/configs/{image_classification → modules/image_classification}/StarNet-S2.yaml +0 -0
  815. /paddlex/configs/{image_classification → modules/image_classification}/StarNet-S3.yaml +0 -0
  816. /paddlex/configs/{image_classification → modules/image_classification}/StarNet-S4.yaml +0 -0
  817. /paddlex/configs/{image_classification → modules/image_classification}/SwinTransformer_base_patch4_window12_384.yaml +0 -0
  818. /paddlex/configs/{image_classification → modules/image_classification}/SwinTransformer_base_patch4_window7_224.yaml +0 -0
  819. /paddlex/configs/{image_classification → modules/image_classification}/SwinTransformer_large_patch4_window12_384.yaml +0 -0
  820. /paddlex/configs/{image_classification → modules/image_classification}/SwinTransformer_large_patch4_window7_224.yaml +0 -0
  821. /paddlex/configs/{image_classification → modules/image_classification}/SwinTransformer_small_patch4_window7_224.yaml +0 -0
  822. /paddlex/configs/{image_classification → modules/image_classification}/SwinTransformer_tiny_patch4_window7_224.yaml +0 -0
  823. /paddlex/configs/{general_recognition → modules/image_feature}/PP-ShiTuV2_rec.yaml +0 -0
  824. /paddlex/configs/{general_recognition → modules/image_feature}/PP-ShiTuV2_rec_CLIP_vit_base.yaml +0 -0
  825. /paddlex/configs/{general_recognition → modules/image_feature}/PP-ShiTuV2_rec_CLIP_vit_large.yaml +0 -0
  826. /paddlex/configs/{multilabel_classification → modules/image_multilabel_classification}/CLIP_vit_base_patch16_448_ML.yaml +0 -0
  827. /paddlex/configs/{multilabel_classification → modules/image_multilabel_classification}/PP-HGNetV2-B0_ML.yaml +0 -0
  828. /paddlex/configs/{multilabel_classification → modules/image_multilabel_classification}/PP-HGNetV2-B4_ML.yaml +0 -0
  829. /paddlex/configs/{multilabel_classification → modules/image_multilabel_classification}/PP-HGNetV2-B6_ML.yaml +0 -0
  830. /paddlex/configs/{multilabel_classification → modules/image_multilabel_classification}/PP-LCNet_x1_0_ML.yaml +0 -0
  831. /paddlex/configs/{multilabel_classification → modules/image_multilabel_classification}/ResNet50_ML.yaml +0 -0
  832. /paddlex/configs/{image_unwarping → modules/image_unwarping}/UVDoc.yaml +0 -0
  833. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Cascade-MaskRCNN-ResNet50-FPN.yaml +0 -0
  834. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN.yaml +0 -0
  835. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Mask-RT-DETR-H.yaml +0 -0
  836. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Mask-RT-DETR-L.yaml +0 -0
  837. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Mask-RT-DETR-M.yaml +0 -0
  838. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Mask-RT-DETR-S.yaml +0 -0
  839. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/Mask-RT-DETR-X.yaml +0 -0
  840. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/MaskRCNN-ResNeXt101-vd-FPN.yaml +0 -0
  841. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/MaskRCNN-ResNet101-FPN.yaml +0 -0
  842. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/MaskRCNN-ResNet101-vd-FPN.yaml +0 -0
  843. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/MaskRCNN-ResNet50-FPN.yaml +0 -0
  844. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/MaskRCNN-ResNet50-vd-FPN.yaml +0 -0
  845. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/MaskRCNN-ResNet50.yaml +0 -0
  846. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/PP-YOLOE_seg-S.yaml +0 -0
  847. /paddlex/configs/{instance_segmentation → modules/instance_segmentation}/SOLOv2.yaml +0 -0
  848. /paddlex/configs/{structure_analysis → modules/layout_detection}/PicoDet-L_layout_17cls.yaml +0 -0
  849. /paddlex/configs/{structure_analysis → modules/layout_detection}/PicoDet-L_layout_3cls.yaml +0 -0
  850. /paddlex/configs/{structure_analysis → modules/layout_detection}/PicoDet-S_layout_17cls.yaml +0 -0
  851. /paddlex/configs/{structure_analysis → modules/layout_detection}/PicoDet-S_layout_3cls.yaml +0 -0
  852. /paddlex/configs/{structure_analysis → modules/layout_detection}/PicoDet_layout_1x.yaml +0 -0
  853. /paddlex/configs/{structure_analysis → modules/layout_detection}/PicoDet_layout_1x_table.yaml +0 -0
  854. /paddlex/configs/{structure_analysis → modules/layout_detection}/RT-DETR-H_layout_17cls.yaml +0 -0
  855. /paddlex/configs/{structure_analysis → modules/layout_detection}/RT-DETR-H_layout_3cls.yaml +0 -0
  856. /paddlex/configs/{mainbody_detection → modules/mainbody_detection}/PP-ShiTuV2_det.yaml +0 -0
  857. /paddlex/configs/{object_detection → modules/object_detection}/Cascade-FasterRCNN-ResNet50-FPN.yaml +0 -0
  858. /paddlex/configs/{object_detection → modules/object_detection}/Cascade-FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml +0 -0
  859. /paddlex/configs/{object_detection → modules/object_detection}/CenterNet-DLA-34.yaml +0 -0
  860. /paddlex/configs/{object_detection → modules/object_detection}/CenterNet-ResNet50.yaml +0 -0
  861. /paddlex/configs/{object_detection → modules/object_detection}/DETR-R50.yaml +0 -0
  862. /paddlex/configs/{object_detection → modules/object_detection}/FCOS-ResNet50.yaml +0 -0
  863. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNeXt101-vd-FPN.yaml +0 -0
  864. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet101-FPN.yaml +0 -0
  865. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet101.yaml +0 -0
  866. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet34-FPN.yaml +0 -0
  867. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet50-FPN.yaml +0 -0
  868. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet50-vd-FPN.yaml +0 -0
  869. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet50-vd-SSLDv2-FPN.yaml +0 -0
  870. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-ResNet50.yaml +0 -0
  871. /paddlex/configs/{object_detection → modules/object_detection}/FasterRCNN-Swin-Tiny-FPN.yaml +0 -0
  872. /paddlex/configs/{object_detection → modules/object_detection}/PP-YOLOE_plus-L.yaml +0 -0
  873. /paddlex/configs/{object_detection → modules/object_detection}/PP-YOLOE_plus-M.yaml +0 -0
  874. /paddlex/configs/{object_detection → modules/object_detection}/PP-YOLOE_plus-S.yaml +0 -0
  875. /paddlex/configs/{object_detection → modules/object_detection}/PP-YOLOE_plus-X.yaml +0 -0
  876. /paddlex/configs/{object_detection → modules/object_detection}/PicoDet-L.yaml +0 -0
  877. /paddlex/configs/{object_detection → modules/object_detection}/PicoDet-M.yaml +0 -0
  878. /paddlex/configs/{object_detection → modules/object_detection}/PicoDet-S.yaml +0 -0
  879. /paddlex/configs/{object_detection → modules/object_detection}/PicoDet-XS.yaml +0 -0
  880. /paddlex/configs/{object_detection → modules/object_detection}/RT-DETR-H.yaml +0 -0
  881. /paddlex/configs/{object_detection → modules/object_detection}/RT-DETR-L.yaml +0 -0
  882. /paddlex/configs/{object_detection → modules/object_detection}/RT-DETR-R18.yaml +0 -0
  883. /paddlex/configs/{object_detection → modules/object_detection}/RT-DETR-R50.yaml +0 -0
  884. /paddlex/configs/{object_detection → modules/object_detection}/RT-DETR-X.yaml +0 -0
  885. /paddlex/configs/{object_detection → modules/object_detection}/YOLOX-L.yaml +0 -0
  886. /paddlex/configs/{object_detection → modules/object_detection}/YOLOX-M.yaml +0 -0
  887. /paddlex/configs/{object_detection → modules/object_detection}/YOLOX-N.yaml +0 -0
  888. /paddlex/configs/{object_detection → modules/object_detection}/YOLOX-S.yaml +0 -0
  889. /paddlex/configs/{object_detection → modules/object_detection}/YOLOX-T.yaml +0 -0
  890. /paddlex/configs/{object_detection → modules/object_detection}/YOLOv3-DarkNet53.yaml +0 -0
  891. /paddlex/configs/{object_detection → modules/object_detection}/YOLOv3-MobileNetV3.yaml +0 -0
  892. /paddlex/configs/{object_detection → modules/object_detection}/YOLOv3-ResNet50_vd_DCN.yaml +0 -0
  893. /paddlex/configs/{pedestrian_attribute → modules/pedestrian_attribute_recognition}/PP-LCNet_x1_0_pedestrian_attribute.yaml +0 -0
  894. /paddlex/configs/{text_detection_seal → modules/seal_text_detection}/PP-OCRv4_mobile_seal_det.yaml +0 -0
  895. /paddlex/configs/{text_detection_seal → modules/seal_text_detection}/PP-OCRv4_server_seal_det.yaml +0 -0
  896. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/Deeplabv3-R101.yaml +0 -0
  897. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/Deeplabv3-R50.yaml +0 -0
  898. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/Deeplabv3_Plus-R101.yaml +0 -0
  899. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/Deeplabv3_Plus-R50.yaml +0 -0
  900. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/OCRNet_HRNet-W18.yaml +0 -0
  901. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/OCRNet_HRNet-W48.yaml +0 -0
  902. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/PP-LiteSeg-B.yaml +0 -0
  903. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/PP-LiteSeg-T.yaml +0 -0
  904. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/SegFormer-B0.yaml +0 -0
  905. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/SegFormer-B1.yaml +0 -0
  906. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/SegFormer-B2.yaml +0 -0
  907. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/SegFormer-B3.yaml +0 -0
  908. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/SegFormer-B4.yaml +0 -0
  909. /paddlex/configs/{semantic_segmentation → modules/semantic_segmentation}/SegFormer-B5.yaml +0 -0
  910. /paddlex/configs/{small_object_detection → modules/small_object_detection}/PP-YOLOE_plus_SOD-L.yaml +0 -0
  911. /paddlex/configs/{small_object_detection → modules/small_object_detection}/PP-YOLOE_plus_SOD-S.yaml +0 -0
  912. /paddlex/configs/{small_object_detection → modules/small_object_detection}/PP-YOLOE_plus_SOD-largesize-L.yaml +0 -0
  913. /paddlex/configs/{table_recognition → modules/table_structure_recognition}/SLANet.yaml +0 -0
  914. /paddlex/configs/{table_recognition → modules/table_structure_recognition}/SLANet_plus.yaml +0 -0
  915. /paddlex/configs/{text_detection → modules/text_detection}/PP-OCRv4_mobile_det.yaml +0 -0
  916. /paddlex/configs/{text_detection → modules/text_detection}/PP-OCRv4_server_det.yaml +0 -0
  917. /paddlex/configs/{text_recognition → modules/text_recognition}/PP-OCRv4_mobile_rec.yaml +0 -0
  918. /paddlex/configs/{text_recognition → modules/text_recognition}/PP-OCRv4_server_rec.yaml +0 -0
  919. /paddlex/configs/{text_recognition → modules/text_recognition}/ch_RepSVTR_rec.yaml +0 -0
  920. /paddlex/configs/{text_recognition → modules/text_recognition}/ch_SVTRv2_rec.yaml +0 -0
  921. /paddlex/configs/{ts_anomaly_detection → modules/ts_anomaly_detection}/AutoEncoder_ad.yaml +0 -0
  922. /paddlex/configs/{ts_anomaly_detection → modules/ts_anomaly_detection}/DLinear_ad.yaml +0 -0
  923. /paddlex/configs/{ts_anomaly_detection → modules/ts_anomaly_detection}/Nonstationary_ad.yaml +0 -0
  924. /paddlex/configs/{ts_anomaly_detection → modules/ts_anomaly_detection}/PatchTST_ad.yaml +0 -0
  925. /paddlex/configs/{ts_anomaly_detection → modules/ts_anomaly_detection}/TimesNet_ad.yaml +0 -0
  926. /paddlex/configs/{ts_classification → modules/ts_classification}/TimesNet_cls.yaml +0 -0
  927. /paddlex/configs/{ts_forecast → modules/ts_forecast}/DLinear.yaml +0 -0
  928. /paddlex/configs/{ts_forecast → modules/ts_forecast}/NLinear.yaml +0 -0
  929. /paddlex/configs/{ts_forecast → modules/ts_forecast}/Nonstationary.yaml +0 -0
  930. /paddlex/configs/{ts_forecast → modules/ts_forecast}/PatchTST.yaml +0 -0
  931. /paddlex/configs/{ts_forecast → modules/ts_forecast}/RLinear.yaml +0 -0
  932. /paddlex/configs/{ts_forecast → modules/ts_forecast}/TiDE.yaml +0 -0
  933. /paddlex/configs/{ts_forecast → modules/ts_forecast}/TimesNet.yaml +0 -0
  934. /paddlex/configs/{vehicle_attribute → modules/vehicle_attribute_recognition}/PP-LCNet_x1_0_vehicle_attribute.yaml +0 -0
  935. /paddlex/configs/{vehicle_detection → modules/vehicle_detection}/PP-YOLOE-L_vehicle.yaml +0 -0
  936. /paddlex/configs/{vehicle_detection → modules/vehicle_detection}/PP-YOLOE-S_vehicle.yaml +0 -0
  937. /paddlex/inference/{results/utils → common}/__init__.py +0 -0
  938. {paddlex-3.0.0b2.dist-info → paddlex-3.0.0rc0.dist-info}/LICENSE +0 -0
  939. {paddlex-3.0.0b2.dist-info → paddlex-3.0.0rc0.dist-info}/entry_points.txt +0 -0
  940. {paddlex-3.0.0b2.dist-info → paddlex-3.0.0rc0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,986 @@
1
+ # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import os
17
+ import os.path as osp
18
+
19
+ import re
20
+ import numpy as np
21
+ from PIL import Image, ImageOps, ImageDraw
22
+ import cv2
23
+ import math
24
+ import json
25
+ import tempfile
26
+ from tokenizers import Tokenizer as TokenizerFast
27
+ from tokenizers import AddedToken
28
+ from typing import List, Tuple, Optional, Any, Dict, Union
29
+
30
+ from ....utils import logging
31
+
32
+
33
+ class MinMaxResize:
34
+ """Class for resizing images to be within specified minimum and maximum dimensions, with padding and normalization."""
35
+
36
+ def __init__(
37
+ self,
38
+ min_dimensions: Optional[List[int]] = [32, 32],
39
+ max_dimensions: Optional[List[int]] = [672, 192],
40
+ **kwargs,
41
+ ) -> None:
42
+ """Initializes the MinMaxResize class with minimum and maximum dimensions.
43
+
44
+ Args:
45
+ min_dimensions (list of int, optional): Minimum dimensions (width, height). Defaults to [32, 32].
46
+ max_dimensions (list of int, optional): Maximum dimensions (width, height). Defaults to [672, 192].
47
+ **kwargs: Additional keyword arguments for future expansion.
48
+ """
49
+ self.min_dimensions = min_dimensions
50
+ self.max_dimensions = max_dimensions
51
+
52
+ def pad_(self, img: Image.Image, divable: int = 32) -> Image.Image:
53
+ """Pads the image to ensure its dimensions are divisible by a specified value.
54
+
55
+ Args:
56
+ img (PIL.Image.Image): The input image.
57
+ divable (int, optional): The value by which the dimensions should be divisible. Defaults to 32.
58
+
59
+ Returns:
60
+ PIL.Image.Image: The padded image.
61
+ """
62
+ threshold = 128
63
+ data = np.array(img.convert("LA"))
64
+ if data[..., -1].var() == 0:
65
+ data = (data[..., 0]).astype(np.uint8)
66
+ else:
67
+ data = (255 - data[..., -1]).astype(np.uint8)
68
+ data = (data - data.min()) / (data.max() - data.min()) * 255
69
+ if data.mean() > threshold:
70
+ # To invert the text to white
71
+ gray = 255 * (data < threshold).astype(np.uint8)
72
+ else:
73
+ gray = 255 * (data > threshold).astype(np.uint8)
74
+ data = 255 - data
75
+
76
+ coords = cv2.findNonZero(gray) # Find all non-zero points (text)
77
+ a, b, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
78
+ rect = data[b : b + h, a : a + w]
79
+ im = Image.fromarray(rect).convert("L")
80
+ dims = []
81
+ for x in [w, h]:
82
+ div, mod = divmod(x, divable)
83
+ dims.append(divable * (div + (1 if mod > 0 else 0)))
84
+ padded = Image.new("L", dims, 255)
85
+ padded.paste(im, (0, 0, im.size[0], im.size[1]))
86
+ return padded
87
+
88
+ def minmax_size_(
89
+ self,
90
+ img: Image.Image,
91
+ max_dimensions: Optional[List[int]],
92
+ min_dimensions: Optional[List[int]],
93
+ ) -> Image.Image:
94
+ """Resizes the image to be within the specified minimum and maximum dimensions.
95
+
96
+ Args:
97
+ img (PIL.Image.Image): The input image.
98
+ max_dimensions (list of int or None): Maximum dimensions (width, height).
99
+ min_dimensions (list of int or None): Minimum dimensions (width, height).
100
+
101
+ Returns:
102
+ PIL.Image.Image: The resized image.
103
+ """
104
+ if max_dimensions is not None:
105
+ ratios = [a / b for a, b in zip(img.size, max_dimensions)]
106
+ if any([r > 1 for r in ratios]):
107
+ size = np.array(img.size) // max(ratios)
108
+ img = img.resize(tuple(size.astype(int)), Image.BILINEAR)
109
+ if min_dimensions is not None:
110
+ # hypothesis: there is a dim in img smaller than min_dimensions, and return a proper dim >= min_dimensions
111
+ padded_size = [
112
+ max(img_dim, min_dim)
113
+ for img_dim, min_dim in zip(img.size, min_dimensions)
114
+ ]
115
+ if padded_size != list(img.size): # assert hypothesis
116
+ padded_im = Image.new("L", padded_size, 255)
117
+ padded_im.paste(img, img.getbbox())
118
+ img = padded_im
119
+ return img
120
+
121
+ def resize(self, img: np.ndarray) -> np.ndarray:
122
+ """Resizes the input image according to the specified minimum and maximum dimensions.
123
+
124
+ Args:
125
+ img (np.ndarray): The input image as a numpy array.
126
+
127
+ Returns:
128
+ np.ndarray: The resized image as a numpy array with three channels.
129
+ """
130
+ h, w = img.shape[:2]
131
+ if (
132
+ self.min_dimensions[0] <= w <= self.max_dimensions[0]
133
+ and self.min_dimensions[1] <= h <= self.max_dimensions[1]
134
+ ):
135
+ return img
136
+ else:
137
+ img = Image.fromarray(np.uint8(img))
138
+ img = self.minmax_size_(
139
+ self.pad_(img), self.max_dimensions, self.min_dimensions
140
+ )
141
+ img = np.array(img)
142
+ img = np.dstack((img, img, img))
143
+ return img
144
+
145
+ def __call__(self, imgs: List[np.ndarray]) -> List[np.ndarray]:
146
+ """Applies the resize method to a list of images.
147
+
148
+ Args:
149
+ imgs (list of np.ndarray): The list of input images as numpy arrays.
150
+
151
+ Returns:
152
+ list of np.ndarray: The list of resized images as numpy arrays with three channels.
153
+ """
154
+ return [self.resize(img) for img in imgs]
155
+
156
+
157
+ class LatexTestTransform:
158
+ """
159
+ A transform class for processing images according to Latex test requirements.
160
+ """
161
+
162
+ def __init__(self, **kwargs) -> None:
163
+ """
164
+ Initialize the transform with default number of output channels.
165
+ """
166
+ super().__init__()
167
+ self.num_output_channels = 3
168
+
169
+ def transform(self, img: np.ndarray) -> np.ndarray:
170
+ """
171
+ Convert the input image to grayscale, squeeze it, and merge to create an output
172
+ image with the specified number of output channels.
173
+
174
+ Parameters:
175
+ img (np.array): The input image.
176
+
177
+ Returns:
178
+ np.array: The transformed image.
179
+ """
180
+ grayscale_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
181
+ squeezed = np.squeeze(grayscale_image)
182
+ return cv2.merge([squeezed] * self.num_output_channels)
183
+
184
+ def __call__(self, imgs: List[np.ndarray]) -> List[np.ndarray]:
185
+ """
186
+ Apply the transform to a list of images.
187
+
188
+ Parameters:
189
+ imgs (list of np.array): The list of input images.
190
+
191
+ Returns:
192
+ list of np.array: The list of transformed images.
193
+ """
194
+ return [self.transform(img) for img in imgs]
195
+
196
+
197
+ class LatexImageFormat:
198
+ """Class for formatting images to a specific format suitable for LaTeX."""
199
+
200
+ def __init__(self, **kwargs) -> None:
201
+ """Initializes the LatexImageFormat class with optional keyword arguments."""
202
+ super().__init__()
203
+
204
+ def format(self, img: np.ndarray) -> np.ndarray:
205
+ """Formats a single image to the LaTeX-compatible format.
206
+
207
+ Args:
208
+ img (numpy.ndarray): The input image as a numpy array.
209
+
210
+ Returns:
211
+ numpy.ndarray: The formatted image as a numpy array with an added dimension for color.
212
+ """
213
+ im_h, im_w = img.shape[:2]
214
+ divide_h = math.ceil(im_h / 16) * 16
215
+ divide_w = math.ceil(im_w / 16) * 16
216
+ img = img[:, :, 0]
217
+ img = np.pad(
218
+ img, ((0, divide_h - im_h), (0, divide_w - im_w)), constant_values=(1, 1)
219
+ )
220
+ img_expanded = img[:, :, np.newaxis].transpose(2, 0, 1)
221
+ return img_expanded[np.newaxis, :]
222
+
223
+ def __call__(self, imgs: List[np.ndarray]) -> List[np.ndarray]:
224
+ """Applies the format method to a list of images.
225
+
226
+ Args:
227
+ imgs (list of numpy.ndarray): A list of input images as numpy arrays.
228
+
229
+ Returns:
230
+ list of numpy.ndarray: A list of formatted images as numpy arrays.
231
+ """
232
+ return [self.format(img) for img in imgs]
233
+
234
+
235
+ class NormalizeImage(object):
236
+ """Normalize an image by subtracting the mean and dividing by the standard deviation.
237
+
238
+ Args:
239
+ scale (float or str): The scale factor to apply to the image. If a string is provided, it will be evaluated as a Python expression.
240
+ mean (list of float): The mean values to subtract from each channel. Defaults to [0.485, 0.456, 0.406].
241
+ std (list of float): The standard deviation values to divide by for each channel. Defaults to [0.229, 0.224, 0.225].
242
+ order (str): The order of dimensions for the mean and std. 'chw' for channels-height-width, 'hwc' for height-width-channels. Defaults to 'chw'.
243
+ **kwargs: Additional keyword arguments that may be used by subclasses.
244
+
245
+ Attributes:
246
+ scale (float): The scale factor applied to the image.
247
+ mean (numpy.ndarray): The mean values reshaped according to the specified order.
248
+ std (numpy.ndarray): The standard deviation values reshaped according to the specified order.
249
+ """
250
+
251
+ def __init__(
252
+ self,
253
+ scale: Optional[Union[float, str]] = None,
254
+ mean: Optional[List[float]] = None,
255
+ std: Optional[List[float]] = None,
256
+ order: str = "chw",
257
+ **kwargs,
258
+ ) -> None:
259
+ if isinstance(scale, str):
260
+ scale = eval(scale)
261
+ self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
262
+ mean = mean if mean is not None else [0.485, 0.456, 0.406]
263
+ std = std if std is not None else [0.229, 0.224, 0.225]
264
+
265
+ shape = (3, 1, 1) if order == "chw" else (1, 1, 3)
266
+ self.mean = np.array(mean).reshape(shape).astype("float32")
267
+ self.std = np.array(std).reshape(shape).astype("float32")
268
+
269
+ def normalize(self, img: Union[np.ndarray, Image.Image]) -> np.ndarray:
270
+ from PIL import Image
271
+
272
+ if isinstance(img, Image.Image):
273
+ img = np.array(img)
274
+ assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage"
275
+ img = (img.astype("float32") * self.scale - self.mean) / self.std
276
+ return img
277
+
278
+ def __call__(self, imgs: List[Union[np.ndarray, Image.Image]]) -> List[np.ndarray]:
279
+ """Apply normalization to a list of images."""
280
+ return [self.normalize(img) for img in imgs]
281
+
282
+
283
+ class ToBatch(object):
284
+ """A class for batching images."""
285
+
286
+ def __init__(self, **kwargs) -> None:
287
+ """Initializes the ToBatch object."""
288
+ super(ToBatch, self).__init__()
289
+
290
+ def __call__(self, imgs: List[np.ndarray]) -> List[np.ndarray]:
291
+ """Concatenates a list of images into a single batch.
292
+
293
+ Args:
294
+ imgs (list): A list of image arrays to be concatenated.
295
+
296
+ Returns:
297
+ list: A list containing the concatenated batch of images wrapped in another list (to comply with common batch processing formats).
298
+ """
299
+ batch_imgs = np.concatenate(imgs)
300
+ batch_imgs = batch_imgs.copy()
301
+ x = [batch_imgs]
302
+ return x
303
+
304
+
305
+ class LaTeXOCRDecode(object):
306
+ """Class for decoding LaTeX OCR tokens based on a provided character list."""
307
+
308
+ def __init__(self, character_list: List[str], **kwargs) -> None:
309
+ """Initializes the LaTeXOCRDecode object.
310
+
311
+ Args:
312
+ character_list (list): The list of characters to use for tokenization.
313
+ **kwargs: Additional keyword arguments for initialization.
314
+ """
315
+ from tokenizers import Tokenizer as TokenizerFast
316
+
317
+ super(LaTeXOCRDecode, self).__init__()
318
+ temp_path = tempfile.gettempdir()
319
+ rec_char_dict_path = os.path.join(temp_path, "latexocr_tokenizer.json")
320
+ try:
321
+ with open(rec_char_dict_path, "w") as f:
322
+ json.dump(character_list, f)
323
+ except Exception as e:
324
+ print(f"创建 latexocr_tokenizer.json 文件失败, 原因{str(e)}")
325
+ self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)
326
+
327
+ def post_process(self, s: str) -> str:
328
+ """Post-processes the decoded LaTeX string.
329
+
330
+ Args:
331
+ s (str): The decoded LaTeX string to post-process.
332
+
333
+ Returns:
334
+ str: The post-processed LaTeX string.
335
+ """
336
+ text_reg = r"(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})"
337
+ letter = "[a-zA-Z]"
338
+ noletter = "[\W_^\d]"
339
+ names = [x[0].replace(" ", "") for x in re.findall(text_reg, s)]
340
+ s = re.sub(text_reg, lambda match: str(names.pop(0)), s)
341
+ news = s
342
+ while True:
343
+ s = news
344
+ news = re.sub(r"(?!\\ )(%s)\s+?(%s)" % (noletter, noletter), r"\1\2", s)
345
+ news = re.sub(r"(?!\\ )(%s)\s+?(%s)" % (noletter, letter), r"\1\2", news)
346
+ news = re.sub(r"(%s)\s+?(%s)" % (letter, noletter), r"\1\2", news)
347
+ if news == s:
348
+ break
349
+ return s
350
+
351
+ def decode(self, tokens: np.ndarray) -> List[str]:
352
+ """Decodes the provided tokens into LaTeX strings.
353
+
354
+ Args:
355
+ tokens (np.array): The tokens to decode.
356
+
357
+ Returns:
358
+ list: The decoded LaTeX strings.
359
+ """
360
+ if len(tokens.shape) == 1:
361
+ tokens = tokens[None, :]
362
+ dec = [self.tokenizer.decode(tok) for tok in tokens]
363
+ dec_str_list = [
364
+ "".join(detok.split(" "))
365
+ .replace("Ġ", " ")
366
+ .replace("[EOS]", "")
367
+ .replace("[BOS]", "")
368
+ .replace("[PAD]", "")
369
+ .strip()
370
+ for detok in dec
371
+ ]
372
+ return [self.post_process(dec_str) for dec_str in dec_str_list]
373
+
374
+ def __call__(
375
+ self,
376
+ preds: np.ndarray,
377
+ label: Optional[np.ndarray] = None,
378
+ mode: str = "eval",
379
+ *args,
380
+ **kwargs,
381
+ ) -> Tuple[List[str], List[str]]:
382
+ """Calls the object with the provided predictions and label.
383
+
384
+ Args:
385
+ preds (np.array): The predictions to decode.
386
+ label (np.array, optional): The labels to decode. Defaults to None.
387
+ mode (str): The mode to run in, either 'train' or 'eval'. Defaults to 'eval'.
388
+ *args: Positional arguments to pass.
389
+ **kwargs: Keyword arguments to pass.
390
+
391
+ Returns:
392
+ tuple or list: The decoded text and optionally the decoded label.
393
+ """
394
+ if mode == "train":
395
+ preds_idx = np.array(preds.argmax(axis=2))
396
+ text = self.decode(preds_idx)
397
+ else:
398
+ text = self.decode(np.array(preds))
399
+ if label is None:
400
+ return text
401
+ label = self.decode(np.array(label))
402
+ return text, label
403
+
404
+
405
+ class UniMERNetImgDecode(object):
406
+ """Class for decoding images for UniMERNet, including cropping margins, resizing, and padding."""
407
+
408
+ def __init__(
409
+ self, input_size: Tuple[int, int], random_padding: bool = False, **kwargs
410
+ ) -> None:
411
+ """Initializes the UniMERNetImgDecode class with input size and random padding options.
412
+
413
+ Args:
414
+ input_size (tuple): The desired input size for the images (height, width).
415
+ random_padding (bool): Whether to use random padding for resizing.
416
+ **kwargs: Additional keyword arguments."""
417
+ self.input_size = input_size
418
+ self.random_padding = random_padding
419
+
420
+ def crop_margin(self, img: Image.Image) -> Image.Image:
421
+ """Crops the margin of the image based on grayscale thresholding.
422
+
423
+ Args:
424
+ img (PIL.Image.Image): The input image.
425
+
426
+ Returns:
427
+ PIL.Image.Image: The cropped image."""
428
+ data = np.array(img.convert("L"))
429
+ data = data.astype(np.uint8)
430
+ max_val = data.max()
431
+ min_val = data.min()
432
+ if max_val == min_val:
433
+ return img
434
+ data = (data - min_val) / (max_val - min_val) * 255
435
+ gray = 255 * (data < 200).astype(np.uint8)
436
+ coords = cv2.findNonZero(gray) # Find all non-zero points (text)
437
+ a, b, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
438
+ return img.crop((a, b, w + a, h + b))
439
+
440
+ def get_dimensions(self, img: Union[Image.Image, np.ndarray]) -> List[int]:
441
+ """Gets the dimensions of the image.
442
+
443
+ Args:
444
+ img (PIL.Image.Image or numpy.ndarray): The input image.
445
+
446
+ Returns:
447
+ list: A list containing the number of channels, height, and width."""
448
+ if hasattr(img, "getbands"):
449
+ channels = len(img.getbands())
450
+ else:
451
+ channels = img.channels
452
+ width, height = img.size
453
+ return [channels, height, width]
454
+
455
+ def _compute_resized_output_size(
456
+ self,
457
+ image_size: Tuple[int, int],
458
+ size: Union[int, Tuple[int, int]],
459
+ max_size: Optional[int] = None,
460
+ ) -> List[int]:
461
+ """Computes the resized output size of the image.
462
+
463
+ Args:
464
+ image_size (tuple): The original size of the image (height, width).
465
+ size (int or tuple): The desired size for the smallest edge or both height and width.
466
+ max_size (int, optional): The maximum allowed size for the longer edge.
467
+
468
+ Returns:
469
+ list: A list containing the new height and width."""
470
+ if len(size) == 1: # specified size only for the smallest edge
471
+ h, w = image_size
472
+ short, long = (w, h) if w <= h else (h, w)
473
+ requested_new_short = size if isinstance(size, int) else size[0]
474
+
475
+ new_short, new_long = requested_new_short, int(
476
+ requested_new_short * long / short
477
+ )
478
+
479
+ if max_size is not None:
480
+ if max_size <= requested_new_short:
481
+ raise ValueError(
482
+ f"max_size = {max_size} must be strictly greater than the requested "
483
+ f"size for the smaller edge size = {size}"
484
+ )
485
+ if new_long > max_size:
486
+ new_short, new_long = int(max_size * new_short / new_long), max_size
487
+
488
+ new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short)
489
+ else: # specified both h and w
490
+ new_w, new_h = size[1], size[0]
491
+ return [new_h, new_w]
492
+
493
+ def resize(
494
+ self, img: Image.Image, size: Union[int, Tuple[int, int]]
495
+ ) -> Image.Image:
496
+ """Resizes the image to the specified size.
497
+
498
+ Args:
499
+ img (PIL.Image.Image): The input image.
500
+ size (int or tuple): The desired size for the smallest edge or both height and width.
501
+
502
+ Returns:
503
+ PIL.Image.Image: The resized image."""
504
+ _, image_height, image_width = self.get_dimensions(img)
505
+ if isinstance(size, int):
506
+ size = [size]
507
+ max_size = None
508
+ output_size = self._compute_resized_output_size(
509
+ (image_height, image_width), size, max_size
510
+ )
511
+ img = img.resize(tuple(output_size[::-1]), resample=2)
512
+ return img
513
+
514
+ def img_decode(self, img: np.ndarray) -> Optional[np.ndarray]:
515
+ """Decodes the image by cropping margins, resizing, and adding padding.
516
+
517
+ Args:
518
+ img (numpy.ndarray): The input image array.
519
+
520
+ Returns:
521
+ numpy.ndarray: The decoded image array."""
522
+ try:
523
+ img = self.crop_margin(Image.fromarray(img).convert("RGB"))
524
+ except OSError:
525
+ return
526
+ if img.height == 0 or img.width == 0:
527
+ return
528
+ img = self.resize(img, min(self.input_size))
529
+ img.thumbnail((self.input_size[1], self.input_size[0]))
530
+ delta_width = self.input_size[1] - img.width
531
+ delta_height = self.input_size[0] - img.height
532
+ if self.random_padding:
533
+ pad_width = np.random.randint(low=0, high=delta_width + 1)
534
+ pad_height = np.random.randint(low=0, high=delta_height + 1)
535
+ else:
536
+ pad_width = delta_width // 2
537
+ pad_height = delta_height // 2
538
+ padding = (
539
+ pad_width,
540
+ pad_height,
541
+ delta_width - pad_width,
542
+ delta_height - pad_height,
543
+ )
544
+ return np.array(ImageOps.expand(img, padding))
545
+
546
+ def __call__(self, imgs: List[np.ndarray]) -> List[Optional[np.ndarray]]:
547
+ """Calls the img_decode method on a list of images.
548
+
549
+ Args:
550
+ imgs (list of numpy.ndarray): The list of input image arrays.
551
+
552
+ Returns:
553
+ list of numpy.ndarray: The list of decoded image arrays."""
554
+ return [self.img_decode(img) for img in imgs]
555
+
556
+
557
+ class UniMERNetDecode(object):
558
+ """Class for decoding tokenized inputs using UniMERNet tokenizer.
559
+
560
+ Attributes:
561
+ SPECIAL_TOKENS_ATTRIBUTES (List[str]): List of special token attributes.
562
+ model_input_names (List[str]): List of model input names.
563
+ max_seq_len (int): Maximum sequence length.
564
+ pad_token_id (int): ID for the padding token.
565
+ bos_token_id (int): ID for the beginning-of-sequence token.
566
+ eos_token_id (int): ID for the end-of-sequence token.
567
+ padding_side (str): Padding side, either 'left' or 'right'.
568
+ pad_token (str): Padding token.
569
+ pad_token_type_id (int): Type ID for the padding token.
570
+ pad_to_multiple_of (Optional[int]): If set, pad to a multiple of this value.
571
+ tokenizer (TokenizerFast): Fast tokenizer instance.
572
+
573
+ Args:
574
+ character_list (Dict[str, Any]): Dictionary containing tokenizer configuration.
575
+ **kwargs: Additional keyword arguments.
576
+ """
577
+
578
+ SPECIAL_TOKENS_ATTRIBUTES = [
579
+ "bos_token",
580
+ "eos_token",
581
+ "unk_token",
582
+ "sep_token",
583
+ "pad_token",
584
+ "cls_token",
585
+ "mask_token",
586
+ "additional_special_tokens",
587
+ ]
588
+
589
+ def __init__(
590
+ self,
591
+ character_list: Dict[str, Any],
592
+ **kwargs,
593
+ ) -> None:
594
+ """Initializes the UniMERNetDecode class.
595
+
596
+ Args:
597
+ character_list (Dict[str, Any]): Dictionary containing tokenizer configuration.
598
+ **kwargs: Additional keyword arguments.
599
+ """
600
+
601
+ self._unk_token = "<unk>"
602
+ self._bos_token = "<s>"
603
+ self._eos_token = "</s>"
604
+ self._pad_token = "<pad>"
605
+ self._sep_token = None
606
+ self._cls_token = None
607
+ self._mask_token = None
608
+ self._additional_special_tokens = []
609
+ self.model_input_names = ["input_ids", "token_type_ids", "attention_mask"]
610
+ self.max_seq_len = 2048
611
+ self.pad_token_id = 1
612
+ self.bos_token_id = 0
613
+ self.eos_token_id = 2
614
+ self.padding_side = "right"
615
+ self.pad_token_id = 1
616
+ self.pad_token = "<pad>"
617
+ self.pad_token_type_id = 0
618
+ self.pad_to_multiple_of = None
619
+
620
+ temp_path = tempfile.gettempdir()
621
+ fast_tokenizer_file = os.path.join(temp_path, "tokenizer.json")
622
+ tokenizer_config_file = os.path.join(temp_path, "tokenizer_config.json")
623
+ try:
624
+ with open(fast_tokenizer_file, "w") as f:
625
+ json.dump(character_list["fast_tokenizer_file"], f)
626
+ with open(tokenizer_config_file, "w") as f:
627
+ json.dump(character_list["tokenizer_config_file"], f)
628
+ except Exception as e:
629
+ print(
630
+ f"创建 tokenizer.json 和 tokenizer_config.json 文件失败, 原因{str(e)}"
631
+ )
632
+
633
+ self.tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
634
+ added_tokens_decoder = {}
635
+ added_tokens_map = {}
636
+ if tokenizer_config_file is not None:
637
+ with open(
638
+ tokenizer_config_file, encoding="utf-8"
639
+ ) as tokenizer_config_handle:
640
+ init_kwargs = json.load(tokenizer_config_handle)
641
+ if "added_tokens_decoder" in init_kwargs:
642
+ for idx, token in init_kwargs["added_tokens_decoder"].items():
643
+ if isinstance(token, dict):
644
+ token = AddedToken(**token)
645
+ if isinstance(token, AddedToken):
646
+ added_tokens_decoder[int(idx)] = token
647
+ added_tokens_map[str(token)] = token
648
+ else:
649
+ raise ValueError(
650
+ f"Found a {token.__class__} in the saved `added_tokens_decoder`, should be a dictionary or an AddedToken instance"
651
+ )
652
+ init_kwargs["added_tokens_decoder"] = added_tokens_decoder
653
+ added_tokens_decoder = init_kwargs.pop("added_tokens_decoder", {})
654
+ tokens_to_add = [
655
+ token
656
+ for index, token in sorted(
657
+ added_tokens_decoder.items(), key=lambda x: x[0]
658
+ )
659
+ if token not in added_tokens_decoder
660
+ ]
661
+ added_tokens_encoder = self.added_tokens_encoder(added_tokens_decoder)
662
+ encoder = list(added_tokens_encoder.keys()) + [
663
+ str(token) for token in tokens_to_add
664
+ ]
665
+ tokens_to_add += [
666
+ token
667
+ for token in self.all_special_tokens_extended
668
+ if token not in encoder and token not in tokens_to_add
669
+ ]
670
+ if len(tokens_to_add) > 0:
671
+ is_last_special = None
672
+ tokens = []
673
+ special_tokens = self.all_special_tokens
674
+ for token in tokens_to_add:
675
+ is_special = (
676
+ (token.special or str(token) in special_tokens)
677
+ if isinstance(token, AddedToken)
678
+ else str(token) in special_tokens
679
+ )
680
+ if is_last_special is None or is_last_special == is_special:
681
+ tokens.append(token)
682
+ else:
683
+ self._add_tokens(tokens, special_tokens=is_last_special)
684
+ tokens = [token]
685
+ is_last_special = is_special
686
+ if tokens:
687
+ self._add_tokens(tokens, special_tokens=is_last_special)
688
+
689
+ def _add_tokens(
690
+ self, new_tokens: List[Union[AddedToken, str]], special_tokens: bool = False
691
+ ) -> List[Union[AddedToken, str]]:
692
+ """Adds new tokens to the tokenizer.
693
+
694
+ Args:
695
+ new_tokens (List[Union[AddedToken, str]]): Tokens to be added.
696
+ special_tokens (bool): Indicates whether the tokens are special tokens.
697
+
698
+ Returns:
699
+ List[Union[AddedToken, str]]: added tokens.
700
+ """
701
+ if special_tokens:
702
+ return self.tokenizer.add_special_tokens(new_tokens)
703
+
704
+ return self.tokenizer.add_tokens(new_tokens)
705
+
706
+ def added_tokens_encoder(
707
+ self, added_tokens_decoder: Dict[int, AddedToken]
708
+ ) -> Dict[str, int]:
709
+ """Creates an encoder dictionary from added tokens.
710
+
711
+ Args:
712
+ added_tokens_decoder (Dict[int, AddedToken]): Dictionary mapping token IDs to tokens.
713
+
714
+ Returns:
715
+ Dict[str, int]: Dictionary mapping token strings to IDs.
716
+ """
717
+ return {
718
+ k.content: v
719
+ for v, k in sorted(added_tokens_decoder.items(), key=lambda item: item[0])
720
+ }
721
+
722
+ @property
723
+ def all_special_tokens(self) -> List[str]:
724
+ """Retrieves all special tokens.
725
+
726
+ Returns:
727
+ List[str]: List of all special tokens as strings.
728
+ """
729
+ all_toks = [str(s) for s in self.all_special_tokens_extended]
730
+ return all_toks
731
+
732
+ @property
733
+ def all_special_tokens_extended(self) -> List[Union[str, AddedToken]]:
734
+ """Retrieves all special tokens, including extended ones.
735
+
736
+ Returns:
737
+ List[Union[str, AddedToken]]: List of all special tokens.
738
+ """
739
+ all_tokens = []
740
+ seen = set()
741
+ for value in self.special_tokens_map_extended.values():
742
+ if isinstance(value, (list, tuple)):
743
+ tokens_to_add = [token for token in value if str(token) not in seen]
744
+ else:
745
+ tokens_to_add = [value] if str(value) not in seen else []
746
+ seen.update(map(str, tokens_to_add))
747
+ all_tokens.extend(tokens_to_add)
748
+ return all_tokens
749
+
750
+ @property
751
+ def special_tokens_map_extended(self) -> Dict[str, Union[str, List[str]]]:
752
+ """Retrieves the extended map of special tokens.
753
+
754
+ Returns:
755
+ Dict[str, Union[str, List[str]]]: Dictionary mapping special token attributes to their values.
756
+ """
757
+ set_attr = {}
758
+ for attr in self.SPECIAL_TOKENS_ATTRIBUTES:
759
+ attr_value = getattr(self, "_" + attr)
760
+ if attr_value:
761
+ set_attr[attr] = attr_value
762
+ return set_attr
763
+
764
+ def convert_ids_to_tokens(
765
+ self, ids: Union[int, List[int]], skip_special_tokens: bool = False
766
+ ) -> Union[str, List[str]]:
767
+ """Converts token IDs to token strings.
768
+
769
+ Args:
770
+ ids (Union[int, List[int]]): Token ID(s) to convert.
771
+ skip_special_tokens (bool): Whether to skip special tokens during conversion.
772
+
773
+ Returns:
774
+ Union[str, List[str]]: Converted token string(s).
775
+ """
776
+ if isinstance(ids, int):
777
+ return self.tokenizer.id_to_token(ids)
778
+ tokens = []
779
+ for index in ids:
780
+ index = int(index)
781
+ if skip_special_tokens and index in self.all_special_ids:
782
+ continue
783
+ tokens.append(self.tokenizer.id_to_token(index))
784
+ return tokens
785
+
786
+ def detokenize(self, tokens: List[List[int]]) -> List[List[str]]:
787
+ """Detokenizes a list of token IDs back into strings.
788
+
789
+ Args:
790
+ tokens (List[List[int]]): List of token ID lists.
791
+
792
+ Returns:
793
+ List[List[str]]: List of detokenized strings.
794
+ """
795
+ self.tokenizer.bos_token = "<s>"
796
+ self.tokenizer.eos_token = "</s>"
797
+ self.tokenizer.pad_token = "<pad>"
798
+ toks = [self.convert_ids_to_tokens(tok) for tok in tokens]
799
+ for b in range(len(toks)):
800
+ for i in reversed(range(len(toks[b]))):
801
+ if toks[b][i] is None:
802
+ toks[b][i] = ""
803
+ toks[b][i] = toks[b][i].replace("Ġ", " ").strip()
804
+ if toks[b][i] in (
805
+ [
806
+ self.tokenizer.bos_token,
807
+ self.tokenizer.eos_token,
808
+ self.tokenizer.pad_token,
809
+ ]
810
+ ):
811
+ del toks[b][i]
812
+ return toks
813
+
814
+ def token2str(self, token_ids: List[List[int]]) -> List[str]:
815
+ """Converts a list of token IDs to strings.
816
+
817
+ Args:
818
+ token_ids (List[List[int]]): List of token ID lists.
819
+
820
+ Returns:
821
+ List[str]: List of converted strings.
822
+ """
823
+ generated_text = []
824
+ for tok_id in token_ids:
825
+ end_idx = np.argwhere(tok_id == 2)
826
+ if len(end_idx) > 0:
827
+ end_idx = int(end_idx[0][0])
828
+ tok_id = tok_id[: end_idx + 1]
829
+ generated_text.append(
830
+ self.tokenizer.decode(tok_id, skip_special_tokens=True)
831
+ )
832
+ generated_text = [self.post_process(text) for text in generated_text]
833
+ return generated_text
834
+
835
+ def normalize(self, s: str) -> str:
836
+ """Normalizes a string by removing unnecessary spaces.
837
+
838
+ Args:
839
+ s (str): String to normalize.
840
+
841
+ Returns:
842
+ str: Normalized string.
843
+ """
844
+ text_reg = r"(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})"
845
+ letter = "[a-zA-Z]"
846
+ noletter = "[\W_^\d]"
847
+ names = [x[0].replace(" ", "") for x in re.findall(text_reg, s)]
848
+ s = re.sub(text_reg, lambda match: str(names.pop(0)), s)
849
+ news = s
850
+ while True:
851
+ s = news
852
+ news = re.sub(r"(?!\\ )(%s)\s+?(%s)" % (noletter, noletter), r"\1\2", s)
853
+ news = re.sub(r"(?!\\ )(%s)\s+?(%s)" % (noletter, letter), r"\1\2", news)
854
+ news = re.sub(r"(%s)\s+?(%s)" % (letter, noletter), r"\1\2", news)
855
+ if news == s:
856
+ break
857
+ return s
858
+
859
+ def post_process(self, text: str) -> str:
860
+ """Post-processes a string by fixing text and normalizing it.
861
+
862
+ Args:
863
+ text (str): String to post-process.
864
+
865
+ Returns:
866
+ str: Post-processed string.
867
+ """
868
+ from ftfy import fix_text
869
+
870
+ text = fix_text(text)
871
+ text = self.normalize(text)
872
+ return text
873
+
874
+ def __call__(
875
+ self,
876
+ preds: np.ndarray,
877
+ label: Optional[np.ndarray] = None,
878
+ mode: str = "eval",
879
+ *args,
880
+ **kwargs,
881
+ ) -> Union[List[str], tuple]:
882
+ """Processes predictions and optionally labels, returning the decoded text.
883
+
884
+ Args:
885
+ preds (np.ndarray): Model predictions.
886
+ label (Optional[np.ndarray]): True labels, if available.
887
+ mode (str): Mode of operation, either 'train' or 'eval'.
888
+
889
+ Returns:
890
+ Union[List[str], tuple]: Decoded text, optionally with labels.
891
+ """
892
+ if mode == "train":
893
+ preds_idx = np.array(preds.argmax(axis=2))
894
+ text = self.token2str(preds_idx)
895
+ else:
896
+ text = self.token2str(np.array(preds))
897
+ if label is None:
898
+ return text
899
+ label = self.token2str(np.array(label))
900
+ return text, label
901
+
902
+
903
+ class UniMERNetTestTransform:
904
+ """
905
+ A class for transforming images according to UniMERNet test specifications.
906
+ """
907
+
908
+ def __init__(self, **kwargs) -> None:
909
+ """
910
+ Initializes the UniMERNetTestTransform class.
911
+ """
912
+ super().__init__()
913
+ self.num_output_channels = 3
914
+
915
+ def transform(self, img: np.ndarray) -> np.ndarray:
916
+ """
917
+ Transforms a single image for UniMERNet testing.
918
+
919
+ Args:
920
+ img (numpy.ndarray): The input image.
921
+
922
+ Returns:
923
+ numpy.ndarray: The transformed image.
924
+ """
925
+ mean = [0.7931, 0.7931, 0.7931]
926
+ std = [0.1738, 0.1738, 0.1738]
927
+ scale = float(1 / 255.0)
928
+ shape = (1, 1, 3)
929
+ mean = np.array(mean).reshape(shape).astype("float32")
930
+ std = np.array(std).reshape(shape).astype("float32")
931
+ img = (img.astype("float32") * scale - mean) / std
932
+ grayscale_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
933
+ squeezed = np.squeeze(grayscale_image)
934
+ img = cv2.merge([squeezed] * self.num_output_channels)
935
+ return img
936
+
937
+ def __call__(self, imgs: List[np.ndarray]) -> List[np.ndarray]:
938
+ """
939
+ Applies the transform to a list of images.
940
+
941
+ Args:
942
+ imgs (list of numpy.ndarray): The list of input images.
943
+
944
+ Returns:
945
+ list of numpy.ndarray: The list of transformed images.
946
+ """
947
+ return [self.transform(img) for img in imgs]
948
+
949
+
950
+ class UniMERNetImageFormat:
951
+ """Class for formatting images to UniMERNet's required format."""
952
+
953
+ def __init__(self, **kwargs) -> None:
954
+ """Initializes the UniMERNetImageFormat instance."""
955
+ # your init code
956
+ pass
957
+
958
+ def format(self, img: np.ndarray) -> np.ndarray:
959
+ """Formats a single image to UniMERNet's required format.
960
+
961
+ Args:
962
+ img (numpy.ndarray): The input image to be formatted.
963
+
964
+ Returns:
965
+ numpy.ndarray: The formatted image.
966
+ """
967
+ im_h, im_w = img.shape[:2]
968
+ divide_h = math.ceil(im_h / 32) * 32
969
+ divide_w = math.ceil(im_w / 32) * 32
970
+ img = img[:, :, 0]
971
+ img = np.pad(
972
+ img, ((0, divide_h - im_h), (0, divide_w - im_w)), constant_values=(1, 1)
973
+ )
974
+ img_expanded = img[:, :, np.newaxis].transpose(2, 0, 1)
975
+ return img_expanded[np.newaxis, :]
976
+
977
+ def __call__(self, imgs: List[np.ndarray]) -> List[np.ndarray]:
978
+ """Applies the format method to a list of images.
979
+
980
+ Args:
981
+ imgs (list of numpy.ndarray): The list of input images to be formatted.
982
+
983
+ Returns:
984
+ list of numpy.ndarray: The list of formatted images.
985
+ """
986
+ return [self.format(img) for img in imgs]