paddlex 3.0.0rc1__py3-none-any.whl → 3.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +1 -1
  3. paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
  4. paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
  5. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
  6. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
  7. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
  8. paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
  9. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
  10. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
  11. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
  12. paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
  13. paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
  14. paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
  15. paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
  16. paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
  17. paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
  18. paddlex/configs/pipelines/OCR.yaml +7 -6
  19. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
  20. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
  21. paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
  22. paddlex/configs/pipelines/doc_understanding.yaml +1 -1
  23. paddlex/configs/pipelines/formula_recognition.yaml +2 -2
  24. paddlex/configs/pipelines/layout_parsing.yaml +3 -2
  25. paddlex/configs/pipelines/seal_recognition.yaml +1 -0
  26. paddlex/configs/pipelines/table_recognition.yaml +2 -1
  27. paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
  28. paddlex/hpip_links.html +20 -20
  29. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
  30. paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
  31. paddlex/inference/common/result/mixin.py +19 -12
  32. paddlex/inference/models/base/predictor/base_predictor.py +2 -8
  33. paddlex/inference/models/common/static_infer.py +29 -73
  34. paddlex/inference/models/common/tokenizer/__init__.py +2 -0
  35. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
  36. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
  37. paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
  38. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
  39. paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
  40. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
  41. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
  42. paddlex/inference/models/common/tokenizer/vocab.py +7 -7
  43. paddlex/inference/models/common/ts/funcs.py +19 -8
  44. paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
  45. paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
  46. paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
  47. paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
  48. paddlex/inference/models/common/vlm/generation/utils.py +1 -1
  49. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
  50. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
  51. paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
  52. paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
  53. paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
  54. paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
  55. paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
  56. paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
  57. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
  58. paddlex/inference/models/doc_vlm/predictor.py +79 -24
  59. paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
  60. paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
  61. paddlex/inference/models/doc_vlm/processors/common.py +189 -0
  62. paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
  63. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
  64. paddlex/inference/models/formula_recognition/predictor.py +8 -2
  65. paddlex/inference/models/formula_recognition/processors.py +90 -77
  66. paddlex/inference/models/formula_recognition/result.py +28 -27
  67. paddlex/inference/models/image_feature/processors.py +3 -4
  68. paddlex/inference/models/keypoint_detection/predictor.py +3 -0
  69. paddlex/inference/models/object_detection/predictor.py +2 -0
  70. paddlex/inference/models/object_detection/processors.py +28 -3
  71. paddlex/inference/models/object_detection/utils.py +2 -0
  72. paddlex/inference/models/table_structure_recognition/result.py +0 -10
  73. paddlex/inference/models/text_detection/predictor.py +8 -0
  74. paddlex/inference/models/text_detection/processors.py +44 -10
  75. paddlex/inference/models/text_detection/result.py +0 -10
  76. paddlex/inference/models/text_recognition/result.py +1 -1
  77. paddlex/inference/pipelines/__init__.py +9 -5
  78. paddlex/inference/pipelines/_parallel.py +172 -0
  79. paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
  80. paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
  81. paddlex/inference/pipelines/base.py +14 -4
  82. paddlex/inference/pipelines/components/faisser.py +1 -1
  83. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
  84. paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
  85. paddlex/inference/pipelines/formula_recognition/result.py +1 -11
  86. paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
  87. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
  88. paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
  89. paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
  90. paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
  91. paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
  92. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +832 -260
  93. paddlex/inference/pipelines/layout_parsing/result.py +4 -17
  94. paddlex/inference/pipelines/layout_parsing/result_v2.py +259 -245
  95. paddlex/inference/pipelines/layout_parsing/setting.py +88 -0
  96. paddlex/inference/pipelines/layout_parsing/utils.py +391 -2028
  97. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
  98. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1199 -0
  99. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +615 -0
  100. paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
  101. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
  102. paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
  103. paddlex/inference/pipelines/ocr/pipeline.py +127 -70
  104. paddlex/inference/pipelines/ocr/result.py +21 -18
  105. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
  106. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
  107. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
  108. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
  109. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +6 -6
  110. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
  111. paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
  112. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
  113. paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
  114. paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
  115. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
  116. paddlex/inference/pipelines/table_recognition/result.py +1 -1
  117. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
  118. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
  119. paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
  120. paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
  121. paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
  122. paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
  123. paddlex/inference/serving/basic_serving/_app.py +46 -13
  124. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
  125. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
  126. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
  127. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
  128. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
  129. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
  130. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
  131. paddlex/inference/serving/infra/utils.py +20 -22
  132. paddlex/inference/serving/schemas/formula_recognition.py +1 -1
  133. paddlex/inference/serving/schemas/layout_parsing.py +1 -2
  134. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
  135. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
  136. paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
  137. paddlex/inference/serving/schemas/seal_recognition.py +1 -1
  138. paddlex/inference/serving/schemas/table_recognition.py +2 -6
  139. paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
  140. paddlex/inference/utils/hpi.py +30 -16
  141. paddlex/inference/utils/hpi_model_info_collection.json +666 -162
  142. paddlex/inference/utils/io/readers.py +12 -12
  143. paddlex/inference/utils/misc.py +20 -0
  144. paddlex/inference/utils/mkldnn_blocklist.py +59 -0
  145. paddlex/inference/utils/official_models.py +140 -5
  146. paddlex/inference/utils/pp_option.py +74 -9
  147. paddlex/model.py +2 -2
  148. paddlex/modules/__init__.py +1 -1
  149. paddlex/modules/anomaly_detection/evaluator.py +2 -2
  150. paddlex/modules/base/__init__.py +1 -1
  151. paddlex/modules/base/evaluator.py +5 -5
  152. paddlex/modules/base/trainer.py +1 -1
  153. paddlex/modules/doc_vlm/dataset_checker.py +2 -2
  154. paddlex/modules/doc_vlm/evaluator.py +2 -2
  155. paddlex/modules/doc_vlm/exportor.py +2 -2
  156. paddlex/modules/doc_vlm/model_list.py +1 -1
  157. paddlex/modules/doc_vlm/trainer.py +2 -2
  158. paddlex/modules/face_recognition/evaluator.py +2 -2
  159. paddlex/modules/formula_recognition/evaluator.py +5 -2
  160. paddlex/modules/formula_recognition/model_list.py +3 -0
  161. paddlex/modules/formula_recognition/trainer.py +3 -0
  162. paddlex/modules/general_recognition/evaluator.py +1 -1
  163. paddlex/modules/image_classification/evaluator.py +2 -2
  164. paddlex/modules/image_classification/model_list.py +1 -0
  165. paddlex/modules/instance_segmentation/evaluator.py +1 -1
  166. paddlex/modules/keypoint_detection/evaluator.py +1 -1
  167. paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
  168. paddlex/modules/multilabel_classification/evaluator.py +2 -2
  169. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
  170. paddlex/modules/object_detection/evaluator.py +2 -2
  171. paddlex/modules/object_detection/model_list.py +2 -0
  172. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
  173. paddlex/modules/semantic_segmentation/evaluator.py +2 -2
  174. paddlex/modules/table_recognition/evaluator.py +2 -2
  175. paddlex/modules/text_detection/evaluator.py +2 -2
  176. paddlex/modules/text_detection/model_list.py +2 -0
  177. paddlex/modules/text_recognition/evaluator.py +2 -2
  178. paddlex/modules/text_recognition/model_list.py +2 -0
  179. paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
  180. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  181. paddlex/modules/ts_classification/evaluator.py +2 -2
  182. paddlex/modules/ts_forecast/evaluator.py +2 -2
  183. paddlex/modules/video_classification/evaluator.py +2 -2
  184. paddlex/modules/video_detection/evaluator.py +2 -2
  185. paddlex/ops/__init__.py +8 -5
  186. paddlex/paddlex_cli.py +19 -13
  187. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
  188. paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
  189. paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
  190. paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
  191. paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
  192. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
  193. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
  194. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
  195. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
  196. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
  197. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
  198. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
  199. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
  200. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
  201. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
  202. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
  203. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
  204. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
  205. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
  206. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
  207. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
  208. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
  209. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
  210. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
  211. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
  212. paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
  213. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
  214. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
  215. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
  216. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
  217. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
  218. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
  219. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
  220. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
  221. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
  222. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
  223. paddlex/repo_apis/base/config.py +1 -1
  224. paddlex/repo_manager/core.py +3 -3
  225. paddlex/repo_manager/meta.py +6 -2
  226. paddlex/repo_manager/repo.py +17 -16
  227. paddlex/utils/custom_device_list.py +26 -2
  228. paddlex/utils/deps.py +3 -3
  229. paddlex/utils/device.py +5 -13
  230. paddlex/utils/env.py +4 -0
  231. paddlex/utils/flags.py +11 -4
  232. paddlex/utils/fonts/__init__.py +34 -4
  233. paddlex/utils/misc.py +1 -1
  234. paddlex/utils/subclass_register.py +2 -2
  235. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/METADATA +349 -208
  236. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/RECORD +240 -211
  237. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/WHEEL +1 -1
  238. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/entry_points.txt +1 -0
  239. {paddlex-3.0.0rc1.dist-info/licenses → paddlex-3.0.2.dist-info}/LICENSE +0 -0
  240. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/top_level.txt +0 -0
@@ -18,14 +18,26 @@ from .base_batch_sampler import BaseBatchSampler
18
18
 
19
19
 
20
20
  class DocVLMBatchSampler(BaseBatchSampler):
21
- def __init__(self):
21
+
22
+ model_names_only_supports_batchsize_of_one = {"PP-DocBee-2B", "PP-DocBee-7B"}
23
+
24
+ def __init__(self, model_name, batch_size: int = 1) -> None:
22
25
  """Initializes the BaseBatchSampler.
23
26
 
24
27
  Args:
28
+ model_name (str): The name of the model.
25
29
  batch_size (int, optional): The size of each batch. Only support 1.
26
30
  """
27
- super().__init__()
28
- self.batch_size = 1
31
+ self.model_name = model_name
32
+ if (
33
+ self.model_name in self.model_names_only_supports_batchsize_of_one
34
+ and batch_size != 1
35
+ ):
36
+ logging.warning(
37
+ f"doc vlm batch sampler only support batch size 1 for {self.model_name}, but got {batch_size} and it will not take effect."
38
+ )
39
+ batch_size = 1
40
+ super().__init__(batch_size)
29
41
 
30
42
  def sample(self, inputs):
31
43
  """Generate list of input file path.
@@ -37,14 +49,22 @@ class DocVLMBatchSampler(BaseBatchSampler):
37
49
  list: list of file path.
38
50
  """
39
51
  if isinstance(inputs, dict):
40
- yield [inputs]
41
- elif isinstance(inputs, list) and all(isinstance(i, dict) for i in inputs):
42
- yield inputs
43
- else:
52
+ inputs = [inputs]
53
+ if not (isinstance(inputs, list) and all(isinstance(i, dict) for i in inputs)):
44
54
  raise TypeError(
45
- f"Not supported input data type! Only `dict` are supported, but got: {type(inputs)}."
55
+ f"Not supported input data type! Only `Dict` or `List[Dict]` are supported, but got: {type(inputs)}."
46
56
  )
47
57
 
58
+ batch = []
59
+ for input_ in inputs:
60
+ batch.append(input_)
61
+ if len(batch) == self.batch_size:
62
+ yield batch
63
+ batch = []
64
+
65
+ if len(batch) > 0:
66
+ yield batch
67
+
48
68
  @BaseBatchSampler.batch_size.setter
49
69
  def batch_size(self, batch_size):
50
70
  """Sets the batch size.
@@ -56,9 +76,12 @@ class DocVLMBatchSampler(BaseBatchSampler):
56
76
  Warning: If the batch size is not equal 1.
57
77
  """
58
78
  # only support batch size 1
59
- if batch_size != 1:
79
+ if (
80
+ self.model_name in self.model_names_only_supports_batchsize_of_one
81
+ and batch_size != 1
82
+ ):
60
83
  logging.warning(
61
- f"doc vlm batch sampler only support batch size 1, but got {batch_size}."
84
+ f"doc vlm batch sampler only support batch size 1 for {self.model_name}, but got {batch_size} and it will not take effect."
62
85
  )
63
86
  else:
64
87
  self._batch_size = batch_size
@@ -40,7 +40,8 @@ class ImgBatch(Batch):
40
40
 
41
41
  class ImageBatchSampler(BaseBatchSampler):
42
42
 
43
- SUFFIX = ["jpg", "png", "jpeg", "JPEG", "JPG", "bmp"]
43
+ IMG_SUFFIX = ["jpg", "png", "jpeg", "bmp"]
44
+ PDF_SUFFIX = ["pdf"]
44
45
 
45
46
  def __init__(self, *args, **kwargs):
46
47
  super().__init__(*args, **kwargs)
@@ -54,16 +55,19 @@ class ImageBatchSampler(BaseBatchSampler):
54
55
  return save_path.as_posix()
55
56
 
56
57
  def _get_files_list(self, fp):
57
- file_list = []
58
58
  if fp is None or not os.path.exists(fp):
59
- raise Exception(f"Not found any img file in path: {fp}")
59
+ raise Exception(f"Not found any files in path: {fp}")
60
+ if os.path.isfile(fp):
61
+ return [fp]
60
62
 
61
- if os.path.isfile(fp) and fp.split(".")[-1] in self.SUFFIX:
62
- file_list.append(fp)
63
- elif os.path.isdir(fp):
63
+ file_list = []
64
+ if os.path.isdir(fp):
64
65
  for root, dirs, files in os.walk(fp):
65
66
  for single_file in files:
66
- if single_file.split(".")[-1] in self.SUFFIX:
67
+ if (
68
+ single_file.split(".")[-1].lower()
69
+ in self.IMG_SUFFIX + self.PDF_SUFFIX
70
+ ):
67
71
  file_list.append(os.path.join(root, single_file))
68
72
  if len(file_list) == 0:
69
73
  raise Exception("Not found any file in {}".format(fp))
@@ -81,29 +85,34 @@ class ImageBatchSampler(BaseBatchSampler):
81
85
  if len(batch) == self.batch_size:
82
86
  yield batch
83
87
  batch = ImgBatch()
84
- elif isinstance(input, str) and input.split(".")[-1] in ("PDF", "pdf"):
85
- file_path = (
86
- self._download_from_url(input)
87
- if input.startswith("http")
88
- else input
89
- )
90
- for page_idx, page_img in enumerate(self.pdf_reader.read(file_path)):
91
- batch.append(page_img, file_path, page_idx)
92
- if len(batch) == self.batch_size:
93
- yield batch
94
- batch = ImgBatch()
95
88
  elif isinstance(input, str):
96
- file_path = (
97
- self._download_from_url(input)
98
- if input.startswith("http")
99
- else input
100
- )
101
- file_list = self._get_files_list(file_path)
102
- for file_path in file_list:
89
+ suffix = input.split(".")[-1].lower()
90
+ if suffix in self.PDF_SUFFIX:
91
+ file_path = (
92
+ self._download_from_url(input)
93
+ if input.startswith("http")
94
+ else input
95
+ )
96
+ for page_idx, page_img in enumerate(
97
+ self.pdf_reader.read(file_path)
98
+ ):
99
+ batch.append(page_img, file_path, page_idx)
100
+ if len(batch) == self.batch_size:
101
+ yield batch
102
+ batch = ImgBatch()
103
+ elif suffix in self.IMG_SUFFIX:
104
+ file_path = (
105
+ self._download_from_url(input)
106
+ if input.startswith("http")
107
+ else input
108
+ )
103
109
  batch.append(file_path, file_path, None)
104
110
  if len(batch) == self.batch_size:
105
111
  yield batch
106
112
  batch = ImgBatch()
113
+ else:
114
+ file_list = self._get_files_list(input)
115
+ yield from self.sample(file_list)
107
116
  else:
108
117
  logging.warning(
109
118
  f"Not supported input data type! Only `numpy.ndarray` and `str` are supported! So has been ignored: {input}."
@@ -161,7 +161,7 @@ class JsonMixin:
161
161
  else:
162
162
  if len(json_data) > 1:
163
163
  logging.warning(
164
- f"The result has multiple json files need to be saved. But the `save_path` has been specfied as `{save_path}`!"
164
+ f"The result has multiple json files need to be saved. But the `save_path` has been specified as `{save_path}`!"
165
165
  )
166
166
  self._json_writer.write(
167
167
  save_path,
@@ -264,7 +264,7 @@ class Base64Mixin:
264
264
  else:
265
265
  if len(base64) > 1:
266
266
  logging.warning(
267
- f"The result has multiple base64 files need to be saved. But the `save_path` has been specfied as `{save_path}`!"
267
+ f"The result has multiple base64 files need to be saved. But the `save_path` has been specified as `{save_path}`!"
268
268
  )
269
269
  self._base64_writer.write(
270
270
  save_path, base64[list(base64.keys())[0]], *args, **kwargs
@@ -328,7 +328,7 @@ class ImgMixin:
328
328
  else:
329
329
  if len(img) > 1:
330
330
  logging.warning(
331
- f"The result has multiple img files need to be saved. But the `save_path` has been specfied as `{save_path}`!"
331
+ f"The result has multiple img files need to be saved. But the `save_path` has been specified as `{save_path}`!"
332
332
  )
333
333
  self._img_writer.write(save_path, img[list(img.keys())[0]], *args, **kwargs)
334
334
 
@@ -392,7 +392,7 @@ class CSVMixin:
392
392
  else:
393
393
  if len(csv) > 1:
394
394
  logging.warning(
395
- f"The result has multiple csv files need to be saved. But the `save_path` has been specfied as `{save_path}`!"
395
+ f"The result has multiple csv files need to be saved. But the `save_path` has been specified as `{save_path}`!"
396
396
  )
397
397
  self._csv_writer.write(save_path, csv[list(csv.keys())[0]], *args, **kwargs)
398
398
 
@@ -455,7 +455,7 @@ class HtmlMixin:
455
455
  else:
456
456
  if len(html) > 1:
457
457
  logging.warning(
458
- f"The result has multiple html files need to be saved. But the `save_path` has been specfied as `{save_path}`!"
458
+ f"The result has multiple html files need to be saved. But the `save_path` has been specified as `{save_path}`!"
459
459
  )
460
460
  self._html_writer.write(
461
461
  save_path, html[list(html.keys())[0]], *args, **kwargs
@@ -524,7 +524,7 @@ class XlsxMixin:
524
524
  else:
525
525
  if len(xlsx) > 1:
526
526
  logging.warning(
527
- f"The result has multiple xlsx files need to be saved. But the `save_path` has been specfied as `{save_path}`!"
527
+ f"The result has multiple xlsx files need to be saved. But the `save_path` has been specified as `{save_path}`!"
528
528
  )
529
529
  self._xlsx_writer.write(
530
530
  save_path, xlsx[list(xlsx.keys())[0]], *args, **kwargs
@@ -589,7 +589,7 @@ class VideoMixin:
589
589
  else:
590
590
  if len(video) > 1:
591
591
  logging.warning(
592
- f"The result has multiple video files need to be saved. But the `save_path` has been specfied as `{save_path}`!"
592
+ f"The result has multiple video files need to be saved. But the `save_path` has been specified as `{save_path}`!"
593
593
  )
594
594
  video_writer.write(save_path, video[list(video.keys())[0]], *args, **kwargs)
595
595
 
@@ -609,10 +609,13 @@ class MarkdownMixin:
609
609
  self._save_funcs.append(self.save_to_markdown)
610
610
 
611
611
  @abstractmethod
612
- def _to_markdown(self) -> Dict[str, Union[str, Dict[str, Any]]]:
612
+ def _to_markdown(self, pretty=True) -> Dict[str, Union[str, Dict[str, Any]]]:
613
613
  """
614
614
  Convert the result to markdown format.
615
615
 
616
+ Args:
617
+ pretty (Optional[bool]): whether to pretty markdown by HTML, default by True.
618
+
616
619
  Returns:
617
620
  Dict[str, Union[str, Dict[str, Any]]]: A dictionary containing markdown text and image data.
618
621
  """
@@ -627,7 +630,7 @@ class MarkdownMixin:
627
630
  """
628
631
  return self._to_markdown()
629
632
 
630
- def save_to_markdown(self, save_path, *args, **kwargs) -> None:
633
+ def save_to_markdown(self, save_path, pretty=True, *args, **kwargs) -> None:
631
634
  """Save the markdown data to a file.
632
635
 
633
636
  Args:
@@ -665,7 +668,7 @@ class MarkdownMixin:
665
668
  self._markdown_writer.write,
666
669
  self._img_writer.write,
667
670
  self.save_path,
668
- self._to_markdown(),
671
+ self._to_markdown(pretty=pretty),
669
672
  *args,
670
673
  **kwargs,
671
674
  )
@@ -698,5 +701,9 @@ class MarkdownMixin:
698
701
  if isinstance(value, dict):
699
702
  base_save_path = save_path.parent
700
703
  for img_path, img_data in value.items():
701
- save_path = base_save_path / img_path
702
- save_img_func(save_path.as_posix(), img_data, *args, **kwargs)
704
+ save_img_func(
705
+ (base_save_path / img_path).as_posix(),
706
+ img_data,
707
+ *args,
708
+ **kwargs,
709
+ )
@@ -118,17 +118,9 @@ class BasePredictor(
118
118
  self.batch_sampler.batch_size = batch_size
119
119
  self._use_hpip = use_hpip
120
120
  if not use_hpip:
121
- if hpi_config is not None:
122
- logging.warning(
123
- "`hpi_config` will be ignored when not using the high-performance inference plugin."
124
- )
125
121
  self._pp_option = self._prepare_pp_option(pp_option, device)
126
122
  else:
127
123
  require_hpip()
128
- if pp_option is not None:
129
- logging.warning(
130
- "`pp_option` will be ignored when using the high-performance inference plugin."
131
- )
132
124
  self._hpi_config = self._prepare_hpi_config(hpi_config, device)
133
125
 
134
126
  logging.debug(f"{self.__class__.__name__}: {self.model_dir}")
@@ -343,6 +335,8 @@ class BasePredictor(
343
335
  device_info = None
344
336
  if pp_option is None:
345
337
  pp_option = PaddlePredictorOption(model_name=self.model_name)
338
+ elif pp_option.model_name is None:
339
+ pp_option.model_name = self.model_name
346
340
  if device_info:
347
341
  pp_option.device_type = device_info[0]
348
342
  pp_option.device_id = device_info[1]
@@ -22,8 +22,7 @@ import numpy as np
22
22
 
23
23
  from ....utils import logging
24
24
  from ....utils.deps import class_requires_deps
25
- from ....utils.device import constr_device
26
- from ....utils.flags import DEBUG, INFER_BENCHMARK_USE_NEW_INFER_API, USE_PIR_TRT
25
+ from ....utils.flags import DEBUG, USE_PIR_TRT
27
26
  from ...utils.benchmark import benchmark, set_inference_operations
28
27
  from ...utils.hpi import (
29
28
  HPIConfig,
@@ -34,15 +33,12 @@ from ...utils.hpi import (
34
33
  suggest_inference_backend_and_config,
35
34
  )
36
35
  from ...utils.model_paths import get_model_paths
37
- from ...utils.pp_option import PaddlePredictorOption
36
+ from ...utils.pp_option import PaddlePredictorOption, get_default_run_mode
38
37
  from ...utils.trt_config import DISABLE_TRT_HALF_OPS_CONFIG
39
38
 
40
39
  CACHE_DIR = ".cache"
41
40
 
42
41
  INFERENCE_OPERATIONS = [
43
- "PaddleCopyToDevice",
44
- "PaddleCopyToHost",
45
- "PaddleModelInfer",
46
42
  "PaddleInferChainLegacy",
47
43
  "MultiBackendInfer",
48
44
  ]
@@ -233,47 +229,6 @@ def _sort_inputs(inputs, names):
233
229
  return inputs
234
230
 
235
231
 
236
- def _concatenate(*callables):
237
- def _chain(x):
238
- for c in callables:
239
- x = c(x)
240
- return x
241
-
242
- return _chain
243
-
244
-
245
- @benchmark.timeit
246
- class PaddleCopyToDevice:
247
- def __init__(self, device_type, device_id):
248
- self.device_type = device_type
249
- self.device_id = device_id
250
-
251
- def __call__(self, arrs):
252
- import paddle
253
-
254
- device_id = [self.device_id] if self.device_id is not None else self.device_id
255
- device = constr_device(self.device_type, device_id)
256
- paddle_tensors = [paddle.to_tensor(i, place=device) for i in arrs]
257
- return paddle_tensors
258
-
259
-
260
- @benchmark.timeit
261
- class PaddleCopyToHost:
262
- def __call__(self, paddle_tensors):
263
- arrs = [i.numpy() for i in paddle_tensors]
264
- return arrs
265
-
266
-
267
- @benchmark.timeit
268
- class PaddleModelInfer:
269
- def __init__(self, predictor):
270
- super().__init__()
271
- self.predictor = predictor
272
-
273
- def __call__(self, x):
274
- return self.predictor.run(x)
275
-
276
-
277
232
  # FIXME: Name might be misleading
278
233
  @benchmark.timeit
279
234
  class PaddleInferChainLegacy:
@@ -317,15 +272,7 @@ class PaddleInfer(StaticInfer):
317
272
  self.model_file_prefix = model_file_prefix
318
273
  self._option = option
319
274
  self.predictor = self._create()
320
- if INFER_BENCHMARK_USE_NEW_INFER_API:
321
- device_type = self._option.device_type
322
- device_type = "gpu" if device_type == "dcu" else device_type
323
- copy_to_device = PaddleCopyToDevice(device_type, self._option.device_id)
324
- copy_to_host = PaddleCopyToHost()
325
- model_infer = PaddleModelInfer(self.predictor)
326
- self.infer = _concatenate(copy_to_device, model_infer, copy_to_host)
327
- else:
328
- self.infer = PaddleInferChainLegacy(self.predictor)
275
+ self.infer = PaddleInferChainLegacy(self.predictor)
329
276
 
330
277
  def __call__(self, x: Sequence[np.ndarray]) -> List[np.ndarray]:
331
278
  names = self.predictor.get_input_names()
@@ -373,7 +320,7 @@ class PaddleInfer(StaticInfer):
373
320
  logging.debug("`device_id` has been set to None")
374
321
 
375
322
  if (
376
- self._option.device_type in ("gpu", "dcu")
323
+ self._option.device_type in ("gpu", "dcu", "npu", "mlu", "gcu", "xpu")
377
324
  and self._option.device_id is None
378
325
  ):
379
326
  self._option.device_id = 0
@@ -402,6 +349,7 @@ class PaddleInfer(StaticInfer):
402
349
  if self._option.run_mode == "paddle_fp16"
403
350
  else PrecisionType.Float32
404
351
  )
352
+ config.disable_mkldnn()
405
353
  config.enable_use_gpu(100, self._option.device_id, precision)
406
354
  if hasattr(config, "enable_new_ir"):
407
355
  config.enable_new_ir(self._option.enable_new_ir)
@@ -417,12 +365,16 @@ class PaddleInfer(StaticInfer):
417
365
  if hasattr(config, "enable_new_executor"):
418
366
  config.enable_new_executor()
419
367
  elif self._option.device_type == "xpu":
368
+ config.enable_xpu()
369
+ config.set_xpu_device_id(self._option.device_id)
420
370
  if hasattr(config, "enable_new_ir"):
421
371
  config.enable_new_ir(self._option.enable_new_ir)
422
372
  if hasattr(config, "enable_new_executor"):
423
373
  config.enable_new_executor()
374
+ config.delete_pass("conv2d_bn_xpu_fuse_pass")
375
+ config.delete_pass("transfer_layout_pass")
424
376
  elif self._option.device_type == "mlu":
425
- config.enable_custom_device("mlu")
377
+ config.enable_custom_device("mlu", self._option.device_id)
426
378
  if hasattr(config, "enable_new_ir"):
427
379
  config.enable_new_ir(self._option.enable_new_ir)
428
380
  if hasattr(config, "enable_new_executor"):
@@ -431,7 +383,7 @@ class PaddleInfer(StaticInfer):
431
383
  from paddle_custom_device.gcu import passes as gcu_passes
432
384
 
433
385
  gcu_passes.setUp()
434
- config.enable_custom_device("gcu")
386
+ config.enable_custom_device("gcu", self._option.device_id)
435
387
  if hasattr(config, "enable_new_ir"):
436
388
  config.enable_new_ir()
437
389
  if hasattr(config, "enable_new_executor"):
@@ -455,15 +407,10 @@ class PaddleInfer(StaticInfer):
455
407
  assert self._option.device_type == "cpu"
456
408
  config.disable_gpu()
457
409
  if "mkldnn" in self._option.run_mode:
458
- try:
459
- config.enable_mkldnn()
460
- if "bf16" in self._option.run_mode:
461
- config.enable_mkldnn_bfloat16()
462
- except Exception:
463
- logging.warning(
464
- "MKL-DNN is not available. We will disable MKL-DNN."
465
- )
466
- config.set_mkldnn_cache_capacity(-1)
410
+ config.enable_mkldnn()
411
+ if "bf16" in self._option.run_mode:
412
+ config.enable_mkldnn_bfloat16()
413
+ config.set_mkldnn_cache_capacity(self._option.mkldnn_cache_capacity)
467
414
  else:
468
415
  if hasattr(config, "disable_mkldnn"):
469
416
  config.disable_mkldnn()
@@ -687,10 +634,19 @@ class HPInfer(StaticInfer):
687
634
  )
688
635
  backend_config = self._config.backend_config or {}
689
636
 
690
- if backend == "paddle" and not backend_config:
691
- logging.warning(
692
- "The Paddle Inference backend is selected with the default configuration. This may not provide optimal performance."
693
- )
637
+ if backend == "paddle":
638
+ if not backend_config:
639
+ is_default_config = True
640
+ elif backend_config.keys() != {"run_mode"}:
641
+ is_default_config = False
642
+ else:
643
+ is_default_config = backend_config["run_mode"] == get_default_run_mode(
644
+ self._config.pdx_model_name, self._config.device_type
645
+ )
646
+ if is_default_config:
647
+ logging.warning(
648
+ "The Paddle Inference backend is selected with the default configuration. This may not provide optimal performance."
649
+ )
694
650
 
695
651
  return backend, backend_config
696
652
 
@@ -833,7 +789,7 @@ class HPInfer(StaticInfer):
833
789
  for name, shapes in backend_config.dynamic_shapes.items():
834
790
  ui_option.trt_option.set_shape(name, *shapes)
835
791
  else:
836
- logging.warning(
792
+ logging.info(
837
793
  "TensorRT dynamic shapes will be loaded from the file."
838
794
  )
839
795
  elif backend == "om":
@@ -15,5 +15,7 @@
15
15
  from .bert_tokenizer import BertTokenizer
16
16
  from .clip_tokenizer import CLIPTokenizer
17
17
  from .gpt_tokenizer import GPTTokenizer
18
+ from .qwen2_5_tokenizer import MIXQwen2_5_Tokenizer
18
19
  from .qwen2_tokenizer import MIXQwen2Tokenizer, Qwen2Tokenizer
20
+ from .qwen_tokenizer import QWenTokenizer
19
21
  from .tokenizer_utils import PretrainedTokenizer
@@ -403,7 +403,7 @@ class CLIPTokenizer(PretrainedTokenizer):
403
403
  Returns the size of vocabulary.
404
404
 
405
405
  Returns:
406
- int: The sum of size of vocabulary and the size of speical tokens.
406
+ int: The sum of size of vocabulary and the size of special tokens.
407
407
 
408
408
  """
409
409
  return len(self.encoder)
@@ -41,7 +41,7 @@ def bytes_to_unicode():
41
41
  The reversible bpe codes work on unicode strings.
42
42
  This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
43
43
  When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
44
- This is a signficant percentage of your normal, say, 32K bpe vocab.
44
+ This is a significant percentage of your normal, say, 32K bpe vocab.
45
45
  To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
46
46
  And avoids mapping to whitespace/control characters the bpe code barfs on.
47
47
  """
@@ -241,7 +241,7 @@ class GPTTokenizer(PretrainedTokenizer):
241
241
  Returns the size of vocabulary.
242
242
 
243
243
  Returns:
244
- int: The sum of size of vocabulary and the size of speical tokens.
244
+ int: The sum of size of vocabulary and the size of special tokens.
245
245
 
246
246
  """
247
247
 
@@ -0,0 +1,112 @@
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ from typing import List
17
+
18
+ from .qwen2_tokenizer import Qwen2Tokenizer
19
+ from .tokenizer_utils_base import AddedToken, TextInput
20
+
21
+
22
+ class MIXQwen2_5_Tokenizer(Qwen2Tokenizer):
23
+ def __init__(self, *args, **kwargs):
24
+ super(MIXQwen2_5_Tokenizer, self).__init__(*args, **kwargs)
25
+
26
+ def tokenize(self, text: TextInput, **kwargs) -> List[str]:
27
+ """
28
+ Converts a string in a sequence of tokens, using the tokenizer.
29
+
30
+ Split in words for word-based vocabulary or sub-words for sub-word-based vocabularies
31
+ (BPE/SentencePieces/WordPieces). Takes care of added tokens.
32
+
33
+ Args:
34
+ text (`str`):
35
+ The sequence to be encoded.
36
+ **kwargs (additional keyword arguments):
37
+ Passed along to the model-specific `prepare_for_tokenization` preprocessing method.
38
+
39
+ Returns:
40
+ `List[str]`: The list of tokens.
41
+ """
42
+
43
+ split_special_tokens = kwargs.pop(
44
+ "split_special_tokens", self.split_special_tokens
45
+ )
46
+
47
+ all_special_tokens_extended = dict(
48
+ (str(t), t)
49
+ for t in self.all_special_tokens_extended
50
+ if isinstance(t, AddedToken)
51
+ )
52
+
53
+ # Add special tokens
54
+ for t in self.added_tokens_decoder:
55
+ token = self.added_tokens_decoder[t]
56
+ if isinstance(token, AddedToken) and token.special:
57
+ all_special_tokens_extended[str(token)] = token
58
+ if str(token) not in self.all_special_tokens:
59
+ self.all_special_tokens.append(str(token))
60
+ if str(token) not in self.unique_no_split_tokens:
61
+ self.unique_no_split_tokens.append(str(token))
62
+
63
+ self._create_trie(self.unique_no_split_tokens)
64
+
65
+ text, kwargs = self.prepare_for_tokenization(text, **kwargs)
66
+
67
+ # TODO: should this be in the base class?
68
+ if hasattr(self, "do_lower_case") and self.do_lower_case:
69
+ # convert non-special tokens to lowercase
70
+ escaped_special_toks = [
71
+ re.escape(s_tok)
72
+ for s_tok in (self.unique_no_split_tokens + self.all_special_tokens)
73
+ ]
74
+ pattern = r"(" + r"|".join(escaped_special_toks) + r")|" + r"(.+?)"
75
+ text = re.sub(
76
+ pattern, lambda m: m.groups()[0] or m.groups()[1].lower(), text
77
+ )
78
+
79
+ if split_special_tokens:
80
+ no_split_token = []
81
+ tokens = [text]
82
+ else:
83
+ no_split_token = set(
84
+ self.unique_no_split_tokens
85
+ ) # don't split on any of the added tokens
86
+ tokens = self.tokens_trie.split(text)
87
+
88
+ for i, token in enumerate(tokens):
89
+ if token in no_split_token:
90
+ tok_extended = all_special_tokens_extended.get(token, None)
91
+ left = tokens[i - 1] if i > 0 else None
92
+ right = tokens[i + 1] if i < len(tokens) - 1 else None
93
+ if isinstance(tok_extended, AddedToken):
94
+ if tok_extended.rstrip and right:
95
+ # A bit counter-intuitive but we strip the left of the string
96
+ # since tok_extended.rstrip means the special token is eating all white spaces on its right
97
+ tokens[i + 1] = right.lstrip()
98
+ # Strip white spaces on the left
99
+ if tok_extended.lstrip and left:
100
+ tokens[i - 1] = left.rstrip() # Opposite here
101
+
102
+ tokenized_text = []
103
+ for token in tokens:
104
+ # Need to skip eventual empty (fully stripped) tokens
105
+ if not token:
106
+ continue
107
+ if token in no_split_token:
108
+ tokenized_text.append(token)
109
+ else:
110
+ tokenized_text.extend(self._tokenize(token))
111
+
112
+ return tokenized_text
@@ -18,6 +18,7 @@ import unicodedata
18
18
  from functools import lru_cache
19
19
  from typing import List, Optional, Tuple
20
20
 
21
+ from .....utils import logging
21
22
  from .....utils.deps import is_dep_available
22
23
  from .tokenizer_utils import PretrainedTokenizer
23
24
  from .tokenizer_utils_base import AddedToken, TextInput
@@ -146,7 +147,12 @@ class Qwen2Tokenizer(PretrainedTokenizer):
146
147
  split_special_tokens=False,
147
148
  **kwargs,
148
149
  ):
149
- super().__init__(**kwargs)
150
+ if unk_token is None:
151
+ logging.info(
152
+ "The `unk_token` parameter needs to be defined: we use `eos_token` by default."
153
+ )
154
+ unk_token = eos_token
155
+
150
156
  # Qwen vocab does not contain control tokens; added tokens need to be special
151
157
  bos_token = (
152
158
  AddedToken(