paddlex 3.0.2__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. paddlex/.version +1 -1
  2. paddlex/configs/modules/text_recognition/eslav_PP-OCRv5_mobile_rec.yaml +39 -0
  3. paddlex/configs/modules/text_recognition/korean_PP-OCRv5_mobile_rec.yaml +39 -0
  4. paddlex/configs/modules/text_recognition/latin_PP-OCRv5_mobile_rec.yaml +39 -0
  5. paddlex/configs/pipelines/PP-DocTranslation.yaml +261 -0
  6. paddlex/inference/common/batch_sampler/__init__.py +1 -0
  7. paddlex/inference/common/batch_sampler/markdown_batch_sampler.py +116 -0
  8. paddlex/inference/common/result/base_cv_result.py +2 -3
  9. paddlex/inference/common/result/mixin.py +3 -1
  10. paddlex/inference/models/base/predictor/base_predictor.py +2 -0
  11. paddlex/inference/models/common/static_infer.py +2 -0
  12. paddlex/inference/models/common/vlm/generation/utils.py +2 -2
  13. paddlex/inference/models/formula_recognition/result.py +2 -2
  14. paddlex/inference/models/image_classification/result.py +3 -5
  15. paddlex/inference/models/image_multilabel_classification/result.py +2 -2
  16. paddlex/inference/models/object_detection/result.py +2 -2
  17. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +3 -0
  18. paddlex/inference/models/text_recognition/predictor.py +51 -1
  19. paddlex/inference/models/text_recognition/result.py +5 -2
  20. paddlex/inference/models/video_classification/result.py +3 -3
  21. paddlex/inference/models/video_detection/result.py +2 -4
  22. paddlex/inference/pipelines/__init__.py +1 -0
  23. paddlex/inference/pipelines/attribute_recognition/result.py +2 -2
  24. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +1 -0
  25. paddlex/inference/pipelines/components/prompt_engineering/generate_translate_prompt.py +179 -0
  26. paddlex/inference/pipelines/doc_preprocessor/result.py +2 -2
  27. paddlex/inference/pipelines/formula_recognition/result.py +2 -2
  28. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +2 -0
  29. paddlex/inference/pipelines/layout_parsing/result_v2.py +11 -4
  30. paddlex/inference/pipelines/ocr/pipeline.py +2 -0
  31. paddlex/inference/pipelines/ocr/result.py +11 -7
  32. paddlex/inference/pipelines/pp_doctranslation/__init__.py +15 -0
  33. paddlex/inference/pipelines/pp_doctranslation/pipeline.py +523 -0
  34. paddlex/inference/pipelines/pp_doctranslation/result.py +39 -0
  35. paddlex/inference/pipelines/pp_doctranslation/utils.py +260 -0
  36. paddlex/inference/pipelines/pp_shitu_v2/result.py +2 -2
  37. paddlex/inference/serving/basic_serving/_app.py +1 -0
  38. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +4 -2
  39. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +5 -1
  40. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +4 -2
  41. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +4 -2
  42. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +4 -2
  43. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +4 -2
  44. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +4 -2
  45. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +4 -2
  46. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +4 -2
  47. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +4 -2
  48. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +4 -2
  49. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +4 -2
  50. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +4 -2
  51. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +4 -2
  52. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -24
  53. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +16 -26
  54. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py +203 -0
  55. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +4 -2
  56. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +4 -2
  57. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +4 -2
  58. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +4 -2
  59. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +4 -2
  60. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +4 -2
  61. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +4 -2
  62. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -2
  63. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +4 -2
  64. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +4 -2
  65. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +4 -2
  66. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +4 -2
  67. paddlex/inference/serving/infra/utils.py +22 -17
  68. paddlex/inference/serving/schemas/anomaly_detection.py +1 -0
  69. paddlex/inference/serving/schemas/doc_preprocessor.py +1 -0
  70. paddlex/inference/serving/schemas/face_recognition.py +1 -0
  71. paddlex/inference/serving/schemas/formula_recognition.py +1 -0
  72. paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -0
  73. paddlex/inference/serving/schemas/image_classification.py +1 -0
  74. paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -0
  75. paddlex/inference/serving/schemas/instance_segmentation.py +1 -0
  76. paddlex/inference/serving/schemas/layout_parsing.py +1 -0
  77. paddlex/inference/serving/schemas/object_detection.py +1 -0
  78. paddlex/inference/serving/schemas/ocr.py +1 -0
  79. paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -0
  80. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -0
  81. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -0
  82. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +5 -4
  83. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +6 -5
  84. paddlex/inference/serving/schemas/pp_doctranslation.py +115 -0
  85. paddlex/inference/serving/schemas/pp_shituv2.py +1 -0
  86. paddlex/inference/serving/schemas/pp_structurev3.py +2 -9
  87. paddlex/inference/serving/schemas/rotated_object_detection.py +1 -0
  88. paddlex/inference/serving/schemas/seal_recognition.py +1 -0
  89. paddlex/inference/serving/schemas/semantic_segmentation.py +1 -0
  90. paddlex/inference/serving/schemas/shared/ocr.py +8 -1
  91. paddlex/inference/serving/schemas/small_object_detection.py +1 -0
  92. paddlex/inference/serving/schemas/table_recognition.py +1 -0
  93. paddlex/inference/serving/schemas/table_recognition_v2.py +1 -0
  94. paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -0
  95. paddlex/inference/serving/schemas/ts_classification.py +1 -0
  96. paddlex/inference/serving/schemas/ts_forecast.py +1 -0
  97. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -0
  98. paddlex/inference/utils/hpi.py +42 -14
  99. paddlex/inference/utils/hpi_model_info_collection.json +0 -2
  100. paddlex/inference/utils/io/__init__.py +1 -0
  101. paddlex/inference/utils/io/readers.py +46 -0
  102. paddlex/inference/utils/io/writers.py +2 -0
  103. paddlex/inference/utils/official_models.py +7 -0
  104. paddlex/inference/utils/pp_option.py +34 -18
  105. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -2
  106. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  107. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  108. paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  109. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  110. paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  111. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  112. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  113. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  114. paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  115. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  116. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  117. paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  118. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  119. paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  120. paddlex/modules/text_recognition/model_list.py +3 -0
  121. paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  122. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  123. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  124. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +27 -0
  125. paddlex/repo_manager/meta.py +3 -3
  126. paddlex/utils/device.py +4 -1
  127. paddlex/utils/download.py +10 -7
  128. paddlex/utils/{fonts/__init__.py → fonts.py} +45 -26
  129. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/METADATA +25 -1
  130. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/RECORD +134 -122
  131. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/LICENSE +0 -0
  132. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/WHEEL +0 -0
  133. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/entry_points.txt +0 -0
  134. {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/top_level.txt +0 -0
paddlex/.version CHANGED
@@ -1 +1 @@
1
- 3.0.2
1
+ 3.1.0
@@ -0,0 +1,39 @@
1
+ Global:
2
+ model: eslav_PP-OCRv5_mobile_rec
3
+ mode: check_dataset # check_dataset/train/evaluate/predict
4
+ dataset_dir: "/paddle/dataset/paddlex/ocr_rec/ocr_rec_dataset_examples"
5
+ device: gpu:0,1,2,3
6
+ output: "output"
7
+
8
+ CheckDataset:
9
+ convert:
10
+ enable: False
11
+ src_dataset_type: null
12
+ split:
13
+ enable: False
14
+ train_percent: null
15
+ val_percent: null
16
+
17
+ Train:
18
+ epochs_iters: 20
19
+ batch_size: 8
20
+ learning_rate: 0.001
21
+ pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/eslav_PP-OCRv5_mobile_rec_pretrained.pdparams
22
+ resume_path: null
23
+ log_interval: 20
24
+ eval_interval: 1
25
+ save_interval: 1
26
+
27
+ Evaluate:
28
+ weight_path: "output/best_accuracy/best_accuracy.pdparams"
29
+ log_interval: 1
30
+
31
+ Export:
32
+ weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/eslav_PP-OCRv5_mobile_rec_pretrained.pdparams
33
+
34
+ Predict:
35
+ batch_size: 1
36
+ model_dir: "output/best_accuracy/inference"
37
+ input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_rec_001.png"
38
+ kernel_option:
39
+ run_mode: paddle
@@ -0,0 +1,39 @@
1
+ Global:
2
+ model: korean_PP-OCRv5_mobile_rec
3
+ mode: check_dataset # check_dataset/train/evaluate/predict
4
+ dataset_dir: "/paddle/dataset/paddlex/ocr_rec/ocr_rec_dataset_examples"
5
+ device: gpu:0,1,2,3
6
+ output: "output"
7
+
8
+ CheckDataset:
9
+ convert:
10
+ enable: False
11
+ src_dataset_type: null
12
+ split:
13
+ enable: False
14
+ train_percent: null
15
+ val_percent: null
16
+
17
+ Train:
18
+ epochs_iters: 20
19
+ batch_size: 8
20
+ learning_rate: 0.001
21
+ pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/korean_PP-OCRv5_mobile_rec_pretrained.pdparams
22
+ resume_path: null
23
+ log_interval: 20
24
+ eval_interval: 1
25
+ save_interval: 1
26
+
27
+ Evaluate:
28
+ weight_path: "output/best_accuracy/best_accuracy.pdparams"
29
+ log_interval: 1
30
+
31
+ Export:
32
+ weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/korean_PP-OCRv5_mobile_rec_pretrained.pdparams
33
+
34
+ Predict:
35
+ batch_size: 1
36
+ model_dir: "output/best_accuracy/inference"
37
+ input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_rec_003_korean.png"
38
+ kernel_option:
39
+ run_mode: paddle
@@ -0,0 +1,39 @@
1
+ Global:
2
+ model: latin_PP-OCRv5_mobile_rec
3
+ mode: check_dataset # check_dataset/train/evaluate/predict
4
+ dataset_dir: "/paddle/dataset/paddlex/ocr_rec/ocr_rec_dataset_examples"
5
+ device: gpu:0,1,2,3
6
+ output: "output"
7
+
8
+ CheckDataset:
9
+ convert:
10
+ enable: False
11
+ src_dataset_type: null
12
+ split:
13
+ enable: False
14
+ train_percent: null
15
+ val_percent: null
16
+
17
+ Train:
18
+ epochs_iters: 20
19
+ batch_size: 8
20
+ learning_rate: 0.001
21
+ pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/latin_PP-OCRv5_mobile_rec_pretrained.pdparams
22
+ resume_path: null
23
+ log_interval: 20
24
+ eval_interval: 1
25
+ save_interval: 1
26
+
27
+ Evaluate:
28
+ weight_path: "output/best_accuracy/best_accuracy.pdparams"
29
+ log_interval: 1
30
+
31
+ Export:
32
+ weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/latin_PP-OCRv5_mobile_rec_pretrained.pdparams
33
+
34
+ Predict:
35
+ batch_size: 1
36
+ model_dir: "output/best_accuracy/inference"
37
+ input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_rec_009_latin.png"
38
+ kernel_option:
39
+ run_mode: paddle
@@ -0,0 +1,261 @@
1
+
2
+ pipeline_name: PP-DocTranslation
3
+
4
+ use_layout_parser: True
5
+
6
+ SubModules:
7
+ LLM_Chat:
8
+ module_name: chat_bot
9
+ model_name: ernie-3.5-8k
10
+ base_url: "https://qianfan.baidubce.com/v2"
11
+ api_type: openai
12
+ api_key: "api_key" # Set this to a real API key
13
+
14
+ PromptEngneering:
15
+ Translate_CommonText:
16
+ module_name: prompt_engneering
17
+ task_type: translate_prompt
18
+
19
+ task_description: '你是一位资深的多语种语言翻译专家,精通多种语言的语法、词汇、文化背景以及语言风格。你的任务是将文本从一种语言准确地转换为另一种语言,同时精准地保留原文的语义、风格和语调,确保翻译内容在目标语言中自然流畅且富有文化适应性。'
20
+
21
+ output_format: '输出应为翻译后的文本,并与原文保持格式一致,包括标点符号和段落结构。如果原文中包含特定的格式(如表格、公式、列表等),翻译后的文本也应保持相同的格式。'
22
+
23
+ rules_str: '通用规则:
24
+ 1. 翻译应确保语义准确完整,并符合目标语言的表达习惯。
25
+ 2. 保留原文的风格和语调,以传达相同的情感和意图。
26
+ 3. 专有名词(如人名、地名、品牌名等)应保持不变,除非它们在目标语言中有公认的翻译。
27
+ 4. 文化特定的表达或成语需根据目标语言的文化背景进行适当的转换或解释。
28
+ 5. 避免使用机器翻译工具的简单直译,需根据上下文进行调整和优化。
29
+ 6. 原文中可能包含的非文本元素(如HTML语法中的图片、表格、公式等)应保持不变。
30
+ 7. 原文中可能包含的代码块,如编程语言代码等,应保持代码块的完整性,不要对代码进行调整。
31
+ 8. 翻译完成后,应仔细校对,确保没有语法和拼写错误。'
32
+ few_shot_demo_text_content:
33
+ few_shot_demo_key_value_list:
34
+
35
+ SubPipelines:
36
+ LayoutParser:
37
+ pipeline_name: PP-StructureV3
38
+
39
+ batch_size: 8
40
+
41
+ use_doc_preprocessor: True
42
+ use_seal_recognition: True
43
+ use_table_recognition: True
44
+ use_formula_recognition: True
45
+ use_chart_recognition: True
46
+ use_region_detection: True
47
+
48
+ SubModules:
49
+ LayoutDetection:
50
+ module_name: layout_detection
51
+ model_name: PP-DocLayout_plus-L
52
+ model_dir: null
53
+ batch_size: 8
54
+ threshold:
55
+ 0: 0.3 # paragraph_title
56
+ 1: 0.5 # image
57
+ 2: 0.4 # text
58
+ 3: 0.5 # number
59
+ 4: 0.5 # abstract
60
+ 5: 0.5 # content
61
+ 6: 0.5 # figure_table_chart_title
62
+ 7: 0.3 # formula
63
+ 8: 0.5 # table
64
+ 9: 0.5 # reference
65
+ 10: 0.5 # doc_title
66
+ 11: 0.5 # footnote
67
+ 12: 0.5 # header
68
+ 13: 0.5 # algorithm
69
+ 14: 0.5 # footer
70
+ 15: 0.45 # seal
71
+ 16: 0.5 # chart
72
+ 17: 0.5 # formula_number
73
+ 18: 0.5 # aside_text
74
+ 19: 0.5 # reference_content
75
+ layout_nms: True
76
+ layout_unclip_ratio: [1.0, 1.0]
77
+ layout_merge_bboxes_mode:
78
+ 0: "large" # paragraph_title
79
+ 1: "large" # image
80
+ 2: "union" # text
81
+ 3: "union" # number
82
+ 4: "union" # abstract
83
+ 5: "union" # content
84
+ 6: "union" # figure_table_chart_title
85
+ 7: "large" # formula
86
+ 8: "union" # table
87
+ 9: "union" # reference
88
+ 10: "union" # doc_title
89
+ 11: "union" # footnote
90
+ 12: "union" # header
91
+ 13: "union" # algorithm
92
+ 14: "union" # footer
93
+ 15: "union" # seal
94
+ 16: "large" # chart
95
+ 17: "union" # formula_number
96
+ 18: "union" # aside_text
97
+ 19: "union" # reference_content
98
+ ChartRecognition:
99
+ module_name: chart_recognition
100
+ model_name: PP-Chart2Table
101
+ model_dir: null
102
+ batch_size: 1
103
+ RegionDetection:
104
+ module_name: layout_detection
105
+ model_name: PP-DocBlockLayout
106
+ model_dir: null
107
+ layout_nms: True
108
+ layout_merge_bboxes_mode: "small"
109
+
110
+ SubPipelines:
111
+ DocPreprocessor:
112
+ pipeline_name: doc_preprocessor
113
+ batch_size: 8
114
+ use_doc_orientation_classify: True
115
+ use_doc_unwarping: True
116
+ SubModules:
117
+ DocOrientationClassify:
118
+ module_name: doc_text_orientation
119
+ model_name: PP-LCNet_x1_0_doc_ori
120
+ model_dir: null
121
+ batch_size: 8
122
+ DocUnwarping:
123
+ module_name: image_unwarping
124
+ model_name: UVDoc
125
+ model_dir: null
126
+
127
+ GeneralOCR:
128
+ pipeline_name: OCR
129
+ batch_size: 8
130
+ text_type: general
131
+ use_doc_preprocessor: False
132
+ use_textline_orientation: True
133
+ SubModules:
134
+ TextDetection:
135
+ module_name: text_detection
136
+ model_name: PP-OCRv5_server_det
137
+ model_dir: null
138
+ limit_side_len: 736
139
+ limit_type: min
140
+ max_side_limit: 4000
141
+ thresh: 0.3
142
+ box_thresh: 0.6
143
+ unclip_ratio: 1.5
144
+ TextLineOrientation:
145
+ module_name: textline_orientation
146
+ model_name: PP-LCNet_x1_0_textline_ori
147
+ model_dir: null
148
+ batch_size: 8
149
+ TextRecognition:
150
+ module_name: text_recognition
151
+ model_name: PP-OCRv5_server_rec
152
+ model_dir: null
153
+ batch_size: 8
154
+ score_thresh: 0.0
155
+
156
+
157
+ TableRecognition:
158
+ pipeline_name: table_recognition_v2
159
+ use_layout_detection: False
160
+ use_doc_preprocessor: False
161
+ use_ocr_model: False
162
+ SubModules:
163
+ TableClassification:
164
+ module_name: table_classification
165
+ model_name: PP-LCNet_x1_0_table_cls
166
+ model_dir: null
167
+
168
+ WiredTableStructureRecognition:
169
+ module_name: table_structure_recognition
170
+ model_name: SLANeXt_wired
171
+ model_dir: null
172
+
173
+ WirelessTableStructureRecognition:
174
+ module_name: table_structure_recognition
175
+ model_name: SLANet_plus
176
+ model_dir: null
177
+
178
+ WiredTableCellsDetection:
179
+ module_name: table_cells_detection
180
+ model_name: RT-DETR-L_wired_table_cell_det
181
+ model_dir: null
182
+
183
+ WirelessTableCellsDetection:
184
+ module_name: table_cells_detection
185
+ model_name: RT-DETR-L_wireless_table_cell_det
186
+ model_dir: null
187
+
188
+ TableOrientationClassify:
189
+ module_name: doc_text_orientation
190
+ model_name: PP-LCNet_x1_0_doc_ori
191
+ model_dir: null
192
+ SubPipelines:
193
+ GeneralOCR:
194
+ pipeline_name: OCR
195
+ text_type: general
196
+ use_doc_preprocessor: False
197
+ use_textline_orientation: True
198
+ SubModules:
199
+ TextDetection:
200
+ module_name: text_detection
201
+ model_name: PP-OCRv5_server_det
202
+ model_dir: null
203
+ limit_side_len: 736
204
+ limit_type: min
205
+ max_side_limit: 4000
206
+ thresh: 0.3
207
+ box_thresh: 0.4
208
+ unclip_ratio: 1.5
209
+ TextLineOrientation:
210
+ module_name: textline_orientation
211
+ model_name: PP-LCNet_x1_0_textline_ori
212
+ model_dir: null
213
+ batch_size: 8
214
+ TextRecognition:
215
+ module_name: text_recognition
216
+ model_name: PP-OCRv5_server_rec
217
+ model_dir: null
218
+ batch_size: 8
219
+ score_thresh: 0.0
220
+
221
+ SealRecognition:
222
+ pipeline_name: seal_recognition
223
+ batch_size: 8
224
+ use_layout_detection: False
225
+ use_doc_preprocessor: False
226
+ SubPipelines:
227
+ SealOCR:
228
+ pipeline_name: OCR
229
+ batch_size: 8
230
+ text_type: seal
231
+ use_doc_preprocessor: False
232
+ use_textline_orientation: False
233
+ SubModules:
234
+ TextDetection:
235
+ module_name: seal_text_detection
236
+ model_name: PP-OCRv4_server_seal_det
237
+ model_dir: null
238
+ limit_side_len: 736
239
+ limit_type: min
240
+ max_side_limit: 4000
241
+ thresh: 0.2
242
+ box_thresh: 0.6
243
+ unclip_ratio: 0.5
244
+ TextRecognition:
245
+ module_name: text_recognition
246
+ model_name: PP-OCRv5_server_rec
247
+ model_dir: null
248
+ batch_size: 8
249
+ score_thresh: 0
250
+
251
+ FormulaRecognition:
252
+ pipeline_name: formula_recognition
253
+ batch_size: 8
254
+ use_layout_detection: False
255
+ use_doc_preprocessor: False
256
+ SubModules:
257
+ FormulaRecognition:
258
+ module_name: formula_recognition
259
+ model_name: PP-FormulaNet_plus-L
260
+ model_dir: null
261
+ batch_size: 8
@@ -17,5 +17,6 @@ from .base_batch_sampler import BaseBatchSampler
17
17
  from .det_3d_batch_sampler import Det3DBatchSampler
18
18
  from .doc_vlm_batch_sampler import DocVLMBatchSampler
19
19
  from .image_batch_sampler import ImageBatchSampler
20
+ from .markdown_batch_sampler import MarkDownBatchSampler
20
21
  from .ts_batch_sampler import TSBatchSampler
21
22
  from .video_batch_sampler import VideoBatchSampler
@@ -0,0 +1,116 @@
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+ from pathlib import Path
17
+
18
+ from ....utils import logging
19
+ from ....utils.cache import CACHE_DIR
20
+ from ....utils.download import download
21
+ from ...utils.io import MarkDownReader
22
+ from .base_batch_sampler import BaseBatchSampler, Batch
23
+
24
+
25
+ class MarkDownBatchSampler(BaseBatchSampler):
26
+ """Batch sampler for markdown data, supporting markdown file inputs."""
27
+
28
+ SUFFIX = ["md", "markdown", "mdown", "mkd"]
29
+
30
+ def __init__(self, *args, **kwargs):
31
+ super().__init__(*args, **kwargs)
32
+ self.md_reader = MarkDownReader()
33
+
34
+ def _download_from_url(self, in_path: str) -> str:
35
+ """Download a file from a URL to a cache directory.
36
+
37
+ Args:
38
+ in_path (str): URL of the file to be downloaded.
39
+
40
+ Returns:
41
+ str: Path to the downloaded file.
42
+ """
43
+ file_name = Path(in_path).name
44
+ save_path = Path(CACHE_DIR) / "predict_input" / file_name
45
+ download(in_path, save_path, overwrite=True)
46
+ return save_path.as_posix()
47
+
48
+ def _get_files_list(self, fp: str) -> list:
49
+ """Get a list of markdown files from a directory or a single file path.
50
+
51
+ Args:
52
+ fp (str): Path to a directory or a single markdown file.
53
+
54
+ Returns:
55
+ list: Sorted list of markdown file paths.
56
+
57
+ Raises:
58
+ Exception: If no markdown file is found in the path.
59
+ """
60
+ file_list = []
61
+ if fp is None or not os.path.exists(fp):
62
+ raise Exception(f"Not found any markdown file in path: {fp}")
63
+
64
+ if os.path.isfile(fp) and fp.split(".")[-1] in self.SUFFIX:
65
+ file_list.append(fp)
66
+ elif os.path.isdir(fp):
67
+ for root, dirs, files in os.walk(fp):
68
+ for single_file in files:
69
+ if single_file.split(".")[-1] in self.SUFFIX:
70
+ file_list.append(os.path.join(root, single_file))
71
+ if len(file_list) == 0:
72
+ raise Exception("Not found any file in {}".format(fp))
73
+ file_list = sorted(file_list)
74
+ return file_list
75
+
76
+ def sample(self, inputs: list) -> list:
77
+ """Generate batches of data from inputs, which can only be file paths.
78
+
79
+ Args:
80
+ inputs (list): List of markdown file paths.
81
+
82
+ Yields:
83
+ list: A batch of data which is a list of markdown file paths.
84
+ """
85
+ if not isinstance(inputs, list):
86
+ inputs = [inputs]
87
+
88
+ batch = Batch()
89
+ for input in inputs:
90
+ if isinstance(input, str):
91
+ suffix = input.split(".")[-1].lower()
92
+ file_path = (
93
+ self._download_from_url(input)
94
+ if input.startswith("http")
95
+ else input
96
+ )
97
+ if suffix in self.SUFFIX:
98
+ markdown_text = self.md_reader.read(file_path)
99
+ batch.append(markdown_text, file_path)
100
+ if len(batch) == self.batch_size:
101
+ yield batch
102
+ batch = Batch()
103
+ else:
104
+ file_list = self._get_files_list(file_path)
105
+ for file_path in file_list:
106
+ markdown_text = self.md_reader.read(file_path)
107
+ batch.append(markdown_text, file_path)
108
+ if len(batch) == self.batch_size:
109
+ yield batch
110
+ batch = Batch()
111
+ else:
112
+ logging.warning(
113
+ f"Not supported input data type! Only `str` is supported! So has been ignored: {input}."
114
+ )
115
+ if len(batch) > 0:
116
+ yield batch
@@ -36,6 +36,5 @@ class BaseCVResult(BaseResult, ImgMixin):
36
36
  if (page_idx := self.get("page_index", None)) is not None:
37
37
  fp = Path(fn)
38
38
  stem, suffix = fp.stem, fp.suffix
39
- return f"{stem}_{page_idx}{suffix}"
40
- else:
41
- return fn
39
+ fn = f"{stem}_{page_idx}{suffix}"
40
+ return fn
@@ -597,6 +597,8 @@ class VideoMixin:
597
597
  class MarkdownMixin:
598
598
  """Mixin class for adding Markdown handling capabilities."""
599
599
 
600
+ MARKDOWN_SAVE_KEYS = ["markdown_texts"]
601
+
600
602
  def __init__(self, *args: list, **kwargs: dict):
601
603
  """Initializes the Markdown writer and appends the save_to_markdown method to the save functions.
602
604
 
@@ -696,7 +698,7 @@ class MarkdownMixin:
696
698
  if data is None:
697
699
  return
698
700
  for key, value in data.items():
699
- if isinstance(value, str):
701
+ if key in self.MARKDOWN_SAVE_KEYS:
700
702
  save_mkd_func(save_path.as_posix(), value, *args, **kwargs)
701
703
  if isinstance(value, dict):
702
704
  base_save_path = save_path.parent
@@ -337,9 +337,11 @@ class BasePredictor(
337
337
  pp_option = PaddlePredictorOption(model_name=self.model_name)
338
338
  elif pp_option.model_name is None:
339
339
  pp_option.model_name = self.model_name
340
+ pp_option.reset_run_mode_by_default(model_name=self.model_name)
340
341
  if device_info:
341
342
  pp_option.device_type = device_info[0]
342
343
  pp_option.device_id = device_info[1]
344
+ pp_option.reset_run_mode_by_default(device_type=device_info[0])
343
345
  hpi_info = self.get_hpi_info()
344
346
  if hpi_info is not None:
345
347
  hpi_info = hpi_info.model_dump(exclude_unset=True)
@@ -687,6 +687,8 @@ class HPInfer(StaticInfer):
687
687
  return PaddleInfer(self._model_dir, self._model_file_prefix, option=pp_option)
688
688
 
689
689
  def _build_ui_runtime(self, backend, backend_config, ui_option=None):
690
+ # TODO: Validate the compatibility of backends with device types
691
+
690
692
  from ultra_infer import ModelFormat, Runtime, RuntimeOption
691
693
 
692
694
  if ui_option is None:
@@ -14,7 +14,7 @@
14
14
 
15
15
  import copy
16
16
  import inspect
17
- from typing import Optional, Union
17
+ from typing import List, Optional, Union
18
18
 
19
19
  import paddle
20
20
  import paddle.distributed as dist
@@ -86,7 +86,7 @@ def get_scale_by_dtype(dtype: str = None, return_positive: bool = True) -> float
86
86
  def get_unfinished_flag(
87
87
  input_ids: Tensor,
88
88
  unfinished_flag: Tensor,
89
- eos_token_id: Union[int, list[int], list[list[int]]],
89
+ eos_token_id: Union[int, List[int], List[List[int]]],
90
90
  ) -> Tensor:
91
91
  """get unfinished flag for generation step
92
92
 
@@ -27,7 +27,7 @@ from PIL import Image, ImageDraw, ImageFont
27
27
  from ....utils import logging
28
28
  from ....utils.deps import function_requires_deps, is_dep_available
29
29
  from ....utils.file_interface import custom_open
30
- from ....utils.fonts import PINGFANG_FONT_FILE_PATH
30
+ from ....utils.fonts import PINGFANG_FONT
31
31
  from ...common.result import BaseCVResult, JsonMixin
32
32
 
33
33
  if is_dep_available("opencv-contrib-python"):
@@ -308,7 +308,7 @@ def draw_formula_module(
308
308
  return formula_img
309
309
  else:
310
310
  img_right_text = draw_box_txt_fine(
311
- img_size, box, "Rendering Failed", PINGFANG_FONT_FILE_PATH
311
+ img_size, box, "Rendering Failed", PINGFANG_FONT.path
312
312
  )
313
313
  return img_right_text
314
314
 
@@ -19,7 +19,7 @@ import numpy as np
19
19
  import PIL
20
20
  from PIL import Image, ImageDraw, ImageFont
21
21
 
22
- from ....utils.fonts import PINGFANG_FONT_FILE_PATH
22
+ from ....utils.fonts import PINGFANG_FONT
23
23
  from ...common.result import BaseCVResult, JsonMixin
24
24
  from ...utils.color_map import get_colormap
25
25
 
@@ -47,9 +47,7 @@ class TopkResult(BaseCVResult):
47
47
  min_font_size = int(image_size[0] * 0.02)
48
48
  max_font_size = int(image_size[0] * 0.05)
49
49
  for font_size in range(max_font_size, min_font_size - 1, -1):
50
- font = ImageFont.truetype(
51
- PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8"
52
- )
50
+ font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
53
51
  if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
54
52
  text_width_tmp, text_height_tmp = draw.textsize(label_str, font)
55
53
  else:
@@ -58,7 +56,7 @@ class TopkResult(BaseCVResult):
58
56
  if text_width_tmp <= image_size[0]:
59
57
  break
60
58
  else:
61
- font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, min_font_size)
59
+ font = ImageFont.truetype(PINGFANG_FONT.path, min_font_size)
62
60
  color_list = get_colormap(rgb=True)
63
61
  color = tuple(color_list[0])
64
62
  font_color = tuple(self._get_font_colormap(3))
@@ -18,7 +18,7 @@ import numpy as np
18
18
  import PIL
19
19
  from PIL import Image, ImageDraw, ImageFont
20
20
 
21
- from ....utils.fonts import PINGFANG_FONT_FILE_PATH
21
+ from ....utils.fonts import PINGFANG_FONT
22
22
  from ...common.result import BaseCVResult, JsonMixin
23
23
  from ...utils.color_map import get_colormap
24
24
 
@@ -43,7 +43,7 @@ class MLClassResult(BaseCVResult):
43
43
  image_width, image_height = image.size
44
44
  font_size = int(image_width * 0.06)
45
45
 
46
- font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size)
46
+ font = ImageFont.truetype(PINGFANG_FONT.path, font_size)
47
47
  text_lines = []
48
48
  row_width = 0
49
49
  row_height = 0
@@ -18,7 +18,7 @@ from typing import List
18
18
  import PIL
19
19
  from PIL import Image, ImageDraw, ImageFont
20
20
 
21
- from ....utils.fonts import PINGFANG_FONT_FILE_PATH
21
+ from ....utils.fonts import PINGFANG_FONT
22
22
  from ...common.result import BaseCVResult, JsonMixin
23
23
  from ...utils.color_map import font_colormap, get_colormap
24
24
 
@@ -32,7 +32,7 @@ def draw_box(img: Image.Image, boxes: List[dict]) -> Image.Image:
32
32
  img (PIL.Image.Image): visualized image
33
33
  """
34
34
  font_size = int(0.018 * int(img.width)) + 2
35
- font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
35
+ font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
36
36
 
37
37
  draw_thickness = int(max(img.size) * 0.002)
38
38
  draw = ImageDraw.Draw(img)
@@ -199,6 +199,9 @@ class GroundingDINOPostProcessor(object):
199
199
  tokenized = self.tokenizer(prompt)
200
200
  if posmap.dim() == 1:
201
201
  non_zero_idx = posmap.nonzero(as_tuple=True)[0].squeeze(-1).tolist()
202
+ non_zero_idx = (
203
+ [non_zero_idx] if not isinstance(non_zero_idx, list) else non_zero_idx
204
+ )
202
205
  token_ids = [tokenized["input_ids"][i] for i in non_zero_idx]
203
206
  return self.tokenizer.decode(token_ids)
204
207
  else: