paddlex 3.0.2__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/configs/modules/text_recognition/eslav_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/korean_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/latin_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/pipelines/PP-DocTranslation.yaml +261 -0
- paddlex/inference/common/batch_sampler/__init__.py +1 -0
- paddlex/inference/common/batch_sampler/markdown_batch_sampler.py +116 -0
- paddlex/inference/common/result/base_cv_result.py +2 -3
- paddlex/inference/common/result/mixin.py +3 -1
- paddlex/inference/models/base/predictor/base_predictor.py +2 -0
- paddlex/inference/models/common/static_infer.py +2 -0
- paddlex/inference/models/common/vlm/generation/utils.py +2 -2
- paddlex/inference/models/formula_recognition/result.py +2 -2
- paddlex/inference/models/image_classification/result.py +3 -5
- paddlex/inference/models/image_multilabel_classification/result.py +2 -2
- paddlex/inference/models/object_detection/result.py +2 -2
- paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +3 -0
- paddlex/inference/models/text_recognition/predictor.py +51 -1
- paddlex/inference/models/text_recognition/result.py +5 -2
- paddlex/inference/models/video_classification/result.py +3 -3
- paddlex/inference/models/video_detection/result.py +2 -4
- paddlex/inference/pipelines/__init__.py +1 -0
- paddlex/inference/pipelines/attribute_recognition/result.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/__init__.py +1 -0
- paddlex/inference/pipelines/components/prompt_engineering/generate_translate_prompt.py +179 -0
- paddlex/inference/pipelines/doc_preprocessor/result.py +2 -2
- paddlex/inference/pipelines/formula_recognition/result.py +2 -2
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +2 -0
- paddlex/inference/pipelines/layout_parsing/result_v2.py +11 -4
- paddlex/inference/pipelines/ocr/pipeline.py +2 -0
- paddlex/inference/pipelines/ocr/result.py +11 -7
- paddlex/inference/pipelines/pp_doctranslation/__init__.py +15 -0
- paddlex/inference/pipelines/pp_doctranslation/pipeline.py +523 -0
- paddlex/inference/pipelines/pp_doctranslation/result.py +39 -0
- paddlex/inference/pipelines/pp_doctranslation/utils.py +260 -0
- paddlex/inference/pipelines/pp_shitu_v2/result.py +2 -2
- paddlex/inference/serving/basic_serving/_app.py +1 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +5 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -24
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +16 -26
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py +203 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +4 -2
- paddlex/inference/serving/infra/utils.py +22 -17
- paddlex/inference/serving/schemas/anomaly_detection.py +1 -0
- paddlex/inference/serving/schemas/doc_preprocessor.py +1 -0
- paddlex/inference/serving/schemas/face_recognition.py +1 -0
- paddlex/inference/serving/schemas/formula_recognition.py +1 -0
- paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -0
- paddlex/inference/serving/schemas/image_classification.py +1 -0
- paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -0
- paddlex/inference/serving/schemas/instance_segmentation.py +1 -0
- paddlex/inference/serving/schemas/layout_parsing.py +1 -0
- paddlex/inference/serving/schemas/object_detection.py +1 -0
- paddlex/inference/serving/schemas/ocr.py +1 -0
- paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -0
- paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -0
- paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -0
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +5 -4
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +6 -5
- paddlex/inference/serving/schemas/pp_doctranslation.py +115 -0
- paddlex/inference/serving/schemas/pp_shituv2.py +1 -0
- paddlex/inference/serving/schemas/pp_structurev3.py +2 -9
- paddlex/inference/serving/schemas/rotated_object_detection.py +1 -0
- paddlex/inference/serving/schemas/seal_recognition.py +1 -0
- paddlex/inference/serving/schemas/semantic_segmentation.py +1 -0
- paddlex/inference/serving/schemas/shared/ocr.py +8 -1
- paddlex/inference/serving/schemas/small_object_detection.py +1 -0
- paddlex/inference/serving/schemas/table_recognition.py +1 -0
- paddlex/inference/serving/schemas/table_recognition_v2.py +1 -0
- paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -0
- paddlex/inference/serving/schemas/ts_classification.py +1 -0
- paddlex/inference/serving/schemas/ts_forecast.py +1 -0
- paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -0
- paddlex/inference/utils/hpi.py +42 -14
- paddlex/inference/utils/hpi_model_info_collection.json +0 -2
- paddlex/inference/utils/io/__init__.py +1 -0
- paddlex/inference/utils/io/readers.py +46 -0
- paddlex/inference/utils/io/writers.py +2 -0
- paddlex/inference/utils/official_models.py +7 -0
- paddlex/inference/utils/pp_option.py +34 -18
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/text_recognition/model_list.py +3 -0
- paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +27 -0
- paddlex/repo_manager/meta.py +3 -3
- paddlex/utils/device.py +4 -1
- paddlex/utils/download.py +10 -7
- paddlex/utils/{fonts/__init__.py → fonts.py} +45 -26
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/METADATA +25 -1
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/RECORD +134 -122
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/LICENSE +0 -0
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/WHEEL +0 -0
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/top_level.txt +0 -0
paddlex/.version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.0
|
1
|
+
3.1.0
|
@@ -0,0 +1,39 @@
|
|
1
|
+
Global:
|
2
|
+
model: eslav_PP-OCRv5_mobile_rec
|
3
|
+
mode: check_dataset # check_dataset/train/evaluate/predict
|
4
|
+
dataset_dir: "/paddle/dataset/paddlex/ocr_rec/ocr_rec_dataset_examples"
|
5
|
+
device: gpu:0,1,2,3
|
6
|
+
output: "output"
|
7
|
+
|
8
|
+
CheckDataset:
|
9
|
+
convert:
|
10
|
+
enable: False
|
11
|
+
src_dataset_type: null
|
12
|
+
split:
|
13
|
+
enable: False
|
14
|
+
train_percent: null
|
15
|
+
val_percent: null
|
16
|
+
|
17
|
+
Train:
|
18
|
+
epochs_iters: 20
|
19
|
+
batch_size: 8
|
20
|
+
learning_rate: 0.001
|
21
|
+
pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/eslav_PP-OCRv5_mobile_rec_pretrained.pdparams
|
22
|
+
resume_path: null
|
23
|
+
log_interval: 20
|
24
|
+
eval_interval: 1
|
25
|
+
save_interval: 1
|
26
|
+
|
27
|
+
Evaluate:
|
28
|
+
weight_path: "output/best_accuracy/best_accuracy.pdparams"
|
29
|
+
log_interval: 1
|
30
|
+
|
31
|
+
Export:
|
32
|
+
weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/eslav_PP-OCRv5_mobile_rec_pretrained.pdparams
|
33
|
+
|
34
|
+
Predict:
|
35
|
+
batch_size: 1
|
36
|
+
model_dir: "output/best_accuracy/inference"
|
37
|
+
input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_rec_001.png"
|
38
|
+
kernel_option:
|
39
|
+
run_mode: paddle
|
@@ -0,0 +1,39 @@
|
|
1
|
+
Global:
|
2
|
+
model: korean_PP-OCRv5_mobile_rec
|
3
|
+
mode: check_dataset # check_dataset/train/evaluate/predict
|
4
|
+
dataset_dir: "/paddle/dataset/paddlex/ocr_rec/ocr_rec_dataset_examples"
|
5
|
+
device: gpu:0,1,2,3
|
6
|
+
output: "output"
|
7
|
+
|
8
|
+
CheckDataset:
|
9
|
+
convert:
|
10
|
+
enable: False
|
11
|
+
src_dataset_type: null
|
12
|
+
split:
|
13
|
+
enable: False
|
14
|
+
train_percent: null
|
15
|
+
val_percent: null
|
16
|
+
|
17
|
+
Train:
|
18
|
+
epochs_iters: 20
|
19
|
+
batch_size: 8
|
20
|
+
learning_rate: 0.001
|
21
|
+
pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/korean_PP-OCRv5_mobile_rec_pretrained.pdparams
|
22
|
+
resume_path: null
|
23
|
+
log_interval: 20
|
24
|
+
eval_interval: 1
|
25
|
+
save_interval: 1
|
26
|
+
|
27
|
+
Evaluate:
|
28
|
+
weight_path: "output/best_accuracy/best_accuracy.pdparams"
|
29
|
+
log_interval: 1
|
30
|
+
|
31
|
+
Export:
|
32
|
+
weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/korean_PP-OCRv5_mobile_rec_pretrained.pdparams
|
33
|
+
|
34
|
+
Predict:
|
35
|
+
batch_size: 1
|
36
|
+
model_dir: "output/best_accuracy/inference"
|
37
|
+
input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_rec_003_korean.png"
|
38
|
+
kernel_option:
|
39
|
+
run_mode: paddle
|
@@ -0,0 +1,39 @@
|
|
1
|
+
Global:
|
2
|
+
model: latin_PP-OCRv5_mobile_rec
|
3
|
+
mode: check_dataset # check_dataset/train/evaluate/predict
|
4
|
+
dataset_dir: "/paddle/dataset/paddlex/ocr_rec/ocr_rec_dataset_examples"
|
5
|
+
device: gpu:0,1,2,3
|
6
|
+
output: "output"
|
7
|
+
|
8
|
+
CheckDataset:
|
9
|
+
convert:
|
10
|
+
enable: False
|
11
|
+
src_dataset_type: null
|
12
|
+
split:
|
13
|
+
enable: False
|
14
|
+
train_percent: null
|
15
|
+
val_percent: null
|
16
|
+
|
17
|
+
Train:
|
18
|
+
epochs_iters: 20
|
19
|
+
batch_size: 8
|
20
|
+
learning_rate: 0.001
|
21
|
+
pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/latin_PP-OCRv5_mobile_rec_pretrained.pdparams
|
22
|
+
resume_path: null
|
23
|
+
log_interval: 20
|
24
|
+
eval_interval: 1
|
25
|
+
save_interval: 1
|
26
|
+
|
27
|
+
Evaluate:
|
28
|
+
weight_path: "output/best_accuracy/best_accuracy.pdparams"
|
29
|
+
log_interval: 1
|
30
|
+
|
31
|
+
Export:
|
32
|
+
weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/latin_PP-OCRv5_mobile_rec_pretrained.pdparams
|
33
|
+
|
34
|
+
Predict:
|
35
|
+
batch_size: 1
|
36
|
+
model_dir: "output/best_accuracy/inference"
|
37
|
+
input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_ocr_rec_009_latin.png"
|
38
|
+
kernel_option:
|
39
|
+
run_mode: paddle
|
@@ -0,0 +1,261 @@
|
|
1
|
+
|
2
|
+
pipeline_name: PP-DocTranslation
|
3
|
+
|
4
|
+
use_layout_parser: True
|
5
|
+
|
6
|
+
SubModules:
|
7
|
+
LLM_Chat:
|
8
|
+
module_name: chat_bot
|
9
|
+
model_name: ernie-3.5-8k
|
10
|
+
base_url: "https://qianfan.baidubce.com/v2"
|
11
|
+
api_type: openai
|
12
|
+
api_key: "api_key" # Set this to a real API key
|
13
|
+
|
14
|
+
PromptEngneering:
|
15
|
+
Translate_CommonText:
|
16
|
+
module_name: prompt_engneering
|
17
|
+
task_type: translate_prompt
|
18
|
+
|
19
|
+
task_description: '你是一位资深的多语种语言翻译专家,精通多种语言的语法、词汇、文化背景以及语言风格。你的任务是将文本从一种语言准确地转换为另一种语言,同时精准地保留原文的语义、风格和语调,确保翻译内容在目标语言中自然流畅且富有文化适应性。'
|
20
|
+
|
21
|
+
output_format: '输出应为翻译后的文本,并与原文保持格式一致,包括标点符号和段落结构。如果原文中包含特定的格式(如表格、公式、列表等),翻译后的文本也应保持相同的格式。'
|
22
|
+
|
23
|
+
rules_str: '通用规则:
|
24
|
+
1. 翻译应确保语义准确完整,并符合目标语言的表达习惯。
|
25
|
+
2. 保留原文的风格和语调,以传达相同的情感和意图。
|
26
|
+
3. 专有名词(如人名、地名、品牌名等)应保持不变,除非它们在目标语言中有公认的翻译。
|
27
|
+
4. 文化特定的表达或成语需根据目标语言的文化背景进行适当的转换或解释。
|
28
|
+
5. 避免使用机器翻译工具的简单直译,需根据上下文进行调整和优化。
|
29
|
+
6. 原文中可能包含的非文本元素(如HTML语法中的图片、表格、公式等)应保持不变。
|
30
|
+
7. 原文中可能包含的代码块,如编程语言代码等,应保持代码块的完整性,不要对代码进行调整。
|
31
|
+
8. 翻译完成后,应仔细校对,确保没有语法和拼写错误。'
|
32
|
+
few_shot_demo_text_content:
|
33
|
+
few_shot_demo_key_value_list:
|
34
|
+
|
35
|
+
SubPipelines:
|
36
|
+
LayoutParser:
|
37
|
+
pipeline_name: PP-StructureV3
|
38
|
+
|
39
|
+
batch_size: 8
|
40
|
+
|
41
|
+
use_doc_preprocessor: True
|
42
|
+
use_seal_recognition: True
|
43
|
+
use_table_recognition: True
|
44
|
+
use_formula_recognition: True
|
45
|
+
use_chart_recognition: True
|
46
|
+
use_region_detection: True
|
47
|
+
|
48
|
+
SubModules:
|
49
|
+
LayoutDetection:
|
50
|
+
module_name: layout_detection
|
51
|
+
model_name: PP-DocLayout_plus-L
|
52
|
+
model_dir: null
|
53
|
+
batch_size: 8
|
54
|
+
threshold:
|
55
|
+
0: 0.3 # paragraph_title
|
56
|
+
1: 0.5 # image
|
57
|
+
2: 0.4 # text
|
58
|
+
3: 0.5 # number
|
59
|
+
4: 0.5 # abstract
|
60
|
+
5: 0.5 # content
|
61
|
+
6: 0.5 # figure_table_chart_title
|
62
|
+
7: 0.3 # formula
|
63
|
+
8: 0.5 # table
|
64
|
+
9: 0.5 # reference
|
65
|
+
10: 0.5 # doc_title
|
66
|
+
11: 0.5 # footnote
|
67
|
+
12: 0.5 # header
|
68
|
+
13: 0.5 # algorithm
|
69
|
+
14: 0.5 # footer
|
70
|
+
15: 0.45 # seal
|
71
|
+
16: 0.5 # chart
|
72
|
+
17: 0.5 # formula_number
|
73
|
+
18: 0.5 # aside_text
|
74
|
+
19: 0.5 # reference_content
|
75
|
+
layout_nms: True
|
76
|
+
layout_unclip_ratio: [1.0, 1.0]
|
77
|
+
layout_merge_bboxes_mode:
|
78
|
+
0: "large" # paragraph_title
|
79
|
+
1: "large" # image
|
80
|
+
2: "union" # text
|
81
|
+
3: "union" # number
|
82
|
+
4: "union" # abstract
|
83
|
+
5: "union" # content
|
84
|
+
6: "union" # figure_table_chart_title
|
85
|
+
7: "large" # formula
|
86
|
+
8: "union" # table
|
87
|
+
9: "union" # reference
|
88
|
+
10: "union" # doc_title
|
89
|
+
11: "union" # footnote
|
90
|
+
12: "union" # header
|
91
|
+
13: "union" # algorithm
|
92
|
+
14: "union" # footer
|
93
|
+
15: "union" # seal
|
94
|
+
16: "large" # chart
|
95
|
+
17: "union" # formula_number
|
96
|
+
18: "union" # aside_text
|
97
|
+
19: "union" # reference_content
|
98
|
+
ChartRecognition:
|
99
|
+
module_name: chart_recognition
|
100
|
+
model_name: PP-Chart2Table
|
101
|
+
model_dir: null
|
102
|
+
batch_size: 1
|
103
|
+
RegionDetection:
|
104
|
+
module_name: layout_detection
|
105
|
+
model_name: PP-DocBlockLayout
|
106
|
+
model_dir: null
|
107
|
+
layout_nms: True
|
108
|
+
layout_merge_bboxes_mode: "small"
|
109
|
+
|
110
|
+
SubPipelines:
|
111
|
+
DocPreprocessor:
|
112
|
+
pipeline_name: doc_preprocessor
|
113
|
+
batch_size: 8
|
114
|
+
use_doc_orientation_classify: True
|
115
|
+
use_doc_unwarping: True
|
116
|
+
SubModules:
|
117
|
+
DocOrientationClassify:
|
118
|
+
module_name: doc_text_orientation
|
119
|
+
model_name: PP-LCNet_x1_0_doc_ori
|
120
|
+
model_dir: null
|
121
|
+
batch_size: 8
|
122
|
+
DocUnwarping:
|
123
|
+
module_name: image_unwarping
|
124
|
+
model_name: UVDoc
|
125
|
+
model_dir: null
|
126
|
+
|
127
|
+
GeneralOCR:
|
128
|
+
pipeline_name: OCR
|
129
|
+
batch_size: 8
|
130
|
+
text_type: general
|
131
|
+
use_doc_preprocessor: False
|
132
|
+
use_textline_orientation: True
|
133
|
+
SubModules:
|
134
|
+
TextDetection:
|
135
|
+
module_name: text_detection
|
136
|
+
model_name: PP-OCRv5_server_det
|
137
|
+
model_dir: null
|
138
|
+
limit_side_len: 736
|
139
|
+
limit_type: min
|
140
|
+
max_side_limit: 4000
|
141
|
+
thresh: 0.3
|
142
|
+
box_thresh: 0.6
|
143
|
+
unclip_ratio: 1.5
|
144
|
+
TextLineOrientation:
|
145
|
+
module_name: textline_orientation
|
146
|
+
model_name: PP-LCNet_x1_0_textline_ori
|
147
|
+
model_dir: null
|
148
|
+
batch_size: 8
|
149
|
+
TextRecognition:
|
150
|
+
module_name: text_recognition
|
151
|
+
model_name: PP-OCRv5_server_rec
|
152
|
+
model_dir: null
|
153
|
+
batch_size: 8
|
154
|
+
score_thresh: 0.0
|
155
|
+
|
156
|
+
|
157
|
+
TableRecognition:
|
158
|
+
pipeline_name: table_recognition_v2
|
159
|
+
use_layout_detection: False
|
160
|
+
use_doc_preprocessor: False
|
161
|
+
use_ocr_model: False
|
162
|
+
SubModules:
|
163
|
+
TableClassification:
|
164
|
+
module_name: table_classification
|
165
|
+
model_name: PP-LCNet_x1_0_table_cls
|
166
|
+
model_dir: null
|
167
|
+
|
168
|
+
WiredTableStructureRecognition:
|
169
|
+
module_name: table_structure_recognition
|
170
|
+
model_name: SLANeXt_wired
|
171
|
+
model_dir: null
|
172
|
+
|
173
|
+
WirelessTableStructureRecognition:
|
174
|
+
module_name: table_structure_recognition
|
175
|
+
model_name: SLANet_plus
|
176
|
+
model_dir: null
|
177
|
+
|
178
|
+
WiredTableCellsDetection:
|
179
|
+
module_name: table_cells_detection
|
180
|
+
model_name: RT-DETR-L_wired_table_cell_det
|
181
|
+
model_dir: null
|
182
|
+
|
183
|
+
WirelessTableCellsDetection:
|
184
|
+
module_name: table_cells_detection
|
185
|
+
model_name: RT-DETR-L_wireless_table_cell_det
|
186
|
+
model_dir: null
|
187
|
+
|
188
|
+
TableOrientationClassify:
|
189
|
+
module_name: doc_text_orientation
|
190
|
+
model_name: PP-LCNet_x1_0_doc_ori
|
191
|
+
model_dir: null
|
192
|
+
SubPipelines:
|
193
|
+
GeneralOCR:
|
194
|
+
pipeline_name: OCR
|
195
|
+
text_type: general
|
196
|
+
use_doc_preprocessor: False
|
197
|
+
use_textline_orientation: True
|
198
|
+
SubModules:
|
199
|
+
TextDetection:
|
200
|
+
module_name: text_detection
|
201
|
+
model_name: PP-OCRv5_server_det
|
202
|
+
model_dir: null
|
203
|
+
limit_side_len: 736
|
204
|
+
limit_type: min
|
205
|
+
max_side_limit: 4000
|
206
|
+
thresh: 0.3
|
207
|
+
box_thresh: 0.4
|
208
|
+
unclip_ratio: 1.5
|
209
|
+
TextLineOrientation:
|
210
|
+
module_name: textline_orientation
|
211
|
+
model_name: PP-LCNet_x1_0_textline_ori
|
212
|
+
model_dir: null
|
213
|
+
batch_size: 8
|
214
|
+
TextRecognition:
|
215
|
+
module_name: text_recognition
|
216
|
+
model_name: PP-OCRv5_server_rec
|
217
|
+
model_dir: null
|
218
|
+
batch_size: 8
|
219
|
+
score_thresh: 0.0
|
220
|
+
|
221
|
+
SealRecognition:
|
222
|
+
pipeline_name: seal_recognition
|
223
|
+
batch_size: 8
|
224
|
+
use_layout_detection: False
|
225
|
+
use_doc_preprocessor: False
|
226
|
+
SubPipelines:
|
227
|
+
SealOCR:
|
228
|
+
pipeline_name: OCR
|
229
|
+
batch_size: 8
|
230
|
+
text_type: seal
|
231
|
+
use_doc_preprocessor: False
|
232
|
+
use_textline_orientation: False
|
233
|
+
SubModules:
|
234
|
+
TextDetection:
|
235
|
+
module_name: seal_text_detection
|
236
|
+
model_name: PP-OCRv4_server_seal_det
|
237
|
+
model_dir: null
|
238
|
+
limit_side_len: 736
|
239
|
+
limit_type: min
|
240
|
+
max_side_limit: 4000
|
241
|
+
thresh: 0.2
|
242
|
+
box_thresh: 0.6
|
243
|
+
unclip_ratio: 0.5
|
244
|
+
TextRecognition:
|
245
|
+
module_name: text_recognition
|
246
|
+
model_name: PP-OCRv5_server_rec
|
247
|
+
model_dir: null
|
248
|
+
batch_size: 8
|
249
|
+
score_thresh: 0
|
250
|
+
|
251
|
+
FormulaRecognition:
|
252
|
+
pipeline_name: formula_recognition
|
253
|
+
batch_size: 8
|
254
|
+
use_layout_detection: False
|
255
|
+
use_doc_preprocessor: False
|
256
|
+
SubModules:
|
257
|
+
FormulaRecognition:
|
258
|
+
module_name: formula_recognition
|
259
|
+
model_name: PP-FormulaNet_plus-L
|
260
|
+
model_dir: null
|
261
|
+
batch_size: 8
|
@@ -17,5 +17,6 @@ from .base_batch_sampler import BaseBatchSampler
|
|
17
17
|
from .det_3d_batch_sampler import Det3DBatchSampler
|
18
18
|
from .doc_vlm_batch_sampler import DocVLMBatchSampler
|
19
19
|
from .image_batch_sampler import ImageBatchSampler
|
20
|
+
from .markdown_batch_sampler import MarkDownBatchSampler
|
20
21
|
from .ts_batch_sampler import TSBatchSampler
|
21
22
|
from .video_batch_sampler import VideoBatchSampler
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import os
|
16
|
+
from pathlib import Path
|
17
|
+
|
18
|
+
from ....utils import logging
|
19
|
+
from ....utils.cache import CACHE_DIR
|
20
|
+
from ....utils.download import download
|
21
|
+
from ...utils.io import MarkDownReader
|
22
|
+
from .base_batch_sampler import BaseBatchSampler, Batch
|
23
|
+
|
24
|
+
|
25
|
+
class MarkDownBatchSampler(BaseBatchSampler):
|
26
|
+
"""Batch sampler for markdown data, supporting markdown file inputs."""
|
27
|
+
|
28
|
+
SUFFIX = ["md", "markdown", "mdown", "mkd"]
|
29
|
+
|
30
|
+
def __init__(self, *args, **kwargs):
|
31
|
+
super().__init__(*args, **kwargs)
|
32
|
+
self.md_reader = MarkDownReader()
|
33
|
+
|
34
|
+
def _download_from_url(self, in_path: str) -> str:
|
35
|
+
"""Download a file from a URL to a cache directory.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
in_path (str): URL of the file to be downloaded.
|
39
|
+
|
40
|
+
Returns:
|
41
|
+
str: Path to the downloaded file.
|
42
|
+
"""
|
43
|
+
file_name = Path(in_path).name
|
44
|
+
save_path = Path(CACHE_DIR) / "predict_input" / file_name
|
45
|
+
download(in_path, save_path, overwrite=True)
|
46
|
+
return save_path.as_posix()
|
47
|
+
|
48
|
+
def _get_files_list(self, fp: str) -> list:
|
49
|
+
"""Get a list of markdown files from a directory or a single file path.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
fp (str): Path to a directory or a single markdown file.
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
list: Sorted list of markdown file paths.
|
56
|
+
|
57
|
+
Raises:
|
58
|
+
Exception: If no markdown file is found in the path.
|
59
|
+
"""
|
60
|
+
file_list = []
|
61
|
+
if fp is None or not os.path.exists(fp):
|
62
|
+
raise Exception(f"Not found any markdown file in path: {fp}")
|
63
|
+
|
64
|
+
if os.path.isfile(fp) and fp.split(".")[-1] in self.SUFFIX:
|
65
|
+
file_list.append(fp)
|
66
|
+
elif os.path.isdir(fp):
|
67
|
+
for root, dirs, files in os.walk(fp):
|
68
|
+
for single_file in files:
|
69
|
+
if single_file.split(".")[-1] in self.SUFFIX:
|
70
|
+
file_list.append(os.path.join(root, single_file))
|
71
|
+
if len(file_list) == 0:
|
72
|
+
raise Exception("Not found any file in {}".format(fp))
|
73
|
+
file_list = sorted(file_list)
|
74
|
+
return file_list
|
75
|
+
|
76
|
+
def sample(self, inputs: list) -> list:
|
77
|
+
"""Generate batches of data from inputs, which can only be file paths.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
inputs (list): List of markdown file paths.
|
81
|
+
|
82
|
+
Yields:
|
83
|
+
list: A batch of data which is a list of markdown file paths.
|
84
|
+
"""
|
85
|
+
if not isinstance(inputs, list):
|
86
|
+
inputs = [inputs]
|
87
|
+
|
88
|
+
batch = Batch()
|
89
|
+
for input in inputs:
|
90
|
+
if isinstance(input, str):
|
91
|
+
suffix = input.split(".")[-1].lower()
|
92
|
+
file_path = (
|
93
|
+
self._download_from_url(input)
|
94
|
+
if input.startswith("http")
|
95
|
+
else input
|
96
|
+
)
|
97
|
+
if suffix in self.SUFFIX:
|
98
|
+
markdown_text = self.md_reader.read(file_path)
|
99
|
+
batch.append(markdown_text, file_path)
|
100
|
+
if len(batch) == self.batch_size:
|
101
|
+
yield batch
|
102
|
+
batch = Batch()
|
103
|
+
else:
|
104
|
+
file_list = self._get_files_list(file_path)
|
105
|
+
for file_path in file_list:
|
106
|
+
markdown_text = self.md_reader.read(file_path)
|
107
|
+
batch.append(markdown_text, file_path)
|
108
|
+
if len(batch) == self.batch_size:
|
109
|
+
yield batch
|
110
|
+
batch = Batch()
|
111
|
+
else:
|
112
|
+
logging.warning(
|
113
|
+
f"Not supported input data type! Only `str` is supported! So has been ignored: {input}."
|
114
|
+
)
|
115
|
+
if len(batch) > 0:
|
116
|
+
yield batch
|
@@ -36,6 +36,5 @@ class BaseCVResult(BaseResult, ImgMixin):
|
|
36
36
|
if (page_idx := self.get("page_index", None)) is not None:
|
37
37
|
fp = Path(fn)
|
38
38
|
stem, suffix = fp.stem, fp.suffix
|
39
|
-
|
40
|
-
|
41
|
-
return fn
|
39
|
+
fn = f"{stem}_{page_idx}{suffix}"
|
40
|
+
return fn
|
@@ -597,6 +597,8 @@ class VideoMixin:
|
|
597
597
|
class MarkdownMixin:
|
598
598
|
"""Mixin class for adding Markdown handling capabilities."""
|
599
599
|
|
600
|
+
MARKDOWN_SAVE_KEYS = ["markdown_texts"]
|
601
|
+
|
600
602
|
def __init__(self, *args: list, **kwargs: dict):
|
601
603
|
"""Initializes the Markdown writer and appends the save_to_markdown method to the save functions.
|
602
604
|
|
@@ -696,7 +698,7 @@ class MarkdownMixin:
|
|
696
698
|
if data is None:
|
697
699
|
return
|
698
700
|
for key, value in data.items():
|
699
|
-
if
|
701
|
+
if key in self.MARKDOWN_SAVE_KEYS:
|
700
702
|
save_mkd_func(save_path.as_posix(), value, *args, **kwargs)
|
701
703
|
if isinstance(value, dict):
|
702
704
|
base_save_path = save_path.parent
|
@@ -337,9 +337,11 @@ class BasePredictor(
|
|
337
337
|
pp_option = PaddlePredictorOption(model_name=self.model_name)
|
338
338
|
elif pp_option.model_name is None:
|
339
339
|
pp_option.model_name = self.model_name
|
340
|
+
pp_option.reset_run_mode_by_default(model_name=self.model_name)
|
340
341
|
if device_info:
|
341
342
|
pp_option.device_type = device_info[0]
|
342
343
|
pp_option.device_id = device_info[1]
|
344
|
+
pp_option.reset_run_mode_by_default(device_type=device_info[0])
|
343
345
|
hpi_info = self.get_hpi_info()
|
344
346
|
if hpi_info is not None:
|
345
347
|
hpi_info = hpi_info.model_dump(exclude_unset=True)
|
@@ -687,6 +687,8 @@ class HPInfer(StaticInfer):
|
|
687
687
|
return PaddleInfer(self._model_dir, self._model_file_prefix, option=pp_option)
|
688
688
|
|
689
689
|
def _build_ui_runtime(self, backend, backend_config, ui_option=None):
|
690
|
+
# TODO: Validate the compatibility of backends with device types
|
691
|
+
|
690
692
|
from ultra_infer import ModelFormat, Runtime, RuntimeOption
|
691
693
|
|
692
694
|
if ui_option is None:
|
@@ -14,7 +14,7 @@
|
|
14
14
|
|
15
15
|
import copy
|
16
16
|
import inspect
|
17
|
-
from typing import Optional, Union
|
17
|
+
from typing import List, Optional, Union
|
18
18
|
|
19
19
|
import paddle
|
20
20
|
import paddle.distributed as dist
|
@@ -86,7 +86,7 @@ def get_scale_by_dtype(dtype: str = None, return_positive: bool = True) -> float
|
|
86
86
|
def get_unfinished_flag(
|
87
87
|
input_ids: Tensor,
|
88
88
|
unfinished_flag: Tensor,
|
89
|
-
eos_token_id: Union[int,
|
89
|
+
eos_token_id: Union[int, List[int], List[List[int]]],
|
90
90
|
) -> Tensor:
|
91
91
|
"""get unfinished flag for generation step
|
92
92
|
|
@@ -27,7 +27,7 @@ from PIL import Image, ImageDraw, ImageFont
|
|
27
27
|
from ....utils import logging
|
28
28
|
from ....utils.deps import function_requires_deps, is_dep_available
|
29
29
|
from ....utils.file_interface import custom_open
|
30
|
-
from ....utils.fonts import
|
30
|
+
from ....utils.fonts import PINGFANG_FONT
|
31
31
|
from ...common.result import BaseCVResult, JsonMixin
|
32
32
|
|
33
33
|
if is_dep_available("opencv-contrib-python"):
|
@@ -308,7 +308,7 @@ def draw_formula_module(
|
|
308
308
|
return formula_img
|
309
309
|
else:
|
310
310
|
img_right_text = draw_box_txt_fine(
|
311
|
-
img_size, box, "Rendering Failed",
|
311
|
+
img_size, box, "Rendering Failed", PINGFANG_FONT.path
|
312
312
|
)
|
313
313
|
return img_right_text
|
314
314
|
|
@@ -19,7 +19,7 @@ import numpy as np
|
|
19
19
|
import PIL
|
20
20
|
from PIL import Image, ImageDraw, ImageFont
|
21
21
|
|
22
|
-
from ....utils.fonts import
|
22
|
+
from ....utils.fonts import PINGFANG_FONT
|
23
23
|
from ...common.result import BaseCVResult, JsonMixin
|
24
24
|
from ...utils.color_map import get_colormap
|
25
25
|
|
@@ -47,9 +47,7 @@ class TopkResult(BaseCVResult):
|
|
47
47
|
min_font_size = int(image_size[0] * 0.02)
|
48
48
|
max_font_size = int(image_size[0] * 0.05)
|
49
49
|
for font_size in range(max_font_size, min_font_size - 1, -1):
|
50
|
-
font = ImageFont.truetype(
|
51
|
-
PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8"
|
52
|
-
)
|
50
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
|
53
51
|
if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
|
54
52
|
text_width_tmp, text_height_tmp = draw.textsize(label_str, font)
|
55
53
|
else:
|
@@ -58,7 +56,7 @@ class TopkResult(BaseCVResult):
|
|
58
56
|
if text_width_tmp <= image_size[0]:
|
59
57
|
break
|
60
58
|
else:
|
61
|
-
font = ImageFont.truetype(
|
59
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, min_font_size)
|
62
60
|
color_list = get_colormap(rgb=True)
|
63
61
|
color = tuple(color_list[0])
|
64
62
|
font_color = tuple(self._get_font_colormap(3))
|
@@ -18,7 +18,7 @@ import numpy as np
|
|
18
18
|
import PIL
|
19
19
|
from PIL import Image, ImageDraw, ImageFont
|
20
20
|
|
21
|
-
from ....utils.fonts import
|
21
|
+
from ....utils.fonts import PINGFANG_FONT
|
22
22
|
from ...common.result import BaseCVResult, JsonMixin
|
23
23
|
from ...utils.color_map import get_colormap
|
24
24
|
|
@@ -43,7 +43,7 @@ class MLClassResult(BaseCVResult):
|
|
43
43
|
image_width, image_height = image.size
|
44
44
|
font_size = int(image_width * 0.06)
|
45
45
|
|
46
|
-
font = ImageFont.truetype(
|
46
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, font_size)
|
47
47
|
text_lines = []
|
48
48
|
row_width = 0
|
49
49
|
row_height = 0
|
@@ -18,7 +18,7 @@ from typing import List
|
|
18
18
|
import PIL
|
19
19
|
from PIL import Image, ImageDraw, ImageFont
|
20
20
|
|
21
|
-
from ....utils.fonts import
|
21
|
+
from ....utils.fonts import PINGFANG_FONT
|
22
22
|
from ...common.result import BaseCVResult, JsonMixin
|
23
23
|
from ...utils.color_map import font_colormap, get_colormap
|
24
24
|
|
@@ -32,7 +32,7 @@ def draw_box(img: Image.Image, boxes: List[dict]) -> Image.Image:
|
|
32
32
|
img (PIL.Image.Image): visualized image
|
33
33
|
"""
|
34
34
|
font_size = int(0.018 * int(img.width)) + 2
|
35
|
-
font = ImageFont.truetype(
|
35
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
|
36
36
|
|
37
37
|
draw_thickness = int(max(img.size) * 0.002)
|
38
38
|
draw = ImageDraw.Draw(img)
|
@@ -199,6 +199,9 @@ class GroundingDINOPostProcessor(object):
|
|
199
199
|
tokenized = self.tokenizer(prompt)
|
200
200
|
if posmap.dim() == 1:
|
201
201
|
non_zero_idx = posmap.nonzero(as_tuple=True)[0].squeeze(-1).tolist()
|
202
|
+
non_zero_idx = (
|
203
|
+
[non_zero_idx] if not isinstance(non_zero_idx, list) else non_zero_idx
|
204
|
+
)
|
202
205
|
token_ids = [tokenized["input_ids"][i] for i in non_zero_idx]
|
203
206
|
return self.tokenizer.decode(token_ids)
|
204
207
|
else:
|