paddlex 3.0.2__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/configs/modules/text_recognition/eslav_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/korean_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/latin_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/pipelines/PP-DocTranslation.yaml +261 -0
- paddlex/inference/common/batch_sampler/__init__.py +1 -0
- paddlex/inference/common/batch_sampler/markdown_batch_sampler.py +116 -0
- paddlex/inference/common/result/base_cv_result.py +2 -3
- paddlex/inference/common/result/mixin.py +3 -1
- paddlex/inference/models/base/predictor/base_predictor.py +2 -0
- paddlex/inference/models/common/static_infer.py +2 -0
- paddlex/inference/models/common/vlm/generation/utils.py +2 -2
- paddlex/inference/models/formula_recognition/result.py +2 -2
- paddlex/inference/models/image_classification/result.py +3 -5
- paddlex/inference/models/image_multilabel_classification/result.py +2 -2
- paddlex/inference/models/object_detection/result.py +2 -2
- paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +3 -0
- paddlex/inference/models/text_recognition/predictor.py +51 -1
- paddlex/inference/models/text_recognition/result.py +5 -2
- paddlex/inference/models/video_classification/result.py +3 -3
- paddlex/inference/models/video_detection/result.py +2 -4
- paddlex/inference/pipelines/__init__.py +1 -0
- paddlex/inference/pipelines/attribute_recognition/result.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/__init__.py +1 -0
- paddlex/inference/pipelines/components/prompt_engineering/generate_translate_prompt.py +179 -0
- paddlex/inference/pipelines/doc_preprocessor/result.py +2 -2
- paddlex/inference/pipelines/formula_recognition/result.py +2 -2
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +2 -0
- paddlex/inference/pipelines/layout_parsing/result_v2.py +11 -4
- paddlex/inference/pipelines/ocr/pipeline.py +2 -0
- paddlex/inference/pipelines/ocr/result.py +11 -7
- paddlex/inference/pipelines/pp_doctranslation/__init__.py +15 -0
- paddlex/inference/pipelines/pp_doctranslation/pipeline.py +523 -0
- paddlex/inference/pipelines/pp_doctranslation/result.py +39 -0
- paddlex/inference/pipelines/pp_doctranslation/utils.py +260 -0
- paddlex/inference/pipelines/pp_shitu_v2/result.py +2 -2
- paddlex/inference/serving/basic_serving/_app.py +1 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +5 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -24
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +16 -26
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py +203 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +4 -2
- paddlex/inference/serving/infra/utils.py +22 -17
- paddlex/inference/serving/schemas/anomaly_detection.py +1 -0
- paddlex/inference/serving/schemas/doc_preprocessor.py +1 -0
- paddlex/inference/serving/schemas/face_recognition.py +1 -0
- paddlex/inference/serving/schemas/formula_recognition.py +1 -0
- paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -0
- paddlex/inference/serving/schemas/image_classification.py +1 -0
- paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -0
- paddlex/inference/serving/schemas/instance_segmentation.py +1 -0
- paddlex/inference/serving/schemas/layout_parsing.py +1 -0
- paddlex/inference/serving/schemas/object_detection.py +1 -0
- paddlex/inference/serving/schemas/ocr.py +1 -0
- paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -0
- paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -0
- paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -0
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +5 -4
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +6 -5
- paddlex/inference/serving/schemas/pp_doctranslation.py +115 -0
- paddlex/inference/serving/schemas/pp_shituv2.py +1 -0
- paddlex/inference/serving/schemas/pp_structurev3.py +2 -9
- paddlex/inference/serving/schemas/rotated_object_detection.py +1 -0
- paddlex/inference/serving/schemas/seal_recognition.py +1 -0
- paddlex/inference/serving/schemas/semantic_segmentation.py +1 -0
- paddlex/inference/serving/schemas/shared/ocr.py +8 -1
- paddlex/inference/serving/schemas/small_object_detection.py +1 -0
- paddlex/inference/serving/schemas/table_recognition.py +1 -0
- paddlex/inference/serving/schemas/table_recognition_v2.py +1 -0
- paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -0
- paddlex/inference/serving/schemas/ts_classification.py +1 -0
- paddlex/inference/serving/schemas/ts_forecast.py +1 -0
- paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -0
- paddlex/inference/utils/hpi.py +42 -14
- paddlex/inference/utils/hpi_model_info_collection.json +0 -2
- paddlex/inference/utils/io/__init__.py +1 -0
- paddlex/inference/utils/io/readers.py +46 -0
- paddlex/inference/utils/io/writers.py +2 -0
- paddlex/inference/utils/official_models.py +7 -0
- paddlex/inference/utils/pp_option.py +34 -18
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/text_recognition/model_list.py +3 -0
- paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +27 -0
- paddlex/repo_manager/meta.py +3 -3
- paddlex/utils/device.py +4 -1
- paddlex/utils/download.py +10 -7
- paddlex/utils/{fonts/__init__.py → fonts.py} +45 -26
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/METADATA +25 -1
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/RECORD +134 -122
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/LICENSE +0 -0
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/WHEEL +0 -0
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/top_level.txt +0 -0
paddlex/utils/download.py
CHANGED
@@ -39,14 +39,14 @@ class _ProgressPrinter(object):
|
|
39
39
|
str_ += "\n"
|
40
40
|
self._last_time = 0
|
41
41
|
if time.time() - self._last_time >= self._flush_intvl:
|
42
|
-
sys.
|
42
|
+
sys.stderr.write(f"\r{str_}")
|
43
43
|
self._last_time = time.time()
|
44
|
-
sys.
|
44
|
+
sys.stderr.flush()
|
45
45
|
|
46
46
|
|
47
47
|
def _download(url, save_path, print_progress):
|
48
48
|
if print_progress:
|
49
|
-
print(f"Connecting to {url} ...")
|
49
|
+
print(f"Connecting to {url} ...", file=sys.stderr)
|
50
50
|
|
51
51
|
with requests.get(url, stream=True, timeout=15) as r:
|
52
52
|
r.raise_for_status()
|
@@ -62,7 +62,10 @@ def _download(url, save_path, print_progress):
|
|
62
62
|
total_length = int(total_length)
|
63
63
|
if print_progress:
|
64
64
|
printer = _ProgressPrinter()
|
65
|
-
print(
|
65
|
+
print(
|
66
|
+
f"Downloading {os.path.basename(save_path)} ...",
|
67
|
+
file=sys.stderr,
|
68
|
+
)
|
66
69
|
for data in r.iter_content(chunk_size=4096):
|
67
70
|
dl += len(data)
|
68
71
|
f.write(data)
|
@@ -95,17 +98,17 @@ def _extract_tar_file(file_path, extd_dir):
|
|
95
98
|
try:
|
96
99
|
f.extract(file, extd_dir)
|
97
100
|
except KeyError:
|
98
|
-
print(f"File {file} not found in the archive.")
|
101
|
+
print(f"File {file} not found in the archive.", file=sys.stderr)
|
99
102
|
yield total_num, index
|
100
103
|
except Exception as e:
|
101
|
-
print(f"An error occurred: {e}")
|
104
|
+
print(f"An error occurred: {e}", file=sys.stderr)
|
102
105
|
|
103
106
|
|
104
107
|
def _extract(file_path, extd_dir, print_progress):
|
105
108
|
"""extract"""
|
106
109
|
if print_progress:
|
107
110
|
printer = _ProgressPrinter()
|
108
|
-
print(f"Extracting {os.path.basename(file_path)}")
|
111
|
+
print(f"Extracting {os.path.basename(file_path)}", file=sys.stderr)
|
109
112
|
|
110
113
|
if zipfile.is_zipfile(file_path):
|
111
114
|
handler = _extract_zip_file
|
@@ -17,27 +17,10 @@ from pathlib import Path
|
|
17
17
|
import PIL
|
18
18
|
from PIL import ImageFont
|
19
19
|
|
20
|
-
from
|
21
|
-
from
|
22
|
-
from
|
23
|
-
from
|
24
|
-
|
25
|
-
|
26
|
-
def get_font_file_path(file_name: str) -> str:
|
27
|
-
"""
|
28
|
-
Get the path of the font file.
|
29
|
-
|
30
|
-
Returns:
|
31
|
-
str: The path to the font file.
|
32
|
-
"""
|
33
|
-
font_path = (Path(CACHE_DIR) / "fonts" / file_name).resolve().as_posix()
|
34
|
-
if not Path(font_path).is_file():
|
35
|
-
download(
|
36
|
-
url=f"https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/fonts/{file_name}",
|
37
|
-
save_path=font_path,
|
38
|
-
)
|
39
|
-
|
40
|
-
return font_path
|
20
|
+
from . import logging
|
21
|
+
from .cache import CACHE_DIR
|
22
|
+
from .download import download
|
23
|
+
from .flags import LOCAL_FONT_FILE_PATH
|
41
24
|
|
42
25
|
|
43
26
|
def create_font(txt: str, sz: tuple, font_path: str) -> ImageFont:
|
@@ -87,12 +70,48 @@ def create_font_vertical(
|
|
87
70
|
return font
|
88
71
|
|
89
72
|
|
73
|
+
class Font:
|
74
|
+
def __init__(self, font_name=None, local_path=None):
|
75
|
+
if local_path is None:
|
76
|
+
if Path(str(LOCAL_FONT_FILE_PATH)).is_file():
|
77
|
+
local_path = str(LOCAL_FONT_FILE_PATH)
|
78
|
+
self._local_path = local_path
|
79
|
+
if not local_path:
|
80
|
+
assert font_name is not None
|
81
|
+
self._font_name = font_name
|
82
|
+
|
83
|
+
@property
|
84
|
+
def path(self):
|
85
|
+
# HACK: download font file when needed only
|
86
|
+
if not self._local_path:
|
87
|
+
self._get_offical_font()
|
88
|
+
return self._local_path
|
89
|
+
|
90
|
+
def _get_offical_font(self):
|
91
|
+
"""
|
92
|
+
Download the official font file.
|
93
|
+
"""
|
94
|
+
font_path = (Path(CACHE_DIR) / "fonts" / self._font_name).resolve().as_posix()
|
95
|
+
if not Path(font_path).is_file():
|
96
|
+
download(
|
97
|
+
url=f"https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/fonts/{self._font_name}",
|
98
|
+
save_path=font_path,
|
99
|
+
)
|
100
|
+
self._local_path = font_path
|
101
|
+
|
102
|
+
|
90
103
|
if Path(str(LOCAL_FONT_FILE_PATH)).is_file():
|
91
104
|
logging.warning(
|
92
105
|
f"Using the local font file(`{LOCAL_FONT_FILE_PATH}`) specified by `LOCAL_FONT_FILE_PATH`!"
|
93
106
|
)
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
107
|
+
|
108
|
+
PINGFANG_FONT = Font(font_name="PingFang-SC-Regular.ttf")
|
109
|
+
SIMFANG_FONT = Font(font_name="simfang.ttf")
|
110
|
+
LATIN_FONT = Font(font_name="latin.ttf")
|
111
|
+
KOREAN_FONT = Font(font_name="korean.ttf")
|
112
|
+
ARABIC_FONT = Font(font_name="arabic.ttf")
|
113
|
+
CYRILLIC_FONT = Font(font_name="cyrillic.ttf")
|
114
|
+
KANNADA_FONT = Font(font_name="kannada.ttf")
|
115
|
+
TELUGU_FONT = Font(font_name="telugu.ttf")
|
116
|
+
TAMIL_FONT = Font(font_name="tamil.ttf")
|
117
|
+
DEVANAGARI_FONT = Font(font_name="devanagari.ttf")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: paddlex
|
3
|
-
Version: 3.0
|
3
|
+
Version: 3.1.0
|
4
4
|
Summary: Low-code development tool based on PaddlePaddle.
|
5
5
|
Home-page: UNKNOWN
|
6
6
|
Author: PaddlePaddle Authors
|
@@ -45,6 +45,7 @@ Requires-Dist: ujson
|
|
45
45
|
Provides-Extra: all
|
46
46
|
Requires-Dist: aiohttp>=3.9; extra == "all"
|
47
47
|
Requires-Dist: bce-python-sdk>=0.9; extra == "all"
|
48
|
+
Requires-Dist: beautifulsoup4; extra == "all"
|
48
49
|
Requires-Dist: chinese-calendar; extra == "all"
|
49
50
|
Requires-Dist: einops; extra == "all"
|
50
51
|
Requires-Dist: faiss-cpu; extra == "all"
|
@@ -81,6 +82,7 @@ Requires-Dist: uvicorn>=0.16; extra == "all"
|
|
81
82
|
Requires-Dist: yarl>=1.9; extra == "all"
|
82
83
|
Requires-Dist: decord==0.6.0; ((platform_machine == "x86_64" or platform_machine == "AMD64") and sys_platform != "darwin") and extra == "all"
|
83
84
|
Provides-Extra: base
|
85
|
+
Requires-Dist: beautifulsoup4; extra == "base"
|
84
86
|
Requires-Dist: chinese-calendar; extra == "base"
|
85
87
|
Requires-Dist: einops; extra == "base"
|
86
88
|
Requires-Dist: faiss-cpu; extra == "base"
|
@@ -182,6 +184,20 @@ Requires-Dist: Jinja2; extra == "speech"
|
|
182
184
|
Requires-Dist: regex; extra == "speech"
|
183
185
|
Requires-Dist: soundfile; extra == "speech"
|
184
186
|
Requires-Dist: tqdm; extra == "speech"
|
187
|
+
Provides-Extra: trans
|
188
|
+
Requires-Dist: beautifulsoup4; extra == "trans"
|
189
|
+
Requires-Dist: ftfy; extra == "trans"
|
190
|
+
Requires-Dist: imagesize; extra == "trans"
|
191
|
+
Requires-Dist: lxml; extra == "trans"
|
192
|
+
Requires-Dist: openai>=1.63; extra == "trans"
|
193
|
+
Requires-Dist: opencv-contrib-python==4.10.0.84; extra == "trans"
|
194
|
+
Requires-Dist: openpyxl; extra == "trans"
|
195
|
+
Requires-Dist: premailer; extra == "trans"
|
196
|
+
Requires-Dist: pyclipper; extra == "trans"
|
197
|
+
Requires-Dist: pypdfium2>=4; extra == "trans"
|
198
|
+
Requires-Dist: scikit-learn; extra == "trans"
|
199
|
+
Requires-Dist: shapely; extra == "trans"
|
200
|
+
Requires-Dist: tokenizers>=0.19; extra == "trans"
|
185
201
|
Provides-Extra: ts
|
186
202
|
Requires-Dist: chinese-calendar; extra == "ts"
|
187
203
|
Requires-Dist: joblib; extra == "ts"
|
@@ -228,6 +244,14 @@ PaddleX 3.0 是基于飞桨框架构建的低代码开发工具,它集成了
|
|
228
244
|
|
229
245
|
## 📣 近期更新
|
230
246
|
|
247
|
+
🔥🔥 **2025.6.28,发布 PaddleX v3.1.0**,新增能力如下:
|
248
|
+
|
249
|
+
- **重要模型:**
|
250
|
+
- **新增PP-OCRv5多语种文本识别模型**,支持法语、西班牙语、葡萄牙语、俄语、韩语等37种语言的文字识别模型的训推流程。**平均精度涨幅超30%。**
|
251
|
+
- 升级PP-StructureV3中的**PP-Chart2Table模型**,图表转表能力进一步升级,在内部自建测评集合上指标(RMS-F1)**提升9.36个百分点(71.24% -> 80.60%)**
|
252
|
+
- **重要产线:**
|
253
|
+
- 新增基于PP-StructureV3和ERNIE 4.5 Turbo的**文档翻译产线PP-DocTranslation,支持翻译Markdown文档、各种复杂版式的PDF文档和文档图像,结果保存为Markdown格式文档。**
|
254
|
+
|
231
255
|
|
232
256
|
🔥🔥 **2025.5.20,发布 PaddleX v3.0.0**,相比PaddleX v2.x,核心升级如下:
|
233
257
|
|