paddlex 3.0.3__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/configs/modules/text_recognition/eslav_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/korean_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/latin_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/pipelines/PP-DocTranslation.yaml +261 -0
- paddlex/inference/common/batch_sampler/__init__.py +1 -0
- paddlex/inference/common/batch_sampler/markdown_batch_sampler.py +116 -0
- paddlex/inference/common/result/base_cv_result.py +2 -3
- paddlex/inference/common/result/mixin.py +3 -1
- paddlex/inference/models/common/vlm/generation/utils.py +2 -2
- paddlex/inference/models/formula_recognition/result.py +2 -2
- paddlex/inference/models/image_classification/result.py +3 -5
- paddlex/inference/models/image_multilabel_classification/result.py +2 -2
- paddlex/inference/models/object_detection/result.py +2 -2
- paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +3 -0
- paddlex/inference/models/text_recognition/predictor.py +51 -1
- paddlex/inference/models/text_recognition/result.py +5 -2
- paddlex/inference/models/video_classification/result.py +3 -3
- paddlex/inference/models/video_detection/result.py +2 -4
- paddlex/inference/pipelines/__init__.py +1 -0
- paddlex/inference/pipelines/attribute_recognition/result.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/__init__.py +1 -0
- paddlex/inference/pipelines/components/prompt_engineering/generate_translate_prompt.py +179 -0
- paddlex/inference/pipelines/doc_preprocessor/result.py +2 -2
- paddlex/inference/pipelines/formula_recognition/result.py +2 -2
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +2 -0
- paddlex/inference/pipelines/layout_parsing/result_v2.py +4 -2
- paddlex/inference/pipelines/ocr/pipeline.py +2 -0
- paddlex/inference/pipelines/ocr/result.py +11 -7
- paddlex/inference/pipelines/pp_doctranslation/__init__.py +15 -0
- paddlex/inference/pipelines/pp_doctranslation/pipeline.py +523 -0
- paddlex/inference/pipelines/pp_doctranslation/result.py +39 -0
- paddlex/inference/pipelines/pp_doctranslation/utils.py +260 -0
- paddlex/inference/pipelines/pp_shitu_v2/result.py +2 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +5 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -24
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +16 -26
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py +203 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +4 -2
- paddlex/inference/serving/schemas/anomaly_detection.py +1 -0
- paddlex/inference/serving/schemas/doc_preprocessor.py +1 -0
- paddlex/inference/serving/schemas/face_recognition.py +1 -0
- paddlex/inference/serving/schemas/formula_recognition.py +1 -0
- paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -0
- paddlex/inference/serving/schemas/image_classification.py +1 -0
- paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -0
- paddlex/inference/serving/schemas/instance_segmentation.py +1 -0
- paddlex/inference/serving/schemas/layout_parsing.py +1 -0
- paddlex/inference/serving/schemas/object_detection.py +1 -0
- paddlex/inference/serving/schemas/ocr.py +1 -0
- paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -0
- paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -0
- paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -0
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +5 -4
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +6 -5
- paddlex/inference/serving/schemas/pp_doctranslation.py +115 -0
- paddlex/inference/serving/schemas/pp_shituv2.py +1 -0
- paddlex/inference/serving/schemas/pp_structurev3.py +2 -9
- paddlex/inference/serving/schemas/rotated_object_detection.py +1 -0
- paddlex/inference/serving/schemas/seal_recognition.py +1 -0
- paddlex/inference/serving/schemas/semantic_segmentation.py +1 -0
- paddlex/inference/serving/schemas/shared/ocr.py +8 -1
- paddlex/inference/serving/schemas/small_object_detection.py +1 -0
- paddlex/inference/serving/schemas/table_recognition.py +1 -0
- paddlex/inference/serving/schemas/table_recognition_v2.py +1 -0
- paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -0
- paddlex/inference/serving/schemas/ts_classification.py +1 -0
- paddlex/inference/serving/schemas/ts_forecast.py +1 -0
- paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -0
- paddlex/inference/utils/io/__init__.py +1 -0
- paddlex/inference/utils/io/readers.py +46 -0
- paddlex/inference/utils/io/writers.py +2 -0
- paddlex/inference/utils/official_models.py +7 -0
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/text_recognition/model_list.py +3 -0
- paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +27 -0
- paddlex/repo_manager/meta.py +3 -3
- paddlex/utils/device.py +4 -1
- paddlex/utils/{fonts/__init__.py → fonts.py} +45 -26
- {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/METADATA +25 -1
- {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/RECORD +126 -114
- {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/LICENSE +0 -0
- {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/WHEEL +0 -0
- {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/top_level.txt +0 -0
@@ -57,6 +57,7 @@ class AnalyzeImagesRequest(ocr.BaseInferRequest):
|
|
57
57
|
sealDetBoxThresh: Optional[float] = None
|
58
58
|
sealDetUnclipRatio: Optional[float] = None
|
59
59
|
sealRecScoreThresh: Optional[float] = None
|
60
|
+
visualize: Optional[bool] = None
|
60
61
|
|
61
62
|
|
62
63
|
class LayoutParsingResult(BaseModel):
|
@@ -78,8 +79,8 @@ BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
|
|
78
79
|
|
79
80
|
class BuildVectorStoreRequest(BaseModel):
|
80
81
|
visualInfo: List[dict]
|
81
|
-
minCharacters:
|
82
|
-
blockSize:
|
82
|
+
minCharacters: int = 3500
|
83
|
+
blockSize: int = 300
|
83
84
|
retrieverConfig: Optional[dict] = None
|
84
85
|
|
85
86
|
|
@@ -93,9 +94,9 @@ CHAT_ENDPOINT: Final[str] = "/chatocr-chat"
|
|
93
94
|
class ChatRequest(BaseModel):
|
94
95
|
keyList: List[str]
|
95
96
|
visualInfo: List[dict]
|
96
|
-
useVectorRetrieval:
|
97
|
+
useVectorRetrieval: bool = True
|
97
98
|
vectorInfo: Optional[dict] = None
|
98
|
-
minCharacters:
|
99
|
+
minCharacters: int = 3500
|
99
100
|
textTaskDescription: Optional[str] = None
|
100
101
|
textOutputFormat: Optional[str] = None
|
101
102
|
# Is the "Str" in the name unnecessary? Keep the names consistent with the
|
@@ -61,6 +61,7 @@ class AnalyzeImagesRequest(ocr.BaseInferRequest):
|
|
61
61
|
sealDetBoxThresh: Optional[float] = None
|
62
62
|
sealDetUnclipRatio: Optional[float] = None
|
63
63
|
sealRecScoreThresh: Optional[float] = None
|
64
|
+
visualize: Optional[bool] = None
|
64
65
|
|
65
66
|
|
66
67
|
class LayoutParsingResult(BaseModel):
|
@@ -80,8 +81,8 @@ BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
|
|
80
81
|
|
81
82
|
class BuildVectorStoreRequest(BaseModel):
|
82
83
|
visualInfo: List[dict]
|
83
|
-
minCharacters:
|
84
|
-
blockSize:
|
84
|
+
minCharacters: int = 3500
|
85
|
+
blockSize: int = 300
|
85
86
|
retrieverConfig: Optional[dict] = None
|
86
87
|
|
87
88
|
|
@@ -108,9 +109,9 @@ CHAT_ENDPOINT: Final[str] = "/chatocr-chat"
|
|
108
109
|
class ChatRequest(BaseModel):
|
109
110
|
keyList: List[str]
|
110
111
|
visualInfo: List[dict]
|
111
|
-
useVectorRetrieval:
|
112
|
+
useVectorRetrieval: bool = True
|
112
113
|
vectorInfo: Optional[dict] = None
|
113
|
-
minCharacters:
|
114
|
+
minCharacters: int = 3500
|
114
115
|
textTaskDescription: Optional[str] = None
|
115
116
|
textOutputFormat: Optional[str] = None
|
116
117
|
textRulesStr: Optional[str] = None
|
@@ -122,7 +123,7 @@ class ChatRequest(BaseModel):
|
|
122
123
|
tableFewShotDemoTextContent: Optional[str] = None
|
123
124
|
tableFewShotDemoKeyValueList: Optional[str] = None
|
124
125
|
mllmPredictInfo: Optional[dict] = None
|
125
|
-
mllmIntegrationStrategy:
|
126
|
+
mllmIntegrationStrategy: str = "integration"
|
126
127
|
chatBotConfig: Optional[dict] = None
|
127
128
|
retrieverConfig: Optional[dict] = None
|
128
129
|
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from typing import Dict, Final, List, Optional, Tuple, Union
|
16
|
+
|
17
|
+
from pydantic import BaseModel
|
18
|
+
|
19
|
+
from ..infra.models import DataInfo, PrimaryOperations
|
20
|
+
from .shared import ocr
|
21
|
+
|
22
|
+
__all__ = [
|
23
|
+
"ANALYZE_IMAGES_ENDPOINT",
|
24
|
+
"AnalyzeImagesRequest",
|
25
|
+
"LayoutParsingResult",
|
26
|
+
"AnalyzeImagesResult",
|
27
|
+
"TRANSLATE_ENDPOINT",
|
28
|
+
"TranslateRequest",
|
29
|
+
"TranslationResult",
|
30
|
+
"TranslateResult",
|
31
|
+
"PRIMARY_OPERATIONS",
|
32
|
+
]
|
33
|
+
|
34
|
+
ANALYZE_IMAGES_ENDPOINT: Final[str] = "/doctrans-visual"
|
35
|
+
|
36
|
+
|
37
|
+
class AnalyzeImagesRequest(ocr.BaseInferRequest):
|
38
|
+
useDocOrientationClassify: Optional[bool] = False
|
39
|
+
useDocUnwarping: Optional[bool] = False
|
40
|
+
useTextlineOrientation: Optional[bool] = None
|
41
|
+
useSealRecognition: Optional[bool] = None
|
42
|
+
useTableRecognition: Optional[bool] = None
|
43
|
+
useFormulaRecognition: Optional[bool] = None
|
44
|
+
useChartRecognition: Optional[bool] = False
|
45
|
+
useRegionDetection: Optional[bool] = None
|
46
|
+
layoutThreshold: Optional[Union[float, dict]] = None
|
47
|
+
layoutNms: Optional[bool] = None
|
48
|
+
layoutUnclipRatio: Optional[Union[float, Tuple[float, float], dict]] = None
|
49
|
+
layoutMergeBboxesMode: Optional[Union[str, dict]] = None
|
50
|
+
textDetLimitSideLen: Optional[int] = None
|
51
|
+
textDetLimitType: Optional[str] = None
|
52
|
+
textDetThresh: Optional[float] = None
|
53
|
+
textDetBoxThresh: Optional[float] = None
|
54
|
+
textDetUnclipRatio: Optional[float] = None
|
55
|
+
textRecScoreThresh: Optional[float] = None
|
56
|
+
sealDetLimitSideLen: Optional[int] = None
|
57
|
+
sealDetLimitType: Optional[str] = None
|
58
|
+
sealDetThresh: Optional[float] = None
|
59
|
+
sealDetBoxThresh: Optional[float] = None
|
60
|
+
sealDetUnclipRatio: Optional[float] = None
|
61
|
+
sealRecScoreThresh: Optional[float] = None
|
62
|
+
useWiredTableCellsTransToHtml: bool = False
|
63
|
+
useWirelessTableCellsTransToHtml: bool = False
|
64
|
+
useTableOrientationClassify: bool = True
|
65
|
+
useOcrResultsWithTableCells: bool = True
|
66
|
+
useE2eWiredTableRecModel: bool = False
|
67
|
+
useE2eWirelessTableRecModel: bool = True
|
68
|
+
visualize: Optional[bool] = None
|
69
|
+
|
70
|
+
|
71
|
+
class LayoutParsingResult(BaseModel):
|
72
|
+
prunedResult: dict
|
73
|
+
markdown: ocr.MarkdownData
|
74
|
+
outputImages: Optional[Dict[str, str]] = None
|
75
|
+
inputImage: Optional[str] = None
|
76
|
+
|
77
|
+
|
78
|
+
class AnalyzeImagesResult(BaseModel):
|
79
|
+
layoutParsingResults: List[LayoutParsingResult]
|
80
|
+
dataInfo: DataInfo
|
81
|
+
|
82
|
+
|
83
|
+
TRANSLATE_ENDPOINT: Final[str] = "/doctrans-translate"
|
84
|
+
|
85
|
+
|
86
|
+
class TranslateRequest(BaseModel):
|
87
|
+
markdownList: List[ocr.MarkdownData]
|
88
|
+
targetLanguage: str = "zh"
|
89
|
+
chunkSize: int = 5000
|
90
|
+
taskDescription: Optional[str] = None
|
91
|
+
outputFormat: Optional[str] = None
|
92
|
+
rulesStr: Optional[str] = None
|
93
|
+
fewShotDemoTextContent: Optional[str] = None
|
94
|
+
fewShotDemoKeyValueList: Optional[str] = None
|
95
|
+
chatBotConfig: Optional[dict] = None
|
96
|
+
sleepInterval: float = 0
|
97
|
+
|
98
|
+
|
99
|
+
class TranslationResult(BaseModel):
|
100
|
+
language: str
|
101
|
+
markdown: ocr.MarkdownData
|
102
|
+
|
103
|
+
|
104
|
+
class TranslateResult(BaseModel):
|
105
|
+
translationResults: List[TranslationResult]
|
106
|
+
|
107
|
+
|
108
|
+
PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
|
109
|
+
"analyzeImages": (
|
110
|
+
ANALYZE_IMAGES_ENDPOINT,
|
111
|
+
AnalyzeImagesRequest,
|
112
|
+
AnalyzeImagesResult,
|
113
|
+
),
|
114
|
+
"translate": (TRANSLATE_ENDPOINT, TranslateRequest, TranslateResult),
|
115
|
+
}
|
@@ -22,7 +22,6 @@ from .shared import ocr
|
|
22
22
|
__all__ = [
|
23
23
|
"INFER_ENDPOINT",
|
24
24
|
"InferRequest",
|
25
|
-
"MarkdownData",
|
26
25
|
"LayoutParsingResult",
|
27
26
|
"InferResult",
|
28
27
|
"PRIMARY_OPERATIONS",
|
@@ -62,18 +61,12 @@ class InferRequest(ocr.BaseInferRequest):
|
|
62
61
|
useOcrResultsWithTableCells: bool = True
|
63
62
|
useE2eWiredTableRecModel: bool = False
|
64
63
|
useE2eWirelessTableRecModel: bool = True
|
65
|
-
|
66
|
-
|
67
|
-
class MarkdownData(BaseModel):
|
68
|
-
text: str
|
69
|
-
images: Dict[str, str]
|
70
|
-
isStart: bool
|
71
|
-
isEnd: bool
|
64
|
+
visualize: Optional[bool] = None
|
72
65
|
|
73
66
|
|
74
67
|
class LayoutParsingResult(BaseModel):
|
75
68
|
prunedResult: dict
|
76
|
-
markdown: MarkdownData
|
69
|
+
markdown: ocr.MarkdownData
|
77
70
|
outputImages: Optional[Dict[str, str]] = None
|
78
71
|
inputImage: Optional[str] = None
|
79
72
|
|
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import Optional
|
15
|
+
from typing import Dict, Optional
|
16
16
|
|
17
17
|
from pydantic import BaseModel
|
18
18
|
from typing_extensions import Literal, TypeAlias
|
@@ -23,3 +23,10 @@ FileType: TypeAlias = Literal[0, 1]
|
|
23
23
|
class BaseInferRequest(BaseModel):
|
24
24
|
file: str
|
25
25
|
fileType: Optional[FileType] = None
|
26
|
+
|
27
|
+
|
28
|
+
class MarkdownData(BaseModel):
|
29
|
+
text: str
|
30
|
+
isStart: bool
|
31
|
+
isEnd: bool
|
32
|
+
images: Optional[Dict[str, str]] = None
|
@@ -35,6 +35,7 @@ INFER_ENDPOINT: Final[str] = "/vehicle-attribute-recognition"
|
|
35
35
|
class InferRequest(BaseModel):
|
36
36
|
image: str
|
37
37
|
detThreshold: Optional[float] = None
|
38
|
+
visualize: Optional[bool] = None
|
38
39
|
clsThreshold: Optional[
|
39
40
|
Union[float, Dict[Union[Literal["default"], int], float], List[float]]
|
40
41
|
] = None
|
@@ -52,6 +52,8 @@ class ReaderType(enum.Enum):
|
|
52
52
|
TS = 5
|
53
53
|
PDF = 6
|
54
54
|
YAML = 8
|
55
|
+
MARKDOWN = 9
|
56
|
+
TXT = 10
|
55
57
|
|
56
58
|
|
57
59
|
class _BaseReader(object):
|
@@ -206,6 +208,41 @@ class YAMLReader(_BaseReader):
|
|
206
208
|
return ReaderType.YAML
|
207
209
|
|
208
210
|
|
211
|
+
class MarkDownReader(_BaseReader):
|
212
|
+
|
213
|
+
def __init__(self, backend="Markdown", **bk_args):
|
214
|
+
super().__init__(backend, **bk_args)
|
215
|
+
|
216
|
+
def read(self, in_path):
|
217
|
+
return self._backend.read_file(str(in_path))
|
218
|
+
|
219
|
+
def _init_backend(self, bk_type, bk_args):
|
220
|
+
if bk_type == "Markdown":
|
221
|
+
return TXTReaderBackend(**bk_args)
|
222
|
+
else:
|
223
|
+
raise ValueError("Unsupported backend type")
|
224
|
+
|
225
|
+
def get_type(self):
|
226
|
+
return ReaderType.MARKDOWN
|
227
|
+
|
228
|
+
|
229
|
+
class TXTReader(_BaseReader):
|
230
|
+
"""TXTReader"""
|
231
|
+
|
232
|
+
def __init__(self, backend="txt", **bk_args):
|
233
|
+
super().__init__(backend, **bk_args)
|
234
|
+
|
235
|
+
def read(self, in_path):
|
236
|
+
return self._backend.read_file(str(in_path))
|
237
|
+
|
238
|
+
def _init_backend(self, bk_type, bk_args):
|
239
|
+
if bk_type == "txt":
|
240
|
+
return TXTReaderBackend(**bk_args)
|
241
|
+
|
242
|
+
def get_type(self):
|
243
|
+
return ReaderType.TXT
|
244
|
+
|
245
|
+
|
209
246
|
class _BaseReaderBackend(object):
|
210
247
|
"""_BaseReaderBackend"""
|
211
248
|
|
@@ -261,6 +298,15 @@ class PDFReaderBackend(_BaseReaderBackend):
|
|
261
298
|
yield img_cv
|
262
299
|
|
263
300
|
|
301
|
+
class TXTReaderBackend(_BaseReaderBackend):
|
302
|
+
"""TXTReaderBackend"""
|
303
|
+
|
304
|
+
def read_file(self, in_path):
|
305
|
+
with open(in_path, "r") as f:
|
306
|
+
data = f.read()
|
307
|
+
return data
|
308
|
+
|
309
|
+
|
264
310
|
class _VideoReaderBackend(_BaseReaderBackend):
|
265
311
|
"""_VideoReaderBackend"""
|
266
312
|
|
@@ -359,7 +359,11 @@ PP-LCNet_x1_0_vehicle_attribute_infer.tar",
|
|
359
359
|
PP-OCRv5_server_rec_infer.tar",
|
360
360
|
"PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/\
|
361
361
|
PP-OCRv5_mobile_rec_infer.tar",
|
362
|
+
"eslav_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/\
|
363
|
+
eslav_PP-OCRv5_mobile_rec_infer.tar",
|
362
364
|
"PP-DocBee2-3B": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-DocBee2-3B_infer.tar",
|
365
|
+
"latin_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/latin_PP-OCRv5_mobile_rec_infer.tar",
|
366
|
+
"korean_PP-OCRv5_mobile_rec": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/korean_PP-OCRv5_mobile_rec_infer.tar",
|
363
367
|
}
|
364
368
|
|
365
369
|
|
@@ -384,6 +388,7 @@ HUGGINGFACE_MODELS = [
|
|
384
388
|
"PicoDet-S_layout_17cls",
|
385
389
|
"PicoDet-S_layout_3cls",
|
386
390
|
"PP-DocBee2-3B",
|
391
|
+
"PP-Chart2Table",
|
387
392
|
"PP-DocBee-2B",
|
388
393
|
"PP-DocBee-7B",
|
389
394
|
"PP-DocBlockLayout",
|
@@ -396,8 +401,10 @@ HUGGINGFACE_MODELS = [
|
|
396
401
|
"PP-FormulaNet_plus-M",
|
397
402
|
"PP-FormulaNet_plus-S",
|
398
403
|
"PP-FormulaNet-S",
|
404
|
+
"PP-LCNet_x0_25_textline_ori",
|
399
405
|
"PP-LCNet_x1_0_doc_ori",
|
400
406
|
"PP-LCNet_x1_0_table_cls",
|
407
|
+
"PP-LCNet_x1_0_textline_ori",
|
401
408
|
"PP-OCRv3_mobile_det",
|
402
409
|
"PP-OCRv3_mobile_rec",
|
403
410
|
"PP-OCRv3_server_det",
|
@@ -100,9 +100,9 @@ def convert_labelme_dataset(input_dir):
|
|
100
100
|
if not os.path.exists(img_path):
|
101
101
|
logging.info("%s is not existed, skip this image" % img_path)
|
102
102
|
continue
|
103
|
-
img_name =
|
103
|
+
img_name = osp.basename(img_path)
|
104
104
|
img_file_list.append(f"images/{img_name}")
|
105
|
-
label_img_name =
|
105
|
+
label_img_name = osp.basename(annotated_img_path)
|
106
106
|
label_file_list.append(f"annotations/{label_img_name}")
|
107
107
|
|
108
108
|
img = np.asarray(cv2.imread(img_path))
|
@@ -16,7 +16,7 @@ import numpy as np
|
|
16
16
|
import PIL
|
17
17
|
from PIL import ImageDraw, ImageFont
|
18
18
|
|
19
|
-
from ......utils.fonts import
|
19
|
+
from ......utils.fonts import PINGFANG_FONT
|
20
20
|
|
21
21
|
|
22
22
|
def colormap(rgb=False):
|
@@ -114,7 +114,7 @@ def draw_label(image, label, label_map_dict):
|
|
114
114
|
min_font_size = int(image_size[0] * 0.02)
|
115
115
|
max_font_size = int(image_size[0] * 0.05)
|
116
116
|
for font_size in range(max_font_size, min_font_size - 1, -1):
|
117
|
-
font = ImageFont.truetype(
|
117
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
|
118
118
|
if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
|
119
119
|
text_width_tmp, text_height_tmp = draw.textsize(
|
120
120
|
label_map_dict[int(label)], font
|
@@ -127,7 +127,7 @@ def draw_label(image, label, label_map_dict):
|
|
127
127
|
if text_width_tmp <= image_size[0]:
|
128
128
|
break
|
129
129
|
else:
|
130
|
-
font = ImageFont.truetype(
|
130
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, min_font_size)
|
131
131
|
color_list = colormap(rgb=True)
|
132
132
|
color = tuple(color_list[0])
|
133
133
|
font_color = tuple(font_colormap(3))
|
@@ -22,7 +22,7 @@ import numpy as np
|
|
22
22
|
|
23
23
|
from .....utils.deps import function_requires_deps, is_dep_available
|
24
24
|
from .....utils.file_interface import custom_open
|
25
|
-
from .....utils.fonts import
|
25
|
+
from .....utils.fonts import PINGFANG_FONT
|
26
26
|
from .....utils.logging import warning
|
27
27
|
|
28
28
|
if is_dep_available("opencv-contrib-python"):
|
@@ -128,7 +128,7 @@ def deep_analyse(dataset_path, output, datatype="FormulaRecDataset"):
|
|
128
128
|
if os_system == "windows":
|
129
129
|
plt.rcParams["font.sans-serif"] = "FangSong"
|
130
130
|
else:
|
131
|
-
font = font_manager.FontProperties(fname=
|
131
|
+
font = font_manager.FontProperties(fname=PINGFANG_FONT.path, size=15)
|
132
132
|
|
133
133
|
fig, ax = plt.subplots(figsize=(15, 9), dpi=120)
|
134
134
|
xlabel_name = "公式长度区间"
|
@@ -19,7 +19,7 @@ import numpy as np
|
|
19
19
|
|
20
20
|
from .....utils.deps import function_requires_deps, is_dep_available
|
21
21
|
from .....utils.file_interface import custom_open
|
22
|
-
from .....utils.fonts import
|
22
|
+
from .....utils.fonts import PINGFANG_FONT
|
23
23
|
|
24
24
|
if is_dep_available("matplotlib"):
|
25
25
|
import matplotlib.pyplot as plt
|
@@ -52,7 +52,7 @@ def deep_analyse(dataset_path, output, dataset_type="ShiTuRec"):
|
|
52
52
|
if os_system == "windows":
|
53
53
|
plt.rcParams["font.sans-serif"] = "FangSong"
|
54
54
|
else:
|
55
|
-
font = font_manager.FontProperties(fname=
|
55
|
+
font = font_manager.FontProperties(fname=PINGFANG_FONT.path, size=10)
|
56
56
|
|
57
57
|
x = np.arange(len(categories)) # 标签位置
|
58
58
|
width = 0.35 # 每个条形的宽度
|
@@ -16,7 +16,7 @@ import numpy as np
|
|
16
16
|
import PIL
|
17
17
|
from PIL import ImageDraw, ImageFont
|
18
18
|
|
19
|
-
from ......utils.fonts import
|
19
|
+
from ......utils.fonts import PINGFANG_FONT
|
20
20
|
|
21
21
|
|
22
22
|
def colormap(rgb=False):
|
@@ -114,7 +114,7 @@ def draw_label(image, label):
|
|
114
114
|
min_font_size = int(image_size[0] * 0.02)
|
115
115
|
max_font_size = int(image_size[0] * 0.05)
|
116
116
|
for font_size in range(max_font_size, min_font_size - 1, -1):
|
117
|
-
font = ImageFont.truetype(
|
117
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
|
118
118
|
if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
|
119
119
|
text_width_tmp, text_height_tmp = draw.textsize(label, font)
|
120
120
|
else:
|
@@ -123,7 +123,7 @@ def draw_label(image, label):
|
|
123
123
|
if text_width_tmp <= image_size[0]:
|
124
124
|
break
|
125
125
|
else:
|
126
|
-
font = ImageFont.truetype(
|
126
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, min_font_size)
|
127
127
|
color_list = colormap(rgb=True)
|
128
128
|
color = tuple(color_list[0])
|
129
129
|
font_color = tuple(font_colormap(3))
|
@@ -20,7 +20,7 @@ import numpy as np
|
|
20
20
|
|
21
21
|
from .....utils.deps import function_requires_deps, is_dep_available
|
22
22
|
from .....utils.file_interface import custom_open
|
23
|
-
from .....utils.fonts import
|
23
|
+
from .....utils.fonts import PINGFANG_FONT
|
24
24
|
|
25
25
|
if is_dep_available("matplotlib"):
|
26
26
|
import matplotlib.pyplot as plt
|
@@ -68,7 +68,7 @@ def deep_analyse(dataset_path, output):
|
|
68
68
|
if os_system == "windows":
|
69
69
|
plt.rcParams["font.sans-serif"] = "FangSong"
|
70
70
|
else:
|
71
|
-
font = font_manager.FontProperties(fname=
|
71
|
+
font = font_manager.FontProperties(fname=PINGFANG_FONT.path, size=10)
|
72
72
|
fig, ax = plt.subplots(figsize=(max(8, int(len(classes) / 5)), 5), dpi=300)
|
73
73
|
ax.bar(x, cnts_train_sorted, width=0.5, label="train")
|
74
74
|
ax.bar(x + width, cnts_val_sorted, width=0.5, label="val")
|
@@ -16,7 +16,7 @@ import numpy as np
|
|
16
16
|
import PIL
|
17
17
|
from PIL import ImageDraw, ImageFont
|
18
18
|
|
19
|
-
from ......utils.fonts import
|
19
|
+
from ......utils.fonts import PINGFANG_FONT
|
20
20
|
|
21
21
|
|
22
22
|
def colormap(rgb=False):
|
@@ -114,7 +114,7 @@ def draw_label(image, label, label_map_dict):
|
|
114
114
|
min_font_size = int(image_size[0] * 0.02)
|
115
115
|
max_font_size = int(image_size[0] * 0.05)
|
116
116
|
for font_size in range(max_font_size, min_font_size - 1, -1):
|
117
|
-
font = ImageFont.truetype(
|
117
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
|
118
118
|
if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
|
119
119
|
text_width_tmp, text_height_tmp = draw.textsize(
|
120
120
|
label_map_dict[int(label)], font
|
@@ -127,7 +127,7 @@ def draw_label(image, label, label_map_dict):
|
|
127
127
|
if text_width_tmp <= image_size[0]:
|
128
128
|
break
|
129
129
|
else:
|
130
|
-
font = ImageFont.truetype(
|
130
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, min_font_size)
|
131
131
|
color_list = colormap(rgb=True)
|
132
132
|
color = tuple(color_list[0])
|
133
133
|
font_color = tuple(font_colormap(3))
|
@@ -20,7 +20,7 @@ from collections import defaultdict
|
|
20
20
|
import numpy as np
|
21
21
|
|
22
22
|
from .....utils.deps import function_requires_deps, is_dep_available
|
23
|
-
from .....utils.fonts import
|
23
|
+
from .....utils.fonts import PINGFANG_FONT
|
24
24
|
|
25
25
|
if is_dep_available("matplotlib"):
|
26
26
|
import matplotlib.pyplot as plt
|
@@ -64,7 +64,7 @@ def deep_analyse(dataset_dir, output):
|
|
64
64
|
if os_system == "windows":
|
65
65
|
plt.rcParams["font.sans-serif"] = "FangSong"
|
66
66
|
else:
|
67
|
-
font = font_manager.FontProperties(fname=
|
67
|
+
font = font_manager.FontProperties(fname=PINGFANG_FONT.path)
|
68
68
|
fig, ax = plt.subplots(figsize=(max(8, int(len(classes) / 5)), 5), dpi=120)
|
69
69
|
ax.bar(x, cnts_train_sorted, width=0.5, label="train")
|
70
70
|
ax.bar(x + width, cnts_val_sorted, width=0.5, label="val")
|
@@ -18,7 +18,7 @@ from PIL import Image, ImageDraw, ImageFont
|
|
18
18
|
|
19
19
|
from ......utils import logging
|
20
20
|
from ......utils.deps import function_requires_deps, is_dep_available
|
21
|
-
from ......utils.fonts import
|
21
|
+
from ......utils.fonts import PINGFANG_FONT
|
22
22
|
|
23
23
|
if is_dep_available("pycocotools"):
|
24
24
|
from pycocotools.coco import COCO
|
@@ -124,7 +124,7 @@ def draw_bbox(image, coco_info: "COCO", img_id):
|
|
124
124
|
font_size = int(0.024 * int(image_info["width"])) + 2
|
125
125
|
except:
|
126
126
|
font_size = 12
|
127
|
-
font = ImageFont.truetype(
|
127
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
|
128
128
|
|
129
129
|
image = image.convert("RGB")
|
130
130
|
draw = ImageDraw.Draw(image)
|
@@ -21,7 +21,7 @@ from collections import defaultdict
|
|
21
21
|
import numpy as np
|
22
22
|
|
23
23
|
from paddlex.utils.deps import function_requires_deps, is_dep_available
|
24
|
-
from paddlex.utils.fonts import
|
24
|
+
from paddlex.utils.fonts import PINGFANG_FONT
|
25
25
|
|
26
26
|
if is_dep_available("matplotlib"):
|
27
27
|
import matplotlib.pyplot as plt
|
@@ -88,7 +88,7 @@ def deep_analyse(dataset_dir, output):
|
|
88
88
|
if os_system == "windows":
|
89
89
|
plt.rcParams["font.sans-serif"] = "FangSong"
|
90
90
|
else:
|
91
|
-
font = font_manager.FontProperties(fname=
|
91
|
+
font = font_manager.FontProperties(fname=PINGFANG_FONT.path)
|
92
92
|
fig, ax = plt.subplots(figsize=(max(8, int(len(classes) / 5)), 5), dpi=120)
|
93
93
|
ax.bar(x, cnts_train_sorted, width=0.5, label="train")
|
94
94
|
ax.bar(x + width, cnts_val_sorted, width=0.5, label="val")
|