paddlex 3.0.2__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/configs/modules/text_recognition/eslav_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/korean_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/latin_PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/pipelines/PP-DocTranslation.yaml +261 -0
- paddlex/inference/common/batch_sampler/__init__.py +1 -0
- paddlex/inference/common/batch_sampler/markdown_batch_sampler.py +116 -0
- paddlex/inference/common/result/base_cv_result.py +2 -3
- paddlex/inference/common/result/mixin.py +3 -1
- paddlex/inference/models/base/predictor/base_predictor.py +2 -0
- paddlex/inference/models/common/static_infer.py +2 -0
- paddlex/inference/models/common/vlm/generation/utils.py +2 -2
- paddlex/inference/models/formula_recognition/result.py +2 -2
- paddlex/inference/models/image_classification/result.py +3 -5
- paddlex/inference/models/image_multilabel_classification/result.py +2 -2
- paddlex/inference/models/object_detection/result.py +2 -2
- paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +3 -0
- paddlex/inference/models/text_recognition/predictor.py +51 -1
- paddlex/inference/models/text_recognition/result.py +5 -2
- paddlex/inference/models/video_classification/result.py +3 -3
- paddlex/inference/models/video_detection/result.py +2 -4
- paddlex/inference/pipelines/__init__.py +1 -0
- paddlex/inference/pipelines/attribute_recognition/result.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/__init__.py +1 -0
- paddlex/inference/pipelines/components/prompt_engineering/generate_translate_prompt.py +179 -0
- paddlex/inference/pipelines/doc_preprocessor/result.py +2 -2
- paddlex/inference/pipelines/formula_recognition/result.py +2 -2
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +2 -0
- paddlex/inference/pipelines/layout_parsing/result_v2.py +11 -4
- paddlex/inference/pipelines/ocr/pipeline.py +2 -0
- paddlex/inference/pipelines/ocr/result.py +11 -7
- paddlex/inference/pipelines/pp_doctranslation/__init__.py +15 -0
- paddlex/inference/pipelines/pp_doctranslation/pipeline.py +523 -0
- paddlex/inference/pipelines/pp_doctranslation/result.py +39 -0
- paddlex/inference/pipelines/pp_doctranslation/utils.py +260 -0
- paddlex/inference/pipelines/pp_shitu_v2/result.py +2 -2
- paddlex/inference/serving/basic_serving/_app.py +1 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +5 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -24
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +16 -26
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py +203 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +4 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +4 -2
- paddlex/inference/serving/infra/utils.py +22 -17
- paddlex/inference/serving/schemas/anomaly_detection.py +1 -0
- paddlex/inference/serving/schemas/doc_preprocessor.py +1 -0
- paddlex/inference/serving/schemas/face_recognition.py +1 -0
- paddlex/inference/serving/schemas/formula_recognition.py +1 -0
- paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -0
- paddlex/inference/serving/schemas/image_classification.py +1 -0
- paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -0
- paddlex/inference/serving/schemas/instance_segmentation.py +1 -0
- paddlex/inference/serving/schemas/layout_parsing.py +1 -0
- paddlex/inference/serving/schemas/object_detection.py +1 -0
- paddlex/inference/serving/schemas/ocr.py +1 -0
- paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -0
- paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -0
- paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -0
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +5 -4
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +6 -5
- paddlex/inference/serving/schemas/pp_doctranslation.py +115 -0
- paddlex/inference/serving/schemas/pp_shituv2.py +1 -0
- paddlex/inference/serving/schemas/pp_structurev3.py +2 -9
- paddlex/inference/serving/schemas/rotated_object_detection.py +1 -0
- paddlex/inference/serving/schemas/seal_recognition.py +1 -0
- paddlex/inference/serving/schemas/semantic_segmentation.py +1 -0
- paddlex/inference/serving/schemas/shared/ocr.py +8 -1
- paddlex/inference/serving/schemas/small_object_detection.py +1 -0
- paddlex/inference/serving/schemas/table_recognition.py +1 -0
- paddlex/inference/serving/schemas/table_recognition_v2.py +1 -0
- paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -0
- paddlex/inference/serving/schemas/ts_classification.py +1 -0
- paddlex/inference/serving/schemas/ts_forecast.py +1 -0
- paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -0
- paddlex/inference/utils/hpi.py +42 -14
- paddlex/inference/utils/hpi_model_info_collection.json +0 -2
- paddlex/inference/utils/io/__init__.py +1 -0
- paddlex/inference/utils/io/readers.py +46 -0
- paddlex/inference/utils/io/writers.py +2 -0
- paddlex/inference/utils/official_models.py +7 -0
- paddlex/inference/utils/pp_option.py +34 -18
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +3 -3
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/text_recognition/model_list.py +3 -0
- paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +27 -0
- paddlex/repo_manager/meta.py +3 -3
- paddlex/utils/device.py +4 -1
- paddlex/utils/download.py +10 -7
- paddlex/utils/{fonts/__init__.py → fonts.py} +45 -26
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/METADATA +25 -1
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/RECORD +134 -122
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/LICENSE +0 -0
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/WHEEL +0 -0
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.2.dist-info → paddlex-3.1.0.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,17 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
from ....modules.text_recognition.model_list import MODELS
|
16
|
+
from ....utils.fonts import (
|
17
|
+
ARABIC_FONT,
|
18
|
+
CYRILLIC_FONT,
|
19
|
+
DEVANAGARI_FONT,
|
20
|
+
KANNADA_FONT,
|
21
|
+
KOREAN_FONT,
|
22
|
+
LATIN_FONT,
|
23
|
+
SIMFANG_FONT,
|
24
|
+
TAMIL_FONT,
|
25
|
+
TELUGU_FONT,
|
26
|
+
)
|
16
27
|
from ....utils.func_register import FuncRegister
|
17
28
|
from ...common.batch_sampler import ImageBatchSampler
|
18
29
|
from ...common.reader import ReadImage
|
@@ -31,6 +42,7 @@ class TextRecPredictor(BasePredictor):
|
|
31
42
|
def __init__(self, *args, input_shape=None, **kwargs):
|
32
43
|
super().__init__(*args, **kwargs)
|
33
44
|
self.input_shape = input_shape
|
45
|
+
self.vis_font = self.get_vis_font()
|
34
46
|
self.pre_tfs, self.infer, self.post_op = self._build()
|
35
47
|
|
36
48
|
def _build_batch_sampler(self):
|
@@ -68,6 +80,7 @@ class TextRecPredictor(BasePredictor):
|
|
68
80
|
"input_img": batch_raw_imgs,
|
69
81
|
"rec_text": texts,
|
70
82
|
"rec_score": scores,
|
83
|
+
"vis_font": [self.vis_font] * len(batch_raw_imgs),
|
71
84
|
}
|
72
85
|
|
73
86
|
@register("DecodeImage")
|
@@ -76,7 +89,7 @@ class TextRecPredictor(BasePredictor):
|
|
76
89
|
return "Read", ReadImage(format=img_mode)
|
77
90
|
|
78
91
|
@register("RecResizeImg")
|
79
|
-
def build_resize(self, image_shape):
|
92
|
+
def build_resize(self, image_shape, **kwargs):
|
80
93
|
return "ReisizeNorm", OCRReisizeNormImg(
|
81
94
|
rec_image_shape=image_shape, input_shape=self.input_shape
|
82
95
|
)
|
@@ -96,3 +109,40 @@ class TextRecPredictor(BasePredictor):
|
|
96
109
|
@register("KeepKeys")
|
97
110
|
def foo(self, *args, **kwargs):
|
98
111
|
return None, None
|
112
|
+
|
113
|
+
def get_vis_font(self):
|
114
|
+
if self.model_name.startswith("PP-OCR"):
|
115
|
+
return SIMFANG_FONT
|
116
|
+
|
117
|
+
if self.model_name in (
|
118
|
+
"latin_PP-OCRv3_mobile_rec",
|
119
|
+
"latin_PP-OCRv5_mobile_rec",
|
120
|
+
):
|
121
|
+
return LATIN_FONT
|
122
|
+
|
123
|
+
if self.model_name in (
|
124
|
+
"cyrillic_PP-OCRv3_mobile_rec",
|
125
|
+
"eslav_PP-OCRv5_mobile_rec",
|
126
|
+
):
|
127
|
+
return CYRILLIC_FONT
|
128
|
+
|
129
|
+
if self.model_name in (
|
130
|
+
"korean_PP-OCRv3_mobile_rec",
|
131
|
+
"korean_PP-OCRv5_mobile_rec",
|
132
|
+
):
|
133
|
+
return KOREAN_FONT
|
134
|
+
|
135
|
+
if self.model_name == "arabic_PP-OCRv3_mobile_rec":
|
136
|
+
return ARABIC_FONT
|
137
|
+
|
138
|
+
if self.model_name == "ka_PP-OCRv3_mobile_rec":
|
139
|
+
return KANNADA_FONT
|
140
|
+
|
141
|
+
if self.model_name == "te_PP-OCRv3_mobile_rec":
|
142
|
+
return TELUGU_FONT
|
143
|
+
|
144
|
+
if self.model_name == "ta_PP-OCRv3_mobile_rec":
|
145
|
+
return TAMIL_FONT
|
146
|
+
|
147
|
+
if self.model_name == "devanagari_PP-OCRv3_mobile_rec":
|
148
|
+
return DEVANAGARI_FONT
|
@@ -17,7 +17,7 @@ import copy
|
|
17
17
|
import PIL
|
18
18
|
from PIL import Image, ImageDraw, ImageFont
|
19
19
|
|
20
|
-
from ....utils.fonts import
|
20
|
+
from ....utils.fonts import SIMFANG_FONT
|
21
21
|
from ...common.result import BaseCVResult, JsonMixin
|
22
22
|
|
23
23
|
|
@@ -26,11 +26,13 @@ class TextRecResult(BaseCVResult):
|
|
26
26
|
def _to_str(self, *args, **kwargs):
|
27
27
|
data = copy.deepcopy(self)
|
28
28
|
data.pop("input_img")
|
29
|
+
data.pop("vis_font")
|
29
30
|
return JsonMixin._to_str(data, *args, **kwargs)
|
30
31
|
|
31
32
|
def _to_json(self, *args, **kwargs):
|
32
33
|
data = copy.deepcopy(self)
|
33
34
|
data.pop("input_img")
|
35
|
+
data.pop("vis_font")
|
34
36
|
return JsonMixin._to_json(data, *args, **kwargs)
|
35
37
|
|
36
38
|
def _to_img(self):
|
@@ -38,10 +40,11 @@ class TextRecResult(BaseCVResult):
|
|
38
40
|
image = Image.fromarray(self["input_img"][:, :, ::-1])
|
39
41
|
rec_text = self["rec_text"]
|
40
42
|
rec_score = self["rec_score"]
|
43
|
+
vis_font = self["vis_font"] if self["vis_font"] is not None else SIMFANG_FONT
|
41
44
|
image = image.convert("RGB")
|
42
45
|
image_width, image_height = image.size
|
43
46
|
text = f"{rec_text} ({rec_score})"
|
44
|
-
font = self.adjust_font_size(image_width, text,
|
47
|
+
font = self.adjust_font_size(image_width, text, vis_font.path)
|
45
48
|
row_height = font.getbbox(text)[3]
|
46
49
|
new_image_height = image_height + int(row_height * 1.2)
|
47
50
|
new_image = Image.new("RGB", (image_width, new_image_height), (255, 255, 255))
|
@@ -17,7 +17,7 @@ import PIL
|
|
17
17
|
from PIL import Image, ImageDraw, ImageFont
|
18
18
|
|
19
19
|
from ....utils.deps import class_requires_deps, is_dep_available
|
20
|
-
from ....utils.fonts import
|
20
|
+
from ....utils.fonts import PINGFANG_FONT
|
21
21
|
from ...common.result import BaseVideoResult
|
22
22
|
from ...utils.color_map import get_colormap
|
23
23
|
from ...utils.io import VideoReader
|
@@ -47,7 +47,7 @@ class TopkVideoResult(BaseVideoResult):
|
|
47
47
|
max_font_size = int(image_size[0] * 0.05)
|
48
48
|
for font_size in range(max_font_size, min_font_size - 1, -1):
|
49
49
|
font = ImageFont.truetype(
|
50
|
-
|
50
|
+
PINGFANG_FONT.path, font_size, encoding="utf-8"
|
51
51
|
)
|
52
52
|
if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
|
53
53
|
text_width_tmp, text_height_tmp = draw.textsize(label_str, font)
|
@@ -57,7 +57,7 @@ class TopkVideoResult(BaseVideoResult):
|
|
57
57
|
if text_width_tmp <= image_size[0]:
|
58
58
|
break
|
59
59
|
else:
|
60
|
-
font = ImageFont.truetype(
|
60
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, min_font_size)
|
61
61
|
color_list = get_colormap(rgb=True)
|
62
62
|
color = tuple(color_list[0])
|
63
63
|
font_color = tuple(self._get_font_colormap(3))
|
@@ -19,7 +19,7 @@ import PIL
|
|
19
19
|
from PIL import Image, ImageDraw, ImageFont
|
20
20
|
|
21
21
|
from ....utils.deps import class_requires_deps, is_dep_available
|
22
|
-
from ....utils.fonts import
|
22
|
+
from ....utils.fonts import PINGFANG_FONT
|
23
23
|
from ...common.result import BaseVideoResult
|
24
24
|
from ...utils.color_map import get_colormap
|
25
25
|
from ...utils.io import VideoReader
|
@@ -46,9 +46,7 @@ class DetVideoResult(BaseVideoResult):
|
|
46
46
|
image = Image.fromarray(video[i].asnumpy())
|
47
47
|
image.size
|
48
48
|
font_size = int(0.018 * int(image.width)) + 2
|
49
|
-
font = ImageFont.truetype(
|
50
|
-
PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8"
|
51
|
-
)
|
49
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
|
52
50
|
draw_thickness = int(max(image.size) * 0.002)
|
53
51
|
draw = ImageDraw.Draw(image)
|
54
52
|
results = self["result"][i]
|
@@ -42,6 +42,7 @@ from .ocr import OCRPipeline
|
|
42
42
|
from .open_vocabulary_detection import OpenVocabularyDetectionPipeline
|
43
43
|
from .open_vocabulary_segmentation import OpenVocabularySegmentationPipeline
|
44
44
|
from .pp_chatocr import PP_ChatOCRv3_Pipeline, PP_ChatOCRv4_Pipeline
|
45
|
+
from .pp_doctranslation import PP_DocTranslation_Pipeline
|
45
46
|
from .pp_shitu_v2 import ShiTuV2Pipeline
|
46
47
|
from .rotated_object_detection import RotatedObjectDetectionPipeline
|
47
48
|
from .seal_recognition import SealRecognitionPipeline
|
@@ -18,7 +18,7 @@ import PIL
|
|
18
18
|
from PIL import Image, ImageDraw, ImageFont
|
19
19
|
|
20
20
|
from ....utils.deps import class_requires_deps, is_dep_available
|
21
|
-
from ....utils.fonts import
|
21
|
+
from ....utils.fonts import PINGFANG_FONT
|
22
22
|
from ...common.result import BaseCVResult, JsonMixin
|
23
23
|
from ...utils.color_map import font_colormap, get_colormap
|
24
24
|
|
@@ -35,7 +35,7 @@ def draw_attribute_result(img, boxes):
|
|
35
35
|
img (PIL.Image.Image): visualized image
|
36
36
|
"""
|
37
37
|
font_size = int((0.024 * int(img.width) + 2) * 0.7)
|
38
|
-
font = ImageFont.truetype(
|
38
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
|
39
39
|
|
40
40
|
draw_thickness = int(max(img.size) * 0.005)
|
41
41
|
draw = ImageDraw.Draw(img)
|
@@ -0,0 +1,179 @@
|
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from typing import Dict
|
16
|
+
|
17
|
+
from .base import BaseGeneratePrompt
|
18
|
+
|
19
|
+
|
20
|
+
class GenerateTranslatePrompt(BaseGeneratePrompt):
|
21
|
+
"""Generate Ensemble Prompt"""
|
22
|
+
|
23
|
+
entities = ["translate_prompt"]
|
24
|
+
|
25
|
+
def __init__(self, config: Dict) -> None:
|
26
|
+
"""Initializes the GenerateTranslatePrompt instance with the given configuration.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
config (Dict): A dictionary containing configuration settings.
|
30
|
+
- task_type (str): The type of task to generate a prompt for, in the support entities list.
|
31
|
+
- task_description (str, optional): A description of the task. Defaults to an empty string.
|
32
|
+
- output_format (str, optional): The desired output format. Defaults to an empty string.
|
33
|
+
- rules_str (str, optional): A string representing rules for the task. Defaults to an empty string.
|
34
|
+
- few_shot_demo_text_content (str, optional): Text content for few-shot demos. Defaults to an empty string.
|
35
|
+
- few_shot_demo_key_value_list (str, optional): A key-value list for few-shot demos. Defaults to an empty string.
|
36
|
+
|
37
|
+
Raises:
|
38
|
+
ValueError: If the task type is not in the allowed entities for GenerateKIEPrompt.
|
39
|
+
"""
|
40
|
+
super().__init__()
|
41
|
+
|
42
|
+
task_type = config.get("task_type", "")
|
43
|
+
task_description = config.get("task_description", "")
|
44
|
+
output_format = config.get("output_format", "")
|
45
|
+
rules_str = config.get("rules_str", "")
|
46
|
+
few_shot_demo_text_content = config.get("few_shot_demo_text_content", "")
|
47
|
+
few_shot_demo_key_value_list = config.get("few_shot_demo_key_value_list", "")
|
48
|
+
|
49
|
+
if task_description is None:
|
50
|
+
task_description = ""
|
51
|
+
|
52
|
+
if output_format is None:
|
53
|
+
output_format = ""
|
54
|
+
|
55
|
+
if rules_str is None:
|
56
|
+
rules_str = ""
|
57
|
+
|
58
|
+
if few_shot_demo_text_content is None:
|
59
|
+
few_shot_demo_text_content = ""
|
60
|
+
|
61
|
+
if few_shot_demo_key_value_list is None:
|
62
|
+
few_shot_demo_key_value_list = ""
|
63
|
+
|
64
|
+
if task_type not in self.entities:
|
65
|
+
raise ValueError(
|
66
|
+
f"task type must be in {self.entities} of GenerateEnsemblePrompt."
|
67
|
+
)
|
68
|
+
|
69
|
+
self.task_type = task_type
|
70
|
+
self.task_description = task_description
|
71
|
+
self.output_format = output_format
|
72
|
+
self.rules_str = rules_str
|
73
|
+
self.few_shot_demo_text_content = few_shot_demo_text_content
|
74
|
+
self.few_shot_demo_key_value_list = few_shot_demo_key_value_list
|
75
|
+
|
76
|
+
def generate_prompt(
|
77
|
+
self,
|
78
|
+
original_text: str,
|
79
|
+
language: str,
|
80
|
+
task_description: str = None,
|
81
|
+
output_format: str = None,
|
82
|
+
rules_str: str = None,
|
83
|
+
few_shot_demo_text_content: str = None,
|
84
|
+
few_shot_demo_key_value_list: str = None,
|
85
|
+
) -> str:
|
86
|
+
"""Generates a prompt based on the given parameters.
|
87
|
+
Args:
|
88
|
+
key (str): the input question.
|
89
|
+
result_methodA (str): the result of method A.
|
90
|
+
result_methodB (str): the result of method B.
|
91
|
+
task_description (str, optional): A description of the task. Defaults to None.
|
92
|
+
output_format (str, optional): The desired output format. Defaults to None.
|
93
|
+
rules_str (str, optional): A string containing rules or instructions. Defaults to None.
|
94
|
+
few_shot_demo_text_content (str, optional): Text content for few-shot demos. Defaults to None.
|
95
|
+
few_shot_demo_key_value_list (str, optional): Key-value list for few-shot demos. Defaults to None.
|
96
|
+
Returns:
|
97
|
+
str: The generated prompt.
|
98
|
+
|
99
|
+
Raises:
|
100
|
+
ValueError: If the task_type is not supported.
|
101
|
+
"""
|
102
|
+
language_map = {
|
103
|
+
"chinese": "简体中文",
|
104
|
+
"zh": "简体中文",
|
105
|
+
"english": "英语",
|
106
|
+
"en": "英语",
|
107
|
+
"french": "法语",
|
108
|
+
"fr": "法语",
|
109
|
+
"spanish": "西班牙语",
|
110
|
+
"es": "西班牙语",
|
111
|
+
"german": "德语",
|
112
|
+
"de": "德语",
|
113
|
+
"japanese": "日语",
|
114
|
+
"ja": "日语",
|
115
|
+
"korean": "韩语",
|
116
|
+
"ko": "韩语",
|
117
|
+
"russian": "俄语",
|
118
|
+
"ru": "俄语",
|
119
|
+
"italian": "意大利语",
|
120
|
+
"it": "意大利语",
|
121
|
+
"portuguese": "葡萄牙语",
|
122
|
+
"pt": "葡萄牙语",
|
123
|
+
"arabic": "阿拉伯语",
|
124
|
+
"ar": "阿拉伯语",
|
125
|
+
"hindi": "印地语",
|
126
|
+
"hi": "印地语",
|
127
|
+
"dutch": "荷兰语",
|
128
|
+
"nl": "荷兰语",
|
129
|
+
"swedish": "瑞典语",
|
130
|
+
"sv": "瑞典语",
|
131
|
+
"turkish": "土耳其语",
|
132
|
+
"tr": "土耳其语",
|
133
|
+
"thai": "泰语",
|
134
|
+
"th": "泰语",
|
135
|
+
"vietnamese": "越南语",
|
136
|
+
"vi": "越南语",
|
137
|
+
"hebrew": "希伯来语",
|
138
|
+
"he": "希伯来语",
|
139
|
+
"greek": "希腊语",
|
140
|
+
"el": "希腊语",
|
141
|
+
"polish": "波兰语",
|
142
|
+
"pl": "波兰语",
|
143
|
+
}
|
144
|
+
|
145
|
+
if task_description is None:
|
146
|
+
task_description = self.task_description
|
147
|
+
|
148
|
+
if output_format is None:
|
149
|
+
output_format = self.output_format
|
150
|
+
|
151
|
+
if rules_str is None:
|
152
|
+
rules_str = self.rules_str
|
153
|
+
|
154
|
+
if few_shot_demo_text_content is None:
|
155
|
+
few_shot_demo_text_content = self.few_shot_demo_text_content
|
156
|
+
|
157
|
+
if few_shot_demo_text_content:
|
158
|
+
few_shot_demo_text_content = (
|
159
|
+
f"这里是一些示例:\n{few_shot_demo_text_content}\n"
|
160
|
+
)
|
161
|
+
|
162
|
+
if few_shot_demo_key_value_list is None:
|
163
|
+
few_shot_demo_key_value_list = self.few_shot_demo_key_value_list
|
164
|
+
|
165
|
+
if few_shot_demo_key_value_list:
|
166
|
+
few_shot_demo_key_value_list = f"这里是一些专业术语对照表,对照表中单词要参考对照表翻译:\n{few_shot_demo_key_value_list}\n"
|
167
|
+
|
168
|
+
prompt = f"""{task_description}{rules_str}{output_format}{few_shot_demo_text_content}{few_shot_demo_key_value_list}"""
|
169
|
+
|
170
|
+
language_name = language_map.get(language, language)
|
171
|
+
task_type = self.task_type
|
172
|
+
if task_type == "translate_prompt":
|
173
|
+
prompt += f"""下面正式开始:
|
174
|
+
\n将以下内容翻译成:{language_name}
|
175
|
+
\n原文:{original_text}
|
176
|
+
"""
|
177
|
+
else:
|
178
|
+
raise ValueError(f"{self.task_type} is currently not supported.")
|
179
|
+
return prompt
|
@@ -16,7 +16,7 @@ from typing import Dict
|
|
16
16
|
|
17
17
|
from PIL import Image, ImageDraw
|
18
18
|
|
19
|
-
from ....utils.fonts import
|
19
|
+
from ....utils.fonts import PINGFANG_FONT, create_font
|
20
20
|
from ...common.result import BaseCVResult, JsonMixin
|
21
21
|
|
22
22
|
|
@@ -55,7 +55,7 @@ class DocPreprocessorResult(BaseCVResult):
|
|
55
55
|
beg_w_list = [0, w1, w1 + w2]
|
56
56
|
for tno in range(len(txt_list)):
|
57
57
|
txt = txt_list[tno]
|
58
|
-
font = create_font(txt, (region_w_list[tno], 20),
|
58
|
+
font = create_font(txt, (region_w_list[tno], 20), PINGFANG_FONT.path)
|
59
59
|
draw_text.text(
|
60
60
|
[10 + beg_w_list[tno], h + 2], txt, fill=(0, 0, 0), font=font
|
61
61
|
)
|
@@ -24,7 +24,7 @@ from PIL import Image, ImageDraw
|
|
24
24
|
|
25
25
|
from ....utils import logging
|
26
26
|
from ....utils.deps import class_requires_deps, function_requires_deps, is_dep_available
|
27
|
-
from ....utils.fonts import
|
27
|
+
from ....utils.fonts import PINGFANG_FONT
|
28
28
|
from ...common.result import BaseCVResult, JsonMixin
|
29
29
|
from ...models.formula_recognition.result import (
|
30
30
|
crop_white_area,
|
@@ -277,6 +277,6 @@ def draw_box_formula_fine(
|
|
277
277
|
)
|
278
278
|
else:
|
279
279
|
img_right_text = draw_box_txt_fine(
|
280
|
-
img_size, box, "Rendering Failed",
|
280
|
+
img_size, box, "Rendering Failed", PINGFANG_FONT.path
|
281
281
|
)
|
282
282
|
return img_right_text
|
@@ -926,6 +926,8 @@ class _LayoutParsingPipelineV2(BasePipeline):
|
|
926
926
|
Predicts the layout parsing result for the given input.
|
927
927
|
|
928
928
|
Args:
|
929
|
+
input (Union[str, list[str], np.ndarray, list[np.ndarray]]): Input image path, list of image paths,
|
930
|
+
numpy array of an image, or list of numpy arrays.
|
929
931
|
use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
|
930
932
|
use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
|
931
933
|
use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
|
@@ -21,7 +21,7 @@ from typing import List
|
|
21
21
|
import numpy as np
|
22
22
|
from PIL import Image, ImageDraw, ImageFont
|
23
23
|
|
24
|
-
from ....utils.fonts import
|
24
|
+
from ....utils.fonts import PINGFANG_FONT
|
25
25
|
from ...common.result import (
|
26
26
|
BaseCVResult,
|
27
27
|
HtmlMixin,
|
@@ -194,7 +194,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
194
194
|
image = Image.fromarray(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
|
195
195
|
draw = ImageDraw.Draw(image, "RGBA")
|
196
196
|
font_size = int(0.018 * int(image.width)) + 2
|
197
|
-
font = ImageFont.truetype(
|
197
|
+
font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
|
198
198
|
parsing_result: List[LayoutBlock] = self["parsing_res_list"]
|
199
199
|
for block in parsing_result:
|
200
200
|
bbox = block.bbox
|
@@ -435,8 +435,8 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
435
435
|
|
436
436
|
markdown_content = ""
|
437
437
|
last_label = None
|
438
|
-
seg_start_flag =
|
439
|
-
seg_end_flag =
|
438
|
+
seg_start_flag = True
|
439
|
+
seg_end_flag = True
|
440
440
|
prev_block = None
|
441
441
|
page_first_element_seg_start_flag = None
|
442
442
|
page_last_element_seg_end_flag = None
|
@@ -468,8 +468,15 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
468
468
|
else handle_func(block)
|
469
469
|
)
|
470
470
|
last_label = label
|
471
|
+
page_first_element_seg_start_flag = (
|
472
|
+
True
|
473
|
+
if page_first_element_seg_start_flag is None
|
474
|
+
else page_first_element_seg_start_flag
|
475
|
+
)
|
471
476
|
page_last_element_seg_end_flag = seg_end_flag
|
472
477
|
|
478
|
+
markdown_info["page_index"] = self["page_index"]
|
479
|
+
markdown_info["input_path"] = self["input_path"]
|
473
480
|
markdown_info["markdown_texts"] = markdown_content
|
474
481
|
markdown_info["page_continuation_flags"] = (
|
475
482
|
page_first_element_seg_start_flag,
|
@@ -368,6 +368,7 @@ class _OCRPipeline(BasePipeline):
|
|
368
368
|
"rec_texts": [],
|
369
369
|
"rec_scores": [],
|
370
370
|
"rec_polys": [],
|
371
|
+
"vis_fonts": [],
|
371
372
|
}
|
372
373
|
for input_path, page_index, doc_preprocessor_res, dt_polys in zip(
|
373
374
|
batch_data.input_paths,
|
@@ -439,6 +440,7 @@ class _OCRPipeline(BasePipeline):
|
|
439
440
|
if rec_res["rec_score"] >= text_rec_score_thresh:
|
440
441
|
res["rec_texts"].append(rec_res["rec_text"])
|
441
442
|
res["rec_scores"].append(rec_res["rec_score"])
|
443
|
+
res["vis_fonts"].append(rec_res["vis_font"])
|
442
444
|
res["rec_polys"].append(dt_polys[sno])
|
443
445
|
|
444
446
|
for res in results:
|
@@ -20,7 +20,7 @@ import numpy as np
|
|
20
20
|
from PIL import Image, ImageDraw
|
21
21
|
|
22
22
|
from ....utils.deps import class_requires_deps, function_requires_deps, is_dep_available
|
23
|
-
from ....utils.fonts import
|
23
|
+
from ....utils.fonts import SIMFANG_FONT, create_font, create_font_vertical
|
24
24
|
from ...common.result import BaseCVResult, JsonMixin
|
25
25
|
|
26
26
|
if is_dep_available("opencv-contrib-python"):
|
@@ -82,6 +82,11 @@ class OCRResult(BaseCVResult):
|
|
82
82
|
random.seed(0)
|
83
83
|
draw_left = ImageDraw.Draw(img_left)
|
84
84
|
for idx, (box, txt) in enumerate(zip(boxes, txts)):
|
85
|
+
vis_font = (
|
86
|
+
self["vis_fonts"][idx]
|
87
|
+
if self["vis_fonts"][idx] is not None
|
88
|
+
else SIMFANG_FONT
|
89
|
+
)
|
85
90
|
try:
|
86
91
|
color = (
|
87
92
|
random.randint(0, 255),
|
@@ -91,17 +96,16 @@ class OCRResult(BaseCVResult):
|
|
91
96
|
box = np.array(box)
|
92
97
|
if len(box) > 4:
|
93
98
|
pts = [(x, y) for x, y in box.tolist()]
|
94
|
-
draw_left.polygon(pts, outline=color, width=8)
|
99
|
+
draw_left.polygon(pts, outline=color, width=8, fill=color)
|
95
100
|
box = self.get_minarea_rect(box)
|
96
101
|
height = int(0.5 * (max(box[:, 1]) - min(box[:, 1])))
|
97
102
|
box[:2, 1] = np.mean(box[:, 1])
|
98
103
|
box[2:, 1] = np.mean(box[:, 1]) + min(20, height)
|
99
|
-
|
100
|
-
|
104
|
+
else:
|
105
|
+
box_pts = [(int(x), int(y)) for x, y in box.tolist()]
|
106
|
+
draw_left.polygon(box_pts, fill=color)
|
101
107
|
|
102
|
-
img_right_text = draw_box_txt_fine(
|
103
|
-
(w, h), box, txt, SIMFANG_FONT_FILE_PATH
|
104
|
-
)
|
108
|
+
img_right_text = draw_box_txt_fine((w, h), box, txt, vis_font.path)
|
105
109
|
pts = np.array(box, np.int32).reshape((-1, 1, 2))
|
106
110
|
cv2.polylines(img_right_text, [pts], True, color, 1)
|
107
111
|
img_right = cv2.bitwise_and(img_right, img_right_text)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from .pipeline import PP_DocTranslation_Pipeline
|