paddlex 3.0.3__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. paddlex/.version +1 -1
  2. paddlex/configs/modules/text_recognition/eslav_PP-OCRv5_mobile_rec.yaml +39 -0
  3. paddlex/configs/modules/text_recognition/korean_PP-OCRv5_mobile_rec.yaml +39 -0
  4. paddlex/configs/modules/text_recognition/latin_PP-OCRv5_mobile_rec.yaml +39 -0
  5. paddlex/configs/pipelines/PP-DocTranslation.yaml +261 -0
  6. paddlex/inference/common/batch_sampler/__init__.py +1 -0
  7. paddlex/inference/common/batch_sampler/markdown_batch_sampler.py +116 -0
  8. paddlex/inference/common/result/base_cv_result.py +2 -3
  9. paddlex/inference/common/result/mixin.py +3 -1
  10. paddlex/inference/models/common/vlm/generation/utils.py +2 -2
  11. paddlex/inference/models/formula_recognition/result.py +2 -2
  12. paddlex/inference/models/image_classification/result.py +3 -5
  13. paddlex/inference/models/image_multilabel_classification/result.py +2 -2
  14. paddlex/inference/models/object_detection/result.py +2 -2
  15. paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +3 -0
  16. paddlex/inference/models/text_recognition/predictor.py +51 -1
  17. paddlex/inference/models/text_recognition/result.py +5 -2
  18. paddlex/inference/models/video_classification/result.py +3 -3
  19. paddlex/inference/models/video_detection/result.py +2 -4
  20. paddlex/inference/pipelines/__init__.py +1 -0
  21. paddlex/inference/pipelines/attribute_recognition/result.py +2 -2
  22. paddlex/inference/pipelines/components/prompt_engineering/__init__.py +1 -0
  23. paddlex/inference/pipelines/components/prompt_engineering/generate_translate_prompt.py +179 -0
  24. paddlex/inference/pipelines/doc_preprocessor/result.py +2 -2
  25. paddlex/inference/pipelines/formula_recognition/result.py +2 -2
  26. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +2 -0
  27. paddlex/inference/pipelines/layout_parsing/result_v2.py +4 -2
  28. paddlex/inference/pipelines/ocr/pipeline.py +2 -0
  29. paddlex/inference/pipelines/ocr/result.py +11 -7
  30. paddlex/inference/pipelines/pp_doctranslation/__init__.py +15 -0
  31. paddlex/inference/pipelines/pp_doctranslation/pipeline.py +523 -0
  32. paddlex/inference/pipelines/pp_doctranslation/result.py +39 -0
  33. paddlex/inference/pipelines/pp_doctranslation/utils.py +260 -0
  34. paddlex/inference/pipelines/pp_shitu_v2/result.py +2 -2
  35. paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +4 -2
  36. paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +5 -1
  37. paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +4 -2
  38. paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +4 -2
  39. paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +4 -2
  40. paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +4 -2
  41. paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +4 -2
  42. paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +4 -2
  43. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +4 -2
  44. paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +4 -2
  45. paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +4 -2
  46. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +4 -2
  47. paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +4 -2
  48. paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +4 -2
  49. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -24
  50. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +16 -26
  51. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_doctranslation.py +203 -0
  52. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +4 -2
  53. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +4 -2
  54. paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +4 -2
  55. paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +4 -2
  56. paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +4 -2
  57. paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +4 -2
  58. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +4 -2
  59. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -2
  60. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +4 -2
  61. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +4 -2
  62. paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +4 -2
  63. paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +4 -2
  64. paddlex/inference/serving/schemas/anomaly_detection.py +1 -0
  65. paddlex/inference/serving/schemas/doc_preprocessor.py +1 -0
  66. paddlex/inference/serving/schemas/face_recognition.py +1 -0
  67. paddlex/inference/serving/schemas/formula_recognition.py +1 -0
  68. paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -0
  69. paddlex/inference/serving/schemas/image_classification.py +1 -0
  70. paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -0
  71. paddlex/inference/serving/schemas/instance_segmentation.py +1 -0
  72. paddlex/inference/serving/schemas/layout_parsing.py +1 -0
  73. paddlex/inference/serving/schemas/object_detection.py +1 -0
  74. paddlex/inference/serving/schemas/ocr.py +1 -0
  75. paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -0
  76. paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -0
  77. paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -0
  78. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +5 -4
  79. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +6 -5
  80. paddlex/inference/serving/schemas/pp_doctranslation.py +115 -0
  81. paddlex/inference/serving/schemas/pp_shituv2.py +1 -0
  82. paddlex/inference/serving/schemas/pp_structurev3.py +2 -9
  83. paddlex/inference/serving/schemas/rotated_object_detection.py +1 -0
  84. paddlex/inference/serving/schemas/seal_recognition.py +1 -0
  85. paddlex/inference/serving/schemas/semantic_segmentation.py +1 -0
  86. paddlex/inference/serving/schemas/shared/ocr.py +8 -1
  87. paddlex/inference/serving/schemas/small_object_detection.py +1 -0
  88. paddlex/inference/serving/schemas/table_recognition.py +1 -0
  89. paddlex/inference/serving/schemas/table_recognition_v2.py +1 -0
  90. paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -0
  91. paddlex/inference/serving/schemas/ts_classification.py +1 -0
  92. paddlex/inference/serving/schemas/ts_forecast.py +1 -0
  93. paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -0
  94. paddlex/inference/utils/io/__init__.py +1 -0
  95. paddlex/inference/utils/io/readers.py +46 -0
  96. paddlex/inference/utils/io/writers.py +2 -0
  97. paddlex/inference/utils/official_models.py +7 -0
  98. paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -2
  99. paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  100. paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  101. paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  102. paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  103. paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  104. paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +3 -3
  105. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  106. paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  107. paddlex/modules/m_3d_bev_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  108. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  109. paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  110. paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  111. paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +2 -2
  112. paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  113. paddlex/modules/text_recognition/model_list.py +3 -0
  114. paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  115. paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  116. paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +2 -2
  117. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +27 -0
  118. paddlex/repo_manager/meta.py +3 -3
  119. paddlex/utils/device.py +4 -1
  120. paddlex/utils/{fonts/__init__.py → fonts.py} +45 -26
  121. {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/METADATA +25 -1
  122. {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/RECORD +126 -114
  123. {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/LICENSE +0 -0
  124. {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/WHEEL +0 -0
  125. {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/entry_points.txt +0 -0
  126. {paddlex-3.0.3.dist-info → paddlex-3.1.0.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,17 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from ....modules.text_recognition.model_list import MODELS
16
+ from ....utils.fonts import (
17
+ ARABIC_FONT,
18
+ CYRILLIC_FONT,
19
+ DEVANAGARI_FONT,
20
+ KANNADA_FONT,
21
+ KOREAN_FONT,
22
+ LATIN_FONT,
23
+ SIMFANG_FONT,
24
+ TAMIL_FONT,
25
+ TELUGU_FONT,
26
+ )
16
27
  from ....utils.func_register import FuncRegister
17
28
  from ...common.batch_sampler import ImageBatchSampler
18
29
  from ...common.reader import ReadImage
@@ -31,6 +42,7 @@ class TextRecPredictor(BasePredictor):
31
42
  def __init__(self, *args, input_shape=None, **kwargs):
32
43
  super().__init__(*args, **kwargs)
33
44
  self.input_shape = input_shape
45
+ self.vis_font = self.get_vis_font()
34
46
  self.pre_tfs, self.infer, self.post_op = self._build()
35
47
 
36
48
  def _build_batch_sampler(self):
@@ -68,6 +80,7 @@ class TextRecPredictor(BasePredictor):
68
80
  "input_img": batch_raw_imgs,
69
81
  "rec_text": texts,
70
82
  "rec_score": scores,
83
+ "vis_font": [self.vis_font] * len(batch_raw_imgs),
71
84
  }
72
85
 
73
86
  @register("DecodeImage")
@@ -76,7 +89,7 @@ class TextRecPredictor(BasePredictor):
76
89
  return "Read", ReadImage(format=img_mode)
77
90
 
78
91
  @register("RecResizeImg")
79
- def build_resize(self, image_shape):
92
+ def build_resize(self, image_shape, **kwargs):
80
93
  return "ReisizeNorm", OCRReisizeNormImg(
81
94
  rec_image_shape=image_shape, input_shape=self.input_shape
82
95
  )
@@ -96,3 +109,40 @@ class TextRecPredictor(BasePredictor):
96
109
  @register("KeepKeys")
97
110
  def foo(self, *args, **kwargs):
98
111
  return None, None
112
+
113
+ def get_vis_font(self):
114
+ if self.model_name.startswith("PP-OCR"):
115
+ return SIMFANG_FONT
116
+
117
+ if self.model_name in (
118
+ "latin_PP-OCRv3_mobile_rec",
119
+ "latin_PP-OCRv5_mobile_rec",
120
+ ):
121
+ return LATIN_FONT
122
+
123
+ if self.model_name in (
124
+ "cyrillic_PP-OCRv3_mobile_rec",
125
+ "eslav_PP-OCRv5_mobile_rec",
126
+ ):
127
+ return CYRILLIC_FONT
128
+
129
+ if self.model_name in (
130
+ "korean_PP-OCRv3_mobile_rec",
131
+ "korean_PP-OCRv5_mobile_rec",
132
+ ):
133
+ return KOREAN_FONT
134
+
135
+ if self.model_name == "arabic_PP-OCRv3_mobile_rec":
136
+ return ARABIC_FONT
137
+
138
+ if self.model_name == "ka_PP-OCRv3_mobile_rec":
139
+ return KANNADA_FONT
140
+
141
+ if self.model_name == "te_PP-OCRv3_mobile_rec":
142
+ return TELUGU_FONT
143
+
144
+ if self.model_name == "ta_PP-OCRv3_mobile_rec":
145
+ return TAMIL_FONT
146
+
147
+ if self.model_name == "devanagari_PP-OCRv3_mobile_rec":
148
+ return DEVANAGARI_FONT
@@ -17,7 +17,7 @@ import copy
17
17
  import PIL
18
18
  from PIL import Image, ImageDraw, ImageFont
19
19
 
20
- from ....utils.fonts import PINGFANG_FONT_FILE_PATH
20
+ from ....utils.fonts import SIMFANG_FONT
21
21
  from ...common.result import BaseCVResult, JsonMixin
22
22
 
23
23
 
@@ -26,11 +26,13 @@ class TextRecResult(BaseCVResult):
26
26
  def _to_str(self, *args, **kwargs):
27
27
  data = copy.deepcopy(self)
28
28
  data.pop("input_img")
29
+ data.pop("vis_font")
29
30
  return JsonMixin._to_str(data, *args, **kwargs)
30
31
 
31
32
  def _to_json(self, *args, **kwargs):
32
33
  data = copy.deepcopy(self)
33
34
  data.pop("input_img")
35
+ data.pop("vis_font")
34
36
  return JsonMixin._to_json(data, *args, **kwargs)
35
37
 
36
38
  def _to_img(self):
@@ -38,10 +40,11 @@ class TextRecResult(BaseCVResult):
38
40
  image = Image.fromarray(self["input_img"][:, :, ::-1])
39
41
  rec_text = self["rec_text"]
40
42
  rec_score = self["rec_score"]
43
+ vis_font = self["vis_font"] if self["vis_font"] is not None else SIMFANG_FONT
41
44
  image = image.convert("RGB")
42
45
  image_width, image_height = image.size
43
46
  text = f"{rec_text} ({rec_score})"
44
- font = self.adjust_font_size(image_width, text, PINGFANG_FONT_FILE_PATH)
47
+ font = self.adjust_font_size(image_width, text, vis_font.path)
45
48
  row_height = font.getbbox(text)[3]
46
49
  new_image_height = image_height + int(row_height * 1.2)
47
50
  new_image = Image.new("RGB", (image_width, new_image_height), (255, 255, 255))
@@ -17,7 +17,7 @@ import PIL
17
17
  from PIL import Image, ImageDraw, ImageFont
18
18
 
19
19
  from ....utils.deps import class_requires_deps, is_dep_available
20
- from ....utils.fonts import PINGFANG_FONT_FILE_PATH
20
+ from ....utils.fonts import PINGFANG_FONT
21
21
  from ...common.result import BaseVideoResult
22
22
  from ...utils.color_map import get_colormap
23
23
  from ...utils.io import VideoReader
@@ -47,7 +47,7 @@ class TopkVideoResult(BaseVideoResult):
47
47
  max_font_size = int(image_size[0] * 0.05)
48
48
  for font_size in range(max_font_size, min_font_size - 1, -1):
49
49
  font = ImageFont.truetype(
50
- PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8"
50
+ PINGFANG_FONT.path, font_size, encoding="utf-8"
51
51
  )
52
52
  if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
53
53
  text_width_tmp, text_height_tmp = draw.textsize(label_str, font)
@@ -57,7 +57,7 @@ class TopkVideoResult(BaseVideoResult):
57
57
  if text_width_tmp <= image_size[0]:
58
58
  break
59
59
  else:
60
- font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, min_font_size)
60
+ font = ImageFont.truetype(PINGFANG_FONT.path, min_font_size)
61
61
  color_list = get_colormap(rgb=True)
62
62
  color = tuple(color_list[0])
63
63
  font_color = tuple(self._get_font_colormap(3))
@@ -19,7 +19,7 @@ import PIL
19
19
  from PIL import Image, ImageDraw, ImageFont
20
20
 
21
21
  from ....utils.deps import class_requires_deps, is_dep_available
22
- from ....utils.fonts import PINGFANG_FONT_FILE_PATH
22
+ from ....utils.fonts import PINGFANG_FONT
23
23
  from ...common.result import BaseVideoResult
24
24
  from ...utils.color_map import get_colormap
25
25
  from ...utils.io import VideoReader
@@ -46,9 +46,7 @@ class DetVideoResult(BaseVideoResult):
46
46
  image = Image.fromarray(video[i].asnumpy())
47
47
  image.size
48
48
  font_size = int(0.018 * int(image.width)) + 2
49
- font = ImageFont.truetype(
50
- PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8"
51
- )
49
+ font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
52
50
  draw_thickness = int(max(image.size) * 0.002)
53
51
  draw = ImageDraw.Draw(image)
54
52
  results = self["result"][i]
@@ -42,6 +42,7 @@ from .ocr import OCRPipeline
42
42
  from .open_vocabulary_detection import OpenVocabularyDetectionPipeline
43
43
  from .open_vocabulary_segmentation import OpenVocabularySegmentationPipeline
44
44
  from .pp_chatocr import PP_ChatOCRv3_Pipeline, PP_ChatOCRv4_Pipeline
45
+ from .pp_doctranslation import PP_DocTranslation_Pipeline
45
46
  from .pp_shitu_v2 import ShiTuV2Pipeline
46
47
  from .rotated_object_detection import RotatedObjectDetectionPipeline
47
48
  from .seal_recognition import SealRecognitionPipeline
@@ -18,7 +18,7 @@ import PIL
18
18
  from PIL import Image, ImageDraw, ImageFont
19
19
 
20
20
  from ....utils.deps import class_requires_deps, is_dep_available
21
- from ....utils.fonts import PINGFANG_FONT_FILE_PATH
21
+ from ....utils.fonts import PINGFANG_FONT
22
22
  from ...common.result import BaseCVResult, JsonMixin
23
23
  from ...utils.color_map import font_colormap, get_colormap
24
24
 
@@ -35,7 +35,7 @@ def draw_attribute_result(img, boxes):
35
35
  img (PIL.Image.Image): visualized image
36
36
  """
37
37
  font_size = int((0.024 * int(img.width) + 2) * 0.7)
38
- font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
38
+ font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
39
39
 
40
40
  draw_thickness = int(max(img.size) * 0.005)
41
41
  draw = ImageDraw.Draw(img)
@@ -14,3 +14,4 @@
14
14
 
15
15
  from .generate_ensemble_prompt import GenerateEnsemblePrompt
16
16
  from .generate_kie_prompt import GenerateKIEPrompt
17
+ from .generate_translate_prompt import GenerateTranslatePrompt
@@ -0,0 +1,179 @@
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Dict
16
+
17
+ from .base import BaseGeneratePrompt
18
+
19
+
20
+ class GenerateTranslatePrompt(BaseGeneratePrompt):
21
+ """Generate Ensemble Prompt"""
22
+
23
+ entities = ["translate_prompt"]
24
+
25
+ def __init__(self, config: Dict) -> None:
26
+ """Initializes the GenerateTranslatePrompt instance with the given configuration.
27
+
28
+ Args:
29
+ config (Dict): A dictionary containing configuration settings.
30
+ - task_type (str): The type of task to generate a prompt for, in the support entities list.
31
+ - task_description (str, optional): A description of the task. Defaults to an empty string.
32
+ - output_format (str, optional): The desired output format. Defaults to an empty string.
33
+ - rules_str (str, optional): A string representing rules for the task. Defaults to an empty string.
34
+ - few_shot_demo_text_content (str, optional): Text content for few-shot demos. Defaults to an empty string.
35
+ - few_shot_demo_key_value_list (str, optional): A key-value list for few-shot demos. Defaults to an empty string.
36
+
37
+ Raises:
38
+ ValueError: If the task type is not in the allowed entities for GenerateKIEPrompt.
39
+ """
40
+ super().__init__()
41
+
42
+ task_type = config.get("task_type", "")
43
+ task_description = config.get("task_description", "")
44
+ output_format = config.get("output_format", "")
45
+ rules_str = config.get("rules_str", "")
46
+ few_shot_demo_text_content = config.get("few_shot_demo_text_content", "")
47
+ few_shot_demo_key_value_list = config.get("few_shot_demo_key_value_list", "")
48
+
49
+ if task_description is None:
50
+ task_description = ""
51
+
52
+ if output_format is None:
53
+ output_format = ""
54
+
55
+ if rules_str is None:
56
+ rules_str = ""
57
+
58
+ if few_shot_demo_text_content is None:
59
+ few_shot_demo_text_content = ""
60
+
61
+ if few_shot_demo_key_value_list is None:
62
+ few_shot_demo_key_value_list = ""
63
+
64
+ if task_type not in self.entities:
65
+ raise ValueError(
66
+ f"task type must be in {self.entities} of GenerateEnsemblePrompt."
67
+ )
68
+
69
+ self.task_type = task_type
70
+ self.task_description = task_description
71
+ self.output_format = output_format
72
+ self.rules_str = rules_str
73
+ self.few_shot_demo_text_content = few_shot_demo_text_content
74
+ self.few_shot_demo_key_value_list = few_shot_demo_key_value_list
75
+
76
+ def generate_prompt(
77
+ self,
78
+ original_text: str,
79
+ language: str,
80
+ task_description: str = None,
81
+ output_format: str = None,
82
+ rules_str: str = None,
83
+ few_shot_demo_text_content: str = None,
84
+ few_shot_demo_key_value_list: str = None,
85
+ ) -> str:
86
+ """Generates a prompt based on the given parameters.
87
+ Args:
88
+ key (str): the input question.
89
+ result_methodA (str): the result of method A.
90
+ result_methodB (str): the result of method B.
91
+ task_description (str, optional): A description of the task. Defaults to None.
92
+ output_format (str, optional): The desired output format. Defaults to None.
93
+ rules_str (str, optional): A string containing rules or instructions. Defaults to None.
94
+ few_shot_demo_text_content (str, optional): Text content for few-shot demos. Defaults to None.
95
+ few_shot_demo_key_value_list (str, optional): Key-value list for few-shot demos. Defaults to None.
96
+ Returns:
97
+ str: The generated prompt.
98
+
99
+ Raises:
100
+ ValueError: If the task_type is not supported.
101
+ """
102
+ language_map = {
103
+ "chinese": "简体中文",
104
+ "zh": "简体中文",
105
+ "english": "英语",
106
+ "en": "英语",
107
+ "french": "法语",
108
+ "fr": "法语",
109
+ "spanish": "西班牙语",
110
+ "es": "西班牙语",
111
+ "german": "德语",
112
+ "de": "德语",
113
+ "japanese": "日语",
114
+ "ja": "日语",
115
+ "korean": "韩语",
116
+ "ko": "韩语",
117
+ "russian": "俄语",
118
+ "ru": "俄语",
119
+ "italian": "意大利语",
120
+ "it": "意大利语",
121
+ "portuguese": "葡萄牙语",
122
+ "pt": "葡萄牙语",
123
+ "arabic": "阿拉伯语",
124
+ "ar": "阿拉伯语",
125
+ "hindi": "印地语",
126
+ "hi": "印地语",
127
+ "dutch": "荷兰语",
128
+ "nl": "荷兰语",
129
+ "swedish": "瑞典语",
130
+ "sv": "瑞典语",
131
+ "turkish": "土耳其语",
132
+ "tr": "土耳其语",
133
+ "thai": "泰语",
134
+ "th": "泰语",
135
+ "vietnamese": "越南语",
136
+ "vi": "越南语",
137
+ "hebrew": "希伯来语",
138
+ "he": "希伯来语",
139
+ "greek": "希腊语",
140
+ "el": "希腊语",
141
+ "polish": "波兰语",
142
+ "pl": "波兰语",
143
+ }
144
+
145
+ if task_description is None:
146
+ task_description = self.task_description
147
+
148
+ if output_format is None:
149
+ output_format = self.output_format
150
+
151
+ if rules_str is None:
152
+ rules_str = self.rules_str
153
+
154
+ if few_shot_demo_text_content is None:
155
+ few_shot_demo_text_content = self.few_shot_demo_text_content
156
+
157
+ if few_shot_demo_text_content:
158
+ few_shot_demo_text_content = (
159
+ f"这里是一些示例:\n{few_shot_demo_text_content}\n"
160
+ )
161
+
162
+ if few_shot_demo_key_value_list is None:
163
+ few_shot_demo_key_value_list = self.few_shot_demo_key_value_list
164
+
165
+ if few_shot_demo_key_value_list:
166
+ few_shot_demo_key_value_list = f"这里是一些专业术语对照表,对照表中单词要参考对照表翻译:\n{few_shot_demo_key_value_list}\n"
167
+
168
+ prompt = f"""{task_description}{rules_str}{output_format}{few_shot_demo_text_content}{few_shot_demo_key_value_list}"""
169
+
170
+ language_name = language_map.get(language, language)
171
+ task_type = self.task_type
172
+ if task_type == "translate_prompt":
173
+ prompt += f"""下面正式开始:
174
+ \n将以下内容翻译成:{language_name}
175
+ \n原文:{original_text}
176
+ """
177
+ else:
178
+ raise ValueError(f"{self.task_type} is currently not supported.")
179
+ return prompt
@@ -16,7 +16,7 @@ from typing import Dict
16
16
 
17
17
  from PIL import Image, ImageDraw
18
18
 
19
- from ....utils.fonts import PINGFANG_FONT_FILE_PATH, create_font
19
+ from ....utils.fonts import PINGFANG_FONT, create_font
20
20
  from ...common.result import BaseCVResult, JsonMixin
21
21
 
22
22
 
@@ -55,7 +55,7 @@ class DocPreprocessorResult(BaseCVResult):
55
55
  beg_w_list = [0, w1, w1 + w2]
56
56
  for tno in range(len(txt_list)):
57
57
  txt = txt_list[tno]
58
- font = create_font(txt, (region_w_list[tno], 20), PINGFANG_FONT_FILE_PATH)
58
+ font = create_font(txt, (region_w_list[tno], 20), PINGFANG_FONT.path)
59
59
  draw_text.text(
60
60
  [10 + beg_w_list[tno], h + 2], txt, fill=(0, 0, 0), font=font
61
61
  )
@@ -24,7 +24,7 @@ from PIL import Image, ImageDraw
24
24
 
25
25
  from ....utils import logging
26
26
  from ....utils.deps import class_requires_deps, function_requires_deps, is_dep_available
27
- from ....utils.fonts import PINGFANG_FONT_FILE_PATH
27
+ from ....utils.fonts import PINGFANG_FONT
28
28
  from ...common.result import BaseCVResult, JsonMixin
29
29
  from ...models.formula_recognition.result import (
30
30
  crop_white_area,
@@ -277,6 +277,6 @@ def draw_box_formula_fine(
277
277
  )
278
278
  else:
279
279
  img_right_text = draw_box_txt_fine(
280
- img_size, box, "Rendering Failed", PINGFANG_FONT_FILE_PATH
280
+ img_size, box, "Rendering Failed", PINGFANG_FONT.path
281
281
  )
282
282
  return img_right_text
@@ -926,6 +926,8 @@ class _LayoutParsingPipelineV2(BasePipeline):
926
926
  Predicts the layout parsing result for the given input.
927
927
 
928
928
  Args:
929
+ input (Union[str, list[str], np.ndarray, list[np.ndarray]]): Input image path, list of image paths,
930
+ numpy array of an image, or list of numpy arrays.
929
931
  use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
930
932
  use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
931
933
  use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
@@ -21,7 +21,7 @@ from typing import List
21
21
  import numpy as np
22
22
  from PIL import Image, ImageDraw, ImageFont
23
23
 
24
- from ....utils.fonts import PINGFANG_FONT_FILE_PATH
24
+ from ....utils.fonts import PINGFANG_FONT
25
25
  from ...common.result import (
26
26
  BaseCVResult,
27
27
  HtmlMixin,
@@ -194,7 +194,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
194
194
  image = Image.fromarray(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
195
195
  draw = ImageDraw.Draw(image, "RGBA")
196
196
  font_size = int(0.018 * int(image.width)) + 2
197
- font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
197
+ font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
198
198
  parsing_result: List[LayoutBlock] = self["parsing_res_list"]
199
199
  for block in parsing_result:
200
200
  bbox = block.bbox
@@ -475,6 +475,8 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
475
475
  )
476
476
  page_last_element_seg_end_flag = seg_end_flag
477
477
 
478
+ markdown_info["page_index"] = self["page_index"]
479
+ markdown_info["input_path"] = self["input_path"]
478
480
  markdown_info["markdown_texts"] = markdown_content
479
481
  markdown_info["page_continuation_flags"] = (
480
482
  page_first_element_seg_start_flag,
@@ -368,6 +368,7 @@ class _OCRPipeline(BasePipeline):
368
368
  "rec_texts": [],
369
369
  "rec_scores": [],
370
370
  "rec_polys": [],
371
+ "vis_fonts": [],
371
372
  }
372
373
  for input_path, page_index, doc_preprocessor_res, dt_polys in zip(
373
374
  batch_data.input_paths,
@@ -439,6 +440,7 @@ class _OCRPipeline(BasePipeline):
439
440
  if rec_res["rec_score"] >= text_rec_score_thresh:
440
441
  res["rec_texts"].append(rec_res["rec_text"])
441
442
  res["rec_scores"].append(rec_res["rec_score"])
443
+ res["vis_fonts"].append(rec_res["vis_font"])
442
444
  res["rec_polys"].append(dt_polys[sno])
443
445
 
444
446
  for res in results:
@@ -20,7 +20,7 @@ import numpy as np
20
20
  from PIL import Image, ImageDraw
21
21
 
22
22
  from ....utils.deps import class_requires_deps, function_requires_deps, is_dep_available
23
- from ....utils.fonts import SIMFANG_FONT_FILE_PATH, create_font, create_font_vertical
23
+ from ....utils.fonts import SIMFANG_FONT, create_font, create_font_vertical
24
24
  from ...common.result import BaseCVResult, JsonMixin
25
25
 
26
26
  if is_dep_available("opencv-contrib-python"):
@@ -82,6 +82,11 @@ class OCRResult(BaseCVResult):
82
82
  random.seed(0)
83
83
  draw_left = ImageDraw.Draw(img_left)
84
84
  for idx, (box, txt) in enumerate(zip(boxes, txts)):
85
+ vis_font = (
86
+ self["vis_fonts"][idx]
87
+ if self["vis_fonts"][idx] is not None
88
+ else SIMFANG_FONT
89
+ )
85
90
  try:
86
91
  color = (
87
92
  random.randint(0, 255),
@@ -91,17 +96,16 @@ class OCRResult(BaseCVResult):
91
96
  box = np.array(box)
92
97
  if len(box) > 4:
93
98
  pts = [(x, y) for x, y in box.tolist()]
94
- draw_left.polygon(pts, outline=color, width=8)
99
+ draw_left.polygon(pts, outline=color, width=8, fill=color)
95
100
  box = self.get_minarea_rect(box)
96
101
  height = int(0.5 * (max(box[:, 1]) - min(box[:, 1])))
97
102
  box[:2, 1] = np.mean(box[:, 1])
98
103
  box[2:, 1] = np.mean(box[:, 1]) + min(20, height)
99
- box_pts = [(int(x), int(y)) for x, y in box.tolist()]
100
- draw_left.polygon(box_pts, fill=color)
104
+ else:
105
+ box_pts = [(int(x), int(y)) for x, y in box.tolist()]
106
+ draw_left.polygon(box_pts, fill=color)
101
107
 
102
- img_right_text = draw_box_txt_fine(
103
- (w, h), box, txt, SIMFANG_FONT_FILE_PATH
104
- )
108
+ img_right_text = draw_box_txt_fine((w, h), box, txt, vis_font.path)
105
109
  pts = np.array(box, np.int32).reshape((-1, 1, 2))
106
110
  cv2.polylines(img_right_text, [pts], True, color, 1)
107
111
  img_right = cv2.bitwise_and(img_right, img_right_text)
@@ -0,0 +1,15 @@
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .pipeline import PP_DocTranslation_Pipeline