PyPI - paddlex - Versions diffs - 3.0.2__py3-none-any.whl → 3.1.0__py3-none-any.whl - Mend

paddlex 3.0.2py3-none-any.whl → 3.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

paddlex/inference/models/text_recognition/predictor.py CHANGED Viewed

@@ -13,6 +13,17 @@
 # limitations under the License.
 from ....modules.text_recognition.model_list import MODELS
+from ....utils.fonts import (
+    ARABIC_FONT,
+    CYRILLIC_FONT,
+    DEVANAGARI_FONT,
+    KANNADA_FONT,
+    KOREAN_FONT,
+    LATIN_FONT,
+    SIMFANG_FONT,
+    TAMIL_FONT,
+    TELUGU_FONT,
+)
 from ....utils.func_register import FuncRegister
 from ...common.batch_sampler import ImageBatchSampler
 from ...common.reader import ReadImage
@@ -31,6 +42,7 @@ class TextRecPredictor(BasePredictor):
     def __init__(self, *args, input_shape=None, **kwargs):
         super().__init__(*args, **kwargs)
         self.input_shape = input_shape
+        self.vis_font = self.get_vis_font()
         self.pre_tfs, self.infer, self.post_op = self._build()
     def _build_batch_sampler(self):
@@ -68,6 +80,7 @@ class TextRecPredictor(BasePredictor):
             "input_img": batch_raw_imgs,
             "rec_text": texts,
             "rec_score": scores,
+            "vis_font": [self.vis_font] * len(batch_raw_imgs),
         }
     @register("DecodeImage")
@@ -76,7 +89,7 @@ class TextRecPredictor(BasePredictor):
         return "Read", ReadImage(format=img_mode)
     @register("RecResizeImg")
-    def build_resize(self, image_shape):
+    def build_resize(self, image_shape, **kwargs):
         return "ReisizeNorm", OCRReisizeNormImg(
             rec_image_shape=image_shape, input_shape=self.input_shape
         )
@@ -96,3 +109,40 @@ class TextRecPredictor(BasePredictor):
     @register("KeepKeys")
     def foo(self, *args, **kwargs):
         return None, None
+    def get_vis_font(self):
+        if self.model_name.startswith("PP-OCR"):
+            return SIMFANG_FONT
+        if self.model_name in (
+            "latin_PP-OCRv3_mobile_rec",
+            "latin_PP-OCRv5_mobile_rec",
+        ):
+            return LATIN_FONT
+        if self.model_name in (
+            "cyrillic_PP-OCRv3_mobile_rec",
+            "eslav_PP-OCRv5_mobile_rec",
+        ):
+            return CYRILLIC_FONT
+        if self.model_name in (
+            "korean_PP-OCRv3_mobile_rec",
+            "korean_PP-OCRv5_mobile_rec",
+        ):
+            return KOREAN_FONT
+        if self.model_name == "arabic_PP-OCRv3_mobile_rec":
+            return ARABIC_FONT
+        if self.model_name == "ka_PP-OCRv3_mobile_rec":
+            return KANNADA_FONT
+        if self.model_name == "te_PP-OCRv3_mobile_rec":
+            return TELUGU_FONT
+        if self.model_name == "ta_PP-OCRv3_mobile_rec":
+            return TAMIL_FONT
+        if self.model_name == "devanagari_PP-OCRv3_mobile_rec":
+            return DEVANAGARI_FONT

paddlex/inference/models/text_recognition/result.py CHANGED Viewed

@@ -17,7 +17,7 @@ import copy
 import PIL
 from PIL import Image, ImageDraw, ImageFont
-from ....utils.fonts import PINGFANG_FONT_FILE_PATH
+from ....utils.fonts import SIMFANG_FONT
 from ...common.result import BaseCVResult, JsonMixin
@@ -26,11 +26,13 @@ class TextRecResult(BaseCVResult):
     def _to_str(self, *args, **kwargs):
         data = copy.deepcopy(self)
         data.pop("input_img")
+        data.pop("vis_font")
         return JsonMixin._to_str(data, *args, **kwargs)
     def _to_json(self, *args, **kwargs):
         data = copy.deepcopy(self)
         data.pop("input_img")
+        data.pop("vis_font")
         return JsonMixin._to_json(data, *args, **kwargs)
     def _to_img(self):
@@ -38,10 +40,11 @@ class TextRecResult(BaseCVResult):
         image = Image.fromarray(self["input_img"][:, :, ::-1])
         rec_text = self["rec_text"]
         rec_score = self["rec_score"]
+        vis_font = self["vis_font"] if self["vis_font"] is not None else SIMFANG_FONT
         image = image.convert("RGB")
         image_width, image_height = image.size
         text = f"{rec_text} ({rec_score})"
-        font = self.adjust_font_size(image_width, text, PINGFANG_FONT_FILE_PATH)
+        font = self.adjust_font_size(image_width, text, vis_font.path)
         row_height = font.getbbox(text)[3]
         new_image_height = image_height + int(row_height * 1.2)
         new_image = Image.new("RGB", (image_width, new_image_height), (255, 255, 255))

paddlex/inference/models/video_classification/result.py CHANGED Viewed

@@ -17,7 +17,7 @@ import PIL
 from PIL import Image, ImageDraw, ImageFont
 from ....utils.deps import class_requires_deps, is_dep_available
-from ....utils.fonts import PINGFANG_FONT_FILE_PATH
+from ....utils.fonts import PINGFANG_FONT
 from ...common.result import BaseVideoResult
 from ...utils.color_map import get_colormap
 from ...utils.io import VideoReader
@@ -47,7 +47,7 @@ class TopkVideoResult(BaseVideoResult):
             max_font_size = int(image_size[0] * 0.05)
             for font_size in range(max_font_size, min_font_size - 1, -1):
                 font = ImageFont.truetype(
-                    PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8"
+                    PINGFANG_FONT.path, font_size, encoding="utf-8"
                 )
                 if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
                     text_width_tmp, text_height_tmp = draw.textsize(label_str, font)
@@ -57,7 +57,7 @@ class TopkVideoResult(BaseVideoResult):
                 if text_width_tmp <= image_size[0]:
                     break
                 else:
-                    font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, min_font_size)
+                    font = ImageFont.truetype(PINGFANG_FONT.path, min_font_size)
             color_list = get_colormap(rgb=True)
             color = tuple(color_list[0])
             font_color = tuple(self._get_font_colormap(3))

paddlex/inference/models/video_detection/result.py CHANGED Viewed

@@ -19,7 +19,7 @@ import PIL
 from PIL import Image, ImageDraw, ImageFont
 from ....utils.deps import class_requires_deps, is_dep_available
-from ....utils.fonts import PINGFANG_FONT_FILE_PATH
+from ....utils.fonts import PINGFANG_FONT
 from ...common.result import BaseVideoResult
 from ...utils.color_map import get_colormap
 from ...utils.io import VideoReader
@@ -46,9 +46,7 @@ class DetVideoResult(BaseVideoResult):
             image = Image.fromarray(video[i].asnumpy())
             image.size
             font_size = int(0.018 * int(image.width)) + 2
-            font = ImageFont.truetype(
-                PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8"
-            )
+            font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
             draw_thickness = int(max(image.size) * 0.002)
             draw = ImageDraw.Draw(image)
             results = self["result"][i]

paddlex/inference/pipelines/__init__.py CHANGED Viewed

@@ -42,6 +42,7 @@ from .ocr import OCRPipeline
 from .open_vocabulary_detection import OpenVocabularyDetectionPipeline
 from .open_vocabulary_segmentation import OpenVocabularySegmentationPipeline
 from .pp_chatocr import PP_ChatOCRv3_Pipeline, PP_ChatOCRv4_Pipeline
+from .pp_doctranslation import PP_DocTranslation_Pipeline
 from .pp_shitu_v2 import ShiTuV2Pipeline
 from .rotated_object_detection import RotatedObjectDetectionPipeline
 from .seal_recognition import SealRecognitionPipeline

paddlex/inference/pipelines/attribute_recognition/result.py CHANGED Viewed

@@ -18,7 +18,7 @@ import PIL
 from PIL import Image, ImageDraw, ImageFont
 from ....utils.deps import class_requires_deps, is_dep_available
-from ....utils.fonts import PINGFANG_FONT_FILE_PATH
+from ....utils.fonts import PINGFANG_FONT
 from ...common.result import BaseCVResult, JsonMixin
 from ...utils.color_map import font_colormap, get_colormap
@@ -35,7 +35,7 @@ def draw_attribute_result(img, boxes):
         img (PIL.Image.Image): visualized image
     """
     font_size = int((0.024 * int(img.width) + 2) * 0.7)
-    font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
+    font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
     draw_thickness = int(max(img.size) * 0.005)
     draw = ImageDraw.Draw(img)

paddlex/inference/pipelines/components/prompt_engineering/__init__.py CHANGED Viewed

@@ -14,3 +14,4 @@
 from .generate_ensemble_prompt import GenerateEnsemblePrompt
 from .generate_kie_prompt import GenerateKIEPrompt
+from .generate_translate_prompt import GenerateTranslatePrompt

paddlex/inference/pipelines/components/prompt_engineering/generate_translate_prompt.py ADDED Viewed

@@ -0,0 +1,179 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict
+from .base import BaseGeneratePrompt
+class GenerateTranslatePrompt(BaseGeneratePrompt):
+    """Generate Ensemble Prompt"""
+    entities = ["translate_prompt"]
+    def __init__(self, config: Dict) -> None:
+        """Initializes the GenerateTranslatePrompt instance with the given configuration.
+        Args:
+            config (Dict): A dictionary containing configuration settings.
+                - task_type (str): The type of task to generate a prompt for, in the support entities list.
+                - task_description (str, optional): A description of the task. Defaults to an empty string.
+                - output_format (str, optional): The desired output format. Defaults to an empty string.
+                - rules_str (str, optional): A string representing rules for the task. Defaults to an empty string.
+                - few_shot_demo_text_content (str, optional): Text content for few-shot demos. Defaults to an empty string.
+                - few_shot_demo_key_value_list (str, optional): A key-value list for few-shot demos. Defaults to an empty string.
+        Raises:
+            ValueError: If the task type is not in the allowed entities for GenerateKIEPrompt.
+        """
+        super().__init__()
+        task_type = config.get("task_type", "")
+        task_description = config.get("task_description", "")
+        output_format = config.get("output_format", "")
+        rules_str = config.get("rules_str", "")
+        few_shot_demo_text_content = config.get("few_shot_demo_text_content", "")
+        few_shot_demo_key_value_list = config.get("few_shot_demo_key_value_list", "")
+        if task_description is None:
+            task_description = ""
+        if output_format is None:
+            output_format = ""
+        if rules_str is None:
+            rules_str = ""
+        if few_shot_demo_text_content is None:
+            few_shot_demo_text_content = ""
+        if few_shot_demo_key_value_list is None:
+            few_shot_demo_key_value_list = ""
+        if task_type not in self.entities:
+            raise ValueError(
+                f"task type must be in {self.entities} of GenerateEnsemblePrompt."
+            )
+        self.task_type = task_type
+        self.task_description = task_description
+        self.output_format = output_format
+        self.rules_str = rules_str
+        self.few_shot_demo_text_content = few_shot_demo_text_content
+        self.few_shot_demo_key_value_list = few_shot_demo_key_value_list
+    def generate_prompt(
+        self,
+        original_text: str,
+        language: str,
+        task_description: str = None,
+        output_format: str = None,
+        rules_str: str = None,
+        few_shot_demo_text_content: str = None,
+        few_shot_demo_key_value_list: str = None,
+    ) -> str:
+        """Generates a prompt based on the given parameters.
+        Args:
+            key (str): the input question.
+            result_methodA (str): the result of method A.
+            result_methodB (str): the result of method B.
+            task_description (str, optional): A description of the task. Defaults to None.
+            output_format (str, optional): The desired output format. Defaults to None.
+            rules_str (str, optional): A string containing rules or instructions. Defaults to None.
+            few_shot_demo_text_content (str, optional): Text content for few-shot demos. Defaults to None.
+            few_shot_demo_key_value_list (str, optional): Key-value list for few-shot demos. Defaults to None.
+        Returns:
+            str: The generated prompt.
+        Raises:
+            ValueError: If the task_type is not supported.
+        """
+        language_map = {
+            "chinese": "简体中文",
+            "zh": "简体中文",
+            "english": "英语",
+            "en": "英语",
+            "french": "法语",
+            "fr": "法语",
+            "spanish": "西班牙语",
+            "es": "西班牙语",
+            "german": "德语",
+            "de": "德语",
+            "japanese": "日语",
+            "ja": "日语",
+            "korean": "韩语",
+            "ko": "韩语",
+            "russian": "俄语",
+            "ru": "俄语",
+            "italian": "意大利语",
+            "it": "意大利语",
+            "portuguese": "葡萄牙语",
+            "pt": "葡萄牙语",
+            "arabic": "阿拉伯语",
+            "ar": "阿拉伯语",
+            "hindi": "印地语",
+            "hi": "印地语",
+            "dutch": "荷兰语",
+            "nl": "荷兰语",
+            "swedish": "瑞典语",
+            "sv": "瑞典语",
+            "turkish": "土耳其语",
+            "tr": "土耳其语",
+            "thai": "泰语",
+            "th": "泰语",
+            "vietnamese": "越南语",
+            "vi": "越南语",
+            "hebrew": "希伯来语",
+            "he": "希伯来语",
+            "greek": "希腊语",
+            "el": "希腊语",
+            "polish": "波兰语",
+            "pl": "波兰语",
+        }
+        if task_description is None:
+            task_description = self.task_description
+        if output_format is None:
+            output_format = self.output_format
+        if rules_str is None:
+            rules_str = self.rules_str
+        if few_shot_demo_text_content is None:
+            few_shot_demo_text_content = self.few_shot_demo_text_content
+        if few_shot_demo_text_content:
+            few_shot_demo_text_content = (
+                f"这里是一些示例：\n{few_shot_demo_text_content}\n"
+            )
+        if few_shot_demo_key_value_list is None:
+            few_shot_demo_key_value_list = self.few_shot_demo_key_value_list
+        if few_shot_demo_key_value_list:
+            few_shot_demo_key_value_list = f"这里是一些专业术语对照表,对照表中单词要参考对照表翻译：\n{few_shot_demo_key_value_list}\n"
+        prompt = f"""{task_description}{rules_str}{output_format}{few_shot_demo_text_content}{few_shot_demo_key_value_list}"""
+        language_name = language_map.get(language, language)
+        task_type = self.task_type
+        if task_type == "translate_prompt":
+            prompt += f"""下面正式开始:
+                \n将以下内容翻译成：{language_name}
+                \n原文：{original_text}
+                """
+        else:
+            raise ValueError(f"{self.task_type} is currently not supported.")
+        return prompt

paddlex/inference/pipelines/doc_preprocessor/result.py CHANGED Viewed

@@ -16,7 +16,7 @@ from typing import Dict
 from PIL import Image, ImageDraw
-from ....utils.fonts import PINGFANG_FONT_FILE_PATH, create_font
+from ....utils.fonts import PINGFANG_FONT, create_font
 from ...common.result import BaseCVResult, JsonMixin
@@ -55,7 +55,7 @@ class DocPreprocessorResult(BaseCVResult):
         beg_w_list = [0, w1, w1 + w2]
         for tno in range(len(txt_list)):
             txt = txt_list[tno]
-            font = create_font(txt, (region_w_list[tno], 20), PINGFANG_FONT_FILE_PATH)
+            font = create_font(txt, (region_w_list[tno], 20), PINGFANG_FONT.path)
             draw_text.text(
                 [10 + beg_w_list[tno], h + 2], txt, fill=(0, 0, 0), font=font
             )

paddlex/inference/pipelines/formula_recognition/result.py CHANGED Viewed

@@ -24,7 +24,7 @@ from PIL import Image, ImageDraw
 from ....utils import logging
 from ....utils.deps import class_requires_deps, function_requires_deps, is_dep_available
-from ....utils.fonts import PINGFANG_FONT_FILE_PATH
+from ....utils.fonts import PINGFANG_FONT
 from ...common.result import BaseCVResult, JsonMixin
 from ...models.formula_recognition.result import (
     crop_white_area,
@@ -277,6 +277,6 @@ def draw_box_formula_fine(
             )
         else:
             img_right_text = draw_box_txt_fine(
-                img_size, box, "Rendering Failed", PINGFANG_FONT_FILE_PATH
+                img_size, box, "Rendering Failed", PINGFANG_FONT.path
             )
         return img_right_text

paddlex/inference/pipelines/layout_parsing/pipeline_v2.py CHANGED Viewed

@@ -926,6 +926,8 @@ class _LayoutParsingPipelineV2(BasePipeline):
         Predicts the layout parsing result for the given input.
         Args:
+            input (Union[str, list[str], np.ndarray, list[np.ndarray]]): Input image path, list of image paths,
+                                                                        numpy array of an image, or list of numpy arrays.
             use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
             use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
             use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.

paddlex/inference/pipelines/layout_parsing/result_v2.py CHANGED Viewed

@@ -21,7 +21,7 @@ from typing import List
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
-from ....utils.fonts import PINGFANG_FONT_FILE_PATH
+from ....utils.fonts import PINGFANG_FONT
 from ...common.result import (
     BaseCVResult,
     HtmlMixin,
@@ -194,7 +194,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
         image = Image.fromarray(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
         draw = ImageDraw.Draw(image, "RGBA")
         font_size = int(0.018 * int(image.width)) + 2
-        font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
+        font = ImageFont.truetype(PINGFANG_FONT.path, font_size, encoding="utf-8")
         parsing_result: List[LayoutBlock] = self["parsing_res_list"]
         for block in parsing_result:
             bbox = block.bbox
@@ -435,8 +435,8 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
         markdown_content = ""
         last_label = None
-        seg_start_flag = None
-        seg_end_flag = None
+        seg_start_flag = True
+        seg_end_flag = True
         prev_block = None
         page_first_element_seg_start_flag = None
         page_last_element_seg_end_flag = None
@@ -468,8 +468,15 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
                         else handle_func(block)
                     )
                 last_label = label
+        page_first_element_seg_start_flag = (
+            True
+            if page_first_element_seg_start_flag is None
+            else page_first_element_seg_start_flag
+        )
         page_last_element_seg_end_flag = seg_end_flag
+        markdown_info["page_index"] = self["page_index"]
+        markdown_info["input_path"] = self["input_path"]
         markdown_info["markdown_texts"] = markdown_content
         markdown_info["page_continuation_flags"] = (
             page_first_element_seg_start_flag,

paddlex/inference/pipelines/ocr/pipeline.py CHANGED Viewed

@@ -368,6 +368,7 @@ class _OCRPipeline(BasePipeline):
                     "rec_texts": [],
                     "rec_scores": [],
                     "rec_polys": [],
+                    "vis_fonts": [],
                 }
                 for input_path, page_index, doc_preprocessor_res, dt_polys in zip(
                     batch_data.input_paths,
@@ -439,6 +440,7 @@ class _OCRPipeline(BasePipeline):
                         if rec_res["rec_score"] >= text_rec_score_thresh:
                             res["rec_texts"].append(rec_res["rec_text"])
                             res["rec_scores"].append(rec_res["rec_score"])
+                            res["vis_fonts"].append(rec_res["vis_font"])
                             res["rec_polys"].append(dt_polys[sno])
             for res in results:

paddlex/inference/pipelines/ocr/result.py CHANGED Viewed

@@ -20,7 +20,7 @@ import numpy as np
 from PIL import Image, ImageDraw
 from ....utils.deps import class_requires_deps, function_requires_deps, is_dep_available
-from ....utils.fonts import SIMFANG_FONT_FILE_PATH, create_font, create_font_vertical
+from ....utils.fonts import SIMFANG_FONT, create_font, create_font_vertical
 from ...common.result import BaseCVResult, JsonMixin
 if is_dep_available("opencv-contrib-python"):
@@ -82,6 +82,11 @@ class OCRResult(BaseCVResult):
         random.seed(0)
         draw_left = ImageDraw.Draw(img_left)
         for idx, (box, txt) in enumerate(zip(boxes, txts)):
+            vis_font = (
+                self["vis_fonts"][idx]
+                if self["vis_fonts"][idx] is not None
+                else SIMFANG_FONT
+            )
             try:
                 color = (
                     random.randint(0, 255),
@@ -91,17 +96,16 @@ class OCRResult(BaseCVResult):
                 box = np.array(box)
                 if len(box) > 4:
                     pts = [(x, y) for x, y in box.tolist()]
-                    draw_left.polygon(pts, outline=color, width=8)
+                    draw_left.polygon(pts, outline=color, width=8, fill=color)
                     box = self.get_minarea_rect(box)
                     height = int(0.5 * (max(box[:, 1]) - min(box[:, 1])))
                     box[:2, 1] = np.mean(box[:, 1])
                     box[2:, 1] = np.mean(box[:, 1]) + min(20, height)
-                box_pts = [(int(x), int(y)) for x, y in box.tolist()]
-                draw_left.polygon(box_pts, fill=color)
+                else:
+                    box_pts = [(int(x), int(y)) for x, y in box.tolist()]
+                    draw_left.polygon(box_pts, fill=color)
-                img_right_text = draw_box_txt_fine(
-                    (w, h), box, txt, SIMFANG_FONT_FILE_PATH
-                )
+                img_right_text = draw_box_txt_fine((w, h), box, txt, vis_font.path)
                 pts = np.array(box, np.int32).reshape((-1, 1, 2))
                 cv2.polylines(img_right_text, [pts], True, color, 1)
                 img_right = cv2.bitwise_and(img_right, img_right_text)

paddlex/inference/pipelines/pp_doctranslation/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .pipeline import PP_DocTranslation_Pipeline

paddlex 3.0.2__py3-none-any.whl → 3.1.0__py3-none-any.whl

paddlex 3.0.2py3-none-any.whl → 3.1.0py3-none-any.whl