PyPI - paddlex - Versions diffs - 3.0.0b2__py3-none-any.whl → 3.0.0rc0__py3-none-any.whl - Mend

paddlex 3.0.0b2py3-none-any.whl → 3.0.0rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (940) hide show

paddlex/inference/pipelines/ocr/result.py ADDED Viewed

@@ -0,0 +1,248 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from pathlib import Path
+from typing import Dict
+import copy
+import math
+import random
+import numpy as np
+import cv2
+import PIL
+from PIL import Image, ImageDraw, ImageFont
+from ....utils.fonts import SIMFANG_FONT_FILE_PATH, create_font
+from ...common.result import BaseCVResult, StrMixin, JsonMixin
+class OCRResult(BaseCVResult):
+    """OCR result"""
+    def _get_input_fn(self):
+        fn = super()._get_input_fn()
+        if (page_idx := self["page_index"]) is not None:
+            fp = Path(fn)
+            stem, suffix = fp.stem, fp.suffix
+            return f"{stem}_{page_idx}{suffix}"
+        else:
+            return fn
+    def get_minarea_rect(self, points: np.ndarray) -> np.ndarray:
+        """
+        Get the minimum area rectangle for the given points using OpenCV.
+        Args:
+            points (np.ndarray): An array of 2D points.
+        Returns:
+            np.ndarray: An array of 2D points representing the corners of the minimum area rectangle
+                     in a specific order (clockwise or counterclockwise starting from the top-left corner).
+        """
+        bounding_box = cv2.minAreaRect(points)
+        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+        index_a, index_b, index_c, index_d = 0, 1, 2, 3
+        if points[1][1] > points[0][1]:
+            index_a = 0
+            index_d = 1
+        else:
+            index_a = 1
+            index_d = 0
+        if points[3][1] > points[2][1]:
+            index_b = 2
+            index_c = 3
+        else:
+            index_b = 3
+            index_c = 2
+        box = np.array(
+            [points[index_a], points[index_b], points[index_c], points[index_d]]
+        ).astype(np.int32)
+        return box
+    def _to_img(self) -> Dict[str, Image.Image]:
+        """
+        Converts the internal data to a PIL Image with detection and recognition results.
+        Returns:
+            Dict[Image.Image]: A dictionary containing two images: 'doc_preprocessor_res' and 'ocr_res_img'.
+        """
+        boxes = self["rec_polys"]
+        txts = self["rec_texts"]
+        image = self["doc_preprocessor_res"]["output_img"]
+        h, w = image.shape[0:2]
+        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        img_left = Image.fromarray(image_rgb)
+        img_right = np.ones((h, w, 3), dtype=np.uint8) * 255
+        random.seed(0)
+        draw_left = ImageDraw.Draw(img_left)
+        for idx, (box, txt) in enumerate(zip(boxes, txts)):
+            try:
+                color = (
+                    random.randint(0, 255),
+                    random.randint(0, 255),
+                    random.randint(0, 255),
+                )
+                box = np.array(box)
+                if len(box) > 4:
+                    pts = [(x, y) for x, y in box.tolist()]
+                    draw_left.polygon(pts, outline=color, width=8)
+                    box = self.get_minarea_rect(box)
+                    height = int(0.5 * (max(box[:, 1]) - min(box[:, 1])))
+                    box[:2, 1] = np.mean(box[:, 1])
+                    box[2:, 1] = np.mean(box[:, 1]) + min(20, height)
+                draw_left.polygon(box, fill=color)
+                img_right_text = draw_box_txt_fine(
+                    (w, h), box, txt, SIMFANG_FONT_FILE_PATH
+                )
+                pts = np.array(box, np.int32).reshape((-1, 1, 2))
+                cv2.polylines(img_right_text, [pts], True, color, 1)
+                img_right = cv2.bitwise_and(img_right, img_right_text)
+            except:
+                continue
+        img_left = Image.blend(Image.fromarray(image_rgb), img_left, 0.5)
+        img_show = Image.new("RGB", (w * 2, h), (255, 255, 255))
+        img_show.paste(img_left, (0, 0, w, h))
+        img_show.paste(Image.fromarray(img_right), (w, 0, w * 2, h))
+        model_settings = self["model_settings"]
+        res_img_dict = {f"ocr_res_img": img_show}
+        if model_settings["use_doc_preprocessor"]:
+            res_img_dict.update(**self["doc_preprocessor_res"].img)
+        return res_img_dict
+    def _to_str(self, *args, **kwargs) -> Dict[str, str]:
+        """Converts the instance's attributes to a dictionary and then to a string.
+        Args:
+            *args: Additional positional arguments passed to the base class method.
+            **kwargs: Additional keyword arguments passed to the base class method.
+        Returns:
+            Dict[str, str]: A dictionary with the instance's attributes converted to strings.
+        """
+        data = {}
+        data["input_path"] = self["input_path"]
+        data["page_index"] = self["page_index"]
+        data["model_settings"] = self["model_settings"]
+        if self["model_settings"]["use_doc_preprocessor"]:
+            data["doc_preprocessor_res"] = self["doc_preprocessor_res"].str["res"]
+        data["dt_polys"] = (
+            self["dt_polys"]
+            if self["text_type"] == "seal"
+            else np.array(self["dt_polys"])
+        )
+        data["text_det_params"] = self["text_det_params"]
+        data["text_type"] = self["text_type"]
+        if "textline_orientation_angles" in self:
+            data["textline_orientation_angles"] = np.array(
+                self["textline_orientation_angles"]
+            )
+        data["text_rec_score_thresh"] = self["text_rec_score_thresh"]
+        data["rec_texts"] = self["rec_texts"]
+        data["rec_scores"] = np.array(self["rec_scores"])
+        data["rec_polys"] = (
+            self["rec_polys"]
+            if self["text_type"] == "seal"
+            else np.array(self["rec_polys"])
+        )
+        data["rec_boxes"] = np.array(self["rec_boxes"])
+        return JsonMixin._to_str(data, *args, **kwargs)
+    def _to_json(self, *args, **kwargs) -> Dict[str, str]:
+        """
+        Converts the object's data to a JSON dictionary.
+        Args:
+            *args: Positional arguments passed to the JsonMixin._to_json method.
+            **kwargs: Keyword arguments passed to the JsonMixin._to_json method.
+        Returns:
+            Dict[str, str]: A dictionary containing the object's data in JSON format.
+        """
+        data = {}
+        data["input_path"] = self["input_path"]
+        data["page_index"] = self["page_index"]
+        data["model_settings"] = self["model_settings"]
+        if self["model_settings"]["use_doc_preprocessor"]:
+            data["doc_preprocessor_res"] = self["doc_preprocessor_res"].json["res"]
+        data["dt_polys"] = self["dt_polys"]
+        data["text_det_params"] = self["text_det_params"]
+        data["text_type"] = self["text_type"]
+        if "textline_orientation_angles" in self:
+            data["textline_orientation_angles"] = self["textline_orientation_angles"]
+        data["text_rec_score_thresh"] = self["text_rec_score_thresh"]
+        data["rec_texts"] = self["rec_texts"]
+        data["rec_scores"] = self["rec_scores"]
+        data["rec_polys"] = self["rec_polys"]
+        data["rec_boxes"] = self["rec_boxes"]
+        return JsonMixin._to_json(data, *args, **kwargs)
+# Adds a function comment according to Google Style Guide
+def draw_box_txt_fine(
+    img_size: tuple, box: np.ndarray, txt: str, font_path: str
+) -> np.ndarray:
+    """
+    Draws text in a box on an image with fine control over size and orientation.
+    Args:
+        img_size (tuple): The size of the output image (width, height).
+        box (np.ndarray): A 4x2 numpy array defining the corners of the box in (x, y) order.
+        txt (str): The text to draw inside the box.
+        font_path (str): The path to the font file to use for drawing the text.
+    Returns:
+        np.ndarray: An image with the text drawn in the specified box.
+    """
+    box_height = int(
+        math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
+    )
+    box_width = int(
+        math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
+    )
+    if box_height > 2 * box_width and box_height > 30:
+        img_text = Image.new("RGB", (box_height, box_width), (255, 255, 255))
+        draw_text = ImageDraw.Draw(img_text)
+        if txt:
+            font = create_font(txt, (box_height, box_width), font_path)
+            draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
+        img_text = img_text.transpose(Image.ROTATE_270)
+    else:
+        img_text = Image.new("RGB", (box_width, box_height), (255, 255, 255))
+        draw_text = ImageDraw.Draw(img_text)
+        if txt:
+            font = create_font(txt, (box_width, box_height), font_path)
+            draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
+    pts1 = np.float32(
+        [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]]
+    )
+    pts2 = np.array(box, dtype=np.float32)
+    M = cv2.getPerspectiveTransform(pts1, pts2)
+    img_text = np.array(img_text, dtype=np.uint8)
+    img_right_text = cv2.warpPerspective(
+        img_text,
+        M,
+        img_size,
+        flags=cv2.INTER_NEAREST,
+        borderMode=cv2.BORDER_CONSTANT,
+        borderValue=(255, 255, 255),
+    )
+    return img_right_text

paddlex/inference/pipelines/open_vocabulary_detection/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .pipeline import OpenVocabularyDetectionPipeline

paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py ADDED Viewed

@@ -0,0 +1,75 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Dict, Optional, Union, List
+import numpy as np
+from ...utils.pp_option import PaddlePredictorOption
+from ..base import BasePipeline
+from ...models.object_detection.result import DetResult
+class OpenVocabularyDetectionPipeline(BasePipeline):
+    """Open Vocabulary Detection Pipeline"""
+    entities = "open_vocabulary_detection"
+    def __init__(
+        self,
+        config: Dict,
+        device: str = None,
+        pp_option: PaddlePredictorOption = None,
+        use_hpip: bool = False,
+    ) -> None:
+        """
+        Initializes the class with given configurations and options.
+        Args:
+            config (Dict): Configuration dictionary containing model and other parameters.
+            device (str): The device to run the prediction on. Default is None.
+            pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
+            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+        """
+        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        open_vocabulary_detection_model_config = config.get("SubModules", {}).get(
+            "OpenVocabularyDetection",
+            {"model_config_error": "config error for doc_ori_classify_model!"},
+        )
+        self.open_vocabulary_detection_model = self.create_model(
+            open_vocabulary_detection_model_config
+        )
+        self.thresholds = open_vocabulary_detection_model_config["thresholds"]
+    def predict(
+        self,
+        input: Union[str, List[str], np.ndarray, List[np.ndarray]],
+        prompt: str,
+        thresholds: Union[Dict[str, float], None] = None,
+        **kwargs
+    ) -> DetResult:
+        """Predicts open vocabulary detection results for the given input.
+        Args:
+            input (Union[str, list[str], np.ndarray, list[np.ndarray]]): The input image(s) or path(s) to the images.
+            prompt (str): The text prompt used to describe the objects.
+            thresholds (dict | None): Threshold values for different models. If provided, these will override any default threshold values set during initialization. Default is None.
+            **kwargs: Additional keyword arguments that can be passed to the function.
+        Returns:
+            DetResult: The predicted open vocabulary detection results.
+        """
+        yield from self.open_vocabulary_detection_model(
+            input, prompt=prompt, thresholds=thresholds
+        )

paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .pipeline import OpenVocabularySegmentationPipeline

paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py ADDED Viewed

@@ -0,0 +1,89 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Dict, Optional, Union, Tuple, List
+import numpy as np
+from ...utils.pp_option import PaddlePredictorOption
+from ..base import BasePipeline
+from ...models.open_vocabulary_segmentation.results import SAMSegResult
+Number = Union[int, float]
+class OpenVocabularySegmentationPipeline(BasePipeline):
+    """Open Vocabulary Segmentation pipeline"""
+    entities = "open_vocabulary_segmentation"
+    def __init__(
+        self,
+        config: Dict,
+        device: str = None,
+        pp_option: PaddlePredictorOption = None,
+        use_hpip: bool = False,
+    ) -> None:
+        """
+        Initializes the class with given configurations and options.
+        Args:
+            config (Dict): Configuration dictionary containing model and other parameters.
+            device (str): The device to run the prediction on. Default is None.
+            pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
+            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+        """
+        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        # create box-prompted SAM-H
+        box_prompted_model_cfg = config.get("SubModules", {}).get(
+            "BoxPromptSegmentation",
+            {"model_config_error": "config error for doc_ori_classify_model!"},
+        )
+        self.box_prompted_model = self.create_model(box_prompted_model_cfg)
+        # create point-prompted SAM-H
+        point_prompted_model_cfg = config.get("SubModules", {}).get(
+            "PointPromptSegmentation",
+            {"model_config_error": "config error for doc_ori_classify_model!"},
+        )
+        self.point_prompted_model = self.create_model(point_prompted_model_cfg)
+    def predict(
+        self,
+        input: Union[str, List[str], np.ndarray, List[np.ndarray]],
+        prompt: Union[List[List[float]], np.ndarray],
+        prompt_type: str = "box",
+        **kwargs
+    ) -> SAMSegResult:
+        """Predicts image segmentation results for the given input.
+        Args:
+            input (str | list[str] | np.ndarray | list[np.ndarray]): The input image(s) or path(s) to the images.
+            prompt (list[list[float]] | np.ndarray): The prompt for the input image(s).
+            prompt_type (str): The type of prompt, either 'box' or 'point'. Default is 'box'.
+            **kwargs: Additional keyword arguments that can be passed to the function.
+        Returns:
+            SAMSegResult: The predicted SAM segmentation results.
+        """
+        if prompt_type == "box":
+            yield from self.box_prompted_model(input, prompts={"box_prompt": prompt})
+        elif prompt_type == "point":
+            yield from self.point_prompted_model(
+                input, prompts={"point_prompt": prompt}
+            )
+        else:
+            raise ValueError(
+                "Invalid prompt type. Only 'box' and 'point' are supported"
+            )

paddlex/inference/pipelines/pp_chatocr/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .pipeline_v3 import PP_ChatOCRv3_Pipeline
+from .pipeline_v4 import PP_ChatOCRv4_Pipeline

paddlex/inference/pipelines/pp_chatocr/pipeline_base.py ADDED Viewed

@@ -0,0 +1,102 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Dict, Optional
+from ..base import BasePipeline
+from ....utils import logging
+from ...utils.pp_option import PaddlePredictorOption
+class PP_ChatOCR_Pipeline(BasePipeline):
+    """PP-ChatOCR Pipeline"""
+    def __init__(
+        self,
+        device: str = None,
+        pp_option: PaddlePredictorOption = None,
+        use_hpip: bool = False,
+    ) -> None:
+        """Initializes the pp-chatocrv3-doc pipeline.
+        Args:
+            config (Dict): Configuration dictionary containing various settings.
+            device (str, optional): Device to run the predictions on. Defaults to None.
+            pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
+            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+        """
+        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+    def visual_predict(self):
+        """
+        This function takes an input image or a list of images and performs various visual
+        prediction tasks such as document orientation classification, document unwarping,
+        general OCR, seal recognition, and table recognition based on the provided flags.
+        """
+        raise NotImplementedError(
+            "The method `visual_predict` has not been implemented yet."
+        )
+    def save_visual_info_list(self):
+        """
+        Save the visual info list to the specified file path.
+        """
+        raise NotImplementedError(
+            "The method `save_visual_info_list` has not been implemented yet."
+        )
+    def load_visual_info_list(self):
+        """
+        Loads visual info list from a file.
+        """
+        raise NotImplementedError(
+            "The method `load_visual_info_list` has not been implemented yet."
+        )
+    def build_vector(self):
+        """
+        Build a vector representation from visual information.
+        """
+        raise NotImplementedError(
+            "The method `build_vector` has not been implemented yet."
+        )
+    def save_vector(self):
+        """
+        Save the vector information to a specified path.
+        """
+        raise NotImplementedError(
+            "The method `save_vector` has not been implemented yet."
+        )
+    def load_vector(self):
+        """
+        Loads vector information from a file.
+        """
+        raise NotImplementedError(
+            "The method `load_vector` has not been implemented yet."
+        )
+    def chat(self):
+        """
+        Generates chat results based on the provided key list and visual information.
+        """
+        raise NotImplementedError("The method `chat` has not been implemented yet.")
+    def predict(self, *args, **kwargs) -> None:
+        logging.error(
+            "PP-ChatOCR Pipeline do not support to call `predict()` directly! Please invoke `visual_predict`, `build_vector`, `chat` sequentially to obtain the result."
+        )
+        return

paddlex 3.0.0b2__py3-none-any.whl → 3.0.0rc0__py3-none-any.whl

paddlex 3.0.0b2py3-none-any.whl → 3.0.0rc0py3-none-any.whl