PyPI - paddlex - Versions diffs - 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl - Mend

paddlex 3.0.0rc0py3-none-any.whl → 3.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (824) hide show

paddlex/inference/pipelines/object_detection/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

paddlex/inference/pipelines/object_detection/pipeline.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,26 +12,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, Optional, Union, Tuple, List
+from typing import Any, Dict, List, Optional, Tuple, Union
 import numpy as np
+from ....utils.deps import pipeline_requires_extra
+from ...models.object_detection.result import DetResult
+from ...utils.hpi import HPIConfig
 from ...utils.pp_option import PaddlePredictorOption
+from .._parallel import AutoParallelImageSimpleInferencePipeline
 from ..base import BasePipeline
-from ...models.object_detection.result import DetResult
-class ObjectDetectionPipeline(BasePipeline):
+class _ObjectDetectionPipeline(BasePipeline):
     """Object Detection Pipeline"""
-    entities = "object_detection"
     def __init__(
         self,
         config: Dict,
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -40,9 +42,15 @@ class ObjectDetectionPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
         model_cfg = config["SubModules"]["ObjectDetection"]
         model_kwargs = {}
         if "threshold" in model_cfg:
@@ -93,3 +101,15 @@ class ObjectDetectionPipeline(BasePipeline):
             layout_merge_bboxes_mode=layout_merge_bboxes_mode,
             **kwargs,
         )
+@pipeline_requires_extra("cv")
+class ObjectDetectionPipeline(AutoParallelImageSimpleInferencePipeline):
+    entities = "object_detection"
+    @property
+    def _pipeline_cls(self):
+        return _ObjectDetectionPipeline
+    def _get_batch_size(self, config):
+        return config["SubModules"]["ObjectDetection"].get("batch_size", 1)

paddlex/inference/pipelines/ocr/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

paddlex/inference/pipelines/ocr/pipeline.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,34 +13,37 @@
 # limitations under the License.
 from typing import Any, Dict, List, Optional, Union
 import numpy as np
-from scipy.ndimage import rotate
-from ...common.reader import ReadImage
+from ....utils import logging
+from ....utils.deps import pipeline_requires_extra
 from ...common.batch_sampler import ImageBatchSampler
+from ...common.reader import ReadImage
+from ...utils.hpi import HPIConfig
 from ...utils.pp_option import PaddlePredictorOption
+from .._parallel import AutoParallelImageSimpleInferencePipeline
 from ..base import BasePipeline
 from ..components import (
     CropByPolys,
-    SortQuadBoxes,
     SortPolyBoxes,
+    SortQuadBoxes,
     convert_points_to_boxes,
+    rotate_image,
 )
 from .result import OCRResult
-from ..doc_preprocessor.result import DocPreprocessorResult
-from ....utils import logging
-class OCRPipeline(BasePipeline):
+class _OCRPipeline(BasePipeline):
     """OCR Pipeline"""
-    entities = "OCR"
     def __init__(
         self,
         config: Dict,
         device: Optional[str] = None,
         pp_option: Optional[PaddlePredictorOption] = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -49,9 +52,15 @@ class OCRPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing various settings.
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
-            use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
         self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
         if self.use_doc_preprocessor:
@@ -82,17 +91,21 @@ class OCRPipeline(BasePipeline):
         if self.text_type == "general":
             self.text_det_limit_side_len = text_det_config.get("limit_side_len", 960)
             self.text_det_limit_type = text_det_config.get("limit_type", "max")
+            self.text_det_max_side_limit = text_det_config.get("max_side_limit", 4000)
             self.text_det_thresh = text_det_config.get("thresh", 0.3)
             self.text_det_box_thresh = text_det_config.get("box_thresh", 0.6)
+            self.input_shape = text_det_config.get("input_shape", None)
             self.text_det_unclip_ratio = text_det_config.get("unclip_ratio", 2.0)
             self._sort_boxes = SortQuadBoxes()
             self._crop_by_polys = CropByPolys(det_box_type="quad")
         elif self.text_type == "seal":
             self.text_det_limit_side_len = text_det_config.get("limit_side_len", 736)
             self.text_det_limit_type = text_det_config.get("limit_type", "min")
+            self.text_det_max_side_limit = text_det_config.get("max_side_limit", 4000)
             self.text_det_thresh = text_det_config.get("thresh", 0.2)
             self.text_det_box_thresh = text_det_config.get("box_thresh", 0.6)
             self.text_det_unclip_ratio = text_det_config.get("unclip_ratio", 0.5)
+            self.input_shape = text_det_config.get("input_shape", None)
             self._sort_boxes = SortPolyBoxes()
             self._crop_by_polys = CropByPolys(det_box_type="poly")
         else:
@@ -102,9 +115,11 @@ class OCRPipeline(BasePipeline):
             text_det_config,
             limit_side_len=self.text_det_limit_side_len,
             limit_type=self.text_det_limit_type,
+            max_side_limit=self.text_det_max_side_limit,
             thresh=self.text_det_thresh,
             box_thresh=self.text_det_box_thresh,
             unclip_ratio=self.text_det_unclip_ratio,
+            input_shape=self.input_shape,
         )
         text_rec_config = config.get("SubModules", {}).get(
@@ -112,9 +127,12 @@ class OCRPipeline(BasePipeline):
             {"model_config_error": "config error for text_rec_model!"},
         )
         self.text_rec_score_thresh = text_rec_config.get("score_thresh", 0)
-        self.text_rec_model = self.create_model(text_rec_config)
+        self.input_shape = text_rec_config.get("input_shape", None)
+        self.text_rec_model = self.create_model(
+            text_rec_config, input_shape=self.input_shape
+        )
-        self.batch_sampler = ImageBatchSampler(batch_size=1)
+        self.batch_sampler = ImageBatchSampler(batch_size=config.get("batch_size", 1))
         self.img_reader = ReadImage(format="BGR")
     def rotate_image(
@@ -148,7 +166,7 @@ class OCRPipeline(BasePipeline):
         for image_array, rotate_indicator in zip(image_array_list, rotate_angle_list):
             # Convert 0/1 indicator to actual rotation angle
             rotate_angle = rotate_indicator * 180
-            rotated_image = rotate(image_array, rotate_angle, reshape=True)
+            rotated_image = rotate_image(image_array, rotate_angle)
             rotated_images.append(rotated_image)
         return rotated_images
@@ -217,6 +235,7 @@ class OCRPipeline(BasePipeline):
         self,
         text_det_limit_side_len: Optional[int] = None,
         text_det_limit_type: Optional[str] = None,
+        text_det_max_side_limit: Optional[int] = None,
         text_det_thresh: Optional[float] = None,
         text_det_box_thresh: Optional[float] = None,
         text_det_unclip_ratio: Optional[float] = None,
@@ -229,6 +248,7 @@ class OCRPipeline(BasePipeline):
         Args:
             text_det_limit_side_len (Optional[int]): The maximum side length of the text box.
             text_det_limit_type (Optional[str]): The type of limit to apply to the text box.
+            text_det_max_side_limit (Optional[int]): The maximum side length of the text box.
             text_det_thresh (Optional[float]): The threshold for text detection.
             text_det_box_thresh (Optional[float]): The threshold for the bounding box.
             text_det_unclip_ratio (Optional[float]): The ratio for unclipping the text box.
@@ -240,6 +260,8 @@ class OCRPipeline(BasePipeline):
             text_det_limit_side_len = self.text_det_limit_side_len
         if text_det_limit_type is None:
             text_det_limit_type = self.text_det_limit_type
+        if text_det_max_side_limit is None:
+            text_det_max_side_limit = self.text_det_max_side_limit
         if text_det_thresh is None:
             text_det_thresh = self.text_det_thresh
         if text_det_box_thresh is None:
@@ -250,6 +272,7 @@ class OCRPipeline(BasePipeline):
             limit_side_len=text_det_limit_side_len,
             limit_type=text_det_limit_type,
             thresh=text_det_thresh,
+            max_side_limit=text_det_max_side_limit,
             box_thresh=text_det_box_thresh,
             unclip_ratio=text_det_unclip_ratio,
         )
@@ -262,6 +285,7 @@ class OCRPipeline(BasePipeline):
         use_textline_orientation: Optional[bool] = None,
         text_det_limit_side_len: Optional[int] = None,
         text_det_limit_type: Optional[str] = None,
+        text_det_max_side_limit: Optional[int] = None,
         text_det_thresh: Optional[float] = None,
         text_det_box_thresh: Optional[float] = None,
         text_det_unclip_ratio: Optional[float] = None,
@@ -277,6 +301,7 @@ class OCRPipeline(BasePipeline):
             use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
             text_det_limit_side_len (Optional[int]): Maximum side length for text detection.
             text_det_limit_type (Optional[str]): Type of limit to apply for text detection.
+            text_det_max_side_limit (Optional[int]): Maximum side length for text detection.
             text_det_thresh (Optional[float]): Threshold for text detection.
             text_det_box_thresh (Optional[float]): Threshold for text detection boxes.
             text_det_unclip_ratio (Optional[float]): Ratio for unclipping text detection boxes.
@@ -295,6 +320,7 @@ class OCRPipeline(BasePipeline):
         text_det_params = self.get_text_det_params(
             text_det_limit_side_len,
             text_det_limit_type,
+            text_det_max_side_limit,
             text_det_thresh,
             text_det_box_thresh,
             text_det_unclip_ratio,
@@ -303,87 +329,135 @@ class OCRPipeline(BasePipeline):
         if text_rec_score_thresh is None:
             text_rec_score_thresh = self.text_rec_score_thresh
-        for img_id, batch_data in enumerate(self.batch_sampler(input)):
-            image_array = self.img_reader(batch_data.instances)[0]
+        for _, batch_data in enumerate(self.batch_sampler(input)):
+            image_arrays = self.img_reader(batch_data.instances)
             if model_settings["use_doc_preprocessor"]:
-                doc_preprocessor_res = next(
+                doc_preprocessor_results = list(
                     self.doc_preprocessor_pipeline(
-                        image_array,
+                        image_arrays,
                         use_doc_orientation_classify=use_doc_orientation_classify,
                         use_doc_unwarping=use_doc_unwarping,
                     )
                 )
             else:
-                doc_preprocessor_res = {"output_img": image_array}
+                doc_preprocessor_results = [{"output_img": arr} for arr in image_arrays]
-            doc_preprocessor_image = doc_preprocessor_res["output_img"]
+            doc_preprocessor_images = [
+                item["output_img"] for item in doc_preprocessor_results
+            ]
-            det_res = next(
-                self.text_det_model(doc_preprocessor_image, **text_det_params)
+            det_results = list(
+                self.text_det_model(doc_preprocessor_images, **text_det_params)
             )
-            dt_polys = det_res["dt_polys"]
-            dt_scores = det_res["dt_scores"]
-            dt_polys = self._sort_boxes(dt_polys)
-            single_img_res = {
-                "input_path": batch_data.input_paths[0],
-                "page_index": batch_data.page_indexes[0],
-                "doc_preprocessor_res": doc_preprocessor_res,
-                "dt_polys": dt_polys,
-                "model_settings": model_settings,
-                "text_det_params": text_det_params,
-                "text_type": self.text_type,
-                "text_rec_score_thresh": text_rec_score_thresh,
-            }
-            single_img_res["rec_texts"] = []
-            single_img_res["rec_scores"] = []
-            single_img_res["rec_polys"] = []
-            if len(dt_polys) > 0:
-                all_subs_of_img = list(
-                    self._crop_by_polys(doc_preprocessor_image, dt_polys)
+            dt_polys_list = [item["dt_polys"] for item in det_results]
+            dt_polys_list = [self._sort_boxes(item) for item in dt_polys_list]
+            results = [
+                {
+                    "input_path": input_path,
+                    "page_index": page_index,
+                    "doc_preprocessor_res": doc_preprocessor_res,
+                    "dt_polys": dt_polys,
+                    "model_settings": model_settings,
+                    "text_det_params": text_det_params,
+                    "text_type": self.text_type,
+                    "text_rec_score_thresh": text_rec_score_thresh,
+                    "rec_texts": [],
+                    "rec_scores": [],
+                    "rec_polys": [],
+                }
+                for input_path, page_index, doc_preprocessor_res, dt_polys in zip(
+                    batch_data.input_paths,
+                    batch_data.page_indexes,
+                    doc_preprocessor_results,
+                    dt_polys_list,
                 )
+            ]
+            indices = list(range(len(doc_preprocessor_images)))
+            indices = [idx for idx in indices if len(dt_polys_list[idx]) > 0]
+            if indices:
+                all_subs_of_imgs = []
+                chunk_indices = [0]
+                for idx in indices:
+                    all_subs_of_img = list(
+                        self._crop_by_polys(
+                            doc_preprocessor_images[idx], dt_polys_list[idx]
+                        )
+                    )
+                    all_subs_of_imgs.extend(all_subs_of_img)
+                    chunk_indices.append(chunk_indices[-1] + len(all_subs_of_img))
                 # use textline orientation model
                 if model_settings["use_textline_orientation"]:
                     angles = [
                         int(textline_angle_info["class_ids"][0])
                         for textline_angle_info in self.textline_orientation_model(
-                            all_subs_of_img
+                            all_subs_of_imgs
                         )
                     ]
-                    all_subs_of_img = self.rotate_image(all_subs_of_img, angles)
+                    all_subs_of_imgs = self.rotate_image(all_subs_of_imgs, angles)
                 else:
-                    angles = [-1] * len(all_subs_of_img)
-                single_img_res["textline_orientation_angles"] = angles
-                sub_img_info_list = [
-                    {
-                        "sub_img_id": img_id,
-                        "sub_img_ratio": sub_img.shape[1] / float(sub_img.shape[0]),
-                    }
-                    for img_id, sub_img in enumerate(all_subs_of_img)
-                ]
-                sorted_subs_info = sorted(
-                    sub_img_info_list, key=lambda x: x["sub_img_ratio"]
-                )
-                sorted_subs_of_img = [
-                    all_subs_of_img[x["sub_img_id"]] for x in sorted_subs_info
-                ]
-                for idx, rec_res in enumerate(self.text_rec_model(sorted_subs_of_img)):
-                    sub_img_id = sorted_subs_info[idx]["sub_img_id"]
-                    sub_img_info_list[sub_img_id]["rec_res"] = rec_res
-                for sno in range(len(sub_img_info_list)):
-                    rec_res = sub_img_info_list[sno]["rec_res"]
-                    if rec_res["rec_score"] >= text_rec_score_thresh:
-                        single_img_res["rec_texts"].append(rec_res["rec_text"])
-                        single_img_res["rec_scores"].append(rec_res["rec_score"])
-                        single_img_res["rec_polys"].append(dt_polys[sno])
-            if self.text_type == "general":
-                rec_boxes = convert_points_to_boxes(single_img_res["rec_polys"])
-                single_img_res["rec_boxes"] = rec_boxes
-            else:
-                single_img_res["rec_boxes"] = np.array([])
-            yield OCRResult(single_img_res)
+                    angles = [-1] * len(all_subs_of_imgs)
+                for i, idx in enumerate(indices):
+                    res = results[idx]
+                    res["textline_orientation_angles"] = angles[
+                        chunk_indices[i] : chunk_indices[i + 1]
+                    ]
+                # TODO: Process all sub-images in the batch together
+                for i, idx in enumerate(indices):
+                    all_subs_of_img = all_subs_of_imgs[
+                        chunk_indices[i] : chunk_indices[i + 1]
+                    ]
+                    res = results[idx]
+                    dt_polys = dt_polys_list[idx]
+                    sub_img_info_list = [
+                        {
+                            "sub_img_id": img_id,
+                            "sub_img_ratio": sub_img.shape[1] / float(sub_img.shape[0]),
+                        }
+                        for img_id, sub_img in enumerate(all_subs_of_img)
+                    ]
+                    sorted_subs_info = sorted(
+                        sub_img_info_list, key=lambda x: x["sub_img_ratio"]
+                    )
+                    sorted_subs_of_img = [
+                        all_subs_of_img[x["sub_img_id"]] for x in sorted_subs_info
+                    ]
+                    for i, rec_res in enumerate(
+                        self.text_rec_model(sorted_subs_of_img)
+                    ):
+                        sub_img_id = sorted_subs_info[i]["sub_img_id"]
+                        sub_img_info_list[sub_img_id]["rec_res"] = rec_res
+                    for sno in range(len(sub_img_info_list)):
+                        rec_res = sub_img_info_list[sno]["rec_res"]
+                        if rec_res["rec_score"] >= text_rec_score_thresh:
+                            res["rec_texts"].append(rec_res["rec_text"])
+                            res["rec_scores"].append(rec_res["rec_score"])
+                            res["rec_polys"].append(dt_polys[sno])
+            for res in results:
+                if self.text_type == "general":
+                    rec_boxes = convert_points_to_boxes(res["rec_polys"])
+                    res["rec_boxes"] = rec_boxes
+                else:
+                    res["rec_boxes"] = np.array([])
+                yield OCRResult(res)
+@pipeline_requires_extra("ocr")
+class OCRPipeline(AutoParallelImageSimpleInferencePipeline):
+    entities = "OCR"
+    @property
+    def _pipeline_cls(self):
+        return _OCRPipeline
+    def _get_batch_size(self, config):
+        return config.get("batch_size", 1)

paddlex/inference/pipelines/ocr/result.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,32 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
-from pathlib import Path
-from typing import Dict
-import copy
 import math
 import random
+from typing import Dict
 import numpy as np
-import cv2
-import PIL
-from PIL import Image, ImageDraw, ImageFont
-from ....utils.fonts import SIMFANG_FONT_FILE_PATH, create_font
-from ...common.result import BaseCVResult, StrMixin, JsonMixin
+from PIL import Image, ImageDraw
+from ....utils.deps import class_requires_deps, function_requires_deps, is_dep_available
+from ....utils.fonts import SIMFANG_FONT_FILE_PATH, create_font, create_font_vertical
+from ...common.result import BaseCVResult, JsonMixin
+if is_dep_available("opencv-contrib-python"):
+    import cv2
+@class_requires_deps("opencv-contrib-python")
 class OCRResult(BaseCVResult):
     """OCR result"""
-    def _get_input_fn(self):
-        fn = super()._get_input_fn()
-        if (page_idx := self["page_index"]) is not None:
-            fp = Path(fn)
-            stem, suffix = fp.stem, fp.suffix
-            return f"{stem}_{page_idx}{suffix}"
-        else:
-            return fn
     def get_minarea_rect(self, points: np.ndarray) -> np.ndarray:
         """
         Get the minimum area rectangle for the given points using OpenCV.
@@ -103,7 +96,9 @@ class OCRResult(BaseCVResult):
                     height = int(0.5 * (max(box[:, 1]) - min(box[:, 1])))
                     box[:2, 1] = np.mean(box[:, 1])
                     box[2:, 1] = np.mean(box[:, 1]) + min(20, height)
-                draw_left.polygon(box, fill=color)
+                box_pts = [(int(x), int(y)) for x, y in box.tolist()]
+                draw_left.polygon(box_pts, fill=color)
                 img_right_text = draw_box_txt_fine(
                     (w, h), box, txt, SIMFANG_FONT_FILE_PATH
                 )
@@ -194,6 +189,7 @@ class OCRResult(BaseCVResult):
 # Adds a function comment according to Google Style Guide
+@function_requires_deps("opencv-contrib-python")
 def draw_box_txt_fine(
     img_size: tuple, box: np.ndarray, txt: str, font_path: str
 ) -> np.ndarray:
@@ -217,12 +213,13 @@ def draw_box_txt_fine(
     )
     if box_height > 2 * box_width and box_height > 30:
-        img_text = Image.new("RGB", (box_height, box_width), (255, 255, 255))
+        img_text = Image.new("RGB", (box_width, box_height), (255, 255, 255))
         draw_text = ImageDraw.Draw(img_text)
         if txt:
-            font = create_font(txt, (box_height, box_width), font_path)
-            draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
-        img_text = img_text.transpose(Image.ROTATE_270)
+            font = create_font_vertical(txt, (box_width, box_height), font_path)
+            draw_vertical_text(
+                draw_text, (0, 0), txt, font, fill=(0, 0, 0), line_spacing=2
+            )
     else:
         img_text = Image.new("RGB", (box_width, box_height), (255, 255, 255))
         draw_text = ImageDraw.Draw(img_text)
@@ -246,3 +243,13 @@ def draw_box_txt_fine(
         borderValue=(255, 255, 255),
     )
     return img_right_text
+@function_requires_deps("opencv-contrib-python")
+def draw_vertical_text(draw, position, text, font, fill=(0, 0, 0), line_spacing=2):
+    x, y = position
+    for char in text:
+        draw.text((x, y), char, font=font, fill=fill)
+        bbox = font.getbbox(char)
+        char_height = bbox[3] - bbox[1]
+        y += char_height + line_spacing

paddlex/inference/pipelines/open_vocabulary_detection/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,14 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, Optional, Union, List
+from typing import Any, Dict, List, Optional, Union
 import numpy as np
-from ...utils.pp_option import PaddlePredictorOption
-from ..base import BasePipeline
+from ....utils.deps import pipeline_requires_extra
 from ...models.object_detection.result import DetResult
+from ...utils.hpi import HPIConfig
+from ...utils.pp_option import PaddlePredictorOption
+from ..base import BasePipeline
+@pipeline_requires_extra("multimodal")
 class OpenVocabularyDetectionPipeline(BasePipeline):
     """Open Vocabulary Detection Pipeline"""
@@ -31,6 +35,7 @@ class OpenVocabularyDetectionPipeline(BasePipeline):
         device: str = None,
         pp_option: PaddlePredictorOption = None,
         use_hpip: bool = False,
+        hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
     ) -> None:
         """
         Initializes the class with given configurations and options.
@@ -39,9 +44,15 @@ class OpenVocabularyDetectionPipeline(BasePipeline):
             config (Dict): Configuration dictionary containing model and other parameters.
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
-            use_hpip (bool): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+            use_hpip (bool, optional): Whether to use the high-performance
+                inference plugin (HPIP) by default. Defaults to False.
+            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
+                The default high-performance inference configuration dictionary.
+                Defaults to None.
         """
-        super().__init__(device=device, pp_option=pp_option, use_hpip=use_hpip)
+        super().__init__(
+            device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
+        )
         open_vocabulary_detection_model_config = config.get("SubModules", {}).get(
             "OpenVocabularyDetection",

paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

paddlex 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl

paddlex 3.0.0rc0py3-none-any.whl → 3.0.1py3-none-any.whl