PyPI - paddlex - Versions diffs - 3.0.0rc1__py3-none-any.whl → 3.0.2__py3-none-any.whl - Mend

paddlex 3.0.0rc1py3-none-any.whl → 3.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (240) hide show

paddlex/inference/pipelines/ocr/pipeline.py CHANGED Viewed

@@ -22,6 +22,7 @@ from ...common.batch_sampler import ImageBatchSampler
 from ...common.reader import ReadImage
 from ...utils.hpi import HPIConfig
 from ...utils.pp_option import PaddlePredictorOption
+from .._parallel import AutoParallelImageSimpleInferencePipeline
 from ..base import BasePipeline
 from ..components import (
     CropByPolys,
@@ -33,12 +34,9 @@ from ..components import (
 from .result import OCRResult
-@pipeline_requires_extra("ocr")
-class OCRPipeline(BasePipeline):
+class _OCRPipeline(BasePipeline):
     """OCR Pipeline"""
-    entities = "OCR"
     def __init__(
         self,
         config: Dict,
@@ -55,9 +53,9 @@ class OCRPipeline(BasePipeline):
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
             use_hpip (bool, optional): Whether to use the high-performance
-                inference plugin (HPIP). Defaults to False.
+                inference plugin (HPIP) by default. Defaults to False.
             hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
-                The high-performance inference configuration dictionary.
+                The default high-performance inference configuration dictionary.
                 Defaults to None.
         """
         super().__init__(
@@ -93,6 +91,7 @@ class OCRPipeline(BasePipeline):
         if self.text_type == "general":
             self.text_det_limit_side_len = text_det_config.get("limit_side_len", 960)
             self.text_det_limit_type = text_det_config.get("limit_type", "max")
+            self.text_det_max_side_limit = text_det_config.get("max_side_limit", 4000)
             self.text_det_thresh = text_det_config.get("thresh", 0.3)
             self.text_det_box_thresh = text_det_config.get("box_thresh", 0.6)
             self.input_shape = text_det_config.get("input_shape", None)
@@ -102,6 +101,7 @@ class OCRPipeline(BasePipeline):
         elif self.text_type == "seal":
             self.text_det_limit_side_len = text_det_config.get("limit_side_len", 736)
             self.text_det_limit_type = text_det_config.get("limit_type", "min")
+            self.text_det_max_side_limit = text_det_config.get("max_side_limit", 4000)
             self.text_det_thresh = text_det_config.get("thresh", 0.2)
             self.text_det_box_thresh = text_det_config.get("box_thresh", 0.6)
             self.text_det_unclip_ratio = text_det_config.get("unclip_ratio", 0.5)
@@ -115,6 +115,7 @@ class OCRPipeline(BasePipeline):
             text_det_config,
             limit_side_len=self.text_det_limit_side_len,
             limit_type=self.text_det_limit_type,
+            max_side_limit=self.text_det_max_side_limit,
             thresh=self.text_det_thresh,
             box_thresh=self.text_det_box_thresh,
             unclip_ratio=self.text_det_unclip_ratio,
@@ -131,7 +132,7 @@ class OCRPipeline(BasePipeline):
             text_rec_config, input_shape=self.input_shape
         )
-        self.batch_sampler = ImageBatchSampler(batch_size=1)
+        self.batch_sampler = ImageBatchSampler(batch_size=config.get("batch_size", 1))
         self.img_reader = ReadImage(format="BGR")
     def rotate_image(
@@ -234,6 +235,7 @@ class OCRPipeline(BasePipeline):
         self,
         text_det_limit_side_len: Optional[int] = None,
         text_det_limit_type: Optional[str] = None,
+        text_det_max_side_limit: Optional[int] = None,
         text_det_thresh: Optional[float] = None,
         text_det_box_thresh: Optional[float] = None,
         text_det_unclip_ratio: Optional[float] = None,
@@ -246,6 +248,7 @@ class OCRPipeline(BasePipeline):
         Args:
             text_det_limit_side_len (Optional[int]): The maximum side length of the text box.
             text_det_limit_type (Optional[str]): The type of limit to apply to the text box.
+            text_det_max_side_limit (Optional[int]): The maximum side length of the text box.
             text_det_thresh (Optional[float]): The threshold for text detection.
             text_det_box_thresh (Optional[float]): The threshold for the bounding box.
             text_det_unclip_ratio (Optional[float]): The ratio for unclipping the text box.
@@ -257,6 +260,8 @@ class OCRPipeline(BasePipeline):
             text_det_limit_side_len = self.text_det_limit_side_len
         if text_det_limit_type is None:
             text_det_limit_type = self.text_det_limit_type
+        if text_det_max_side_limit is None:
+            text_det_max_side_limit = self.text_det_max_side_limit
         if text_det_thresh is None:
             text_det_thresh = self.text_det_thresh
         if text_det_box_thresh is None:
@@ -267,6 +272,7 @@ class OCRPipeline(BasePipeline):
             limit_side_len=text_det_limit_side_len,
             limit_type=text_det_limit_type,
             thresh=text_det_thresh,
+            max_side_limit=text_det_max_side_limit,
             box_thresh=text_det_box_thresh,
             unclip_ratio=text_det_unclip_ratio,
         )
@@ -279,6 +285,7 @@ class OCRPipeline(BasePipeline):
         use_textline_orientation: Optional[bool] = None,
         text_det_limit_side_len: Optional[int] = None,
         text_det_limit_type: Optional[str] = None,
+        text_det_max_side_limit: Optional[int] = None,
         text_det_thresh: Optional[float] = None,
         text_det_box_thresh: Optional[float] = None,
         text_det_unclip_ratio: Optional[float] = None,
@@ -294,6 +301,7 @@ class OCRPipeline(BasePipeline):
             use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
             text_det_limit_side_len (Optional[int]): Maximum side length for text detection.
             text_det_limit_type (Optional[str]): Type of limit to apply for text detection.
+            text_det_max_side_limit (Optional[int]): Maximum side length for text detection.
             text_det_thresh (Optional[float]): Threshold for text detection.
             text_det_box_thresh (Optional[float]): Threshold for text detection boxes.
             text_det_unclip_ratio (Optional[float]): Ratio for unclipping text detection boxes.
@@ -312,6 +320,7 @@ class OCRPipeline(BasePipeline):
         text_det_params = self.get_text_det_params(
             text_det_limit_side_len,
             text_det_limit_type,
+            text_det_max_side_limit,
             text_det_thresh,
             text_det_box_thresh,
             text_det_unclip_ratio,
@@ -320,87 +329,135 @@ class OCRPipeline(BasePipeline):
         if text_rec_score_thresh is None:
             text_rec_score_thresh = self.text_rec_score_thresh
-        for img_id, batch_data in enumerate(self.batch_sampler(input)):
-            image_array = self.img_reader(batch_data.instances)[0]
+        for _, batch_data in enumerate(self.batch_sampler(input)):
+            image_arrays = self.img_reader(batch_data.instances)
             if model_settings["use_doc_preprocessor"]:
-                doc_preprocessor_res = next(
+                doc_preprocessor_results = list(
                     self.doc_preprocessor_pipeline(
-                        image_array,
+                        image_arrays,
                         use_doc_orientation_classify=use_doc_orientation_classify,
                         use_doc_unwarping=use_doc_unwarping,
                     )
                 )
             else:
-                doc_preprocessor_res = {"output_img": image_array}
+                doc_preprocessor_results = [{"output_img": arr} for arr in image_arrays]
-            doc_preprocessor_image = doc_preprocessor_res["output_img"]
+            doc_preprocessor_images = [
+                item["output_img"] for item in doc_preprocessor_results
+            ]
-            det_res = next(
-                self.text_det_model(doc_preprocessor_image, **text_det_params)
+            det_results = list(
+                self.text_det_model(doc_preprocessor_images, **text_det_params)
             )
-            dt_polys = det_res["dt_polys"]
-            det_res["dt_scores"]
-            dt_polys = self._sort_boxes(dt_polys)
-            single_img_res = {
-                "input_path": batch_data.input_paths[0],
-                "page_index": batch_data.page_indexes[0],
-                "doc_preprocessor_res": doc_preprocessor_res,
-                "dt_polys": dt_polys,
-                "model_settings": model_settings,
-                "text_det_params": text_det_params,
-                "text_type": self.text_type,
-                "text_rec_score_thresh": text_rec_score_thresh,
-            }
-            single_img_res["rec_texts"] = []
-            single_img_res["rec_scores"] = []
-            single_img_res["rec_polys"] = []
-            if len(dt_polys) > 0:
-                all_subs_of_img = list(
-                    self._crop_by_polys(doc_preprocessor_image, dt_polys)
+            dt_polys_list = [item["dt_polys"] for item in det_results]
+            dt_polys_list = [self._sort_boxes(item) for item in dt_polys_list]
+            results = [
+                {
+                    "input_path": input_path,
+                    "page_index": page_index,
+                    "doc_preprocessor_res": doc_preprocessor_res,
+                    "dt_polys": dt_polys,
+                    "model_settings": model_settings,
+                    "text_det_params": text_det_params,
+                    "text_type": self.text_type,
+                    "text_rec_score_thresh": text_rec_score_thresh,
+                    "rec_texts": [],
+                    "rec_scores": [],
+                    "rec_polys": [],
+                }
+                for input_path, page_index, doc_preprocessor_res, dt_polys in zip(
+                    batch_data.input_paths,
+                    batch_data.page_indexes,
+                    doc_preprocessor_results,
+                    dt_polys_list,
                 )
+            ]
+            indices = list(range(len(doc_preprocessor_images)))
+            indices = [idx for idx in indices if len(dt_polys_list[idx]) > 0]
+            if indices:
+                all_subs_of_imgs = []
+                chunk_indices = [0]
+                for idx in indices:
+                    all_subs_of_img = list(
+                        self._crop_by_polys(
+                            doc_preprocessor_images[idx], dt_polys_list[idx]
+                        )
+                    )
+                    all_subs_of_imgs.extend(all_subs_of_img)
+                    chunk_indices.append(chunk_indices[-1] + len(all_subs_of_img))
                 # use textline orientation model
                 if model_settings["use_textline_orientation"]:
                     angles = [
                         int(textline_angle_info["class_ids"][0])
                         for textline_angle_info in self.textline_orientation_model(
-                            all_subs_of_img
+                            all_subs_of_imgs
                         )
                     ]
-                    all_subs_of_img = self.rotate_image(all_subs_of_img, angles)
+                    all_subs_of_imgs = self.rotate_image(all_subs_of_imgs, angles)
                 else:
-                    angles = [-1] * len(all_subs_of_img)
-                single_img_res["textline_orientation_angles"] = angles
-                sub_img_info_list = [
-                    {
-                        "sub_img_id": img_id,
-                        "sub_img_ratio": sub_img.shape[1] / float(sub_img.shape[0]),
-                    }
-                    for img_id, sub_img in enumerate(all_subs_of_img)
-                ]
-                sorted_subs_info = sorted(
-                    sub_img_info_list, key=lambda x: x["sub_img_ratio"]
-                )
-                sorted_subs_of_img = [
-                    all_subs_of_img[x["sub_img_id"]] for x in sorted_subs_info
-                ]
-                for idx, rec_res in enumerate(self.text_rec_model(sorted_subs_of_img)):
-                    sub_img_id = sorted_subs_info[idx]["sub_img_id"]
-                    sub_img_info_list[sub_img_id]["rec_res"] = rec_res
-                for sno in range(len(sub_img_info_list)):
-                    rec_res = sub_img_info_list[sno]["rec_res"]
-                    if rec_res["rec_score"] >= text_rec_score_thresh:
-                        single_img_res["rec_texts"].append(rec_res["rec_text"])
-                        single_img_res["rec_scores"].append(rec_res["rec_score"])
-                        single_img_res["rec_polys"].append(dt_polys[sno])
-            if self.text_type == "general":
-                rec_boxes = convert_points_to_boxes(single_img_res["rec_polys"])
-                single_img_res["rec_boxes"] = rec_boxes
-            else:
-                single_img_res["rec_boxes"] = np.array([])
-            yield OCRResult(single_img_res)
+                    angles = [-1] * len(all_subs_of_imgs)
+                for i, idx in enumerate(indices):
+                    res = results[idx]
+                    res["textline_orientation_angles"] = angles[
+                        chunk_indices[i] : chunk_indices[i + 1]
+                    ]
+                # TODO: Process all sub-images in the batch together
+                for i, idx in enumerate(indices):
+                    all_subs_of_img = all_subs_of_imgs[
+                        chunk_indices[i] : chunk_indices[i + 1]
+                    ]
+                    res = results[idx]
+                    dt_polys = dt_polys_list[idx]
+                    sub_img_info_list = [
+                        {
+                            "sub_img_id": img_id,
+                            "sub_img_ratio": sub_img.shape[1] / float(sub_img.shape[0]),
+                        }
+                        for img_id, sub_img in enumerate(all_subs_of_img)
+                    ]
+                    sorted_subs_info = sorted(
+                        sub_img_info_list, key=lambda x: x["sub_img_ratio"]
+                    )
+                    sorted_subs_of_img = [
+                        all_subs_of_img[x["sub_img_id"]] for x in sorted_subs_info
+                    ]
+                    for i, rec_res in enumerate(
+                        self.text_rec_model(sorted_subs_of_img)
+                    ):
+                        sub_img_id = sorted_subs_info[i]["sub_img_id"]
+                        sub_img_info_list[sub_img_id]["rec_res"] = rec_res
+                    for sno in range(len(sub_img_info_list)):
+                        rec_res = sub_img_info_list[sno]["rec_res"]
+                        if rec_res["rec_score"] >= text_rec_score_thresh:
+                            res["rec_texts"].append(rec_res["rec_text"])
+                            res["rec_scores"].append(rec_res["rec_score"])
+                            res["rec_polys"].append(dt_polys[sno])
+            for res in results:
+                if self.text_type == "general":
+                    rec_boxes = convert_points_to_boxes(res["rec_polys"])
+                    res["rec_boxes"] = rec_boxes
+                else:
+                    res["rec_boxes"] = np.array([])
+                yield OCRResult(res)
+@pipeline_requires_extra("ocr")
+class OCRPipeline(AutoParallelImageSimpleInferencePipeline):
+    entities = "OCR"
+    @property
+    def _pipeline_cls(self):
+        return _OCRPipeline
+    def _get_batch_size(self, config):
+        return config.get("batch_size", 1)

paddlex/inference/pipelines/ocr/result.py CHANGED Viewed

@@ -14,14 +14,13 @@
 import math
 import random
-from pathlib import Path
 from typing import Dict
 import numpy as np
 from PIL import Image, ImageDraw
 from ....utils.deps import class_requires_deps, function_requires_deps, is_dep_available
-from ....utils.fonts import SIMFANG_FONT_FILE_PATH, create_font
+from ....utils.fonts import SIMFANG_FONT_FILE_PATH, create_font, create_font_vertical
 from ...common.result import BaseCVResult, JsonMixin
 if is_dep_available("opencv-contrib-python"):
@@ -32,15 +31,6 @@ if is_dep_available("opencv-contrib-python"):
 class OCRResult(BaseCVResult):
     """OCR result"""
-    def _get_input_fn(self):
-        fn = super()._get_input_fn()
-        if (page_idx := self["page_index"]) is not None:
-            fp = Path(fn)
-            stem, suffix = fp.stem, fp.suffix
-            return f"{stem}_{page_idx}{suffix}"
-        else:
-            return fn
     def get_minarea_rect(self, points: np.ndarray) -> np.ndarray:
         """
         Get the minimum area rectangle for the given points using OpenCV.
@@ -106,7 +96,9 @@ class OCRResult(BaseCVResult):
                     height = int(0.5 * (max(box[:, 1]) - min(box[:, 1])))
                     box[:2, 1] = np.mean(box[:, 1])
                     box[2:, 1] = np.mean(box[:, 1]) + min(20, height)
-                draw_left.polygon(box, fill=color)
+                box_pts = [(int(x), int(y)) for x, y in box.tolist()]
+                draw_left.polygon(box_pts, fill=color)
                 img_right_text = draw_box_txt_fine(
                     (w, h), box, txt, SIMFANG_FONT_FILE_PATH
                 )
@@ -214,19 +206,20 @@ def draw_box_txt_fine(
         np.ndarray: An image with the text drawn in the specified box.
     """
     box_height = int(
-        math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
+        math.sqrt(float(box[0][0] - box[3][0]) ** 2 + float(box[0][1] - box[3][1]) ** 2)
     )
     box_width = int(
-        math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
+        math.sqrt(float(box[0][0] - box[1][0]) ** 2 + float(box[0][1] - box[1][1]) ** 2)
     )
     if box_height > 2 * box_width and box_height > 30:
-        img_text = Image.new("RGB", (box_height, box_width), (255, 255, 255))
+        img_text = Image.new("RGB", (box_width, box_height), (255, 255, 255))
         draw_text = ImageDraw.Draw(img_text)
         if txt:
-            font = create_font(txt, (box_height, box_width), font_path)
-            draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
-        img_text = img_text.transpose(Image.ROTATE_270)
+            font = create_font_vertical(txt, (box_width, box_height), font_path)
+            draw_vertical_text(
+                draw_text, (0, 0), txt, font, fill=(0, 0, 0), line_spacing=2
+            )
     else:
         img_text = Image.new("RGB", (box_width, box_height), (255, 255, 255))
         draw_text = ImageDraw.Draw(img_text)
@@ -250,3 +243,13 @@ def draw_box_txt_fine(
         borderValue=(255, 255, 255),
     )
     return img_right_text
+@function_requires_deps("opencv-contrib-python")
+def draw_vertical_text(draw, position, text, font, fill=(0, 0, 0), line_spacing=2):
+    x, y = position
+    for char in text:
+        draw.text((x, y), char, font=font, fill=fill)
+        bbox = font.getbbox(char)
+        char_height = bbox[3] - bbox[1]
+        y += char_height + line_spacing

paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py CHANGED Viewed

@@ -45,9 +45,9 @@ class OpenVocabularyDetectionPipeline(BasePipeline):
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
             use_hpip (bool, optional): Whether to use the high-performance
-                inference plugin (HPIP). Defaults to False.
+                inference plugin (HPIP) by default. Defaults to False.
             hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
-                The high-performance inference configuration dictionary.
+                The default high-performance inference configuration dictionary.
                 Defaults to None.
         """
         super().__init__(

paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py CHANGED Viewed

@@ -47,9 +47,9 @@ class OpenVocabularySegmentationPipeline(BasePipeline):
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
             use_hpip (bool, optional): Whether to use the high-performance
-                inference plugin (HPIP). Defaults to False.
+                inference plugin (HPIP) by default. Defaults to False.
             hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
-                The high-performance inference configuration dictionary.
+                The default high-performance inference configuration dictionary.
                 Defaults to None.
         """
         super().__init__(

paddlex/inference/pipelines/pp_chatocr/pipeline_base.py CHANGED Viewed

@@ -37,9 +37,9 @@ class PP_ChatOCR_Pipeline(BasePipeline):
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
             use_hpip (bool, optional): Whether to use the high-performance
-                inference plugin (HPIP). Defaults to False.
+                inference plugin (HPIP) by default. Defaults to False.
             hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
-                The high-performance inference configuration dictionary.
+                The default high-performance inference configuration dictionary.
                 Defaults to None.
         """

paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py CHANGED Viewed

@@ -54,9 +54,9 @@ class PP_ChatOCRv3_Pipeline(PP_ChatOCR_Pipeline):
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
             use_hpip (bool, optional): Whether to use the high-performance
-                inference plugin (HPIP). Defaults to False.
+                inference plugin (HPIP) by default. Defaults to False.
             hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
-                The high-performance inference configuration dictionary.
+                The default high-performance inference configuration dictionary.
                 Defaults to None.
             initial_predictor (bool, optional): Whether to initialize the predictor. Defaults to True.
         """
@@ -206,7 +206,6 @@ class PP_ChatOCRv3_Pipeline(PP_ChatOCR_Pipeline):
         input: Union[str, List[str], np.ndarray, List[np.ndarray]],
         use_doc_orientation_classify: Optional[bool] = None,
         use_doc_unwarping: Optional[bool] = None,
-        use_general_ocr: Optional[bool] = None,
         use_seal_recognition: Optional[bool] = None,
         use_table_recognition: Optional[bool] = None,
         layout_threshold: Optional[Union[float, dict]] = None,
@@ -237,7 +236,6 @@ class PP_ChatOCRv3_Pipeline(PP_ChatOCR_Pipeline):
                                                                         numpy array of an image, or list of numpy arrays.
             use_doc_orientation_classify (bool): Flag to use document orientation classification.
             use_doc_unwarping (bool): Flag to use document unwarping.
-            use_general_ocr (bool): Flag to use general OCR.
             use_seal_recognition (bool): Flag to use seal recognition.
             use_table_recognition (bool): Flag to use table recognition.
             layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
@@ -280,7 +278,6 @@ class PP_ChatOCRv3_Pipeline(PP_ChatOCR_Pipeline):
             input,
             use_doc_orientation_classify=use_doc_orientation_classify,
             use_doc_unwarping=use_doc_unwarping,
-            use_general_ocr=use_general_ocr,
             use_seal_recognition=use_seal_recognition,
             use_table_recognition=use_table_recognition,
             layout_threshold=layout_threshold,

paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py CHANGED Viewed

@@ -62,9 +62,9 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
             device (str, optional): Device to run the predictions on. Defaults to None.
             pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
             use_hpip (bool, optional): Whether to use the high-performance
-                inference plugin (HPIP). Defaults to False.
+                inference plugin (HPIP) by default. Defaults to False.
             hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
-                The high-performance inference configuration dictionary.
+                The default high-performance inference configuration dictionary.
                 Defaults to None.
             initial_predictor (bool, optional): Whether to initialize the predictor. Defaults to True.
         """
@@ -249,7 +249,7 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
         input: Union[str, List[str], np.ndarray, List[np.ndarray]],
         use_doc_orientation_classify: Optional[bool] = None,
         use_doc_unwarping: Optional[bool] = None,
-        use_general_ocr: Optional[bool] = None,
+        use_textline_orientation: Optional[bool] = None,
         use_seal_recognition: Optional[bool] = None,
         use_table_recognition: Optional[bool] = None,
         layout_threshold: Optional[Union[float, dict]] = None,
@@ -280,7 +280,7 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
                                                                         numpy array of an image, or list of numpy arrays.
             use_doc_orientation_classify (bool): Flag to use document orientation classification.
             use_doc_unwarping (bool): Flag to use document unwarping.
-            use_general_ocr (bool): Flag to use general OCR.
+            use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
             use_seal_recognition (bool): Flag to use seal recognition.
             use_table_recognition (bool): Flag to use table recognition.
             layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
@@ -322,7 +322,7 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
             input,
             use_doc_orientation_classify=use_doc_orientation_classify,
             use_doc_unwarping=use_doc_unwarping,
-            use_general_ocr=use_general_ocr,
+            use_textline_orientation=use_textline_orientation,
             use_seal_recognition=use_seal_recognition,
             use_table_recognition=use_table_recognition,
             layout_threshold=layout_threshold,
@@ -638,7 +638,7 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
         for image_array in self.img_reader([input]):
-            image_string = cv2.imencode(".jpg", image_array)[1].tostring()
+            image_string = cv2.imencode(".jpg", image_array)[1].tobytes()
             image_base64 = base64.b64encode(image_string).decode("utf-8")
             result = {}
             for key in key_list:

paddlex/inference/pipelines/rotated_object_detection/pipeline.py CHANGED Viewed

@@ -20,15 +20,13 @@ from ....utils.deps import pipeline_requires_extra
 from ...models.object_detection.result import DetResult
 from ...utils.hpi import HPIConfig
 from ...utils.pp_option import PaddlePredictorOption
+from .._parallel import AutoParallelImageSimpleInferencePipeline
 from ..base import BasePipeline
-@pipeline_requires_extra("cv")
-class RotatedObjectDetectionPipeline(BasePipeline):
+class _RotatedObjectDetectionPipeline(BasePipeline):
     """Rotated Object Detection Pipeline"""
-    entities = "rotated_object_detection"
     def __init__(
         self,
         config: Dict,
@@ -45,9 +43,9 @@ class RotatedObjectDetectionPipeline(BasePipeline):
             device (str): The device to run the prediction on. Default is None.
             pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
             use_hpip (bool, optional): Whether to use the high-performance
-                inference plugin (HPIP). Defaults to False.
+                inference plugin (HPIP) by default. Defaults to False.
             hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
-                The high-performance inference configuration dictionary.
+                The default high-performance inference configuration dictionary.
                 Defaults to None.
         """
         super().__init__(
@@ -83,3 +81,15 @@ class RotatedObjectDetectionPipeline(BasePipeline):
             DetResult: The predicted rotated object detection results.
         """
         yield from self.rotated_object_detection_model(input, threshold=threshold)
+@pipeline_requires_extra("cv")
+class RotatedObjectDetectionPipeline(AutoParallelImageSimpleInferencePipeline):
+    entities = "rotated_object_detection"
+    @property
+    def _pipeline_cls(self):
+        return _RotatedObjectDetectionPipeline
+    def _get_batch_size(self, config):
+        return config["SubModules"]["RotatedObjectDetection"].get("batch_size", 1)

paddlex 3.0.0rc1__py3-none-any.whl → 3.0.2__py3-none-any.whl

paddlex 3.0.0rc1py3-none-any.whl → 3.0.2py3-none-any.whl