PyPI - paddlex - Versions diffs - 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl - Mend

paddlex 3.0.0rc0py3-none-any.whl → 3.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (824) hide show

paddlex/inference/models/text_detection/processors.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,30 +12,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List, Tuple, Union
-import os
-import sys
-import cv2
-import copy
 import math
-import pyclipper
+from typing import Union
 import numpy as np
-from numpy.linalg import norm
-from PIL import Image
-from shapely.geometry import Polygon
-from ...utils.io import ImageReader
 from ....utils import logging
+from ....utils.deps import class_requires_deps, is_dep_available
+from ...utils.benchmark import benchmark
+if is_dep_available("opencv-contrib-python"):
+    import cv2
+if is_dep_available("pyclipper"):
+    import pyclipper
+@benchmark.timeit
+@class_requires_deps("opencv-contrib-python")
 class DetResizeForTest:
     """DetResizeForTest"""
-    def __init__(self, **kwargs):
-        super().__init__()
+    def __init__(self, input_shape=None, max_side_limit=4000, **kwargs):
         self.resize_type = 0
         self.keep_ratio = False
-        if "image_shape" in kwargs:
+        if input_shape is not None:
+            self.input_shape = input_shape
+            self.resize_type = 3
+        elif "image_shape" in kwargs:
             self.image_shape = kwargs["image_shape"]
             self.resize_type = 1
             if "keep_ratio" in kwargs:
@@ -50,22 +53,34 @@ class DetResizeForTest:
             self.limit_side_len = 736
             self.limit_type = "min"
+        self.max_side_limit = max_side_limit
     def __call__(
         self,
         imgs,
         limit_side_len: Union[int, None] = None,
         limit_type: Union[str, None] = None,
+        max_side_limit: Union[int, None] = None,
     ):
         """apply"""
+        max_side_limit = (
+            max_side_limit if max_side_limit is not None else self.max_side_limit
+        )
         resize_imgs, img_shapes = [], []
         for ori_img in imgs:
-            img, shape = self.resize(ori_img, limit_side_len, limit_type)
+            img, shape = self.resize(
+                ori_img, limit_side_len, limit_type, max_side_limit
+            )
             resize_imgs.append(img)
             img_shapes.append(shape)
         return resize_imgs, img_shapes
     def resize(
-        self, img, limit_side_len: Union[int, None], limit_type: Union[str, None]
+        self,
+        img,
+        limit_side_len: Union[int, None],
+        limit_type: Union[str, None],
+        max_side_limit: Union[int, None] = None,
     ):
         src_h, src_w, _ = img.shape
         if sum([src_h, src_w]) < 64:
@@ -74,10 +89,12 @@ class DetResizeForTest:
         if self.resize_type == 0:
             # img, shape = self.resize_image_type0(img)
             img, [ratio_h, ratio_w] = self.resize_image_type0(
-                img, limit_side_len, limit_type
+                img, limit_side_len, limit_type, max_side_limit
             )
         elif self.resize_type == 2:
             img, [ratio_h, ratio_w] = self.resize_image_type2(img)
+        elif self.resize_type == 3:
+            img, [ratio_h, ratio_w] = self.resize_image_type3(img)
         else:
             # img, shape = self.resize_image_type1(img)
             img, [ratio_h, ratio_w] = self.resize_image_type1(img)
@@ -98,6 +115,8 @@ class DetResizeForTest:
             resize_w = ori_w * resize_h / ori_h
             N = math.ceil(resize_w / 32)
             resize_w = N * 32
+        if resize_h == ori_h and resize_w == ori_w:
+            return img, [1.0, 1.0]
         ratio_h = float(resize_h) / ori_h
         ratio_w = float(resize_w) / ori_w
         img = cv2.resize(img, (int(resize_w), int(resize_h)))
@@ -105,7 +124,11 @@ class DetResizeForTest:
         return img, [ratio_h, ratio_w]
     def resize_image_type0(
-        self, img, limit_side_len: Union[int, None], limit_type: Union[str, None]
+        self,
+        img,
+        limit_side_len: Union[int, None],
+        limit_type: Union[str, None],
+        max_side_limit: Union[int, None] = None,
     ):
         """
         resize image to a size multiple of 32 which is required by the network
@@ -142,16 +165,28 @@ class DetResizeForTest:
         resize_h = int(h * ratio)
         resize_w = int(w * ratio)
+        if max(resize_h, resize_w) > max_side_limit:
+            logging.warning(
+                f"Resized image size ({resize_h}x{resize_w}) exceeds max_side_limit of {max_side_limit}. "
+                f"Resizing to fit within limit."
+            )
+            ratio = float(max_side_limit) / max(resize_h, resize_w)
+            resize_h, resize_w = int(resize_h * ratio), int(resize_w * ratio)
         resize_h = max(int(round(resize_h / 32) * 32), 32)
         resize_w = max(int(round(resize_w / 32) * 32), 32)
+        if resize_h == h and resize_w == w:
+            return img, [1.0, 1.0]
         try:
             if int(resize_w) <= 0 or int(resize_h) <= 0:
                 return None, (None, None)
             img = cv2.resize(img, (int(resize_w), int(resize_h)))
         except:
             logging.info(img.shape, resize_w, resize_h)
-            sys.exit(0)
+            raise
         ratio_h = resize_h / float(h)
         ratio_w = resize_w / float(w)
         return img, [ratio_h, ratio_w]
@@ -174,37 +209,70 @@ class DetResizeForTest:
         max_stride = 128
         resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
         resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
+        if resize_h == h and resize_w == w:
+            return img, [1.0, 1.0]
         img = cv2.resize(img, (int(resize_w), int(resize_h)))
         ratio_h = resize_h / float(h)
         ratio_w = resize_w / float(w)
         return img, [ratio_h, ratio_w]
+    def resize_image_type3(self, img):
+        """resize the image"""
+        resize_c, resize_h, resize_w = self.input_shape  # (c, h, w)
+        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        if resize_h == ori_h and resize_w == ori_w:
+            return img, [1.0, 1.0]
+        ratio_h = float(resize_h) / ori_h
+        ratio_w = float(resize_w) / ori_w
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        return img, [ratio_h, ratio_w]
+@benchmark.timeit
+@class_requires_deps("opencv-contrib-python")
 class NormalizeImage:
-    """normalize image such as substract mean, divide std"""
+    """normalize image such as subtract mean, divide std"""
-    def __init__(self, scale=None, mean=None, std=None, order="chw", **kwargs):
+    def __init__(self, scale=None, mean=None, std=None, order="chw"):
         super().__init__()
         if isinstance(scale, str):
             scale = eval(scale)
-        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
+        self.order = order
+        scale = scale if scale is not None else 1.0 / 255.0
         mean = mean if mean is not None else [0.485, 0.456, 0.406]
         std = std if std is not None else [0.229, 0.224, 0.225]
-        shape = (3, 1, 1) if order == "chw" else (1, 1, 3)
-        self.mean = np.array(mean).reshape(shape).astype("float32")
-        self.std = np.array(std).reshape(shape).astype("float32")
+        self.alpha = [scale / std[i] for i in range(len(std))]
+        self.beta = [-mean[i] / std[i] for i in range(len(std))]
     def __call__(self, imgs):
         """apply"""
-        def norm(img):
-            return (img.astype("float32") * self.scale - self.mean) / self.std
+        def _norm(img):
+            if self.order == "chw":
+                img = np.transpose(img, (2, 0, 1))
+            split_im = list(cv2.split(img))
+            for c in range(img.shape[2]):
+                split_im[c] = split_im[c].astype(np.float32)
+                split_im[c] *= self.alpha[c]
+                split_im[c] += self.beta[c]
-        return [norm(img) for img in imgs]
+            res = cv2.merge(split_im)
+            if self.order == "chw":
+                res = np.transpose(res, (1, 2, 0))
+            return res
+        return [_norm(img) for img in imgs]
+@benchmark.timeit
+@class_requires_deps("opencv-contrib-python", "pyclipper")
 class DBPostProcess:
     """
     The post process for Differentiable Binarization (DB).
@@ -219,7 +287,7 @@ class DBPostProcess:
         use_dilation=False,
         score_mode="fast",
         box_type="quad",
-        **kwargs
+        **kwargs,
     ):
         super().__init__()
         self.thresh = thresh
@@ -248,7 +316,8 @@ class DBPostProcess:
         bitmap = _bitmap
         height, width = bitmap.shape
+        width_scale = dest_width / width
+        height_scale = dest_height / height
         boxes = []
         scores = []
@@ -283,10 +352,10 @@ class DBPostProcess:
                 continue
             box = np.array(box)
-            box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
-            box[:, 1] = np.clip(
-                np.round(box[:, 1] / height * dest_height), 0, dest_height
-            )
+            for i in range(box.shape[0]):
+                box[i, 0] = max(0, min(round(box[i, 0] * width_scale), dest_width))
+                box[i, 1] = max(0, min(round(box[i, 1] * height_scale), dest_height))
             boxes.append(box)
             scores.append(score)
         return boxes, scores
@@ -304,6 +373,8 @@ class DBPostProcess:
         bitmap = _bitmap
         height, width = bitmap.shape
+        width_scale = dest_width / width
+        height_scale = dest_height / height
         outs = cv2.findContours(
             (bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
@@ -334,20 +405,21 @@ class DBPostProcess:
             box, sside = self.get_mini_boxes(box)
             if sside < self.min_size + 2:
                 continue
             box = np.array(box)
+            for i in range(box.shape[0]):
+                box[i, 0] = max(0, min(round(box[i, 0] * width_scale), dest_width))
+                box[i, 1] = max(0, min(round(box[i, 1] * height_scale), dest_height))
-            box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
-            box[:, 1] = np.clip(
-                np.round(box[:, 1] / height * dest_height), 0, dest_height
-            )
             boxes.append(box.astype(np.int16))
             scores.append(score)
         return np.array(boxes, dtype=np.int16), scores
     def unclip(self, box, unclip_ratio):
         """unclip"""
-        poly = Polygon(box)
-        distance = poly.area * unclip_ratio / poly.length
+        area = cv2.contourArea(box)
+        length = cv2.arcLength(box, True)
+        distance = area * unclip_ratio / length
         offset = pyclipper.PyclipperOffset()
         offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
         try:
@@ -382,10 +454,10 @@ class DBPostProcess:
         """box_score_fast: use bbox mean score as the mean score"""
         h, w = bitmap.shape[:2]
         box = _box.copy()
-        xmin = np.clip(np.floor(box[:, 0].min()).astype("int"), 0, w - 1)
-        xmax = np.clip(np.ceil(box[:, 0].max()).astype("int"), 0, w - 1)
-        ymin = np.clip(np.floor(box[:, 1].min()).astype("int"), 0, h - 1)
-        ymax = np.clip(np.ceil(box[:, 1].max()).astype("int"), 0, h - 1)
+        xmin = max(0, min(math.floor(box[:, 0].min()), w - 1))
+        xmax = max(0, min(math.ceil(box[:, 0].max()), w - 1))
+        ymin = max(0, min(math.floor(box[:, 1].min()), h - 1))
+        ymax = max(0, min(math.ceil(box[:, 1].max()), h - 1))
         mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
         box[:, 0] = box[:, 0] - xmin

paddlex/inference/models/text_detection/result.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,23 +13,18 @@
 # limitations under the License.
 import copy
 import numpy as np
-import cv2
-from pathlib import Path
-from ...common.result import BaseCVResult, StrMixin, JsonMixin
+from ....utils.deps import class_requires_deps, is_dep_available
+from ...common.result import BaseCVResult, JsonMixin
+if is_dep_available("opencv-contrib-python"):
+    import cv2
-class TextDetResult(BaseCVResult):
-    def _get_input_fn(self):
-        fn = super()._get_input_fn()
-        if (page_idx := self["page_index"]) is not None:
-            fp = Path(fn)
-            stem, suffix = fp.stem, fp.suffix
-            return f"{stem}_{page_idx}{suffix}"
-        else:
-            return fn
+@class_requires_deps("opencv-contrib-python")
+class TextDetResult(BaseCVResult):
     def _to_img(self):
         """draw rectangle"""

paddlex/inference/models/text_recognition/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

paddlex/inference/models/text_recognition/predictor.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,31 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from ....utils.func_register import FuncRegister
 from ....modules.text_recognition.model_list import MODELS
+from ....utils.func_register import FuncRegister
 from ...common.batch_sampler import ImageBatchSampler
 from ...common.reader import ReadImage
-from ..common import (
-    Resize,
-    ResizeByShort,
-    Normalize,
-    ToCHWImage,
-    StaticInfer,
-)
-from ..base import BasicPredictor
-from .processors import OCRReisizeNormImg, CTCLabelDecode, ToBatch
+from ..base import BasePredictor
+from .processors import CTCLabelDecode, OCRReisizeNormImg, ToBatch
 from .result import TextRecResult
-class TextRecPredictor(BasicPredictor):
+class TextRecPredictor(BasePredictor):
     entities = MODELS
     _FUNC_MAP = {}
     register = FuncRegister(_FUNC_MAP)
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, input_shape=None, **kwargs):
         super().__init__(*args, **kwargs)
+        self.input_shape = input_shape
         self.pre_tfs, self.infer, self.post_op = self._build()
     def _build_batch_sampler(self):
@@ -57,11 +51,7 @@ class TextRecPredictor(BasicPredictor):
                 pre_tfs[name] = op
         pre_tfs["ToBatch"] = ToBatch()
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
         post_op = self.build_postprocess(**self.config["PostProcess"])
         return pre_tfs, infer, post_op
@@ -87,7 +77,9 @@ class TextRecPredictor(BasicPredictor):
     @register("RecResizeImg")
     def build_resize(self, image_shape):
-        return "ReisizeNorm", OCRReisizeNormImg(rec_image_shape=image_shape)
+        return "ReisizeNorm", OCRReisizeNormImg(
+            rec_image_shape=image_shape, input_shape=self.input_shape
+        )
     def build_postprocess(self, **kwargs):
         if kwargs.get("name") == "CTCLabelDecode":

paddlex/inference/models/text_recognition/processors.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,28 +13,28 @@
 # limitations under the License.
-import os
-import os.path as osp
+import math
+import re
 from typing import List
-import re
 import numpy as np
-from PIL import Image
-import cv2
-import math
-import json
-import tempfile
-from tokenizers import Tokenizer as TokenizerFast
-from ....utils import logging
+from ....utils.deps import class_requires_deps, is_dep_available
+from ...utils.benchmark import benchmark
+if is_dep_available("opencv-contrib-python"):
+    import cv2
+@benchmark.timeit
+@class_requires_deps("opencv-contrib-python")
 class OCRReisizeNormImg:
     """for ocr image resize and normalization"""
-    def __init__(self, rec_image_shape=[3, 48, 320]):
+    def __init__(self, rec_image_shape=[3, 48, 320], input_shape=None):
         super().__init__()
         self.rec_image_shape = rec_image_shape
+        self.input_shape = input_shape
         self.max_imgW = 3200
     def resize_norm_img(self, img, max_wh_ratio):
@@ -64,7 +64,10 @@ class OCRReisizeNormImg:
     def __call__(self, imgs):
         """apply"""
-        return [self.resize(img) for img in imgs]
+        if self.input_shape is None:
+            return [self.resize(img) for img in imgs]
+        else:
+            return [self.staticResize(img) for img in imgs]
     def resize(self, img):
         imgC, imgH, imgW = self.rec_image_shape
@@ -75,7 +78,16 @@ class OCRReisizeNormImg:
         img = self.resize_norm_img(img, max_wh_ratio)
         return img
+    def staticResize(self, img):
+        imgC, imgH, imgW = self.input_shape
+        resized_image = cv2.resize(img, (int(imgW), int(imgH)))
+        resized_image = resized_image.transpose((2, 0, 1)) / 255
+        resized_image -= 0.5
+        resized_image /= 0.5
+        return resized_image
+@benchmark.timeit
 class BaseRecLabelDecode:
     """Convert between text-label and text-index"""
@@ -167,6 +179,7 @@ class BaseRecLabelDecode:
         return texts, scores
+@benchmark.timeit
 class CTCLabelDecode(BaseRecLabelDecode):
     """Convert between text-label and text-index"""
@@ -192,6 +205,7 @@ class CTCLabelDecode(BaseRecLabelDecode):
         return character_list
+@benchmark.timeit
 class ToBatch:
     """A class for batching and padding images to a uniform width."""

paddlex/inference/models/text_recognition/result.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,11 +13,12 @@
 # limitations under the License.
 import copy
 import PIL
 from PIL import Image, ImageDraw, ImageFont
 from ....utils.fonts import PINGFANG_FONT_FILE_PATH
-from ...common.result import BaseCVResult, StrMixin, JsonMixin
+from ...common.result import BaseCVResult, JsonMixin
 class TextRecResult(BaseCVResult):

paddlex/inference/models/ts_anomaly_detection/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

paddlex/inference/models/ts_anomaly_detection/predictor.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,30 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Union, Dict, List, Tuple
-import numpy as np
-import pandas as pd
 import os
+from typing import Any, Dict, List, Tuple, Union
+import pandas as pd
 from ....modules.ts_anomaly_detection.model_list import MODELS
 from ...common.batch_sampler import TSBatchSampler
 from ...common.reader import ReadTS
+from ..base import BasePredictor
 from ..common import (
-    TSCutOff,
     BuildTSDataset,
-    TSNormalize,
     TimeFeature,
+    TSCutOff,
+    TSNormalize,
     TStoArray,
     TStoBatch,
-    StaticInfer,
 )
 from .processors import GetAnomaly
-from ..base import BasicPredictor
 from .result import TSAdResult
-class TSAdPredictor(BasicPredictor):
-    """TSAdPredictor that inherits from BasicPredictor."""
+class TSAdPredictor(BasePredictor):
+    """TSAdPredictor that inherits from BasePredictor."""
     entities = MODELS
@@ -94,11 +93,7 @@ class TSAdPredictor(BasicPredictor):
             )
         preprocessors["TStoArray"] = TStoArray(self.config["input_data"])
         preprocessors["TStoBatch"] = TStoBatch()
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
         postprocessors = {}
         postprocessors["GetAnomaly"] = GetAnomaly(
             self.config["model_threshold"], self.config["info_params"]
@@ -116,7 +111,7 @@ class TSAdPredictor(BasicPredictor):
             dict: A dictionary containing the input path, raw image, class IDs, scores, and label names for every instance of the batch. Keys include 'input_path', 'input_img', 'class_ids', 'scores', and 'label_names'.
         """
-        batch_raw_ts = self.preprocessors["ReadTS"](ts_list=batch_data)
+        batch_raw_ts = self.preprocessors["ReadTS"](ts_list=batch_data.instances)
         batch_cutoff_ts = self.preprocessors["TSCutOff"](ts_list=batch_raw_ts)
         if "TSNormalize" in self.preprocessors:
@@ -140,7 +135,7 @@ class TSAdPredictor(BasicPredictor):
             ori_ts_list=batch_input_ts, pred_list=batch_preds
         )
         return {
-            "input_path": batch_data,
+            "input_path": batch_data.input_paths,
             "input_ts": batch_raw_ts,
             "anomaly": batch_ts_preds,
         }

paddlex/inference/models/ts_anomaly_detection/processors.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,11 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List, Dict, Any
+from typing import Any, Dict, List
 import numpy as np
 import pandas as pd
+from ...utils.benchmark import benchmark
+@benchmark.timeit
 class GetAnomaly:
     """A class to detect anomalies in time series data based on a model threshold."""

paddlex 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl

paddlex 3.0.0rc0py3-none-any.whl → 3.0.1py3-none-any.whl