PyPI - paddlex - Versions diffs - 3.0.0rc0__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl - Mend

paddlex 3.0.0rc0py3-none-any.whl → 3.0.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (785) hide show

paddlex/inference/models/object_detection/processors.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,20 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List, Sequence, Tuple, Union, Optional
+from typing import List, Optional, Sequence, Tuple, Union
-import cv2
 import numpy as np
 from numpy import ndarray
-from ..common import Resize as CommonResize
-from ..common import Normalize as CommonNormalize
+from ....utils.deps import class_requires_deps, function_requires_deps, is_dep_available
 from ...common.reader import ReadImage as CommonReadImage
+from ...utils.benchmark import benchmark
+from ..common import Normalize as CommonNormalize
+from ..common import Resize as CommonResize
+if is_dep_available("opencv-contrib-python"):
+    import cv2
 Boxes = List[dict]
 Number = Union[int, float]
+@benchmark.timeit_with_options(name=None, is_read_operation=True)
+@class_requires_deps("opencv-contrib-python")
 class ReadImage(CommonReadImage):
     """Reads images from a list of raw image data or file paths."""
@@ -69,7 +75,7 @@ class ReadImage(CommonReadImage):
         if isinstance(img, np.ndarray):
             ori_img = img
             if self.format == "RGB":
-                img = img[:, :, ::-1]
+                img = cv2.cvtColor(ori_img, cv2.COLOR_BGR2RGB)
             return img, ori_img
         elif isinstance(img, str):
             blob = self._img_reader.read(img)
@@ -81,7 +87,7 @@ class ReadImage(CommonReadImage):
                 if blob.ndim != 3:
                     raise RuntimeError("Array is not 3-dimensional.")
                 # BGR to RGB
-                blob = blob[..., ::-1]
+                blob = cv2.cvtColor(blob, cv2.COLOR_BGR2RGB)
             return blob, ori_img
         else:
             raise TypeError(
@@ -92,6 +98,7 @@ class ReadImage(CommonReadImage):
             )
+@benchmark.timeit
 class Resize(CommonResize):
     def __call__(self, datas: List[dict]) -> List[dict]:
         """
@@ -122,31 +129,18 @@ class Resize(CommonResize):
         return datas
+@benchmark.timeit
 class Normalize(CommonNormalize):
-    """Normalizes images in a list of dictionaries containing image data"""
-    def apply(self, img: ndarray) -> ndarray:
-        """Applies normalization to a single image."""
-        old_type = img.dtype
-        # XXX: If `old_type` has higher precision than float32,
-        # we will lose some precision.
-        img = img.astype("float32", copy=False)
-        img *= self.scale
-        img -= self.mean
-        img /= self.std
-        if self.preserve_dtype:
-            img = img.astype(old_type, copy=False)
-        return img
     def __call__(self, datas: List[dict]) -> List[dict]:
         """Normalizes images in a list of dictionaries. Iterates over each dictionary,
         applies normalization to the 'img' key, and returns the modified list.
         """
         for data in datas:
-            data["img"] = self.apply(data["img"])
+            data["img"] = self.norm(data["img"])
         return datas
+@benchmark.timeit
 class ToCHWImage:
     """Converts images in a list of dictionaries from HWC to CHW format."""
@@ -164,6 +158,7 @@ class ToCHWImage:
         return datas
+@benchmark.timeit
 class ToBatch:
     """
     Class for batch processing of data dictionaries.
@@ -211,6 +206,7 @@ class ToBatch:
         return [self.apply(datas, key) for key in self.ordered_required_keys]
+@benchmark.timeit
 class DetPad:
     """
     Pad image to a specified size.
@@ -248,6 +244,7 @@ class DetPad:
         return datas
+@benchmark.timeit
 class PadStride:
     """padding image for model with FPN , instead PadBatch(pad_to_stride, pad_gt) in original config
     Args:
@@ -318,6 +315,7 @@ def _get_3rd_point(a: ndarray, b: ndarray) -> ndarray:
     return third_pt
+@function_requires_deps("opencv-contrib-python")
 def get_affine_transform(
     center: ndarray,
     input_size: Union[Number, Tuple[Number, Number], ndarray],
@@ -374,6 +372,8 @@ def get_affine_transform(
     return trans
+@benchmark.timeit
+@class_requires_deps("opencv-contrib-python")
 class WarpAffine:
     """Apply warp affine transformation to the image based on the given parameters.
@@ -434,7 +434,7 @@ class WarpAffine:
         if not self.keep_res:
             out_h = input_h // self.down_ratio
             out_w = input_w // self.down_ratio
-            trans_output = get_affine_transform(c, s, 0, [out_w, out_h])
+            get_affine_transform(c, s, 0, [out_w, out_h])
         return inp
@@ -621,7 +621,7 @@ def nms(boxes, iou_same=0.6, iou_diff=0.95):
         current = indices[0]
         current_box = boxes[current]
         current_class = current_box[0]
-        current_score = current_box[1]
+        current_box[1]
         current_coords = current_box[2:]
         selected_boxes.append(current)
@@ -675,7 +675,7 @@ def check_containment(boxes, formula_index=None, category_index=None, mode=None)
                 if mode == "large" and boxes[j][0] == category_index:
                     if is_contained(boxes[i], boxes[j]):
                         contained_by_other[i] = 1
-                        contains_other[j] = 1
+                        contains_other[j] = 1
                 if mode == "small" and boxes[i][0] == category_index:
                     if is_contained(boxes[i], boxes[j]):
                         contained_by_other[i] = 1
@@ -687,6 +687,7 @@ def check_containment(boxes, formula_index=None, category_index=None, mode=None)
     return contains_other, contained_by_other
+@benchmark.timeit
 class DetPostProcess:
     """Save Result Transform
@@ -743,14 +744,16 @@ class DetPostProcess:
             )
         if layout_nms:
-            filtered_boxes = []
+            pass
             ### Layout postprocess for NMS
             selected_indices = nms(boxes, iou_same=0.6, iou_diff=0.98)
             boxes = np.array(boxes[selected_indices])
         if layout_merge_bboxes_mode:
-            formula_index = (self.labels.index("formula") if "formula" in self.labels else None)
-            if isinstance(layout_merge_bboxes_mode, str):
+            formula_index = (
+                self.labels.index("formula") if "formula" in self.labels else None
+            )
+            if isinstance(layout_merge_bboxes_mode, str):
                 assert layout_merge_bboxes_mode in [
                     "union",
                     "large",
@@ -783,13 +786,15 @@ class DetPostProcess:
                                 boxes, formula_index, category_index, mode=layout_mode
                             )
                             # Remove boxes that are contained by other boxes
-                            keep_mask &= (contained_by_other == 0)
+                            keep_mask &= contained_by_other == 0
                         elif layout_mode == "small":
                             contains_other, contained_by_other = check_containment(
                                 boxes, formula_index, category_index, mode=layout_mode
                             )
                             # Keep boxes that do not contain others or are contained by others
-                            keep_mask &= (contains_other == 0) | (contained_by_other == 1)
+                            keep_mask &= (contains_other == 0) | (
+                                contained_by_other == 1
+                            )
                 boxes = boxes[keep_mask]
         if boxes.size == 0:

paddlex/inference/models/object_detection/result.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,14 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List
 import copy
+from typing import List
 import PIL
 from PIL import Image, ImageDraw, ImageFont
 from ....utils.fonts import PINGFANG_FONT_FILE_PATH
-from ...utils.color_map import get_colormap, font_colormap
-from ...common.result import BaseCVResult, StrMixin, JsonMixin
+from ...common.result import BaseCVResult, JsonMixin
+from ...utils.color_map import font_colormap, get_colormap
 def draw_box(img: Image.Image, boxes: List[dict]) -> Image.Image:

paddlex/inference/models/object_detection/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

paddlex/inference/models/open_vocabulary_detection/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

paddlex/inference/models/open_vocabulary_detection/predictor.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,21 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Union, Dict, List, Tuple, Optional, Callable
-import numpy as np
 import inspect
+from typing import Any, Callable, Dict, List, Optional, Union
-from ....utils.func_register import FuncRegister
 from ....modules.open_vocabulary_detection.model_list import MODELS
+from ....utils.func_register import FuncRegister
 from ...common.batch_sampler import ImageBatchSampler
 from ...common.reader import ReadImage
-from .processors import GroundingDINOProcessor, GroundingDINOPostProcessor
-from ..common import StaticInfer
-from ..base import BasicPredictor
+from ..base import BasePredictor
 from ..object_detection.result import DetResult
+from .processors import (
+    GroundingDINOPostProcessor,
+    GroundingDINOProcessor,
+    YOLOWorldPostProcessor,
+    YOLOWorldProcessor,
+)
-class OVDetPredictor(BasicPredictor):
+class OVDetPredictor(BasePredictor):
     entities = MODELS
@@ -68,11 +71,7 @@ class OVDetPredictor(BasicPredictor):
                 pre_ops.append(op)
         # build infer
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
         # build postprocess op
         post_op = self.build_postprocess(pre_ops=pre_ops)
@@ -97,7 +96,7 @@ class OVDetPredictor(BasicPredictor):
         image_paths = batch_data.input_paths
         src_images = self.pre_ops[0](batch_data.instances)
         datas = src_images
-        # preprocess
+        # preprocess for image only
         for pre_op in self.pre_ops[1:-1]:
             datas = pre_op(datas)
@@ -141,6 +140,10 @@ class OVDetPredictor(BasicPredictor):
                 box_threshold=self.config["box_threshold"],
                 text_threshold=self.config["text_threshold"],
             )
+        elif "YOLO-World" in self.model_name:
+            return YOLOWorldPostProcessor(
+                threshold=self.config["threshold"],
+            )
         else:
             raise NotImplementedError
@@ -153,3 +156,17 @@ class OVDetPredictor(BasicPredictor):
             text_max_words=text_max_words,
             target_size=target_size,
         )
+    @register("YOLOWorldProcessor")
+    def build_yoloworld_preprocessor(
+        self,
+        image_target_size=(640, 640),
+        image_mean=[0.0, 0.0, 0.0],
+        image_std=[1.0, 1.0, 1.0],
+    ):
+        return YOLOWorldProcessor(
+            model_dir=self.model_dir,
+            image_target_size=image_target_size,
+            image_mean=image_mean,
+            image_std=image_std,
+        )

paddlex/inference/models/open_vocabulary_detection/processors/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,4 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .groundingdino_processors import GroundingDINOProcessor, GroundingDINOPostProcessor
+from .groundingdino_processors import GroundingDINOPostProcessor, GroundingDINOProcessor
+from .yoloworld_processors import YOLOWorldPostProcessor, YOLOWorldProcessor

paddlex/inference/models/open_vocabulary_detection/processors/common.py ADDED Viewed

@@ -0,0 +1,114 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, List, Tuple
+import numpy as np
+from .....utils.deps import class_requires_deps, is_dep_available
+if is_dep_available("opencv-contrib-python"):
+    import cv2
+@class_requires_deps("opencv-contrib-python")
+class LetterResize(object):
+    def __init__(
+        self,
+        scale=[640, 640],
+        pad_val=144,
+        use_mini_pad=False,
+        stretch_only=False,
+        allow_scale_up=False,
+    ):
+        super(LetterResize, self).__init__()
+        self.scale = scale
+        self.pad_val = pad_val
+        self.use_mini_pad = use_mini_pad
+        self.stretch_only = stretch_only
+        self.allow_scale_up = allow_scale_up
+    def _resize_img(self, image: np.ndarray) -> Dict:
+        scale = self.scale
+        image_shape = image.shape[:2]
+        ratio = min(scale[0] / image_shape[0], scale[1] / image_shape[1])
+        if not self.allow_scale_up:
+            ratio = min(ratio, 1.0)
+        ratio = [ratio, ratio]
+        no_pad_shape = (
+            int(round(image_shape[0] * ratio[0])),
+            int(round(image_shape[1] * ratio[1])),
+        )
+        padding_h, padding_w = [scale[0] - no_pad_shape[0], scale[1] - no_pad_shape[1]]
+        if self.use_mini_pad:
+            padding_w, padding_h = np.mod(padding_w, 32), np.mod(padding_h, 32)
+        elif self.stretch_only:
+            padding_h, padding_w = 0.0, 0.0
+            no_pad_shape = (scale[0], scale[1])
+            ratio = [scale[0] / image_shape[0], scale[1] / image_shape[1]]
+        if image_shape != no_pad_shape:
+            image = cv2.resize(
+                image,
+                (no_pad_shape[1], no_pad_shape[0]),
+                interpolation=cv2.INTER_LINEAR,
+            )
+        scale_factor = (
+            no_pad_shape[1] / image_shape[1],
+            no_pad_shape[0] / image_shape[0],
+        )
+        top_padding, left_padding = int(round(padding_h // 2 - 0.1)), int(
+            round(padding_w // 2 - 0.1)
+        )
+        bottom_padding = padding_h - top_padding
+        right_padding = padding_w - left_padding
+        padding_list = [top_padding, bottom_padding, left_padding, right_padding]
+        if (
+            top_padding != 0
+            or bottom_padding != 0
+            or left_padding != 0
+            or right_padding != 0
+        ):
+            pad_val = self.pad_val
+            if isinstance(pad_val, int) and image.ndim == 3:
+                pad_val = tuple(pad_val for _ in range(image.shape[2]))
+            top, bottom, left, right = padding_list
+            image = cv2.copyMakeBorder(
+                image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=pad_val
+            )
+        result = dict()
+        result["image"] = image
+        result["scale_factor"] = np.array(scale_factor, dtype=np.float32)
+        result["pad_param"] = np.array(padding_list, dtype=np.float32)
+        return result
+    def __call__(self, images: List[np.ndarray]) -> List[Dict]:
+        if not isinstance(images, (List, Tuple)):
+            images = [images]
+        rst_images = [self._resize_img(image) for image in images]
+        return rst_images

paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,18 +13,13 @@
 # limitations under the License.
 import os
-from typing import Dict, List, Optional, Union, Tuple
+from typing import Dict, List, Optional, Tuple, Union
 import numpy as np
 import PIL
+from ....utils.benchmark import benchmark
 from ...common.tokenizer.bert_tokenizer import BertTokenizer
-from .....utils.lazy_loader import LazyLoader
-# NOTE: LazyLoader is used to avoid conflicts between ultra-infer and Paddle
-paddle = LazyLoader("lazy_paddle", globals(), "paddle")
-T = LazyLoader("T", globals(), "paddle.vision.transforms")
-F = LazyLoader("F", globals(), "paddle.nn.functional")
 def _max_by_axis(the_list):
@@ -97,6 +92,7 @@ def _text_pad_batch_data(
     return return_list if len(return_list) > 1 else return_list[0]
+@benchmark.timeit
 class GroundingDINOPostProcessor(object):
     """PostProcessors for GroundingDINO"""
@@ -127,6 +123,7 @@ class GroundingDINOPostProcessor(object):
         text_threshold=None,
         **kwargs,
     ):
+        import paddle
         box_threshold = self.box_threshold if box_threshold is None else box_threshold
         text_threshold = (
@@ -166,6 +163,8 @@ class GroundingDINOPostProcessor(object):
         text_threshold,
     ):
         """Post Process for prediction result of single image."""
+        import paddle
+        import paddle.nn.functional as F
         logits = F.sigmoid(pred_logits)
         boxes = pred_boxes
@@ -206,6 +205,7 @@ class GroundingDINOPostProcessor(object):
             raise NotImplementedError("posmap must be 1-dim")
+@benchmark.timeit
 class GroundingDINOProcessor(object):
     """Image and Text Processors for GroundingDINO"""
@@ -261,6 +261,7 @@ class GroundingDINOProcessor(object):
         return [arr.numpy() for arr in paddle_rst]
+@benchmark.timeit
 class GroundingDinoTextProcessor(object):
     """Constructs a GroundingDino text processor."""
@@ -276,6 +277,8 @@ class GroundingDinoTextProcessor(object):
         special_tokens_list,
     ):
         """Preprocess the text with tokenization."""
+        import paddle
         tokenized_out = {}
         input_ids = _text_pad_batch_data(input_ids)
         input_ids = paddle.to_tensor(input_ids, dtype=paddle.int64).squeeze(-1)
@@ -321,6 +324,8 @@ class GroundingDinoTextProcessor(object):
         Returns:
             torch.Tensor: attention mask between each special tokens.
         """
+        import paddle
         input_ids = tokenized["input_ids"]
         bs, num_token = input_ids.shape
         special_tokens_mask = paddle.zeros((bs, num_token), dtype=paddle.bool)
@@ -363,6 +368,7 @@ class GroundingDinoTextProcessor(object):
         return attention_mask, position_ids.cast(paddle.int64), cate_to_token_mask_list
+@benchmark.timeit
 class GroundingDinoImageProcessor(object):
     """Constructs a GroundingDino image processor."""
@@ -393,6 +399,7 @@ class GroundingDinoImageProcessor(object):
     def resize(self, image, size=None, max_size=1333):
         """Officially aligned Image resize."""
+        import paddle.vision.transforms as T
         def get_size_with_aspect_ratio(image_size, size, max_size=None):
             w, h = image_size
@@ -426,6 +433,8 @@ class GroundingDinoImageProcessor(object):
         return rescaled_image
     def nested_tensor_from_tensor_list(self, tensor_list):
+        import paddle
         if tensor_list[0].ndim == 3:
             max_size = _max_by_axis([list(img.shape) for img in tensor_list])
             batch_shape = [len(tensor_list)] + max_size
@@ -455,6 +464,8 @@ class GroundingDinoImageProcessor(object):
         **kwargs,
     ):
         """Preprocess an image or batch of images."""
+        import paddle.vision.transforms as T
         do_resize = do_resize if do_resize is not None else self.do_resize
         do_normalize = do_normalize if do_normalize is not None else self.do_normalize
         do_nested = do_nested if do_nested is not None else self.do_nested

paddlex 3.0.0rc0__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl

paddlex 3.0.0rc0py3-none-any.whl → 3.0.0rc1py3-none-any.whl