PyPI - paddlex - Versions diffs - 3.0.0rc0__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl - Mend

paddlex 3.0.0rc0py3-none-any.whl → 3.0.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (785) hide show

paddlex/inference/models/open_vocabulary_detection/processors/yoloworld_processors.py ADDED Viewed

@@ -0,0 +1,209 @@
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from typing import List, Tuple, Union
+import numpy as np
+from ...common.tokenizer.clip_tokenizer import CLIPTokenizer
+from .common import LetterResize
+class YOLOWorldProcessor(object):
+    """Image and Text Processors for YOLO-World"""
+    def __init__(
+        self,
+        model_dir,
+        image_target_size: Union[Tuple[int], int] = (640, 640),
+        image_mean: Union[float, List[float]] = [0.0, 0.0, 0.0],
+        image_std: Union[float, List[float]] = [1.0, 1.0, 1.0],
+        **kwargs,
+    ):
+        if isinstance(image_target_size, int):
+            image_target_size = (image_target_size, image_target_size)
+        if isinstance(image_mean, float):
+            image_mean = [image_mean, image_mean, image_mean]
+        if isinstance(image_std, float):
+            image_std = [image_std, image_std, image_std]
+        self.image_target_size = image_target_size
+        self.image_mean = image_mean
+        self.image_std = image_std
+        tokenizer_dir = os.path.join(model_dir, "tokenizer")
+        assert os.path.isdir(tokenizer_dir), f"{tokenizer_dir} not exists."
+        self.tokenizer = CLIPTokenizer.from_pretrained(tokenizer_dir)
+        self.resize_op = LetterResize(self.image_target_size, allow_scale_up=True)
+        if isinstance(image_mean, (tuple, list)):
+            self.image_mean = np.array(image_mean)
+        if self.image_mean.ndim < 4:
+            self.image_mean = self.image_mean.reshape(1, -1, 1, 1)
+        if isinstance(image_std, (tuple, list)):
+            self.image_std = np.array(image_std)
+        if self.image_std.ndim < 4:
+            self.image_std = self.image_std.reshape(1, -1, 1, 1)
+    def __call__(
+        self,
+        images: List[np.ndarray],
+        text: str,
+        **kwargs,
+    ):
+        preprocess_results = self.process_image(images)
+        preprocess_results.update(self.process_text(text))
+        static_input_orders = [
+            "attention_mask",
+            "image",
+            "input_ids",
+            "pad_param",
+            "scale_factor",
+        ]
+        result = [preprocess_results[k] for k in static_input_orders]
+        return result
+    def process_image(self, images):
+        """Image preprocess for YOLO-World"""
+        rescaled_images = self.resize_op(images)
+        images = np.stack(
+            [rescaled_image["image"] for rescaled_image in rescaled_images], axis=0
+        )
+        scale_factors = np.stack(
+            [rescaled_image["scale_factor"] for rescaled_image in rescaled_images],
+            axis=0,
+        )
+        pad_params = np.stack(
+            [rescaled_image["pad_param"] for rescaled_image in rescaled_images], axis=0
+        )
+        images = np.transpose(images, (0, 3, 1, 2)).astype(np.float32) / 255.0
+        images -= self.image_mean
+        images /= self.image_std
+        image_results = {
+            "image": images,
+            "scale_factor": scale_factors,
+            "pad_param": pad_params[:, [3, 0]],
+        }
+        return image_results
+    def process_text(self, text):
+        text = text.strip().lower()
+        words = [word.strip() for word in text.split(",")]
+        words += [" "]
+        tokenized_text = self.tokenizer(text=words, return_tensors="pd", padding=True)
+        text_results = {
+            "input_ids": tokenized_text["input_ids"].numpy(),
+            "attention_mask": tokenized_text["attention_mask"].numpy(),
+        }
+        return text_results
+class YOLOWorldPostProcessor(object):
+    """PostProcessors for YOLO-World"""
+    def __init__(
+        self,
+        threshold: float = 0.05,
+        **kwargs,
+    ):
+        """Init Function for YOLO-World PostProcessor
+        Args:
+            threshold (float): threshold for low confidence bbox filtering.
+        """
+        self.threshold = threshold
+    def __call__(
+        self,
+        pred_boxes,
+        pred_nums,
+        prompt,
+        src_images,
+        threshold=None,
+        **kwargs,
+    ):
+        threshold = self.threshold if threshold is None else threshold
+        split_index = np.cumsum(pred_nums)[:-1]
+        pred_boxes = np.split(pred_boxes, split_index, axis=0)
+        assert len(pred_boxes) == len(src_images)
+        classnames = self.prompt_to_classnames(prompt)
+        rst_boxes = []
+        for pred_box, src_image in zip(pred_boxes, src_images):
+            rst_boxes.append(
+                self.postprocess(
+                    pred_box,
+                    classnames,
+                    src_image,
+                    threshold,
+                )
+            )
+        return rst_boxes
+    def postprocess(
+        self,
+        pred_boxes,
+        classnames,
+        src_image,
+        threshold,
+    ):
+        """Post Process for prediction result of single image."""
+        pred_boxes = pred_boxes[pred_boxes[:, 1] > threshold]
+        H, W, *_ = src_image.shape
+        pred_labels = pred_boxes[:, 0].astype(np.int32)
+        pred_scores = pred_boxes[:, 1]
+        pred_bboxes = pred_boxes[:, 2:]
+        pred_bboxes[:, ::2] = np.clip(pred_bboxes[:, ::2], a_min=0, a_max=W)
+        pred_bboxes[:, 1::2] = np.clip(pred_bboxes[:, 1::2], a_min=0, a_max=H)
+        rst_bboxes = []
+        for pred_label, pred_score, pred_bbox in zip(
+            pred_labels, pred_scores, pred_bboxes
+        ):
+            rst_bboxes.append(
+                {
+                    "coordinate": pred_bbox.tolist(),
+                    "label": classnames[pred_label],
+                    "score": pred_score,
+                }
+            )
+        return rst_bboxes
+    def prompt_to_classnames(self, text):
+        text = text.strip().lower()
+        words = [word.strip() for word in text.split(",")]
+        words += [" "]
+        return words

paddlex/inference/models/open_vocabulary_segmentation/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

paddlex/inference/models/open_vocabulary_segmentation/predictor.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,21 +13,18 @@
 # limitations under the License.
-from typing import Any, Union, Dict, List, Tuple, Optional, Callable
-import numpy as np
-import inspect
+from typing import Any, Dict, List
-from ....utils.func_register import FuncRegister
 from ....modules.open_vocabulary_segmentation.model_list import MODELS
+from ....utils.func_register import FuncRegister
 from ...common.batch_sampler import ImageBatchSampler
 from ...common.reader import ReadImage
+from ..base import BasePredictor
 from .processors import SAMProcessor
-from ..common import StaticInfer
-from ..base import BasicPredictor
 from .results import SAMSegResult
-class OVSegPredictor(BasicPredictor):
+class OVSegPredictor(BasePredictor):
     entities = MODELS
@@ -62,11 +59,7 @@ class OVSegPredictor(BasicPredictor):
                 pre_ops.append(op)
         # build infer
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
         # build model specific processor, it's required for a OV model.
         processor_cfg = self.config["Processor"]

paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,19 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
-from typing import Dict, List, Optional, Union, Tuple
+from copy import deepcopy
+from typing import List, Optional, Tuple, Union
 import numpy as np
 import PIL
-from copy import deepcopy
-from .....utils.lazy_loader import LazyLoader
-# NOTE: LazyLoader is used to avoid conflicts between ultra-infer and Paddle
-paddle = LazyLoader("lazy_paddle", globals(), "paddle")
-T = LazyLoader("T", globals(), "paddle.vision.transforms")
-F = LazyLoader("F", globals(), "paddle.nn.functional")
+from ....utils.benchmark import benchmark
 def _get_preprocess_shape(
@@ -106,6 +100,8 @@ class SAMProcessor(object):
         return image_seg, prompt
     def postprocess(self, low_res_masks, mask_threshold: float = 0.0):
+        import paddle
+        import paddle.nn.functional as F
         if isinstance(low_res_masks, list):
             assert len(low_res_masks) == 1
@@ -126,6 +122,7 @@ class SAMProcessor(object):
         return [masks]
+@benchmark.timeit
 class SamPromptProcessor(object):
     """Constructs a Sam prompt processor."""
@@ -180,6 +177,7 @@ class SamPromptProcessor(object):
             return box.astype(np.float32)
+@benchmark.timeit
 class SamImageProcessor(object):
     """Constructs a Sam image processor."""
@@ -207,6 +205,8 @@ class SamImageProcessor(object):
     def apply_image(self, image: np.ndarray) -> np.ndarray:
         """Expects a numpy array with shape HxWxC in uint8 format."""
+        import paddle.vision.transforms as T
         target_size = _get_preprocess_shape(image.shape[0], image.shape[1], self.size)
         if isinstance(image, np.ndarray):
             image = PIL.Image.fromarray(image)
@@ -223,8 +223,8 @@ class SamImageProcessor(object):
         images,
     ):
         """Preprocess an image or a batch of images with a same shape."""
-        size = self.size
+        import paddle
+        import paddle.nn.functional as F
         input_image = [self.apply_image(image) for image in images]

paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,23 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
-import cv2
+import copy
+import random
 import numpy as np
-import copy, random
-import PIL
-from PIL import Image, ImageDraw, ImageFont
-from ....common.result import BaseCVResult, StrMixin, JsonMixin
+from PIL import Image
+from .....utils.deps import function_requires_deps, is_dep_available
+from ....common.result import BaseCVResult, JsonMixin
 from ....utils.color_map import get_colormap
-from ....common.result import BaseCVResult
+if is_dep_available("opencv-contrib-python"):
+    import cv2
+@function_requires_deps("opencv-contrib-python")
 def draw_segm(im, masks, mask_info, alpha=0.7):
     """
     Draw segmentation on image
     """
-    mask_color_id = 0
     w_ratio = 0.4
     color_list = get_colormap(rgb=True)
     im = np.array(im).astype("float32")

paddlex/inference/models/semantic_segmentation/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

paddlex/inference/models/semantic_segmentation/predictor.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,27 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Union, Dict, List, Tuple
+from typing import Any, Dict, List, Tuple, Union
 import numpy as np
-from ....utils.func_register import FuncRegister
 from ....modules.semantic_segmentation.model_list import MODELS
+from ....utils.func_register import FuncRegister
 from ...common.batch_sampler import ImageBatchSampler
 from ...common.reader import ReadImage
-from ..common import (
-    ResizeByShort,
-    Normalize,
-    ToCHWImage,
-    ToBatch,
-    StaticInfer,
-)
+from ..base import BasePredictor
+from ..common import Normalize, ToBatch, ToCHWImage
 from .processors import Resize, SegPostProcess
-from ..base import BasicPredictor
 from .result import SegResult
-class SegPredictor(BasicPredictor):
-    """SegPredictor that inherits from BasicPredictor."""
+class SegPredictor(BasePredictor):
+    """SegPredictor that inherits from BasePredictor."""
     entities = MODELS
@@ -95,11 +90,7 @@ class SegPredictor(BasicPredictor):
             _, op = self._FUNC_MAP["Resize"](self, target_size=self.target_size)
             preprocessors["Resize"] = op
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
         postprocessers = SegPostProcess()

paddlex/inference/models/semantic_segmentation/processors.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,19 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List, Tuple, Union
-import os
-import sys
-import cv2
-import copy
 import math
-import pyclipper
 import numpy as np
-from ..common.vision.processors import _BaseResize
+from ....utils.deps import class_requires_deps, is_dep_available
+from ...utils.benchmark import benchmark
 from ..common.vision import funcs as F
+from ..common.vision.processors import _BaseResize
+if is_dep_available("opencv-contrib-python"):
+    import cv2
+@benchmark.timeit
 class Resize(_BaseResize):
     """Resize the image."""
@@ -81,6 +82,8 @@ class Resize(_BaseResize):
         return img
+@benchmark.timeit
+@class_requires_deps("opencv-contrib-python")
 class SegPostProcess:
     """Semantic Segmentation PostProcess

paddlex/inference/models/semantic_segmentation/result.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import copy
 import numpy as np
 from PIL import Image
-import copy
-from ...common.result import BaseCVResult, StrMixin, JsonMixin
+from ...common.result import BaseCVResult, JsonMixin
 class SegResult(BaseCVResult):

paddlex/inference/models/table_structure_recognition/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

paddlex/inference/models/table_structure_recognition/predictor.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,27 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Union, Dict, List, Tuple
+from typing import Any, Dict, List, Tuple, Union
 import numpy as np
-from ....utils.func_register import FuncRegister
 from ....modules.table_recognition.model_list import MODELS
+from ....utils.func_register import FuncRegister
 from ...common.batch_sampler import ImageBatchSampler
 from ...common.reader import ReadImage
-from ..common import (
-    Resize,
-    ResizeByLong,
-    Normalize,
-    ToCHWImage,
-    ToBatch,
-    StaticInfer,
-)
-from ..base import BasicPredictor
+from ..base import BasePredictor
+from ..common import Normalize, ResizeByLong, ToBatch, ToCHWImage
 from .processors import Pad, TableLabelDecode
 from .result import TableRecResult
-class TablePredictor(BasicPredictor):
+class TablePredictor(BasePredictor):
     entities = MODELS
     _FUNC_MAP = {}
@@ -59,11 +53,7 @@ class TablePredictor(BasicPredictor):
                 preprocessors.append(op)
         preprocessors.append(ToBatch())
-        infer = StaticInfer(
-            model_dir=self.model_dir,
-            model_prefix=self.MODEL_FILE_PREFIX,
-            option=self.pp_option,
-        )
+        infer = self.create_static_infer()
         postprocessors = TableLabelDecode(
             model_name=self.config["Global"]["model_name"],

paddlex/inference/models/table_structure_recognition/processors.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,12 +13,13 @@
 # limitations under the License.
-import cv2
 import numpy as np
-from numpy import ndarray
+from ...utils.benchmark import benchmark
 from ..common.vision import funcs as F
+@benchmark.timeit
 class Pad:
     """Pad the image."""
@@ -60,6 +61,7 @@ class Pad:
         return [self.apply(img) for img in imgs]
+@benchmark.timeit
 class TableLabelDecode:
     """decode the table model outputs(probs) to character str"""
@@ -121,16 +123,8 @@ class TableLabelDecode:
     def __call__(self, pred, img_size, ori_img_size):
         """apply"""
-        bbox_preds, structure_probs = [], []
-        for i in range(len(pred[0][0])):
-            bbox_preds.append(pred[0][0][i])
-            structure_probs.append(pred[1][0][i])
-        bbox_preds = [bbox_preds]
-        structure_probs = [structure_probs]
-        bbox_preds = np.array(bbox_preds)
-        structure_probs = np.array(structure_probs)
+        bbox_preds = np.array([list(pred[0][0])])
+        structure_probs = np.array([list(pred[1][0])])
         bbox_list, structure_str_list, structure_score = self.decode(
             structure_probs, bbox_preds, img_size, ori_img_size
@@ -159,9 +153,11 @@ class TableLabelDecode:
         structure_batch_list = []
         bbox_batch_list = []
         batch_size = len(structure_idx)
+        bbox_list = []
+        scale_list = []
+        scales = [0] * 8
         for batch_idx in range(batch_size):
             structure_list = []
-            bbox_list = []
             score_list = []
             for idx in range(len(structure_idx[batch_idx])):
                 char_idx = int(structure_idx[batch_idx][idx])
@@ -172,15 +168,21 @@ class TableLabelDecode:
                 text = self.character[char_idx]
                 if text in self.td_token:
                     bbox = bbox_preds[batch_idx, idx]
-                    bbox = self._bbox_decode(
-                        bbox, padding_size[batch_idx], ori_img_size[batch_idx]
+                    h_scale, w_scale = self._get_bbox_scales(
+                        padding_size[batch_idx], ori_img_size[batch_idx]
                     )
-                    bbox_list.append(bbox.astype(int))
+                    scales[0::2] = [h_scale] * 4
+                    scales[1::2] = [w_scale] * 4
+                    bbox_list.append(bbox)
+                    scale_list.append(scales)
                 structure_list.append(text)
                 score_list.append(structure_probs[batch_idx, idx])
             structure_batch_list.append(structure_list)
             structure_score = np.mean(score_list)
-            bbox_batch_list.append(bbox_list)
+        bbox_batch_array = np.multiply(np.array(bbox_list), np.array(scale_list))
+        bbox_batch_list = [bbox_batch_array.astype(int).tolist()]
         return bbox_batch_list, structure_batch_list, structure_score
@@ -214,22 +216,14 @@ class TableLabelDecode:
             bbox_batch_list.append(bbox_list)
         return bbox_batch_list, structure_batch_list
-    def _bbox_decode(self, bbox, padding_shape, ori_shape):
+    def _get_bbox_scales(self, padding_shape, ori_shape):
         if self.model_name == "SLANet":
             w, h = ori_shape
-            bbox[0::2] *= w
-            bbox[1::2] *= h
+            return w, h
         else:
             w, h = padding_shape
             ori_w, ori_h = ori_shape
             ratio_w = w / ori_w
             ratio_h = h / ori_h
             ratio = min(ratio_w, ratio_h)
-            bbox[0::2] *= w
-            bbox[1::2] *= h
-            bbox[0::2] /= ratio
-            bbox[1::2] /= ratio
-        return bbox
+            return w / ratio, h / ratio

paddlex 3.0.0rc0__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl

paddlex 3.0.0rc0py3-none-any.whl → 3.0.0rc1py3-none-any.whl