PyPI - paddlex - Versions diffs - 3.0.0b2__py3-none-any.whl → 3.0.0rc0__py3-none-any.whl - Mend

paddlex 3.0.0b2py3-none-any.whl → 3.0.0rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (940) hide show

paddlex/inference/models/video_classification/processors.py ADDED Viewed

@@ -0,0 +1,409 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import os.path as osp
+from typing import List, Sequence, Union, Optional, Tuple
+import re
+import numpy as np
+import cv2
+import math
+import json
+import tempfile
+import lazy_paddle
+class Scale:
+    """Scale images."""
+    def __init__(
+        self,
+        short_size: int,
+        fixed_ratio: bool = True,
+        keep_ratio: Union[bool, None] = None,
+        do_round: bool = False,
+    ) -> None:
+        """
+        Initializes the Scale class.
+        Args:
+            short_size (int): The target size for the shorter side of the image.
+            fixed_ratio (bool): Whether to maintain a fixed aspect ratio of 4:3.
+            keep_ratio (Union[bool, None]): Whether to keep the aspect ratio. Cannot be True if fixed_ratio is True.
+            do_round (bool): Whether to round the scaling factor.
+        """
+        super().__init__()
+        self.short_size = short_size
+        assert (fixed_ratio and not keep_ratio) or (
+            not fixed_ratio
+        ), f"fixed_ratio and keep_ratio cannot be true at the same time"
+        self.fixed_ratio = fixed_ratio
+        self.keep_ratio = keep_ratio
+        self.do_round = do_round
+    def scale(self, video: List[np.ndarray]) -> List[np.ndarray]:
+        """
+        Performs resize operations on a sequence of images.
+        Args:
+            video (List[np.ndarray]): List where each item is an image,  as a numpy array.
+             For example, [np.ndarray0, np.ndarray1, np.ndarray2, ...]
+        Returns:
+            List[np.ndarray]: List where each item is a np.ndarray after scaling.
+        """
+        imgs = video
+        resized_imgs = []
+        for i in range(len(imgs)):
+            img = imgs[i]
+            if isinstance(img, np.ndarray):
+                h, w, _ = img.shape
+            else:
+                raise NotImplementedError
+            if (w <= h and w == self.short_size) or (h <= w and h == self.short_size):
+                resized_imgs.append(img)
+                continue
+            if w <= h:
+                ow = self.short_size
+                if self.fixed_ratio:
+                    oh = int(self.short_size * 4.0 / 3.0)
+                elif self.keep_ratio is False:
+                    oh = self.short_size
+                else:
+                    scale_factor = self.short_size / w
+                    oh = (
+                        int(h * float(scale_factor) + 0.5)
+                        if self.do_round
+                        else int(h * self.short_size / w)
+                    )
+                    ow = (
+                        int(w * float(scale_factor) + 0.5)
+                        if self.do_round
+                        else self.short_size
+                    )
+            else:
+                oh = self.short_size
+                if self.fixed_ratio:
+                    ow = int(self.short_size * 4.0 / 3.0)
+                elif self.keep_ratio is False:
+                    ow = self.short_size
+                else:
+                    scale_factor = self.short_size / h
+                    oh = (
+                        int(h * float(scale_factor) + 0.5)
+                        if self.do_round
+                        else self.short_size
+                    )
+                    ow = (
+                        int(w * float(scale_factor) + 0.5)
+                        if self.do_round
+                        else int(w * self.short_size / h)
+                    )
+            resized_imgs.append(
+                cv2.resize(img, (ow, oh), interpolation=cv2.INTER_LINEAR)
+            )
+        imgs = resized_imgs
+        return imgs
+    def __call__(self, videos: List[np.ndarray]) -> List[np.ndarray]:
+        """
+        Apply the scaling operation to a list of videos.
+        Args:
+            videos (List[np.ndarray]): A list of videos, where each video is a sequence
+            of images.
+        Returns:
+            List[np.ndarray]: A list of videos after scaling, where each video is a list of images.
+        """
+        return [self.scale(video) for video in videos]
+class CenterCrop:
+    """Center crop images."""
+    def __init__(self, target_size: int, do_round: bool = True) -> None:
+        """
+        Initializes the CenterCrop class.
+        Args:
+            target_size (int): The size of the cropped area.
+            do_round (bool): Whether to round the crop coordinates.
+        """
+        super().__init__()
+        self.target_size = target_size
+        self.do_round = do_round
+    def center_crop(self, imgs: List[np.ndarray]) -> List[np.ndarray]:
+        """
+        Performs center crop operations on images.
+        Args:
+            imgs (List[np.ndarray]): A sequence of images (a numpy array).
+        Returns:
+            List[np.ndarray]: A list of images after center cropping or a cropped numpy array.
+        """
+        crop_imgs = []
+        th, tw = self.target_size, self.target_size
+        if isinstance(imgs, lazy_paddle.Tensor):
+            h, w = imgs.shape[-2:]
+            x1 = int(round((w - tw) / 2.0)) if self.do_round else (w - tw) // 2
+            y1 = int(round((h - th) / 2.0)) if self.do_round else (h - th) // 2
+            crop_imgs = imgs[:, :, y1 : y1 + th, x1 : x1 + tw]
+        else:
+            for img in imgs:
+                h, w, _ = img.shape
+                assert (w >= self.target_size) and (
+                    h >= self.target_size
+                ), "image width({}) and height({}) should be larger than crop size".format(
+                    w, h, self.target_size
+                )
+                x1 = int(round((w - tw) / 2.0)) if self.do_round else (w - tw) // 2
+                y1 = int(round((h - th) / 2.0)) if self.do_round else (h - th) // 2
+                crop_imgs.append(img[y1 : y1 + th, x1 : x1 + tw])
+        return crop_imgs
+    def __call__(self, videos: List[np.ndarray]) -> List[np.ndarray]:
+        """
+        Apply the center crop operation to a list of videos.
+        Args:
+            videos (List[np.ndarray]): A list of videos, where each video is a sequence of images.
+        Returns:
+            List[np.ndarray]: A list of videos after center cropping.
+        """
+        return [self.center_crop(video) for video in videos]
+class Image2Array:
+    """Convert a sequence of images to a numpy array with optional transposition."""
+    def __init__(self, transpose: bool = True, data_format: str = "tchw") -> None:
+        """
+        Initializes the Image2Array class.
+        Args:
+            transpose (bool): Whether to transpose the resulting numpy array.
+            data_format (str): The format to transpose to, either 'tchw' or 'cthw'.
+        Raises:
+            AssertionError: If data_format is not one of the allowed values.
+        """
+        super().__init__()
+        assert data_format in [
+            "tchw",
+            "cthw",
+        ], f"Target format must in ['tchw', 'cthw'], but got {data_format}"
+        self.transpose = transpose
+        self.data_format = data_format
+    def img2array(self, imgs: List[np.ndarray]) -> np.ndarray:
+        """
+        Converts a sequence of images to a numpy array and optionally transposes it.
+        Args:
+            imgs (List[np.ndarray]): A list of images to be converted to a numpy array.
+        Returns:
+            np.ndarray: A numpy array representation of the images.
+        """
+        t_imgs = np.stack(imgs).astype("float32")
+        if self.transpose:
+            if self.data_format == "tchw":
+                t_imgs = t_imgs.transpose([0, 3, 1, 2])  # tchw
+            else:
+                t_imgs = t_imgs.transpose([3, 0, 1, 2])  # cthw
+        return t_imgs
+    def __call__(self, videos: List[np.ndarray]) -> List[np.ndarray]:
+        """
+        Apply the image to array conversion to a list of videos.
+        Args:
+            videos (List[Sequence[np.ndarray]]): A list of videos, where each video is a sequence of images.
+        Returns:
+            List[np.ndarray]: A list of numpy arrays, one for each video.
+        """
+        return [self.img2array(video) for video in videos]
+class NormalizeVideo:
+    """
+    Normalize video frames by subtracting the mean and dividing by the standard deviation.
+    """
+    def __init__(
+        self,
+        mean: Sequence[float],
+        std: Sequence[float],
+        tensor_shape: Sequence[int] = [3, 1, 1],
+        inplace: bool = False,
+    ) -> None:
+        """
+        Initializes the NormalizeVideo class.
+        Args:
+            mean (Sequence[float]): The mean values for each channel.
+            std (Sequence[float]): The standard deviation values for each channel.
+            tensor_shape (Sequence[int]): The shape of the mean and std tensors.
+            inplace (bool): Whether to perform normalization in place.
+        """
+        super().__init__()
+        self.inplace = inplace
+        if not inplace:
+            self.mean = np.array(mean).reshape(tensor_shape).astype(np.float32)
+            self.std = np.array(std).reshape(tensor_shape).astype(np.float32)
+        else:
+            self.mean = np.array(mean, dtype=np.float32)
+            self.std = np.array(std, dtype=np.float32)
+    def normalize_video(self, imgs: np.ndarray) -> np.ndarray:
+        """
+        Normalizes a sequence of images.
+        Args:
+            imgs (np.ndarray): A numpy array of images to be normalized.
+        Returns:
+            np.ndarray: The normalized images as a numpy array.
+        """
+        if self.inplace:
+            n = len(imgs)
+            h, w, c = imgs[0].shape
+            norm_imgs = np.empty((n, h, w, c), dtype=np.float32)
+            for i, img in enumerate(imgs):
+                norm_imgs[i] = img
+            for img in norm_imgs:  # [n,h,w,c]
+                mean = np.float64(self.mean.reshape(1, -1))  # [1, 3]
+                stdinv = 1 / np.float64(self.std.reshape(1, -1))  # [1, 3]
+                cv2.subtract(img, mean, img)
+                cv2.multiply(img, stdinv, img)
+        else:
+            imgs = imgs
+            norm_imgs = imgs / 255.0
+            norm_imgs -= self.mean
+            norm_imgs /= self.std
+        imgs = norm_imgs
+        imgs = np.expand_dims(imgs, axis=0).copy()
+        return imgs
+    def __call__(self, videos: List[np.ndarray]) -> List[np.ndarray]:
+        """
+        Apply normalization to a list of videos.
+        Args:
+            videos (List[np.ndarray]): A list of videos, where each video is a numpy array of images.
+        Returns:
+            List[np.ndarray]: A list of normalized videos as numpy arrays.
+        """
+        return [self.normalize_video(video) for video in videos]
+class VideoClasTopk:
+    """Applies a top-k transformation on video classification predictions."""
+    def __init__(self, class_ids: Optional[Sequence[Union[str, int]]] = None) -> None:
+        """
+        Initializes the VideoClasTopk class.
+        Args:
+            class_ids (Optional[Sequence[Union[str, int]]]): A list of class labels corresponding to class indices.
+        """
+        super().__init__()
+        self.class_id_map = self._parse_class_id_map(class_ids)
+    def softmax(self, data: np.ndarray) -> np.ndarray:
+        """
+        Applies the softmax function to an array of data.
+        Args:
+            data (np.ndarray): An array of data for which to compute softmax.
+        Returns:
+            np.ndarray: The softmax-transformed data.
+        """
+        x_max = np.max(data, axis=-1, keepdims=True)
+        e_x = np.exp(data - x_max)
+        return e_x / np.sum(e_x, axis=-1, keepdims=True)
+    def _parse_class_id_map(
+        self, class_ids: Optional[Sequence[Union[str, int]]]
+    ) -> Optional[dict]:
+        """
+        Parses a list of class IDs into a mapping from class index to class label.
+        Args:
+            class_ids (Optional[Sequence[Union[str, int]]]): A list of class labels.
+        Returns:
+            Optional[dict]: A dictionary mapping class indices to labels, or None if no class_ids are provided.
+        """
+        if class_ids is None:
+            return None
+        class_id_map = {id: str(lb) for id, lb in enumerate(class_ids)}
+        return class_id_map
+    def __call__(
+        self, preds: np.ndarray, topk: int = 5
+    ) -> Tuple[np.ndarray, List[np.ndarray], List[List[str]]]:
+        """
+        Selects the top-k predictions from the classification output.
+        Args:
+            preds (np.ndarray): A 2D array of prediction scores.
+            topk (int): The number of top predictions to return.
+        Returns:
+            Tuple[np.ndarray, List[np.ndarray], List[List[str]]]: A tuple containing:
+                - An array of indices of the top-k predictions.
+                - A list of arrays of scores for the top-k predictions.
+                - A list of lists of label names for the top-k predictions.
+        """
+        preds[0] = self.softmax(preds[0])
+        indexes = preds[0].argsort(axis=1)[:, -topk:][:, ::-1].astype("int32")
+        scores = [
+            list(np.around(pred[index], decimals=5))
+            for pred, index in zip(preds[0], indexes)
+        ]
+        label_names = [[self.class_id_map[i] for i in index] for index in indexes]
+        return indexes, scores, label_names
+class ToBatch:
+    """A class for batching videos."""
+    def __call__(self, videos: List[np.ndarray]) -> List[np.ndarray]:
+        """Call method to stack videos into a batch.
+        Args:
+            videos (list of np.ndarrays): List of videos to process.
+        Returns:
+            list of np.ndarrays: List containing a stacked tensor of the videos.
+        """
+        return [np.concatenate(videos, axis=0).astype(dtype=np.float32, copy=False)]

paddlex/inference/models/video_classification/result.py ADDED Viewed

@@ -0,0 +1,92 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cv2
+import numpy as np
+import PIL
+from PIL import Image, ImageDraw, ImageFont
+from ....utils.fonts import PINGFANG_FONT_FILE_PATH
+from ...utils.color_map import get_colormap
+from ...utils.io import VideoReader
+from ...common.result import BaseVideoResult
+class TopkVideoResult(BaseVideoResult):
+    def _to_video(self):
+        """Draw label on image"""
+        labels = self.get("label_names", self["class_ids"])
+        label_str = f"{labels[0]} {self['scores'][0]:.2f}"
+        video_reader = VideoReader(backend="decord")
+        video = video_reader.read(self["input_path"])
+        video = list(video)
+        write_fps = video_reader.get_fps()
+        video_list = []
+        for i in range(len(video)):
+            image = Image.fromarray(video[i].asnumpy())
+            image_size = image.size
+            draw = ImageDraw.Draw(image)
+            min_font_size = int(image_size[0] * 0.02)
+            max_font_size = int(image_size[0] * 0.05)
+            for font_size in range(max_font_size, min_font_size - 1, -1):
+                font = ImageFont.truetype(
+                    PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8"
+                )
+                if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
+                    text_width_tmp, text_height_tmp = draw.textsize(label_str, font)
+                else:
+                    left, top, right, bottom = draw.textbbox((0, 0), label_str, font)
+                    text_width_tmp, text_height_tmp = right - left, bottom - top
+                if text_width_tmp <= image_size[0]:
+                    break
+                else:
+                    font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, min_font_size)
+            color_list = get_colormap(rgb=True)
+            color = tuple(color_list[0])
+            font_color = tuple(self._get_font_colormap(3))
+            if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
+                text_width, text_height = draw.textsize(label_str, font)
+            else:
+                left, top, right, bottom = draw.textbbox((0, 0), label_str, font)
+                text_width, text_height = right - left, bottom - top
+            rect_left = 3
+            rect_top = 3
+            rect_right = rect_left + text_width + 3
+            rect_bottom = rect_top + text_height + 6
+            draw.rectangle(
+                [(rect_left, rect_top), (rect_right, rect_bottom)], fill=color
+            )
+            text_x = rect_left + 3
+            text_y = rect_top
+            draw.text((text_x, text_y), label_str, fill=font_color, font=font)
+            image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+            video_list.append(image)
+        return {"res": (np.array(video_list), write_fps)}
+    def _get_font_colormap(self, color_index):
+        """
+        Get font colormap
+        """
+        dark = np.array([0x14, 0x0E, 0x35])
+        light = np.array([0xFF, 0xFF, 0xFF])
+        light_indexs = [0, 3, 4, 8, 9, 13, 14, 18, 19]
+        if color_index in light_indexs:
+            return light.astype("int32")
+        else:
+            return dark.astype("int32")

paddlex/inference/models/video_detection/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .predictor import VideoDetPredictor

paddlex/inference/models/video_detection/predictor.py ADDED Viewed

@@ -0,0 +1,136 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Union, Dict, List, Tuple
+from ....utils.func_register import FuncRegister
+from ....modules.video_detection.model_list import MODELS
+from ...common.batch_sampler import VideoBatchSampler
+from ...common.reader import ReadVideo
+from ..common import (
+    ToBatch,
+    StaticInfer,
+)
+from ..base import BasicPredictor
+from .processors import ResizeVideo, Image2Array, NormalizeVideo, DetVideoPostProcess
+from .result import DetVideoResult
+class VideoDetPredictor(BasicPredictor):
+    entities = MODELS
+    _FUNC_MAP = {}
+    register = FuncRegister(_FUNC_MAP)
+    def __init__(
+        self,
+        nms_thresh: Union[float, None] = None,
+        score_thresh: Union[float, None] = None,
+        *args,
+        **kwargs
+    ):
+        super().__init__(*args, **kwargs)
+        self.nms_thresh = nms_thresh
+        self.score_thresh = score_thresh
+        self.pre_tfs, self.infer, self.post_op = self._build()
+    def _build_batch_sampler(self):
+        return VideoBatchSampler()
+    def _get_result_class(self):
+        return DetVideoResult
+    def _build(self):
+        pre_tfs = {}
+        for cfg in self.config["PreProcess"]["transform_ops"]:
+            tf_key = list(cfg.keys())[0]
+            assert tf_key in self._FUNC_MAP
+            func = self._FUNC_MAP[tf_key]
+            args = cfg.get(tf_key, {})
+            name, op = func(self, **args) if args else func(self)
+            if op:
+                pre_tfs[name] = op
+        infer = StaticInfer(
+            model_dir=self.model_dir,
+            model_prefix=self.MODEL_FILE_PREFIX,
+            option=self.pp_option,
+        )
+        post_op = {}
+        for cfg in self.config["PostProcess"]["transform_ops"]:
+            tf_key = list(cfg.keys())[0]
+            assert tf_key in self._FUNC_MAP
+            func = self._FUNC_MAP[tf_key]
+            args = cfg.get(tf_key, {})
+            if tf_key == "DetVideoPostProcess":
+                args["label_list"] = self.config["label_list"]
+            name, op = func(self, **args) if args else func(self)
+            if op:
+                post_op[name] = op
+        return pre_tfs, infer, post_op
+    def process(
+        self,
+        batch_data,
+        nms_thresh: Union[float, None] = None,
+        score_thresh: Union[float, None] = None,
+    ):
+        batch_raw_videos = self.pre_tfs["ReadVideo"](videos=batch_data)
+        batch_videos = self.pre_tfs["ResizeVideo"](videos=batch_raw_videos)
+        batch_videos = self.pre_tfs["Image2Array"](videos=batch_videos)
+        x = self.pre_tfs["NormalizeVideo"](videos=batch_videos)
+        num_seg = len(x[0])
+        pred_seg = []
+        for i in range(num_seg):
+            batch_preds = self.infer(x=[x[0][i]])
+            pred_seg.append(batch_preds)
+        batch_bboxes = self.post_op["DetVideoPostProcess"](
+            preds=[pred_seg],
+            nms_thresh=nms_thresh or self.nms_thresh,
+            score_thresh=score_thresh or self.score_thresh,
+        )
+        return {
+            "input_path": batch_data,
+            "result": batch_bboxes,
+        }
+    @register("ReadVideo")
+    def build_readvideo(self, num_seg=8):
+        return "ReadVideo", ReadVideo(backend="opencv", num_seg=num_seg)
+    @register("ResizeVideo")
+    def build_resize(self, target_size=224):
+        return "ResizeVideo", ResizeVideo(
+            target_size=target_size,
+        )
+    @register("Image2Array")
+    def build_image2array(self, data_format="tchw"):
+        return "Image2Array", Image2Array(data_format="tchw")
+    @register("NormalizeVideo")
+    def build_normalize(
+        self,
+        scale=255.0,
+    ):
+        return "NormalizeVideo", NormalizeVideo(scale=scale)
+    @register("DetVideoPostProcess")
+    def build_postprocess(self, nms_thresh, score_thresh, label_list=[]):
+        if not self.nms_thresh:
+            self.nms_thresh = nms_thresh
+        if not self.score_thresh:
+            self.score_thresh = score_thresh
+        return "DetVideoPostProcess", DetVideoPostProcess(label_list=label_list)

paddlex 3.0.0b2__py3-none-any.whl → 3.0.0rc0__py3-none-any.whl

paddlex 3.0.0b2py3-none-any.whl → 3.0.0rc0py3-none-any.whl