PyPI - paddlex - Versions diffs - 3.0.0b2__py3-none-any.whl → 3.0.0rc0__py3-none-any.whl - Mend

paddlex 3.0.0b2py3-none-any.whl → 3.0.0rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (940) hide show

paddlex/inference/models/3d_bev_detection/processors.py ADDED Viewed

@@ -0,0 +1,978 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numbers
+import cv2
+import numpy as np
+from typing import Generic, List, Optional
+import lazy_paddle as paddle
+from ...utils.io import ImageReader
+from ....utils import logging
+from ...common.reader.det_3d_reader import Sample
+cv2_interp_codes = {
+    "nearest": cv2.INTER_NEAREST,
+    "bilinear": cv2.INTER_LINEAR,
+    "bicubic": cv2.INTER_CUBIC,
+    "area": cv2.INTER_AREA,
+    "lanczos": cv2.INTER_LANCZOS4,
+}
+class LoadPointsFromFile:
+    """Load points from a file and process them according to specified parameters."""
+    def __init__(
+        self, load_dim=6, use_dim=[0, 1, 2], shift_height=False, use_color=False
+    ):
+        """Initializes the LoadPointsFromFile object.
+        Args:
+            load_dim (int): Dimensions loaded in points.
+            use_dim (list or int): Dimensions used in points. If int, will use a range from 0 to use_dim (exclusive).
+            shift_height (bool): Whether to shift height values.
+            use_color (bool): Whether to include color attributes in the loaded points.
+        """
+        self.shift_height = shift_height
+        self.use_color = use_color
+        if isinstance(use_dim, int):
+            use_dim = list(range(use_dim))
+        assert (
+            max(use_dim) < load_dim
+        ), f"Expect all used dimensions < {load_dim}, got {use_dim}"
+        self.load_dim = load_dim
+        self.use_dim = use_dim
+    def _load_points(self, pts_filename):
+        """Private function to load point clouds data from a file.
+        Args:
+            pts_filename (str): Path to the point cloud file.
+        Returns:
+            numpy.ndarray: Loaded point cloud data.
+        """
+        points = np.fromfile(pts_filename, dtype=np.float32)
+        return points
+    def __call__(self, results):
+        """Call function to load points data from file and process it.
+        Args:
+            results (dict): Dictionary containing the 'pts_filename' key with the path to the point cloud file.
+        Returns:
+            dict: Updated results dictionary with 'points' key added.
+        """
+        pts_filename = results["pts_filename"]
+        points = self._load_points(pts_filename)
+        points = points.reshape(-1, self.load_dim)
+        points = points[:, self.use_dim]
+        attribute_dims = None
+        if self.shift_height:
+            floor_height = np.percentile(points[:, 2], 0.99)
+            height = points[:, 2] - floor_height
+            points = np.concatenate(
+                [points[:, :3], np.expand_dims(height, 1), points[:, 3:]], 1
+            )
+            attribute_dims = dict(height=3)
+        if self.use_color:
+            assert len(self.use_dim) >= 6
+            if attribute_dims is None:
+                attribute_dims = dict()
+            attribute_dims.update(
+                dict(
+                    color=[
+                        points.shape[1] - 3,
+                        points.shape[1] - 2,
+                        points.shape[1] - 1,
+                    ]
+                )
+            )
+        results["points"] = points
+        return results
+class LoadPointsFromMultiSweeps(object):
+    """Load points from multiple sweeps.This is usually used for nuScenes dataset to utilize previous sweeps."""
+    def __init__(
+        self,
+        sweeps_num=10,
+        load_dim=5,
+        use_dim=[0, 1, 2, 4],
+        pad_empty_sweeps=False,
+        remove_close=False,
+        test_mode=False,
+        point_cloud_angle_range=None,
+    ):
+        """Initializes the LoadPointsFromMultiSweeps object
+        Args:
+            sweeps_num (int): Number of sweeps. Defaults to 10.
+            load_dim (int): Dimension number of the loaded points. Defaults to 5.
+            use_dim (list[int]): Which dimension to use. Defaults to [0, 1, 2, 4].
+                for more details. Defaults to dict(backend='disk').
+            pad_empty_sweeps (bool): Whether to repeat keyframe when
+                sweeps is empty. Defaults to False.
+            remove_close (bool): Whether to remove close points.
+                Defaults to False.
+            test_mode (bool): If test_model=True used for testing, it will not
+                randomly sample sweeps but select the nearest N frames.
+                Defaults to False.
+        """
+        self.load_dim = load_dim
+        self.sweeps_num = sweeps_num
+        self.use_dim = use_dim
+        self.pad_empty_sweeps = pad_empty_sweeps
+        self.remove_close = remove_close
+        self.test_mode = test_mode
+        if point_cloud_angle_range is not None:
+            self.filter_by_angle = True
+            self.point_cloud_angle_range = point_cloud_angle_range
+            print(point_cloud_angle_range)
+        else:
+            self.filter_by_angle = False
+            # self.point_cloud_angle_range = point_cloud_angle_range
+    def _load_points(self, pts_filename):
+        """Private function to load point clouds data.
+        Args:
+            pts_filename (str): Filename of point clouds data.
+        Returns:
+            np.ndarray: An array containing point clouds data.
+        """
+        points = np.fromfile(pts_filename, dtype=np.float32)
+        return points
+    def _remove_close(self, points, radius=1.0):
+        """Removes point too close within a certain radius from origin.
+        Args:
+            points (np.ndarray): Sweep points.
+            radius (float): Radius below which points are removed.
+                Defaults to 1.0.
+        Returns:
+            np.ndarray: Points after removing.
+        """
+        if isinstance(points, np.ndarray):
+            points_numpy = points
+        else:
+            raise NotImplementedError
+        x_filt = np.abs(points_numpy[:, 0]) < radius
+        y_filt = np.abs(points_numpy[:, 1]) < radius
+        not_close = np.logical_not(np.logical_and(x_filt, y_filt))
+        return points[not_close]
+    def filter_point_by_angle(self, points):
+        """
+        Filters points based on their angle in relation to the origin.
+        Args:
+            points (np.ndarray): An array of points with shape (N, 2), where each row
+                is a point in 2D space.
+        Returns:
+            np.ndarray: A filtered array of points that fall within the specified
+                angle range.
+        """
+        if isinstance(points, np.ndarray):
+            points_numpy = points
+        else:
+            raise NotImplementedError
+        pts_phi = (
+            np.arctan(points_numpy[:, 0] / points_numpy[:, 1])
+            + (points_numpy[:, 1] < 0) * np.pi
+            + np.pi * 2
+        ) % (np.pi * 2)
+        pts_phi[pts_phi > np.pi] -= np.pi * 2
+        pts_phi = pts_phi / np.pi * 180
+        assert np.all(-180 <= pts_phi) and np.all(pts_phi <= 180)
+        filt = np.logical_and(
+            pts_phi >= self.point_cloud_angle_range[0],
+            pts_phi <= self.point_cloud_angle_range[1],
+        )
+        return points[filt]
+    def __call__(self, results):
+        """Call function to load multi-sweep point clouds from files.
+        Args:
+            results (dict): Result dict containing multi-sweep point cloud \
+                filenames.
+        Returns:
+            dict: The result dict containing the multi-sweep points data. \
+                Added key and value are described below.
+                - points (np.ndarray): Multi-sweep point cloud arrays.
+        """
+        points = results["points"]
+        points[:, 4] = 0
+        sweep_points_list = [points]
+        ts = results["timestamp"]
+        if self.pad_empty_sweeps and len(results["sweeps"]) == 0:
+            for i in range(self.sweeps_num):
+                if self.remove_close:
+                    sweep_points_list.append(self._remove_close(points))
+                else:
+                    sweep_points_list.append(points)
+        else:
+            if len(results["sweeps"]) <= self.sweeps_num:
+                choices = np.arange(len(results["sweeps"]))
+            elif self.test_mode:
+                choices = np.arange(self.sweeps_num)
+            else:
+                choices = np.random.choice(
+                    len(results["sweeps"]), self.sweeps_num, replace=False
+                )
+            for idx in choices:
+                sweep = results["sweeps"][idx]
+                points_sweep = self._load_points(sweep["data_path"])
+                points_sweep = np.copy(points_sweep).reshape(-1, self.load_dim)
+                if self.remove_close:
+                    points_sweep = self._remove_close(points_sweep)
+                sweep_ts = sweep["timestamp"] / 1e6
+                points_sweep[:, :3] = (
+                    points_sweep[:, :3] @ sweep["sensor2lidar_rotation"].T
+                )
+                points_sweep[:, :3] += sweep["sensor2lidar_translation"]
+                points_sweep[:, 4] = ts - sweep_ts
+                # points_sweep = points.new_point(points_sweep)
+                sweep_points_list.append(points_sweep)
+        points = np.concatenate(sweep_points_list, axis=0)
+        if self.filter_by_angle:
+            points = self.filter_point_by_angle(points)
+        points = points[:, self.use_dim]
+        results["points"] = points
+        return results
+class LoadMultiViewImageFromFiles:
+    """Load multi-view images from files."""
+    def __init__(
+        self,
+        to_float32=False,
+        project_pts_to_img_depth=False,
+        cam_depth_range=[4.0, 45.0, 1.0],
+        constant_std=0.5,
+        imread_flag=-1,
+    ):
+        """
+        Initializes the LoadMultiViewImageFromFiles object.
+        Args:
+            to_float32 (bool): Whether to convert the loaded images to float32. Default: False.
+            project_pts_to_img_depth (bool): Whether to project points to image depth. Default: False.
+            cam_depth_range (list): Camera depth range in the format [min, max, focal]. Default: [4.0, 45.0, 1.0].
+            constant_std (float): Constant standard deviation for normalization. Default: 0.5.
+            imread_flag (int): Flag determining the color type of the loaded image.
+                - -1: cv2.IMREAD_UNCHANGED
+                -  0: cv2.IMREAD_GRAYSCALE
+                -  1: cv2.IMREAD_COLOR
+                Default: -1.
+        """
+        self.to_float32 = to_float32
+        self.project_pts_to_img_depth = project_pts_to_img_depth
+        self.cam_depth_range = cam_depth_range
+        self.constant_std = constant_std
+        self.imread_flag = imread_flag
+    def __call__(self, sample):
+        """
+        Call method to load multi-view image from files and update the sample dictionary.
+        Args:
+            sample (dict): Dictionary containing the image filename key.
+        Returns:
+            dict: Updated sample dictionary with loaded images and additional information.
+        """
+        filename = sample["img_filename"]
+        img = np.stack(
+            [cv2.imread(name, self.imread_flag) for name in filename], axis=-1
+        )
+        if self.to_float32:
+            img = img.astype(np.float32)
+        sample["filename"] = filename
+        sample["img"] = [img[..., i] for i in range(img.shape[-1])]
+        sample["img_shape"] = img.shape
+        sample["ori_shape"] = img.shape
+        sample["pad_shape"] = img.shape
+        # sample['scale_factor'] = 1.0
+        num_channels = 1 if len(img.shape) < 3 else img.shape[2]
+        sample["img_norm_cfg"] = dict(
+            mean=np.zeros(num_channels, dtype=np.float32),
+            std=np.ones(num_channels, dtype=np.float32),
+            to_rgb=False,
+        )
+        sample["img_fields"] = ["img"]
+        return sample
+class ResizeImage:
+    """Resize images & bbox & mask."""
+    def __init__(
+        self,
+        img_scale=None,
+        multiscale_mode="range",
+        ratio_range=None,
+        keep_ratio=True,
+        bbox_clip_border=True,
+        backend="cv2",
+        override=False,
+    ):
+        """Initializes the ResizeImage object.
+        Args:
+            img_scale (list or int, optional): The scale of the image. If a single integer is provided, it will be converted to a list. Defaults to None.
+            multiscale_mode (str): The mode for multiscale resizing. Can be "value" or "range". Defaults to "range".
+            ratio_range (list, optional): The range of image aspect ratios. Only used when img_scale is a single value. Defaults to None.
+            keep_ratio (bool): Whether to keep the aspect ratio when resizing. Defaults to True.
+            bbox_clip_border (bool): Whether to clip the bounding box to the image border. Defaults to True.
+            backend (str): The backend to use for image resizing. Can be "cv2". Defaults to "cv2".
+            override (bool): Whether to override certain resize parameters. Note: This option needs refactoring. Defaults to False.
+        """
+        if img_scale is None:
+            self.img_scale = None
+        else:
+            if isinstance(img_scale, list):
+                self.img_scale = img_scale
+            else:
+                self.img_scale = [img_scale]
+        if ratio_range is not None:
+            # mode 1: given a scale and a range of image ratio
+            assert len(self.img_scale) == 1
+        else:
+            # mode 2: given multiple scales or a range of scales
+            assert multiscale_mode in ["value", "range"]
+        self.backend = backend
+        self.multiscale_mode = multiscale_mode
+        self.ratio_range = ratio_range
+        self.keep_ratio = keep_ratio
+        # TODO: refactor the override option in Resize
+        self.override = override
+        self.bbox_clip_border = bbox_clip_border
+    @staticmethod
+    def random_select(img_scales):
+        """Randomly select an img_scale from the given list of candidates.
+        Args:
+            img_scales (list): A list of image scales to choose from.
+        Returns:
+            tuple: A tuple containing the selected image scale and its index in the list.
+        """
+        scale_idx = np.random.randint(len(img_scales))
+        img_scale = img_scales[scale_idx]
+        return img_scale, scale_idx
+    @staticmethod
+    def random_sample(img_scales):
+        """
+        Randomly sample an img_scale when `multiscale_mode` is set to 'range'.
+        Args:
+            img_scales (list of tuples): A list of tuples, where each tuple contains
+                the minimum and maximum scale dimensions for an image.
+        Returns:
+            tuple: A tuple containing the randomly sampled img_scale (long_edge, short_edge)
+                and None (to maintain function signature compatibility).
+        """
+        img_scale_long = [max(s) for s in img_scales]
+        img_scale_short = [min(s) for s in img_scales]
+        long_edge = np.random.randint(min(img_scale_long), max(img_scale_long) + 1)
+        short_edge = np.random.randint(min(img_scale_short), max(img_scale_short) + 1)
+        img_scale = (long_edge, short_edge)
+        return img_scale, None
+    @staticmethod
+    def random_sample_ratio(img_scale, ratio_range):
+        """
+        Randomly sample an img_scale based on the specified ratio_range.
+        Args:
+            img_scale (list): A list of two integers representing the minimum and maximum
+                scale for the image.
+            ratio_range (tuple): A tuple of two floats representing the minimum and maximum
+                ratio for sampling the img_scale.
+        Returns:
+            tuple: A tuple containing the sampled scale (as a tuple of two integers)
+                and None.
+        """
+        assert isinstance(img_scale, list) and len(img_scale) == 2
+        min_ratio, max_ratio = ratio_range
+        assert min_ratio <= max_ratio
+        ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
+        scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
+        return scale, None
+    def _random_scale(self, results):
+        """Randomly sample an img_scale according to `ratio_range` and `multiscale_mode`.
+        Args:
+            results (dict): A dictionary to store the sampled scale and its index.
+        Returns:
+            None. The sampled scale and its index are stored in `results` dictionary.
+        """
+        if self.ratio_range is not None:
+            scale, scale_idx = self.random_sample_ratio(
+                self.img_scale[0], self.ratio_range
+            )
+        elif len(self.img_scale) == 1:
+            scale, scale_idx = self.img_scale[0], 0
+        elif self.multiscale_mode == "range":
+            scale, scale_idx = self.random_sample(self.img_scale)
+        elif self.multiscale_mode == "value":
+            scale, scale_idx = self.random_select(self.img_scale)
+        else:
+            raise NotImplementedError
+        results["scale"] = scale
+        results["scale_idx"] = scale_idx
+    def _resize_img(self, results):
+        """Resize images based on the scale factor provided in ``results['scale']`` while maintaining the aspect ratio if ``self.keep_ratio`` is True.
+        Args:
+            results (dict): A dictionary containing image fields and their corresponding scales.
+        Returns:
+            None. The ``results`` dictionary is modified in place with resized images and additional fields like `img_shape`, `pad_shape`, `scale_factor`, and `keep_ratio`.
+        """
+        for key in results.get("img_fields", ["img"]):
+            for idx in range(len(results["img"])):
+                if self.keep_ratio:
+                    img, scale_factor = self.imrescale(
+                        results[key][idx],
+                        results["scale"],
+                        interpolation="bilinear" if key == "img" else "nearest",
+                        return_scale=True,
+                        backend=self.backend,
+                    )
+                    new_h, new_w = img.shape[:2]
+                    h, w = results[key][idx].shape[:2]
+                    w_scale = new_w / w
+                    h_scale = new_h / h
+                else:
+                    raise NotImplementedError
+                results[key][idx] = img
+            scale_factor = np.array(
+                [w_scale, h_scale, w_scale, h_scale], dtype=np.float32
+            )
+            results["img_shape"] = img.shape
+            # in case that there is no padding
+            results["pad_shape"] = img.shape
+            results["scale_factor"] = scale_factor
+            results["keep_ratio"] = self.keep_ratio
+    def rescale_size(self, old_size, scale, return_scale=False):
+        """
+        Calculate the new size to be rescaled to based on the given scale.
+        Args:
+            old_size (tuple): A tuple containing the width and height of the original size.
+            scale (float, int, or list of int): The scale factor or a list of integers representing the maximum and minimum allowed size.
+            return_scale (bool): Whether to return the scale factor along with the new size.
+        Returns:
+            tuple: A tuple containing the new size and optionally the scale factor if return_scale is True.
+        """
+        w, h = old_size
+        if isinstance(scale, (float, int)):
+            if scale <= 0:
+                raise ValueError(f"Invalid scale {scale}, must be positive.")
+            scale_factor = scale
+        elif isinstance(scale, list):
+            max_long_edge = max(scale)
+            max_short_edge = min(scale)
+            scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w))
+        else:
+            raise TypeError(
+                f"Scale must be a number or list of int, but got {type(scale)}"
+            )
+        def _scale_size(size, scale):
+            if isinstance(scale, (float, int)):
+                scale = (scale, scale)
+            w, h = size
+            return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5)
+        new_size = _scale_size((w, h), scale_factor)
+        if return_scale:
+            return new_size, scale_factor
+        else:
+            return new_size
+    def imrescale(
+        self, img, scale, return_scale=False, interpolation="bilinear", backend=None
+    ):
+        """Resize image while keeping the aspect ratio.
+        Args:
+            img (numpy.ndarray): The input image.
+            scale (float): The scaling factor.
+            return_scale (bool): Whether to return the scaling factor along with the resized image.
+            interpolation (str): The interpolation method to use. Defaults to 'bilinear'.
+            backend (str): The backend to use for resizing. Defaults to None.
+        Returns:
+            tuple or numpy.ndarray: The resized image, and optionally the scaling factor.
+        """
+        h, w = img.shape[:2]
+        new_size, scale_factor = self.rescale_size((w, h), scale, return_scale=True)
+        rescaled_img = self.imresize(
+            img, new_size, interpolation=interpolation, backend=backend
+        )
+        if return_scale:
+            return rescaled_img, scale_factor
+        else:
+            return rescaled_img
+    def imresize(
+        self,
+        img,
+        size,
+        return_scale=False,
+        interpolation="bilinear",
+        out=None,
+        backend=None,
+    ):
+        """Resize an image to a given size.
+        Args:
+            img (numpy.ndarray): The input image to be resized.
+            size (tuple): The new size for the image as (height, width).
+            return_scale (bool): Whether to return the scaling factors along with the resized image.
+            interpolation (str): The interpolation method to use. Default is 'bilinear'.
+            out (numpy.ndarray, optional): Output array. If provided, it must have the same shape and dtype as the output array.
+            backend (str, optional): The backend to use for resizing. Supported backends are 'cv2' and 'pillow'.
+        Returns:
+            numpy.ndarray or tuple: The resized image. If return_scale is True, returns a tuple containing the resized image and the scaling factors (w_scale, h_scale).
+        """
+        h, w = img.shape[:2]
+        if backend not in ["cv2", "pillow"]:
+            raise ValueError(
+                f"backend: {backend} is not supported for resize."
+                f"Supported backends are 'cv2', 'pillow'"
+            )
+        if backend == "pillow":
+            raise NotImplementedError
+        else:
+            resized_img = cv2.resize(
+                img, size, dst=out, interpolation=cv2_interp_codes[interpolation]
+            )
+        if not return_scale:
+            return resized_img
+        else:
+            w_scale = size[0] / w
+            h_scale = size[1] / h
+            return resized_img, w_scale, h_scale
+    def _resize_bboxes(self, results):
+        """Resize bounding boxes with `results['scale_factor']`.
+        Args:
+            results (dict): A dictionary containing the bounding boxes and other related information.
+        """
+        for key in results.get("bbox_fields", []):
+            bboxes = results[key] * results["scale_factor"]
+            if self.bbox_clip_border:
+                img_shape = results["img_shape"]
+                bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
+                bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
+            results[key] = bboxes
+    def _resize_masks(self, results):
+        """Resize masks with ``results['scale']``"""
+        raise NotImplementedError
+    def _resize_seg(self, results):
+        """Resize semantic segmentation map with ``results['scale']``."""
+        raise NotImplementedError
+    def __call__(self, results):
+        """Call function to resize images, bounding boxes, masks, and semantic segmentation maps according to the provided scale or scale factor.
+        Args:
+            results (dict): A dictionary containing the input data, including 'img', 'scale', and optionally 'scale_factor'.
+        Returns:
+            dict: A dictionary with the resized data.
+        """
+        if "scale" not in results:
+            if "scale_factor" in results:
+                img_shape = results["img"][0].shape[:2]
+                scale_factor = results["scale_factor"]
+                assert isinstance(scale_factor, float)
+                results["scale"] = list(
+                    [int(x * scale_factor) for x in img_shape][::-1]
+                )
+            else:
+                self._random_scale(results)
+        else:
+            if not self.override:
+                assert (
+                    "scale_factor" not in results
+                ), "scale and scale_factor cannot be both set."
+            else:
+                results.pop("scale")
+                if "scale_factor" in results:
+                    results.pop("scale_factor")
+                self._random_scale(results)
+        self._resize_img(results)
+        self._resize_bboxes(results)
+        return results
+class NormalizeImage:
+    """Normalize the image."""
+    """Normalize an image by subtracting the mean and dividing by the standard deviation.
+    Args:
+        mean (list or tuple): Mean values for each channel.
+        std (list or tuple): Standard deviation values for each channel.
+        to_rgb (bool): Whether to convert the image from BGR to RGB.
+    """
+    def __init__(self, mean, std, to_rgb=True):
+        """Initializes the NormalizeImage class with mean, std, and to_rgb parameters."""
+        self.mean = np.array(mean, dtype=np.float32)
+        self.std = np.array(std, dtype=np.float32)
+        self.to_rgb = to_rgb
+    def _imnormalize(self, img, mean, std, to_rgb=True):
+        """Normalize the given image inplace.
+        Args:
+            img (numpy.ndarray): The image to normalize.
+            mean (numpy.ndarray): Mean values for normalization.
+            std (numpy.ndarray): Standard deviation values for normalization.
+            to_rgb (bool): Whether to convert the image from BGR to RGB.
+        Returns:
+            numpy.ndarray: The normalized image.
+        """
+        img = img.copy().astype(np.float32)
+        mean = np.float64(mean.reshape(1, -1))
+        stdinv = 1 / np.float64(std.reshape(1, -1))
+        if to_rgb:
+            cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)  # inplace
+        cv2.subtract(img, mean, img)  # inplace
+        cv2.multiply(img, stdinv, img)  # inplace
+        return img
+    def __call__(self, results):
+        """Call method to normalize images in the results dictionary.
+        Args:
+            results (dict): A dictionary containing image fields to normalize.
+        Returns:
+            dict: The results dictionary with normalized images.
+        """
+        for key in results.get("img_fields", ["img"]):
+            if key == "img_depth":
+                continue
+            for idx in range(len(results["img"])):
+                results[key][idx] = self._imnormalize(
+                    results[key][idx], self.mean, self.std, self.to_rgb
+                )
+        results["img_norm_cfg"] = dict(mean=self.mean, std=self.std, to_rgb=self.to_rgb)
+        return results
+class PadImage(object):
+    """Pad the image & mask."""
+    def __init__(self, size=None, size_divisor=None, pad_val=0):
+        self.size = size
+        self.size_divisor = size_divisor
+        self.pad_val = pad_val
+        # only one of size and size_divisor should be valid
+        assert size is not None or size_divisor is not None
+        assert size is None or size_divisor is None
+    def impad(
+        self, img, *, shape=None, padding=None, pad_val=0, padding_mode="constant"
+    ):
+        """Pad the given image to a certain shape or pad on all sides
+        Args:
+            img (numpy.ndarray): The input image to be padded.
+            shape (tuple, optional): Desired output shape in the form (height, width). One of shape or padding must be specified.
+            padding (int, tuple, optional): Number of pixels to pad on each side of the image. If a single int is provided this
+                is used to pad all sides with this value. If a tuple of length 2 is provided this is interpreted as (top_bottom, left_right).
+                If a tuple of length 4 is provided this is interpreted as (top, right, bottom, left).
+            pad_val (int, list, optional): Pixel value used for padding. If a list is provided, it must have the same length as the
+                last dimension of the input image. Defaults to 0.
+            padding_mode (str, optional): Padding mode to use. One of 'constant', 'edge', 'reflect', 'symmetric'.
+                Defaults to 'constant'.
+        Returns:
+            numpy.ndarray: The padded image.
+        """
+        assert (shape is not None) ^ (padding is not None)
+        if shape is not None:
+            padding = [0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0]]
+        # check pad_val
+        if isinstance(pad_val, list):
+            assert len(pad_val) == img.shape[-1]
+        elif not isinstance(pad_val, numbers.Number):
+            raise TypeError(
+                "pad_val must be a int or a list. " f"But received {type(pad_val)}"
+            )
+        # check padding
+        if isinstance(padding, list) and len(padding) in [2, 4]:
+            if len(padding) == 2:
+                padding = [padding[0], padding[1], padding[0], padding[1]]
+        elif isinstance(padding, numbers.Number):
+            padding = [padding, padding, padding, padding]
+        else:
+            raise ValueError(
+                "Padding must be a int or a 2, or 4 element list."
+                f"But received {padding}"
+            )
+        # check padding mode
+        assert padding_mode in ["constant", "edge", "reflect", "symmetric"]
+        border_type = {
+            "constant": cv2.BORDER_CONSTANT,
+            "edge": cv2.BORDER_REPLICATE,
+            "reflect": cv2.BORDER_REFLECT_101,
+            "symmetric": cv2.BORDER_REFLECT,
+        }
+        img = cv2.copyMakeBorder(
+            img,
+            padding[1],
+            padding[3],
+            padding[0],
+            padding[2],
+            border_type[padding_mode],
+            value=pad_val,
+        )
+        return img
+    def impad_to_multiple(self, img, divisor, pad_val=0):
+        """
+        Pad an image to ensure each edge length is a multiple of a given number.
+        Args:
+            img (numpy.ndarray): The input image.
+            divisor (int): The number to which each edge length should be a multiple.
+            pad_val (int, optional): The value to pad the image with. Defaults to 0.
+        Returns:
+            numpy.ndarray: The padded image.
+        """
+        pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor
+        pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor
+        return self.impad(img, shape=(pad_h, pad_w), pad_val=pad_val)
+    def _pad_img(self, results):
+        """
+        Pad images according to ``self.size`` or adjust their shapes to be multiples of ``self.size_divisor``.
+        Args:
+            results (dict): A dictionary containing image data, with 'img_fields' as an optional key
+                pointing to a list of image field names.
+        """
+        for key in results.get("img_fields", ["img"]):
+            if self.size is not None:
+                padded_img = self.impad(
+                    results[key], shape=self.size, pad_val=self.pad_val
+                )
+            elif self.size_divisor is not None:
+                for idx in range(len(results[key])):
+                    padded_img = self.impad_to_multiple(
+                        results[key][idx], self.size_divisor, pad_val=self.pad_val
+                    )
+                    results[key][idx] = padded_img
+        results["pad_shape"] = padded_img.shape
+        results["pad_fixed_size"] = self.size
+        results["pad_size_divisor"] = self.size_divisor
+    def _pad_masks(self, results):
+        """Pad masks according to ``results['pad_shape']``."""
+        raise NotImplementedError
+    def _pad_seg(self, results):
+        """Pad semantic segmentation map according to ``results['pad_shape']``."""
+        raise NotImplementedError
+    def __call__(self, results):
+        """Call function to pad images, masks, semantic segmentation maps."""
+        self._pad_img(results)
+        return results
+class SampleFilterByKey:
+    """Collect data from the loader relevant to the specific task."""
+    def __init__(
+        self,
+        keys,
+        meta_keys=(
+            "filename",
+            "ori_shape",
+            "img_shape",
+            "lidar2img",
+            "depth2img",
+            "cam2img",
+            "pad_shape",
+            "scale_factor",
+            "flip",
+            "pcd_horizontal_flip",
+            "pcd_vertical_flip",
+            "box_type_3d",
+            "img_norm_cfg",
+            "pcd_trans",
+            "sample_idx",
+            "pcd_scale_factor",
+            "pcd_rotation",
+            "pts_filename",
+            "transformation_3d_flow",
+        ),
+    ):
+        self.keys = keys
+        self.meta_keys = meta_keys
+    def __call__(self, sample):
+        """Call function to filter sample by keys. The keys in `meta_keys` are used to filter metadata from the input sample.
+        Args:
+            sample (Sample): The input sample to be filtered.
+        Returns:
+            Sample: A new Sample object containing only the filtered metadata and specified keys.
+        """
+        filtered_sample = Sample(path=sample.path, modality=sample.modality)
+        filtered_sample.meta.id = sample.meta.id
+        img_metas = {}
+        for key in self.meta_keys:
+            if key in sample:
+                img_metas[key] = sample[key]
+        filtered_sample["img_metas"] = img_metas
+        for key in self.keys:
+            filtered_sample[key] = sample[key]
+        return filtered_sample
+class GetInferInput:
+    """Collect infer input data from transformed sample"""
+    def collate_fn(self, batch):
+        sample = batch[0]
+        collated_batch = {}
+        collated_fields = [
+            "img",
+            "points",
+            "img_metas",
+            "gt_bboxes_3d",
+            "gt_labels_3d",
+            "modality",
+            "meta",
+            "idx",
+            "img_depth",
+        ]
+        for k in list(sample.keys()):
+            if k not in collated_fields:
+                continue
+            if k == "img":
+                collated_batch[k] = np.stack([elem[k] for elem in batch], axis=0)
+            elif k == "img_depth":
+                collated_batch[k] = np.stack(
+                    [np.stack(elem[k], axis=0) for elem in batch], axis=0
+                )
+            else:
+                collated_batch[k] = [elem[k] for elem in batch]
+        return collated_batch
+    def __call__(self, sample):
+        """Call function to infer input data from transformed sample
+        Args:
+            sample (Sample): The input sample data.
+        Returns:
+            infer_input (list): A list containing all the input data for inference.
+            sample_id (str): token id of the input sample.
+        """
+        if sample.modality == "multimodal" or sample.modality == "multiview":
+            if "img" in sample.keys():
+                sample.img = np.stack(
+                    [img.transpose(2, 0, 1) for img in sample.img], axis=0
+                )
+        sample = self.collate_fn([sample])
+        infer_input = []
+        img = sample.get("img", None)[0]
+        infer_input.append(img.astype(np.float32))
+        lidar2img = np.stack(sample["img_metas"][0]["lidar2img"]).astype(np.float32)
+        infer_input.append(lidar2img)
+        points = sample.get("points", None)[0]
+        infer_input.append(points.astype(np.float32))
+        img_metas = {
+            "input_lidar_path": sample["img_metas"][0]["pts_filename"],
+            "input_img_paths": sample["img_metas"][0]["filename"],
+            "sample_id": sample["img_metas"][0]["sample_idx"],
+        }
+        return infer_input, img_metas

paddlex 3.0.0b2__py3-none-any.whl → 3.0.0rc0__py3-none-any.whl

paddlex 3.0.0b2py3-none-any.whl → 3.0.0rc0py3-none-any.whl