PyPI - paddlex - Versions diffs - 3.0.0b2__py3-none-any.whl → 3.0.0rc0__py3-none-any.whl - Mend

paddlex 3.0.0b2py3-none-any.whl → 3.0.0rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (940) hide show

paddlex/inference/models/image_unwarping/predictor.py ADDED Viewed

@@ -0,0 +1,105 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Union, Dict, List, Tuple
+import numpy as np
+from ....modules.image_unwarping.model_list import MODELS
+from ...common.batch_sampler import ImageBatchSampler
+from ...common.reader import ReadImage
+from ..common import (
+    Normalize,
+    ToCHWImage,
+    ToBatch,
+    StaticInfer,
+)
+from ..base import BasicPredictor
+from .processors import DocTrPostProcess
+from .result import DocTrResult
+class WarpPredictor(BasicPredictor):
+    """WarpPredictor that inherits from BasicPredictor."""
+    entities = MODELS
+    def __init__(self, *args: List, **kwargs: Dict) -> None:
+        """Initializes WarpPredictor.
+        Args:
+            *args: Arbitrary positional arguments passed to the superclass.
+            **kwargs: Arbitrary keyword arguments passed to the superclass.
+        """
+        super().__init__(*args, **kwargs)
+        self.preprocessors, self.infer, self.postprocessors = self._build()
+    def _build_batch_sampler(self) -> ImageBatchSampler:
+        """Builds and returns an ImageBatchSampler instance.
+        Returns:
+            ImageBatchSampler: An instance of ImageBatchSampler.
+        """
+        return ImageBatchSampler()
+    def _get_result_class(self) -> type:
+        """Returns the warpping result, DocTrResult.
+        Returns:
+            type: The DocTrResult.
+        """
+        return DocTrResult
+    def _build(self) -> Tuple:
+        """Build the preprocessors, inference engine, and postprocessors based on the configuration.
+        Returns:
+            tuple: A tuple containing the preprocessors, inference engine, and postprocessors.
+        """
+        preprocessors = {"Read": ReadImage(format="BGR")}
+        preprocessors["Normalize"] = Normalize(mean=0.0, std=1.0, scale=1.0 / 255)
+        preprocessors["ToCHW"] = ToCHWImage()
+        preprocessors["ToBatch"] = ToBatch()
+        infer = StaticInfer(
+            model_dir=self.model_dir,
+            model_prefix=self.MODEL_FILE_PREFIX,
+            option=self.pp_option,
+        )
+        postprocessors = {"DocTrPostProcess": DocTrPostProcess()}
+        return preprocessors, infer, postprocessors
+    def process(self, batch_data: List[Union[str, np.ndarray]]) -> Dict[str, Any]:
+        """
+        Process a batch of data through the preprocessing, inference, and postprocessing.
+        Args:
+            batch_data (List[Union[str, np.ndarray], ...]): A batch of input data (e.g., image file paths).
+        Returns:
+            dict: A dictionary containing the input path, raw image, class IDs, scores, and label names for every instance of the batch. Keys include 'input_path', 'input_img', 'class_ids', 'scores', and 'label_names'.
+        """
+        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data.instances)
+        batch_imgs = self.preprocessors["Normalize"](imgs=batch_raw_imgs)
+        batch_imgs = self.preprocessors["ToCHW"](imgs=batch_imgs)
+        x = self.preprocessors["ToBatch"](imgs=batch_imgs)
+        batch_preds = self.infer(x=x)
+        batch_warp_preds = self.postprocessors["DocTrPostProcess"](batch_preds)
+        return {
+            "input_path": batch_data.input_paths,
+            "page_index": batch_data.page_indexes,
+            "input_img": batch_raw_imgs,
+            "doctr_img": batch_warp_preds,
+        }

paddlex/inference/models/image_unwarping/processors.py ADDED Viewed

@@ -0,0 +1,88 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+from typing import List, Union, Tuple
+class DocTrPostProcess:
+    """
+    Post-processing class for cropping regions from images (though currently only performs scaling and color channel adjustments).
+    Attributes:
+        scale (np.float32): A scaling factor to be applied to the image pixel values.
+            Defaults to 255.0 if not provided.
+    Methods:
+        __call__(imgs: List[Union[np.ndarray, Tuple[np.ndarray, ...]]]) -> List[np.ndarray]:
+            Call method to process a list of images.
+        doctr(pred: Union[np.ndarray, Tuple[np.ndarray, ...]]) -> np.ndarray:
+            Method to process a single image or a tuple/list containing an image.
+    """
+    def __init__(self, scale: Union[str, float, None] = None):
+        """
+        Initializes the DocTrPostProcess class with a scaling factor.
+        Args:
+            scale (Union[str, float, None]): A scaling factor for the image pixel values.
+                If a string is provided, it will be converted to a float. Defaults to 255.0.
+        """
+        super().__init__()
+        self.scale = (
+            np.float32(scale) if isinstance(scale, (str, float)) else np.float32(255.0)
+        )
+    def __call__(
+        self, imgs: List[Union[np.ndarray, Tuple[np.ndarray, ...]]]
+    ) -> List[np.ndarray]:
+        """
+        Processes a list of images using the `doctr` method.
+        Args:
+            imgs (List[Union[np.ndarray, Tuple[np.ndarray, ...]]]): A list of images to process.
+                Each image can be a numpy array or a tuple containing a numpy array.
+        Returns:
+            List[np.ndarray]: A list of processed images.
+        """
+        return [self.doctr(img) for img in imgs]
+    def doctr(self, pred: Union[np.ndarray, Tuple[np.ndarray, ...]]) -> np.ndarray:
+        """
+        Processes a single image.
+        Args:
+            pred (Union[np.ndarray, Tuple[np.ndarray, ...]]): The image to process, which can be
+                a numpy array or a tuple containing a numpy array. Only the first element is used if it's a tuple.
+        Returns:
+            np.ndarray: The processed image.
+        Raises:
+            AssertionError: If the input is not a numpy array.
+        """
+        if isinstance(pred, tuple):
+            im = pred[0]
+        else:
+            im = pred
+        assert isinstance(
+            im, np.ndarray
+        ), "Invalid input 'im' in DocTrPostProcess. Expected a numpy array."
+        im = im.squeeze()
+        im = im.transpose(1, 2, 0)
+        im *= self.scale
+        im = im[:, :, ::-1]
+        im = im.astype("uint8", copy=False)
+        return im

paddlex/inference/models/image_unwarping/result.py ADDED Viewed

@@ -0,0 +1,45 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+import numpy as np
+from ...common.result import BaseCVResult, StrMixin, JsonMixin
+class DocTrResult(BaseCVResult):
+    """
+    Result class for DocTr, encapsulating the output of a document image processing task.
+    Attributes:
+        (inherited from BaseCVResult): Any attributes defined in the base class.
+    Methods:
+        _to_img(self) -> np.ndarray:
+            Converts the stored image result to a numpy array.
+    """
+    def _to_img(self) -> np.ndarray:
+        result = np.array(self["doctr_img"])
+        return {"res": result}
+    def _to_str(self, *args, **kwargs):
+        data = copy.deepcopy(self)
+        data.pop("input_img")
+        data["doctr_img"] = "..."
+        return JsonMixin._to_str(data, *args, **kwargs)
+    def _to_json(self, *args, **kwargs):
+        data = copy.deepcopy(self)
+        data.pop("input_img")
+        return JsonMixin._to_json(data, *args, **kwargs)

paddlex/inference/models/instance_segmentation/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .predictor import InstanceSegPredictor

paddlex/inference/models/instance_segmentation/predictor.py ADDED Viewed

@@ -0,0 +1,210 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Union, Dict, List, Tuple, Sequence, Optional
+import numpy as np
+from ....modules.instance_segmentation.model_list import MODELS
+from ...common.batch_sampler import ImageBatchSampler
+from ..common import StaticInfer
+from ..object_detection.processors import (
+    ReadImage,
+    ToBatch,
+)
+from .processors import InstanceSegPostProcess
+from ..object_detection import DetPredictor
+from .result import InstanceSegResult
+from ....utils import logging
+class InstanceSegPredictor(DetPredictor):
+    """InstanceSegPredictor that inherits from DetPredictor."""
+    entities = MODELS
+    def __init__(self, *args, threshold: Optional[float] = None, **kwargs):
+        """Initializes InstanceSegPredictor.
+        Args:
+            *args: Arbitrary positional arguments passed to the superclass.
+            threshold (Optional[float], optional): The threshold for filtering out low-confidence predictions.
+                Defaults to None, in which case will use default from the config file.
+            **kwargs: Arbitrary keyword arguments passed to the superclass.
+        """
+        super().__init__(*args, **kwargs)
+        self.model_names_only_supports_batchsize_of_one = {
+            "SOLOv2",
+            "PP-YOLOE_seg-S",
+            "Cascade-MaskRCNN-ResNet50-vd-SSLDv2-FPN",
+            "Cascade-MaskRCNN-ResNet50-FPN",
+        }
+        if self.model_name in self.model_names_only_supports_batchsize_of_one:
+            logging.warning(
+                f"Instance Segmentation Models: \"{', '.join(list(self.model_names_only_supports_batchsize_of_one))}\" only supports prediction with a batch_size of one, "
+                "if you set the predictor with a batch_size larger than one, no error will occur, however, it will actually inference with a batch_size of one, "
+                f"which will lead to a slower inference speed. You are now using {self.config['Global']['model_name']}."
+            )
+        self.threshold = threshold
+    def _get_result_class(self) -> type:
+        """Returns the result class, InstanceSegResult.
+        Returns:
+            type: The InstanceSegResult class.
+        """
+        return InstanceSegResult
+    def _build(self) -> Tuple:
+        """Build the preprocessors, inference engine, and postprocessors based on the configuration.
+        Returns:
+            tuple: A tuple containing the preprocessors, inference engine, and postprocessors.
+        """
+        # build preprocess ops
+        pre_ops = [ReadImage(format="RGB")]
+        for cfg in self.config["Preprocess"]:
+            tf_key = cfg["type"]
+            func = self._FUNC_MAP[tf_key]
+            cfg.pop("type")
+            args = cfg
+            op = func(self, **args) if args else func(self)
+            if op:
+                pre_ops.append(op)
+        pre_ops.append(self.build_to_batch())
+        # build infer
+        infer = StaticInfer(
+            model_dir=self.model_dir,
+            model_prefix=self.MODEL_FILE_PREFIX,
+            option=self.pp_option,
+        )
+        # build postprocess op
+        post_op = self.build_postprocess()
+        return pre_ops, infer, post_op
+    def build_to_batch(self):
+        ordered_required_keys = (
+            "img_size",
+            "img",
+            "scale_factors",
+        )
+        return ToBatch(ordered_required_keys=ordered_required_keys)
+    def process(self, batch_data: List[Any], threshold: Optional[float] = None):
+        """
+        Process a batch of data through the preprocessing, inference, and postprocessing.
+        Args:
+            batch_data (List[Union[str, np.ndarray], ...]): A batch of input data (e.g., image file paths).
+        Returns:
+            dict: A dictionary containing the input path, raw image, box and mask
+                for every instance of the batch. Keys include 'input_path', 'input_img', 'boxes' and 'masks'.
+        """
+        datas = batch_data.instances
+        # preprocess
+        for pre_op in self.pre_ops[:-1]:
+            datas = pre_op(datas)
+        # use `ToBatch` format batch inputs
+        batch_inputs = self.pre_ops[-1](datas)
+        # do infer
+        if self.model_name in self.model_names_only_supports_batchsize_of_one:
+            batch_preds = []
+            for i in range(batch_inputs[0].shape[0]):
+                batch_inputs_ = [
+                    batch_input_[i][None, ...] for batch_input_ in batch_inputs
+                ]
+                batch_pred_ = self.infer(batch_inputs_)
+                batch_preds.append(batch_pred_)
+        else:
+            batch_preds = self.infer(batch_inputs)
+        # process a batch of predictions into a list of single image result
+        preds_list = self._format_output(batch_preds)
+        # postprocess
+        boxes_masks = self.post_op(
+            preds_list, datas, threshold if threshold is not None else self.threshold
+        )
+        return {
+            "input_path": batch_data.input_paths,
+            "page_index": batch_data.page_indexes,
+            "input_img": [data["ori_img"] for data in datas],
+            "boxes": [result["boxes"] for result in boxes_masks],
+            "masks": [result["masks"] for result in boxes_masks],
+        }
+    def _format_output(self, pred: Sequence[Any]) -> List[dict]:
+        """
+        Transform batch outputs into a list of single image output.
+        Args:
+            pred (Sequence[Any]): The input predictions, which can be either a list of 3 or 4 elements.
+                - When len(pred) == 4, it is expected to be in the format [boxes, class_ids, scores, masks],
+                  compatible with SOLOv2 output.
+                - When len(pred) == 3, it is expected to be in the format [boxes, box_nums, masks],
+                  compatible with Instance Segmentation output.
+        Returns:
+            List[dict]: A list of dictionaries, each containing either 'class_id' and 'masks' (for SOLOv2),
+                or 'boxes' and 'masks' (for Instance Segmentation), or just 'boxes' if no masks are provided.
+        """
+        box_idx_start = 0
+        pred_box = []
+        if isinstance(pred[0], list) and len(pred[0]) == 4:
+            # Adapt to SOLOv2, which only support prediction with a batch_size of 1.
+            pred_class_id = [[pred_[1], pred_[2]] for pred_ in pred]
+            pred_mask = [pred_[3] for pred_ in pred]
+            return [
+                {
+                    "class_id": np.array(pred_class_id[i]),
+                    "masks": np.array(pred_mask[i]),
+                }
+                for i in range(len(pred_class_id))
+            ]
+        if isinstance(pred[0], list) and len(pred[0]) == 3:
+            # Adapt to PP-YOLOE_seg-S, which only support prediction with a batch_size of 1.
+            return [
+                {"boxes": np.array(pred[i][0]), "masks": np.array(pred[i][2])}
+                for i in range(len(pred))
+            ]
+        pred_mask = []
+        for idx in range(len(pred[1])):
+            np_boxes_num = pred[1][idx]
+            box_idx_end = box_idx_start + np_boxes_num
+            np_boxes = pred[0][box_idx_start:box_idx_end]
+            pred_box.append(np_boxes)
+            np_masks = pred[2][box_idx_start:box_idx_end]
+            pred_mask.append(np_masks)
+            box_idx_start = box_idx_end
+        return [
+            {"boxes": np.array(pred_box[i]), "masks": np.array(pred_mask[i])}
+            for i in range(len(pred_box))
+        ]
+    def build_postprocess(self):
+        return InstanceSegPostProcess(
+            threshold=self.config["draw_threshold"], labels=self.config["label_list"]
+        )

paddlex/inference/models/instance_segmentation/processors.py ADDED Viewed

@@ -0,0 +1,105 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from typing import List, Sequence, Tuple, Union, Optional
+import numpy as np
+from ....utils import logging
+from ..object_detection.processors import restructured_boxes
+import cv2
+def extract_masks_from_boxes(boxes, masks):
+    """
+    Extracts the portion of each mask that is within the corresponding box.
+    """
+    new_masks = []
+    for i, box in enumerate(boxes):
+        x_min, y_min, x_max, y_max = box["coordinate"]
+        x_min, y_min, x_max, y_max = map(
+            lambda x: int(round(x)), [x_min, y_min, x_max, y_max]
+        )
+        cropped_mask = masks[i][y_min:y_max, x_min:x_max]
+        new_masks.append(cropped_mask)
+    return new_masks
+class InstanceSegPostProcess(object):
+    """Save Result Transform"""
+    def __init__(self, threshold=0.5, labels=None):
+        super().__init__()
+        self.threshold = threshold
+        self.labels = labels
+    def apply(self, masks, img_size, boxes=None, class_id=None, threshold=None):
+        """apply"""
+        if boxes is not None:
+            expect_boxes = (boxes[:, 1] > threshold) & (boxes[:, 0] > -1)
+            boxes = boxes[expect_boxes, :]
+            boxes = restructured_boxes(boxes, self.labels, img_size)
+            masks = masks[expect_boxes, :, :]
+            masks = extract_masks_from_boxes(boxes, masks)
+            result = {"boxes": boxes, "masks": masks}
+        else:
+            mask_info = []
+            class_id = [list(item) for item in zip(class_id[0], class_id[1])]
+            selected_masks = []
+            for i, info in enumerate(class_id):
+                label_id = int(info[0])
+                if info[1] < threshold:
+                    continue
+                mask_info.append(
+                    {
+                        "label": self.labels[label_id],
+                        "score": info[1],
+                        "class_id": label_id,
+                    }
+                )
+                selected_masks.append(masks[i])
+            result = {"boxes": mask_info, "masks": selected_masks}
+        return result
+    def __call__(
+        self,
+        batch_outputs: List[dict],
+        datas: List[dict],
+        threshold: Optional[float] = None,
+    ):
+        """Apply the post-processing to a batch of outputs.
+        Args:
+            batch_outputs (List[dict]): The list of detection outputs.
+            datas (List[dict]): The list of input data.
+            threshold: Optional[float]: object score threshold for postprocess.
+        Returns:
+            List[Boxes]: The list of post-processed detection boxes.
+        """
+        outputs = []
+        for data, output in zip(datas, batch_outputs):
+            boxes_masks = self.apply(
+                img_size=data["ori_img_size"],
+                **output,
+                threshold=threshold if threshold is not None else self.threshold
+            )
+            outputs.append(boxes_masks)
+        return outputs

paddlex 3.0.0b2__py3-none-any.whl → 3.0.0rc0__py3-none-any.whl

paddlex 3.0.0b2py3-none-any.whl → 3.0.0rc0py3-none-any.whl