PyPI - python-doctr - Versions diffs - 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

python-doctr 0.8.1py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

doctr/__init__.py +1 -1
doctr/contrib/__init__.py +0 -0
doctr/contrib/artefacts.py +131 -0
doctr/contrib/base.py +105 -0
doctr/datasets/datasets/pytorch.py +2 -2
doctr/datasets/generator/base.py +6 -5
doctr/datasets/imgur5k.py +1 -1
doctr/datasets/loader.py +1 -6
doctr/datasets/utils.py +2 -1
doctr/datasets/vocabs.py +9 -2
doctr/file_utils.py +26 -12
doctr/io/elements.py +40 -6
doctr/io/html.py +2 -2
doctr/io/image/pytorch.py +6 -8
doctr/io/image/tensorflow.py +1 -1
doctr/io/pdf.py +5 -2
doctr/io/reader.py +6 -0
doctr/models/__init__.py +0 -1
doctr/models/_utils.py +57 -20
doctr/models/builder.py +71 -13
doctr/models/classification/mobilenet/pytorch.py +45 -9
doctr/models/classification/mobilenet/tensorflow.py +38 -7
doctr/models/classification/predictor/pytorch.py +18 -11
doctr/models/classification/predictor/tensorflow.py +16 -10
doctr/models/classification/textnet/pytorch.py +3 -3
doctr/models/classification/textnet/tensorflow.py +3 -3
doctr/models/classification/zoo.py +39 -15
doctr/models/detection/_utils/__init__.py +1 -0
doctr/models/detection/_utils/base.py +66 -0
doctr/models/detection/differentiable_binarization/base.py +4 -3
doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
doctr/models/detection/fast/base.py +6 -5
doctr/models/detection/fast/pytorch.py +4 -4
doctr/models/detection/fast/tensorflow.py +4 -4
doctr/models/detection/linknet/base.py +4 -3
doctr/models/detection/predictor/pytorch.py +15 -1
doctr/models/detection/predictor/tensorflow.py +15 -1
doctr/models/detection/zoo.py +7 -2
doctr/models/factory/hub.py +3 -12
doctr/models/kie_predictor/base.py +9 -3
doctr/models/kie_predictor/pytorch.py +41 -20
doctr/models/kie_predictor/tensorflow.py +36 -16
doctr/models/modules/layers/pytorch.py +2 -3
doctr/models/modules/layers/tensorflow.py +6 -8
doctr/models/modules/transformer/pytorch.py +2 -2
doctr/models/predictor/base.py +77 -50
doctr/models/predictor/pytorch.py +31 -20
doctr/models/predictor/tensorflow.py +27 -17
doctr/models/preprocessor/pytorch.py +4 -4
doctr/models/preprocessor/tensorflow.py +3 -2
doctr/models/recognition/master/pytorch.py +2 -2
doctr/models/recognition/parseq/pytorch.py +4 -3
doctr/models/recognition/parseq/tensorflow.py +4 -3
doctr/models/recognition/sar/pytorch.py +7 -6
doctr/models/recognition/sar/tensorflow.py +3 -9
doctr/models/recognition/vitstr/pytorch.py +1 -1
doctr/models/recognition/zoo.py +1 -1
doctr/models/zoo.py +2 -2
doctr/py.typed +0 -0
doctr/transforms/functional/base.py +1 -1
doctr/transforms/functional/pytorch.py +4 -4
doctr/transforms/modules/base.py +37 -15
doctr/transforms/modules/pytorch.py +66 -8
doctr/transforms/modules/tensorflow.py +63 -7
doctr/utils/fonts.py +7 -5
doctr/utils/geometry.py +35 -12
doctr/utils/metrics.py +33 -174
doctr/utils/reconstitution.py +126 -0
doctr/utils/visualization.py +5 -118
doctr/version.py +1 -1
{python_doctr-0.8.1.dist-info → python_doctr-0.9.0.dist-info}/METADATA +84 -80
{python_doctr-0.8.1.dist-info → python_doctr-0.9.0.dist-info}/RECORD +76 -76
{python_doctr-0.8.1.dist-info → python_doctr-0.9.0.dist-info}/WHEEL +1 -1
doctr/models/artefacts/__init__.py +0 -2
doctr/models/artefacts/barcode.py +0 -74
doctr/models/artefacts/face.py +0 -63
doctr/models/obj_detection/__init__.py +0 -1
doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
{python_doctr-0.8.1.dist-info → python_doctr-0.9.0.dist-info}/LICENSE +0 -0
{python_doctr-0.8.1.dist-info → python_doctr-0.9.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.8.1.dist-info → python_doctr-0.9.0.dist-info}/zip-safe +0 -0

doctr/transforms/modules/tensorflow.py CHANGED Viewed

@@ -4,7 +4,7 @@
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import random
-from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Callable, Iterable, List, Optional, Tuple, Union
 import numpy as np
 import tensorflow as tf
@@ -30,6 +30,7 @@ __all__ = [
     "GaussianNoise",
     "RandomHorizontalFlip",
     "RandomShadow",
+    "RandomResize",
 ]
@@ -457,10 +458,7 @@ class RandomHorizontalFlip(NestedObject):
     >>> from doctr.transforms import RandomHorizontalFlip
     >>> transfo = RandomHorizontalFlip(p=0.5)
     >>> image = tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)
-    >>> target = {
-    >>> "boxes": np.array([[0.1, 0.1, 0.4, 0.5] ], dtype= np.float32),
-    >>> "labels": np.ones(1, dtype= np.int64)
-    >>> }
+    >>> target = np.array([[0.1, 0.1, 0.4, 0.5] ], dtype= np.float32)
     >>> out = transfo(image, target)
     Args:
@@ -472,12 +470,15 @@ class RandomHorizontalFlip(NestedObject):
         super().__init__()
         self.p = p
-    def __call__(self, img: Union[tf.Tensor, np.ndarray], target: Dict[str, Any]) -> Tuple[tf.Tensor, Dict[str, Any]]:
+    def __call__(self, img: Union[tf.Tensor, np.ndarray], target: np.ndarray) -> Tuple[tf.Tensor, np.ndarray]:
         if np.random.rand(1) <= self.p:
             _img = tf.image.flip_left_right(img)
             _target = target.copy()
             # Changing the relative bbox coordinates
-            _target["boxes"][:, ::2] = 1 - target["boxes"][:, [2, 0]]
+            if target.shape[1:] == (4,):
+                _target[:, ::2] = 1 - target[:, [2, 0]]
+            else:
+                _target[..., 0] = 1 - target[..., 0]
             return _img, _target
         return img, target
@@ -515,3 +516,58 @@ class RandomShadow(NestedObject):
     def extra_repr(self) -> str:
         return f"opacity_range={self.opacity_range}"
+class RandomResize(NestedObject):
+    """Randomly resize the input image and align corresponding targets
+    >>> import tensorflow as tf
+    >>> from doctr.transforms import RandomResize
+    >>> transfo = RandomResize((0.3, 0.9), preserve_aspect_ratio=True, symmetric_pad=True, p=0.5)
+    >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
+    Args:
+    ----
+        scale_range: range of the resizing factor for width and height (independently)
+        preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
+            given a float value, the aspect ratio will be preserved with this probability
+        symmetric_pad: whether to symmetrically pad the image,
+            given a float value, the symmetric padding will be applied with this probability
+        p: probability to apply the transformation
+    """
+    def __init__(
+        self,
+        scale_range: Tuple[float, float] = (0.3, 0.9),
+        preserve_aspect_ratio: Union[bool, float] = False,
+        symmetric_pad: Union[bool, float] = False,
+        p: float = 0.5,
+    ):
+        super().__init__()
+        self.scale_range = scale_range
+        self.preserve_aspect_ratio = preserve_aspect_ratio
+        self.symmetric_pad = symmetric_pad
+        self.p = p
+        self._resize = Resize
+    def __call__(self, img: tf.Tensor, target: np.ndarray) -> Tuple[tf.Tensor, np.ndarray]:
+        if np.random.rand(1) <= self.p:
+            scale_h = random.uniform(*self.scale_range)
+            scale_w = random.uniform(*self.scale_range)
+            new_size = (int(img.shape[-3] * scale_h), int(img.shape[-2] * scale_w))
+            _img, _target = self._resize(
+                new_size,
+                preserve_aspect_ratio=self.preserve_aspect_ratio
+                if isinstance(self.preserve_aspect_ratio, bool)
+                else bool(np.random.rand(1) <= self.symmetric_pad),
+                symmetric_pad=self.symmetric_pad
+                if isinstance(self.symmetric_pad, bool)
+                else bool(np.random.rand(1) <= self.symmetric_pad),
+            )(img, target)
+            return _img, _target
+        return img, target
+    def extra_repr(self) -> str:
+        return f"scale_range={self.scale_range}, preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}, p={self.p}"  # noqa: E501

doctr/utils/fonts.py CHANGED Viewed

@@ -5,14 +5,16 @@
 import logging
 import platform
-from typing import Optional
+from typing import Optional, Union
 from PIL import ImageFont
 __all__ = ["get_font"]
-def get_font(font_family: Optional[str] = None, font_size: int = 13) -> ImageFont.ImageFont:
+def get_font(
+    font_family: Optional[str] = None, font_size: int = 13
+) -> Union[ImageFont.FreeTypeFont, ImageFont.ImageFont]:
     """Resolves a compatible ImageFont for the system
     Args:
@@ -28,14 +30,14 @@ def get_font(font_family: Optional[str] = None, font_size: int = 13) -> ImageFon
     if font_family is None:
         try:
             font = ImageFont.truetype("FreeMono.ttf" if platform.system() == "Linux" else "Arial.ttf", font_size)
-        except OSError:
-            font = ImageFont.load_default()
+        except OSError:  # pragma: no cover
+            font = ImageFont.load_default()  # type: ignore[assignment]
             logging.warning(
                 "unable to load recommended font family. Loading default PIL font,"
                 "font size issues may be expected."
                 "To prevent this, it is recommended to specify the value of 'font_family'."
             )
-    else:
+    else:  # pragma: no cover
         font = ImageFont.truetype(font_family, font_size)
     return font

doctr/utils/geometry.py CHANGED Viewed

@@ -25,6 +25,7 @@ __all__ = [
     "rotate_abs_geoms",
     "extract_crops",
     "extract_rcrops",
+    "detach_scores",
 ]
@@ -57,6 +58,28 @@ def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox:
     return (min(x), min(y)), (max(x), max(y))
+def detach_scores(boxes: List[np.ndarray]) -> Tuple[List[np.ndarray], List[np.ndarray]]:
+    """Detach the objectness scores from box predictions
+    Args:
+    ----
+        boxes: list of arrays with boxes of shape (N, 5) or (N, 5, 2)
+    Returns:
+    -------
+        a tuple of two lists: the first one contains the boxes without the objectness scores,
+        the second one contains the objectness scores
+    """
+    def _detach(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        if boxes.ndim == 2:
+            return boxes[:, :-1], boxes[:, -1]
+        return boxes[:, :-1], boxes[:, -1, -1]
+    loc_preds, obj_scores = zip(*(_detach(box) for box in boxes))
+    return list(loc_preds), list(obj_scores)
 def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]:
     """Compute enclosing bbox either from:
@@ -64,18 +87,18 @@ def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Unio
     ----
         bboxes: boxes in one of the following formats:
-            - an array of boxes: (*, 5), where boxes have this shape:
-            (xmin, ymin, xmax, ymax, score)
+            - an array of boxes: (*, 4), where boxes have this shape:
+            (xmin, ymin, xmax, ymax)
             - a list of BoundingBox
     Returns:
     -------
-        a (1, 5) array (enclosing boxarray), or a BoundingBox
+        a (1, 4) array (enclosing boxarray), or a BoundingBox
     """
     if isinstance(bboxes, np.ndarray):
-        xmin, ymin, xmax, ymax, score = np.split(bboxes, 5, axis=1)
-        return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max(), score.mean()])
+        xmin, ymin, xmax, ymax = np.split(bboxes, 4, axis=1)
+        return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max()])
     else:
         x, y = zip(*[point for box in bboxes for point in box])
         return (min(x), min(y)), (max(x), max(y))
@@ -88,21 +111,21 @@ def resolve_enclosing_rbbox(rbboxes: List[np.ndarray], intermed_size: int = 1024
     ----
         rbboxes: boxes in one of the following formats:
-            - an array of boxes: (*, 5), where boxes have this shape:
-            (xmin, ymin, xmax, ymax, score)
+            - an array of boxes: (*, 4, 2), where boxes have this shape:
+            (x1, y1), (x2, y2), (x3, y3), (x4, y4)
             - a list of BoundingBox
         intermed_size: size of the intermediate image
     Returns:
     -------
-        a (1, 5) array (enclosing boxarray), or a BoundingBox
+        a (4, 2) array (enclosing rotated box)
     """
     cloud: np.ndarray = np.concatenate(rbboxes, axis=0)
     # Convert to absolute for minAreaRect
     cloud *= intermed_size
     rect = cv2.minAreaRect(cloud.astype(np.int32))
-    return cv2.boxPoints(rect) / intermed_size  # type: ignore[operator]
+    return cv2.boxPoints(rect) / intermed_size  # type: ignore[return-value]
 def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray:
@@ -232,7 +255,7 @@ def rotate_boxes(
     Args:
     ----
-        loc_preds: (N, 5) or (N, 4, 2) array of RELATIVE boxes
+        loc_preds: (N, 4) or (N, 4, 2) array of RELATIVE boxes
         angle: angle between -90 and +90 degrees
         orig_shape: shape of the origin image
         min_angle: minimum angle to rotate boxes
@@ -320,7 +343,7 @@ def rotate_image(
             # Pad height
             else:
                 h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0
-            rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
+            rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))  # type: ignore[assignment]
         if preserve_origin_shape:
             # rescale
             rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR)
@@ -453,4 +476,4 @@ def extract_rcrops(
         )
         for idx in range(_boxes.shape[0])
     ]
-    return crops
+    return crops  # type: ignore[return-value]

doctr/utils/metrics.py CHANGED Viewed

@@ -5,16 +5,14 @@
 from typing import Dict, List, Optional, Tuple
-import cv2
 import numpy as np
+from anyascii import anyascii
 from scipy.optimize import linear_sum_assignment
-from unidecode import unidecode
+from shapely.geometry import Polygon
 __all__ = [
     "TextMatch",
     "box_iou",
-    "box_ioa",
-    "mask_iou",
     "polygon_iou",
     "nms",
     "LocalizationConfusion",
@@ -34,16 +32,16 @@ def string_match(word1: str, word2: str) -> Tuple[bool, bool, bool, bool]:
     Returns:
     -------
         a tuple with booleans specifying respectively whether the raw strings, their lower-case counterparts, their
-            unidecode counterparts and their lower-case unidecode counterparts match
+            anyascii counterparts and their lower-case anyascii counterparts match
     """
     raw_match = word1 == word2
     caseless_match = word1.lower() == word2.lower()
-    unidecode_match = unidecode(word1) == unidecode(word2)
+    anyascii_match = anyascii(word1) == anyascii(word2)
     # Warning: the order is important here otherwise the pair ("EUR", "€") cannot be matched
-    unicase_match = unidecode(word1).lower() == unidecode(word2).lower()
+    unicase_match = anyascii(word1).lower() == anyascii(word2).lower()
-    return raw_match, caseless_match, unidecode_match, unicase_match
+    return raw_match, caseless_match, anyascii_match, unicase_match
 class TextMatch:
@@ -94,10 +92,10 @@ class TextMatch:
             raise AssertionError("prediction size does not match with ground-truth labels size")
         for gt_word, pred_word in zip(gt, pred):
-            _raw, _caseless, _unidecode, _unicase = string_match(gt_word, pred_word)
+            _raw, _caseless, _anyascii, _unicase = string_match(gt_word, pred_word)
             self.raw += int(_raw)
             self.caseless += int(_caseless)
-            self.unidecode += int(_unidecode)
+            self.anyascii += int(_anyascii)
             self.unicase += int(_unicase)
         self.total += len(gt)
@@ -107,8 +105,8 @@ class TextMatch:
         Returns
         -------
-            a dictionary with the exact match score for the raw data, its lower-case counterpart, its unidecode
-            counterpart and its lower-case unidecode counterpart
+            a dictionary with the exact match score for the raw data, its lower-case counterpart, its anyascii
+            counterpart and its lower-case anyascii counterpart
         """
         if self.total == 0:
             raise AssertionError("you need to update the metric before getting the summary")
@@ -116,14 +114,14 @@ class TextMatch:
         return dict(
             raw=self.raw / self.total,
             caseless=self.caseless / self.total,
-            unidecode=self.unidecode / self.total,
+            anyascii=self.anyascii / self.total,
             unicase=self.unicase / self.total,
         )
     def reset(self) -> None:
         self.raw = 0
         self.caseless = 0
-        self.unidecode = 0
+        self.anyascii = 0
         self.unicase = 0
         self.total = 0
@@ -158,66 +156,7 @@ def box_iou(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
     return iou_mat
-def box_ioa(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
-    """Computes the IoA (intersection over area) between two sets of bounding boxes:
-    ioa(i, j) = inter(i, j) / area(i)
-    Args:
-    ----
-        boxes_1: bounding boxes of shape (N, 4) in format (xmin, ymin, xmax, ymax)
-        boxes_2: bounding boxes of shape (M, 4) in format (xmin, ymin, xmax, ymax)
-    Returns:
-    -------
-        the IoA matrix of shape (N, M)
-    """
-    ioa_mat: np.ndarray = np.zeros((boxes_1.shape[0], boxes_2.shape[0]), dtype=np.float32)
-    if boxes_1.shape[0] > 0 and boxes_2.shape[0] > 0:
-        l1, t1, r1, b1 = np.split(boxes_1, 4, axis=1)
-        l2, t2, r2, b2 = np.split(boxes_2, 4, axis=1)
-        left = np.maximum(l1, l2.T)
-        top = np.maximum(t1, t2.T)
-        right = np.minimum(r1, r2.T)
-        bot = np.minimum(b1, b2.T)
-        intersection = np.clip(right - left, 0, np.Inf) * np.clip(bot - top, 0, np.Inf)
-        area = (r1 - l1) * (b1 - t1)
-        ioa_mat = intersection / area
-    return ioa_mat
-def mask_iou(masks_1: np.ndarray, masks_2: np.ndarray) -> np.ndarray:
-    """Computes the IoU between two sets of boolean masks
-    Args:
-    ----
-        masks_1: boolean masks of shape (N, H, W)
-        masks_2: boolean masks of shape (M, H, W)
-    Returns:
-    -------
-        the IoU matrix of shape (N, M)
-    """
-    if masks_1.shape[1:] != masks_2.shape[1:]:
-        raise AssertionError("both boolean masks should have the same spatial shape")
-    iou_mat: np.ndarray = np.zeros((masks_1.shape[0], masks_2.shape[0]), dtype=np.float32)
-    if masks_1.shape[0] > 0 and masks_2.shape[0] > 0:
-        axes = tuple(range(2, masks_1.ndim + 1))
-        intersection = np.logical_and(masks_1[:, None, ...], masks_2[None, ...]).sum(axis=axes)
-        union = np.logical_or(masks_1[:, None, ...], masks_2[None, ...]).sum(axis=axes)
-        iou_mat = intersection / union
-    return iou_mat
-def polygon_iou(
-    polys_1: np.ndarray, polys_2: np.ndarray, mask_shape: Tuple[int, int], use_broadcasting: bool = False
-) -> np.ndarray:
+def polygon_iou(polys_1: np.ndarray, polys_2: np.ndarray) -> np.ndarray:
     """Computes the IoU between two sets of rotated bounding boxes
     Args:
@@ -234,80 +173,18 @@ def polygon_iou(
     if polys_1.ndim != 3 or polys_2.ndim != 3:
         raise AssertionError("expects boxes to be in format (N, 4, 2)")
-    iou_mat: np.ndarray = np.zeros((polys_1.shape[0], polys_2.shape[0]), dtype=np.float32)
-    if polys_1.shape[0] > 0 and polys_2.shape[0] > 0:
-        if use_broadcasting:
-            masks_1 = rbox_to_mask(polys_1, shape=mask_shape)
-            masks_2 = rbox_to_mask(polys_2, shape=mask_shape)
-            iou_mat = mask_iou(masks_1, masks_2)
-        else:
-            # Save memory by doing the computation for each pair
-            for idx, b1 in enumerate(polys_1):
-                m1 = _rbox_to_mask(b1, mask_shape)
-                for _idx, b2 in enumerate(polys_2):
-                    m2 = _rbox_to_mask(b2, mask_shape)
-                    iou_mat[idx, _idx] = np.logical_and(m1, m2).sum() / np.logical_or(m1, m2).sum()
-    return iou_mat
+    iou_mat = np.zeros((polys_1.shape[0], polys_2.shape[0]), dtype=np.float32)
+    shapely_polys_1 = [Polygon(poly) for poly in polys_1]
+    shapely_polys_2 = [Polygon(poly) for poly in polys_2]
-def _rbox_to_mask(box: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
-    """Converts a rotated bounding box to a boolean mask
+    for i, poly1 in enumerate(shapely_polys_1):
+        for j, poly2 in enumerate(shapely_polys_2):
+            intersection_area = poly1.intersection(poly2).area
+            union_area = poly1.area + poly2.area - intersection_area
+            iou_mat[i, j] = intersection_area / union_area
-    Args:
-    ----
-        box: rotated bounding box of shape (4, 2)
-        shape: spatial shapes of the output masks
-    Returns:
-    -------
-        the boolean mask of the specified shape
-    """
-    mask: np.ndarray = np.zeros(shape, dtype=np.uint8)
-    # Get absolute coords
-    if not np.issubdtype(box.dtype, np.integer):
-        abs_box = box.copy()
-        abs_box[:, 0] = abs_box[:, 0] * shape[1]
-        abs_box[:, 1] = abs_box[:, 1] * shape[0]
-        abs_box = abs_box.round().astype(int)
-    else:
-        abs_box = box
-        abs_box[2:] = abs_box[2:] + 1
-    cv2.fillPoly(mask, [abs_box - 1], 1.0)  # type: ignore[call-overload]
-    return mask.astype(bool)
-def rbox_to_mask(boxes: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
-    """Converts rotated bounding boxes to boolean masks
-    Args:
-    ----
-        boxes: rotated bounding boxes of shape (N, 4, 2)
-        shape: spatial shapes of the output masks
-    Returns:
-    -------
-        the boolean masks of shape (N, H, W)
-    """
-    masks: np.ndarray = np.zeros((boxes.shape[0], *shape), dtype=np.uint8)
-    if boxes.shape[0] > 0:
-        # Get absolute coordinates
-        if not np.issubdtype(boxes.dtype, np.integer):
-            abs_boxes = boxes.copy()
-            abs_boxes[:, :, 0] = abs_boxes[:, :, 0] * shape[1]
-            abs_boxes[:, :, 1] = abs_boxes[:, :, 1] * shape[0]
-            abs_boxes = abs_boxes.round().astype(int)
-        else:
-            abs_boxes = boxes
-            abs_boxes[:, 2:] = abs_boxes[:, 2:] + 1
-        # TODO: optimize slicing to improve vectorization
-        for idx, _box in enumerate(abs_boxes):
-            cv2.fillPoly(masks[idx], [_box - 1], 1.0)  # type: ignore[call-overload]
-    return masks.astype(bool)
+    return iou_mat
 def nms(boxes: np.ndarray, thresh: float = 0.5) -> List[int]:
@@ -386,21 +263,15 @@ class LocalizationConfusion:
     ----
         iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
         use_polygons: if set to True, predictions and targets will be expected to have rotated format
-        mask_shape: if use_polygons is True, describes the spatial shape of the image used
-        use_broadcasting: if use_polygons is True, use broadcasting for IoU computation by consuming more memory
     """
     def __init__(
         self,
         iou_thresh: float = 0.5,
         use_polygons: bool = False,
-        mask_shape: Tuple[int, int] = (1024, 1024),
-        use_broadcasting: bool = True,
     ) -> None:
         self.iou_thresh = iou_thresh
         self.use_polygons = use_polygons
-        self.mask_shape = mask_shape
-        self.use_broadcasting = use_broadcasting
         self.reset()
     def update(self, gts: np.ndarray, preds: np.ndarray) -> None:
@@ -414,7 +285,7 @@ class LocalizationConfusion:
         if preds.shape[0] > 0:
             # Compute IoU
             if self.use_polygons:
-                iou_mat = polygon_iou(gts, preds, self.mask_shape, self.use_broadcasting)
+                iou_mat = polygon_iou(gts, preds)
             else:
                 iou_mat = box_iou(gts, preds)
             self.tot_iou += float(iou_mat.max(axis=0).sum())
@@ -441,7 +312,7 @@ class LocalizationConfusion:
         precision = self.matches / self.num_preds if self.num_preds > 0 else None
         # mean IoU
-        mean_iou = self.tot_iou / self.num_preds if self.num_preds > 0 else None
+        mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None
         return recall, precision, mean_iou
@@ -492,21 +363,15 @@ class OCRMetric:
     ----
         iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
         use_polygons: if set to True, predictions and targets will be expected to have rotated format
-        mask_shape: if use_polygons is True, describes the spatial shape of the image used
-        use_broadcasting: if use_polygons is True, use broadcasting for IoU computation by consuming more memory
     """
     def __init__(
         self,
         iou_thresh: float = 0.5,
         use_polygons: bool = False,
-        mask_shape: Tuple[int, int] = (1024, 1024),
-        use_broadcasting: bool = True,
     ) -> None:
         self.iou_thresh = iou_thresh
         self.use_polygons = use_polygons
-        self.mask_shape = mask_shape
-        self.use_broadcasting = use_broadcasting
         self.reset()
     def update(
@@ -533,7 +398,7 @@ class OCRMetric:
         # Compute IoU
         if pred_boxes.shape[0] > 0:
             if self.use_polygons:
-                iou_mat = polygon_iou(gt_boxes, pred_boxes, self.mask_shape, self.use_broadcasting)
+                iou_mat = polygon_iou(gt_boxes, pred_boxes)
             else:
                 iou_mat = box_iou(gt_boxes, pred_boxes)
@@ -544,10 +409,10 @@ class OCRMetric:
             is_kept = iou_mat[gt_indices, pred_indices] >= self.iou_thresh
             # String comparison
             for gt_idx, pred_idx in zip(gt_indices[is_kept], pred_indices[is_kept]):
-                _raw, _caseless, _unidecode, _unicase = string_match(gt_labels[gt_idx], pred_labels[pred_idx])
+                _raw, _caseless, _anyascii, _unicase = string_match(gt_labels[gt_idx], pred_labels[pred_idx])
                 self.raw_matches += int(_raw)
                 self.caseless_matches += int(_caseless)
-                self.unidecode_matches += int(_unidecode)
+                self.anyascii_matches += int(_anyascii)
                 self.unicase_matches += int(_unicase)
         self.num_gts += gt_boxes.shape[0]
@@ -564,7 +429,7 @@ class OCRMetric:
         recall = dict(
             raw=self.raw_matches / self.num_gts if self.num_gts > 0 else None,
             caseless=self.caseless_matches / self.num_gts if self.num_gts > 0 else None,
-            unidecode=self.unidecode_matches / self.num_gts if self.num_gts > 0 else None,
+            anyascii=self.anyascii_matches / self.num_gts if self.num_gts > 0 else None,
             unicase=self.unicase_matches / self.num_gts if self.num_gts > 0 else None,
         )
@@ -572,12 +437,12 @@ class OCRMetric:
         precision = dict(
             raw=self.raw_matches / self.num_preds if self.num_preds > 0 else None,
             caseless=self.caseless_matches / self.num_preds if self.num_preds > 0 else None,
-            unidecode=self.unidecode_matches / self.num_preds if self.num_preds > 0 else None,
+            anyascii=self.anyascii_matches / self.num_preds if self.num_preds > 0 else None,
             unicase=self.unicase_matches / self.num_preds if self.num_preds > 0 else None,
         )
         # mean IoU (overall detected boxes)
-        mean_iou = self.tot_iou / self.num_preds if self.num_preds > 0 else None
+        mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None
         return recall, precision, mean_iou
@@ -587,7 +452,7 @@ class OCRMetric:
         self.tot_iou = 0.0
         self.raw_matches = 0
         self.caseless_matches = 0
-        self.unidecode_matches = 0
+        self.anyascii_matches = 0
         self.unicase_matches = 0
@@ -631,21 +496,15 @@ class DetectionMetric:
     ----
         iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
         use_polygons: if set to True, predictions and targets will be expected to have rotated format
-        mask_shape: if use_polygons is True, describes the spatial shape of the image used
-        use_broadcasting: if use_polygons is True, use broadcasting for IoU computation by consuming more memory
     """
     def __init__(
         self,
         iou_thresh: float = 0.5,
         use_polygons: bool = False,
-        mask_shape: Tuple[int, int] = (1024, 1024),
-        use_broadcasting: bool = True,
     ) -> None:
         self.iou_thresh = iou_thresh
         self.use_polygons = use_polygons
-        self.mask_shape = mask_shape
-        self.use_broadcasting = use_broadcasting
         self.reset()
     def update(
@@ -672,7 +531,7 @@ class DetectionMetric:
         # Compute IoU
         if pred_boxes.shape[0] > 0:
             if self.use_polygons:
-                iou_mat = polygon_iou(gt_boxes, pred_boxes, self.mask_shape, self.use_broadcasting)
+                iou_mat = polygon_iou(gt_boxes, pred_boxes)
             else:
                 iou_mat = box_iou(gt_boxes, pred_boxes)
@@ -701,7 +560,7 @@ class DetectionMetric:
         precision = self.num_matches / self.num_preds if self.num_preds > 0 else None
         # mean IoU (overall detected boxes)
-        mean_iou = self.tot_iou / self.num_preds if self.num_preds > 0 else None
+        mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None
         return recall, precision, mean_iou

python-doctr 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl

python-doctr 0.8.1py3-none-any.whl → 0.9.0py3-none-any.whl