PyPI - python-doctr - Versions diffs - 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

python-doctr 0.8.1py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

doctr/__init__.py +1 -1
doctr/contrib/__init__.py +0 -0
doctr/contrib/artefacts.py +131 -0
doctr/contrib/base.py +105 -0
doctr/datasets/cord.py +10 -1
doctr/datasets/datasets/pytorch.py +2 -2
doctr/datasets/funsd.py +11 -1
doctr/datasets/generator/base.py +6 -5
doctr/datasets/ic03.py +11 -1
doctr/datasets/ic13.py +10 -1
doctr/datasets/iiit5k.py +26 -16
doctr/datasets/imgur5k.py +11 -2
doctr/datasets/loader.py +1 -6
doctr/datasets/sroie.py +11 -1
doctr/datasets/svhn.py +11 -1
doctr/datasets/svt.py +11 -1
doctr/datasets/synthtext.py +11 -1
doctr/datasets/utils.py +9 -3
doctr/datasets/vocabs.py +15 -4
doctr/datasets/wildreceipt.py +12 -1
doctr/file_utils.py +45 -12
doctr/io/elements.py +52 -10
doctr/io/html.py +2 -2
doctr/io/image/pytorch.py +6 -8
doctr/io/image/tensorflow.py +1 -1
doctr/io/pdf.py +5 -2
doctr/io/reader.py +6 -0
doctr/models/__init__.py +0 -1
doctr/models/_utils.py +57 -20
doctr/models/builder.py +73 -15
doctr/models/classification/magc_resnet/tensorflow.py +13 -6
doctr/models/classification/mobilenet/pytorch.py +47 -9
doctr/models/classification/mobilenet/tensorflow.py +51 -14
doctr/models/classification/predictor/pytorch.py +28 -17
doctr/models/classification/predictor/tensorflow.py +26 -16
doctr/models/classification/resnet/tensorflow.py +21 -8
doctr/models/classification/textnet/pytorch.py +3 -3
doctr/models/classification/textnet/tensorflow.py +11 -5
doctr/models/classification/vgg/tensorflow.py +9 -3
doctr/models/classification/vit/tensorflow.py +10 -4
doctr/models/classification/zoo.py +55 -19
doctr/models/detection/_utils/__init__.py +1 -0
doctr/models/detection/_utils/base.py +66 -0
doctr/models/detection/differentiable_binarization/base.py +4 -3
doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
doctr/models/detection/differentiable_binarization/tensorflow.py +34 -12
doctr/models/detection/fast/base.py +6 -5
doctr/models/detection/fast/pytorch.py +4 -4
doctr/models/detection/fast/tensorflow.py +15 -12
doctr/models/detection/linknet/base.py +4 -3
doctr/models/detection/linknet/tensorflow.py +23 -11
doctr/models/detection/predictor/pytorch.py +15 -1
doctr/models/detection/predictor/tensorflow.py +17 -3
doctr/models/detection/zoo.py +7 -2
doctr/models/factory/hub.py +8 -18
doctr/models/kie_predictor/base.py +13 -3
doctr/models/kie_predictor/pytorch.py +45 -20
doctr/models/kie_predictor/tensorflow.py +44 -17
doctr/models/modules/layers/pytorch.py +2 -3
doctr/models/modules/layers/tensorflow.py +6 -8
doctr/models/modules/transformer/pytorch.py +2 -2
doctr/models/modules/transformer/tensorflow.py +0 -2
doctr/models/modules/vision_transformer/pytorch.py +1 -1
doctr/models/modules/vision_transformer/tensorflow.py +1 -1
doctr/models/predictor/base.py +97 -58
doctr/models/predictor/pytorch.py +35 -20
doctr/models/predictor/tensorflow.py +35 -18
doctr/models/preprocessor/pytorch.py +4 -4
doctr/models/preprocessor/tensorflow.py +3 -2
doctr/models/recognition/crnn/tensorflow.py +8 -6
doctr/models/recognition/master/pytorch.py +2 -2
doctr/models/recognition/master/tensorflow.py +9 -4
doctr/models/recognition/parseq/pytorch.py +4 -3
doctr/models/recognition/parseq/tensorflow.py +14 -11
doctr/models/recognition/sar/pytorch.py +7 -6
doctr/models/recognition/sar/tensorflow.py +10 -12
doctr/models/recognition/vitstr/pytorch.py +1 -1
doctr/models/recognition/vitstr/tensorflow.py +9 -4
doctr/models/recognition/zoo.py +1 -1
doctr/models/utils/pytorch.py +1 -1
doctr/models/utils/tensorflow.py +15 -15
doctr/models/zoo.py +2 -2
doctr/py.typed +0 -0
doctr/transforms/functional/base.py +1 -1
doctr/transforms/functional/pytorch.py +5 -5
doctr/transforms/modules/base.py +37 -15
doctr/transforms/modules/pytorch.py +73 -14
doctr/transforms/modules/tensorflow.py +78 -19
doctr/utils/fonts.py +7 -5
doctr/utils/geometry.py +141 -31
doctr/utils/metrics.py +34 -175
doctr/utils/reconstitution.py +212 -0
doctr/utils/visualization.py +5 -118
doctr/version.py +1 -1
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/METADATA +85 -81
python_doctr-0.10.0.dist-info/RECORD +173 -0
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/WHEEL +1 -1
doctr/models/artefacts/__init__.py +0 -2
doctr/models/artefacts/barcode.py +0 -74
doctr/models/artefacts/face.py +0 -63
doctr/models/obj_detection/__init__.py +0 -1
doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
python_doctr-0.8.1.dist-info/RECORD +0 -173
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/LICENSE +0 -0
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/zip-safe +0 -0

doctr/utils/geometry.py CHANGED Viewed

@@ -20,11 +20,13 @@ __all__ = [
     "rotate_boxes",
     "compute_expanded_shape",
     "rotate_image",
+    "remove_image_padding",
     "estimate_page_angle",
     "convert_to_relative_coords",
     "rotate_abs_geoms",
     "extract_crops",
     "extract_rcrops",
+    "detach_scores",
 ]
@@ -57,6 +59,28 @@ def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox:
     return (min(x), min(y)), (max(x), max(y))
+def detach_scores(boxes: List[np.ndarray]) -> Tuple[List[np.ndarray], List[np.ndarray]]:
+    """Detach the objectness scores from box predictions
+    Args:
+    ----
+        boxes: list of arrays with boxes of shape (N, 5) or (N, 5, 2)
+    Returns:
+    -------
+        a tuple of two lists: the first one contains the boxes without the objectness scores,
+        the second one contains the objectness scores
+    """
+    def _detach(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        if boxes.ndim == 2:
+            return boxes[:, :-1], boxes[:, -1]
+        return boxes[:, :-1], boxes[:, -1, -1]
+    loc_preds, obj_scores = zip(*(_detach(box) for box in boxes))
+    return list(loc_preds), list(obj_scores)
 def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]:
     """Compute enclosing bbox either from:
@@ -64,18 +88,18 @@ def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Unio
     ----
         bboxes: boxes in one of the following formats:
-            - an array of boxes: (*, 5), where boxes have this shape:
-            (xmin, ymin, xmax, ymax, score)
+            - an array of boxes: (*, 4), where boxes have this shape:
+            (xmin, ymin, xmax, ymax)
             - a list of BoundingBox
     Returns:
     -------
-        a (1, 5) array (enclosing boxarray), or a BoundingBox
+        a (1, 4) array (enclosing boxarray), or a BoundingBox
     """
     if isinstance(bboxes, np.ndarray):
-        xmin, ymin, xmax, ymax, score = np.split(bboxes, 5, axis=1)
-        return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max(), score.mean()])
+        xmin, ymin, xmax, ymax = np.split(bboxes, 4, axis=1)
+        return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max()])
     else:
         x, y = zip(*[point for box in bboxes for point in box])
         return (min(x), min(y)), (max(x), max(y))
@@ -88,21 +112,21 @@ def resolve_enclosing_rbbox(rbboxes: List[np.ndarray], intermed_size: int = 1024
     ----
         rbboxes: boxes in one of the following formats:
-            - an array of boxes: (*, 5), where boxes have this shape:
-            (xmin, ymin, xmax, ymax, score)
+            - an array of boxes: (*, 4, 2), where boxes have this shape:
+            (x1, y1), (x2, y2), (x3, y3), (x4, y4)
             - a list of BoundingBox
         intermed_size: size of the intermediate image
     Returns:
     -------
-        a (1, 5) array (enclosing boxarray), or a BoundingBox
+        a (4, 2) array (enclosing rotated box)
     """
     cloud: np.ndarray = np.concatenate(rbboxes, axis=0)
     # Convert to absolute for minAreaRect
     cloud *= intermed_size
     rect = cv2.minAreaRect(cloud.astype(np.int32))
-    return cv2.boxPoints(rect) / intermed_size  # type: ignore[operator]
+    return cv2.boxPoints(rect) / intermed_size  # type: ignore[return-value]
 def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray:
@@ -232,7 +256,7 @@ def rotate_boxes(
     Args:
     ----
-        loc_preds: (N, 5) or (N, 4, 2) array of RELATIVE boxes
+        loc_preds: (N, 4) or (N, 4, 2) array of RELATIVE boxes
         angle: angle between -90 and +90 degrees
         orig_shape: shape of the origin image
         min_angle: minimum angle to rotate boxes
@@ -320,7 +344,7 @@ def rotate_image(
             # Pad height
             else:
                 h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0
-            rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
+            rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))  # type: ignore[assignment]
         if preserve_origin_shape:
             # rescale
             rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR)
@@ -328,6 +352,26 @@ def rotate_image(
     return rot_img
+def remove_image_padding(image: np.ndarray) -> np.ndarray:
+    """Remove black border padding from an image
+    Args:
+    ----
+        image: numpy tensor to remove padding from
+    Returns:
+    -------
+        Image with padding removed
+    """
+    # Find the bounding box of the non-black region
+    rows = np.any(image, axis=1)
+    cols = np.any(image, axis=0)
+    rmin, rmax = np.where(rows)[0][[0, -1]]
+    cmin, cmax = np.where(cols)[0][[0, -1]]
+    return image[rmin : rmax + 1, cmin : cmax + 1]
 def estimate_page_angle(polys: np.ndarray) -> float:
     """Takes a batch of rotated previously ORIENTED polys (N, 4, 2) (rectified by the classifier) and return the
     estimated angle ccw in degrees
@@ -408,7 +452,7 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True
 def extract_rcrops(
-    img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True
+    img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True, assume_horizontal: bool = False
 ) -> List[np.ndarray]:
     """Created cropped images from list of rotated bounding boxes
@@ -418,6 +462,7 @@ def extract_rcrops(
         polys: bounding boxes of shape (N, 4, 2)
         dtype: target data type of bounding boxes
         channels_last: whether the channel dimensions is the last one instead of the last one
+        assume_horizontal: whether the boxes are assumed to be only horizontally oriented
     Returns:
     -------
@@ -435,22 +480,87 @@ def extract_rcrops(
         _boxes[:, :, 0] *= width
         _boxes[:, :, 1] *= height
-    src_pts = _boxes[:, :3].astype(np.float32)
-    # Preserve size
-    d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
-    d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1)
-    # (N, 3, 2)
-    dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype)
-    dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1
-    dst_pts[:, 2, 1] = d2 - 1
-    # Use a warp transformation to extract the crop
-    crops = [
-        cv2.warpAffine(
-            img if channels_last else img.transpose(1, 2, 0),
-            # Transformation matrix
-            cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
-            (int(d1[idx]), int(d2[idx])),
-        )
-        for idx in range(_boxes.shape[0])
-    ]
-    return crops
+    src_img = img if channels_last else img.transpose(1, 2, 0)
+    # Handle only horizontal oriented boxes
+    if assume_horizontal:
+        crops = []
+        for box in _boxes:
+            # Calculate the centroid of the quadrilateral
+            centroid = np.mean(box, axis=0)
+            # Divide the points into left and right
+            left_points = box[box[:, 0] < centroid[0]]
+            right_points = box[box[:, 0] >= centroid[0]]
+            # Sort the left points according to the y-axis
+            left_points = left_points[np.argsort(left_points[:, 1])]
+            top_left_pt = left_points[0]
+            bottom_left_pt = left_points[-1]
+            # Sort the right points according to the y-axis
+            right_points = right_points[np.argsort(right_points[:, 1])]
+            top_right_pt = right_points[0]
+            bottom_right_pt = right_points[-1]
+            box_points = np.array(
+                [top_left_pt, bottom_left_pt, top_right_pt, bottom_right_pt],
+                dtype=dtype,
+            )
+            # Get the width and height of the rectangle that will contain the warped quadrilateral
+            width_upper = np.linalg.norm(top_right_pt - top_left_pt)
+            width_lower = np.linalg.norm(bottom_right_pt - bottom_left_pt)
+            height_left = np.linalg.norm(bottom_left_pt - top_left_pt)
+            height_right = np.linalg.norm(bottom_right_pt - top_right_pt)
+            # Get the maximum width and height
+            rect_width = max(int(width_upper), int(width_lower))
+            rect_height = max(int(height_left), int(height_right))
+            dst_pts = np.array(
+                [
+                    [0, 0],  # top-left
+                    # bottom-left
+                    [0, rect_height - 1],
+                    # top-right
+                    [rect_width - 1, 0],
+                    # bottom-right
+                    [rect_width - 1, rect_height - 1],
+                ],
+                dtype=dtype,
+            )
+            # Get the perspective transform matrix using the box points
+            affine_mat = cv2.getPerspectiveTransform(box_points, dst_pts)
+            # Perform the perspective warp to get the rectified crop
+            crop = cv2.warpPerspective(
+                src_img,
+                affine_mat,
+                (rect_width, rect_height),
+            )
+            # Add the crop to the list of crops
+            crops.append(crop)
+    # Handle any oriented boxes
+    else:
+        src_pts = _boxes[:, :3].astype(np.float32)
+        # Preserve size
+        d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
+        d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1)
+        # (N, 3, 2)
+        dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype)
+        dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1
+        dst_pts[:, 2, 1] = d2 - 1
+        # Use a warp transformation to extract the crop
+        crops = [
+            cv2.warpAffine(
+                src_img,
+                # Transformation matrix
+                cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
+                (int(d1[idx]), int(d2[idx])),
+            )
+            for idx in range(_boxes.shape[0])
+        ]
+    return crops  # type: ignore[return-value]

doctr/utils/metrics.py CHANGED Viewed

@@ -5,16 +5,14 @@
 from typing import Dict, List, Optional, Tuple
-import cv2
 import numpy as np
+from anyascii import anyascii
 from scipy.optimize import linear_sum_assignment
-from unidecode import unidecode
+from shapely.geometry import Polygon
 __all__ = [
     "TextMatch",
     "box_iou",
-    "box_ioa",
-    "mask_iou",
     "polygon_iou",
     "nms",
     "LocalizationConfusion",
@@ -34,16 +32,16 @@ def string_match(word1: str, word2: str) -> Tuple[bool, bool, bool, bool]:
     Returns:
     -------
         a tuple with booleans specifying respectively whether the raw strings, their lower-case counterparts, their
-            unidecode counterparts and their lower-case unidecode counterparts match
+            anyascii counterparts and their lower-case anyascii counterparts match
     """
     raw_match = word1 == word2
     caseless_match = word1.lower() == word2.lower()
-    unidecode_match = unidecode(word1) == unidecode(word2)
+    anyascii_match = anyascii(word1) == anyascii(word2)
     # Warning: the order is important here otherwise the pair ("EUR", "€") cannot be matched
-    unicase_match = unidecode(word1).lower() == unidecode(word2).lower()
+    unicase_match = anyascii(word1).lower() == anyascii(word2).lower()
-    return raw_match, caseless_match, unidecode_match, unicase_match
+    return raw_match, caseless_match, anyascii_match, unicase_match
 class TextMatch:
@@ -94,10 +92,10 @@ class TextMatch:
             raise AssertionError("prediction size does not match with ground-truth labels size")
         for gt_word, pred_word in zip(gt, pred):
-            _raw, _caseless, _unidecode, _unicase = string_match(gt_word, pred_word)
+            _raw, _caseless, _anyascii, _unicase = string_match(gt_word, pred_word)
             self.raw += int(_raw)
             self.caseless += int(_caseless)
-            self.unidecode += int(_unidecode)
+            self.anyascii += int(_anyascii)
             self.unicase += int(_unicase)
         self.total += len(gt)
@@ -107,8 +105,8 @@ class TextMatch:
         Returns
         -------
-            a dictionary with the exact match score for the raw data, its lower-case counterpart, its unidecode
-            counterpart and its lower-case unidecode counterpart
+            a dictionary with the exact match score for the raw data, its lower-case counterpart, its anyascii
+            counterpart and its lower-case anyascii counterpart
         """
         if self.total == 0:
             raise AssertionError("you need to update the metric before getting the summary")
@@ -116,14 +114,14 @@ class TextMatch:
         return dict(
             raw=self.raw / self.total,
             caseless=self.caseless / self.total,
-            unidecode=self.unidecode / self.total,
+            anyascii=self.anyascii / self.total,
             unicase=self.unicase / self.total,
         )
     def reset(self) -> None:
         self.raw = 0
         self.caseless = 0
-        self.unidecode = 0
+        self.anyascii = 0
         self.unicase = 0
         self.total = 0
@@ -151,73 +149,14 @@ def box_iou(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
         right = np.minimum(r1, r2.T)
         bot = np.minimum(b1, b2.T)
-        intersection = np.clip(right - left, 0, np.Inf) * np.clip(bot - top, 0, np.Inf)
+        intersection = np.clip(right - left, 0, np.inf) * np.clip(bot - top, 0, np.inf)
         union = (r1 - l1) * (b1 - t1) + ((r2 - l2) * (b2 - t2)).T - intersection
         iou_mat = intersection / union
     return iou_mat
-def box_ioa(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
-    """Computes the IoA (intersection over area) between two sets of bounding boxes:
-    ioa(i, j) = inter(i, j) / area(i)
-    Args:
-    ----
-        boxes_1: bounding boxes of shape (N, 4) in format (xmin, ymin, xmax, ymax)
-        boxes_2: bounding boxes of shape (M, 4) in format (xmin, ymin, xmax, ymax)
-    Returns:
-    -------
-        the IoA matrix of shape (N, M)
-    """
-    ioa_mat: np.ndarray = np.zeros((boxes_1.shape[0], boxes_2.shape[0]), dtype=np.float32)
-    if boxes_1.shape[0] > 0 and boxes_2.shape[0] > 0:
-        l1, t1, r1, b1 = np.split(boxes_1, 4, axis=1)
-        l2, t2, r2, b2 = np.split(boxes_2, 4, axis=1)
-        left = np.maximum(l1, l2.T)
-        top = np.maximum(t1, t2.T)
-        right = np.minimum(r1, r2.T)
-        bot = np.minimum(b1, b2.T)
-        intersection = np.clip(right - left, 0, np.Inf) * np.clip(bot - top, 0, np.Inf)
-        area = (r1 - l1) * (b1 - t1)
-        ioa_mat = intersection / area
-    return ioa_mat
-def mask_iou(masks_1: np.ndarray, masks_2: np.ndarray) -> np.ndarray:
-    """Computes the IoU between two sets of boolean masks
-    Args:
-    ----
-        masks_1: boolean masks of shape (N, H, W)
-        masks_2: boolean masks of shape (M, H, W)
-    Returns:
-    -------
-        the IoU matrix of shape (N, M)
-    """
-    if masks_1.shape[1:] != masks_2.shape[1:]:
-        raise AssertionError("both boolean masks should have the same spatial shape")
-    iou_mat: np.ndarray = np.zeros((masks_1.shape[0], masks_2.shape[0]), dtype=np.float32)
-    if masks_1.shape[0] > 0 and masks_2.shape[0] > 0:
-        axes = tuple(range(2, masks_1.ndim + 1))
-        intersection = np.logical_and(masks_1[:, None, ...], masks_2[None, ...]).sum(axis=axes)
-        union = np.logical_or(masks_1[:, None, ...], masks_2[None, ...]).sum(axis=axes)
-        iou_mat = intersection / union
-    return iou_mat
-def polygon_iou(
-    polys_1: np.ndarray, polys_2: np.ndarray, mask_shape: Tuple[int, int], use_broadcasting: bool = False
-) -> np.ndarray:
+def polygon_iou(polys_1: np.ndarray, polys_2: np.ndarray) -> np.ndarray:
     """Computes the IoU between two sets of rotated bounding boxes
     Args:
@@ -234,80 +173,18 @@ def polygon_iou(
     if polys_1.ndim != 3 or polys_2.ndim != 3:
         raise AssertionError("expects boxes to be in format (N, 4, 2)")
-    iou_mat: np.ndarray = np.zeros((polys_1.shape[0], polys_2.shape[0]), dtype=np.float32)
-    if polys_1.shape[0] > 0 and polys_2.shape[0] > 0:
-        if use_broadcasting:
-            masks_1 = rbox_to_mask(polys_1, shape=mask_shape)
-            masks_2 = rbox_to_mask(polys_2, shape=mask_shape)
-            iou_mat = mask_iou(masks_1, masks_2)
-        else:
-            # Save memory by doing the computation for each pair
-            for idx, b1 in enumerate(polys_1):
-                m1 = _rbox_to_mask(b1, mask_shape)
-                for _idx, b2 in enumerate(polys_2):
-                    m2 = _rbox_to_mask(b2, mask_shape)
-                    iou_mat[idx, _idx] = np.logical_and(m1, m2).sum() / np.logical_or(m1, m2).sum()
-    return iou_mat
+    iou_mat = np.zeros((polys_1.shape[0], polys_2.shape[0]), dtype=np.float32)
+    shapely_polys_1 = [Polygon(poly) for poly in polys_1]
+    shapely_polys_2 = [Polygon(poly) for poly in polys_2]
-def _rbox_to_mask(box: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
-    """Converts a rotated bounding box to a boolean mask
+    for i, poly1 in enumerate(shapely_polys_1):
+        for j, poly2 in enumerate(shapely_polys_2):
+            intersection_area = poly1.intersection(poly2).area
+            union_area = poly1.area + poly2.area - intersection_area
+            iou_mat[i, j] = intersection_area / union_area
-    Args:
-    ----
-        box: rotated bounding box of shape (4, 2)
-        shape: spatial shapes of the output masks
-    Returns:
-    -------
-        the boolean mask of the specified shape
-    """
-    mask: np.ndarray = np.zeros(shape, dtype=np.uint8)
-    # Get absolute coords
-    if not np.issubdtype(box.dtype, np.integer):
-        abs_box = box.copy()
-        abs_box[:, 0] = abs_box[:, 0] * shape[1]
-        abs_box[:, 1] = abs_box[:, 1] * shape[0]
-        abs_box = abs_box.round().astype(int)
-    else:
-        abs_box = box
-        abs_box[2:] = abs_box[2:] + 1
-    cv2.fillPoly(mask, [abs_box - 1], 1.0)  # type: ignore[call-overload]
-    return mask.astype(bool)
-def rbox_to_mask(boxes: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
-    """Converts rotated bounding boxes to boolean masks
-    Args:
-    ----
-        boxes: rotated bounding boxes of shape (N, 4, 2)
-        shape: spatial shapes of the output masks
-    Returns:
-    -------
-        the boolean masks of shape (N, H, W)
-    """
-    masks: np.ndarray = np.zeros((boxes.shape[0], *shape), dtype=np.uint8)
-    if boxes.shape[0] > 0:
-        # Get absolute coordinates
-        if not np.issubdtype(boxes.dtype, np.integer):
-            abs_boxes = boxes.copy()
-            abs_boxes[:, :, 0] = abs_boxes[:, :, 0] * shape[1]
-            abs_boxes[:, :, 1] = abs_boxes[:, :, 1] * shape[0]
-            abs_boxes = abs_boxes.round().astype(int)
-        else:
-            abs_boxes = boxes
-            abs_boxes[:, 2:] = abs_boxes[:, 2:] + 1
-        # TODO: optimize slicing to improve vectorization
-        for idx, _box in enumerate(abs_boxes):
-            cv2.fillPoly(masks[idx], [_box - 1], 1.0)  # type: ignore[call-overload]
-    return masks.astype(bool)
+    return iou_mat
 def nms(boxes: np.ndarray, thresh: float = 0.5) -> List[int]:
@@ -386,21 +263,15 @@ class LocalizationConfusion:
     ----
         iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
         use_polygons: if set to True, predictions and targets will be expected to have rotated format
-        mask_shape: if use_polygons is True, describes the spatial shape of the image used
-        use_broadcasting: if use_polygons is True, use broadcasting for IoU computation by consuming more memory
     """
     def __init__(
         self,
         iou_thresh: float = 0.5,
         use_polygons: bool = False,
-        mask_shape: Tuple[int, int] = (1024, 1024),
-        use_broadcasting: bool = True,
     ) -> None:
         self.iou_thresh = iou_thresh
         self.use_polygons = use_polygons
-        self.mask_shape = mask_shape
-        self.use_broadcasting = use_broadcasting
         self.reset()
     def update(self, gts: np.ndarray, preds: np.ndarray) -> None:
@@ -414,7 +285,7 @@ class LocalizationConfusion:
         if preds.shape[0] > 0:
             # Compute IoU
             if self.use_polygons:
-                iou_mat = polygon_iou(gts, preds, self.mask_shape, self.use_broadcasting)
+                iou_mat = polygon_iou(gts, preds)
             else:
                 iou_mat = box_iou(gts, preds)
             self.tot_iou += float(iou_mat.max(axis=0).sum())
@@ -441,7 +312,7 @@ class LocalizationConfusion:
         precision = self.matches / self.num_preds if self.num_preds > 0 else None
         # mean IoU
-        mean_iou = self.tot_iou / self.num_preds if self.num_preds > 0 else None
+        mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None
         return recall, precision, mean_iou
@@ -492,21 +363,15 @@ class OCRMetric:
     ----
         iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
         use_polygons: if set to True, predictions and targets will be expected to have rotated format
-        mask_shape: if use_polygons is True, describes the spatial shape of the image used
-        use_broadcasting: if use_polygons is True, use broadcasting for IoU computation by consuming more memory
     """
     def __init__(
         self,
         iou_thresh: float = 0.5,
         use_polygons: bool = False,
-        mask_shape: Tuple[int, int] = (1024, 1024),
-        use_broadcasting: bool = True,
     ) -> None:
         self.iou_thresh = iou_thresh
         self.use_polygons = use_polygons
-        self.mask_shape = mask_shape
-        self.use_broadcasting = use_broadcasting
         self.reset()
     def update(
@@ -533,7 +398,7 @@ class OCRMetric:
         # Compute IoU
         if pred_boxes.shape[0] > 0:
             if self.use_polygons:
-                iou_mat = polygon_iou(gt_boxes, pred_boxes, self.mask_shape, self.use_broadcasting)
+                iou_mat = polygon_iou(gt_boxes, pred_boxes)
             else:
                 iou_mat = box_iou(gt_boxes, pred_boxes)
@@ -544,10 +409,10 @@ class OCRMetric:
             is_kept = iou_mat[gt_indices, pred_indices] >= self.iou_thresh
             # String comparison
             for gt_idx, pred_idx in zip(gt_indices[is_kept], pred_indices[is_kept]):
-                _raw, _caseless, _unidecode, _unicase = string_match(gt_labels[gt_idx], pred_labels[pred_idx])
+                _raw, _caseless, _anyascii, _unicase = string_match(gt_labels[gt_idx], pred_labels[pred_idx])
                 self.raw_matches += int(_raw)
                 self.caseless_matches += int(_caseless)
-                self.unidecode_matches += int(_unidecode)
+                self.anyascii_matches += int(_anyascii)
                 self.unicase_matches += int(_unicase)
         self.num_gts += gt_boxes.shape[0]
@@ -564,7 +429,7 @@ class OCRMetric:
         recall = dict(
             raw=self.raw_matches / self.num_gts if self.num_gts > 0 else None,
             caseless=self.caseless_matches / self.num_gts if self.num_gts > 0 else None,
-            unidecode=self.unidecode_matches / self.num_gts if self.num_gts > 0 else None,
+            anyascii=self.anyascii_matches / self.num_gts if self.num_gts > 0 else None,
             unicase=self.unicase_matches / self.num_gts if self.num_gts > 0 else None,
         )
@@ -572,12 +437,12 @@ class OCRMetric:
         precision = dict(
             raw=self.raw_matches / self.num_preds if self.num_preds > 0 else None,
             caseless=self.caseless_matches / self.num_preds if self.num_preds > 0 else None,
-            unidecode=self.unidecode_matches / self.num_preds if self.num_preds > 0 else None,
+            anyascii=self.anyascii_matches / self.num_preds if self.num_preds > 0 else None,
             unicase=self.unicase_matches / self.num_preds if self.num_preds > 0 else None,
         )
         # mean IoU (overall detected boxes)
-        mean_iou = self.tot_iou / self.num_preds if self.num_preds > 0 else None
+        mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None
         return recall, precision, mean_iou
@@ -587,7 +452,7 @@ class OCRMetric:
         self.tot_iou = 0.0
         self.raw_matches = 0
         self.caseless_matches = 0
-        self.unidecode_matches = 0
+        self.anyascii_matches = 0
         self.unicase_matches = 0
@@ -631,21 +496,15 @@ class DetectionMetric:
     ----
         iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
         use_polygons: if set to True, predictions and targets will be expected to have rotated format
-        mask_shape: if use_polygons is True, describes the spatial shape of the image used
-        use_broadcasting: if use_polygons is True, use broadcasting for IoU computation by consuming more memory
     """
     def __init__(
         self,
         iou_thresh: float = 0.5,
         use_polygons: bool = False,
-        mask_shape: Tuple[int, int] = (1024, 1024),
-        use_broadcasting: bool = True,
     ) -> None:
         self.iou_thresh = iou_thresh
         self.use_polygons = use_polygons
-        self.mask_shape = mask_shape
-        self.use_broadcasting = use_broadcasting
         self.reset()
     def update(
@@ -672,7 +531,7 @@ class DetectionMetric:
         # Compute IoU
         if pred_boxes.shape[0] > 0:
             if self.use_polygons:
-                iou_mat = polygon_iou(gt_boxes, pred_boxes, self.mask_shape, self.use_broadcasting)
+                iou_mat = polygon_iou(gt_boxes, pred_boxes)
             else:
                 iou_mat = box_iou(gt_boxes, pred_boxes)
@@ -701,7 +560,7 @@ class DetectionMetric:
         precision = self.num_matches / self.num_preds if self.num_preds > 0 else None
         # mean IoU (overall detected boxes)
-        mean_iou = self.tot_iou / self.num_preds if self.num_preds > 0 else None
+        mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None
         return recall, precision, mean_iou

python-doctr 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

python-doctr 0.8.1py3-none-any.whl → 0.10.0py3-none-any.whl