PyPI - python-doctr - Versions diffs - 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

python-doctr 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

doctr/__init__.py +1 -1
doctr/contrib/__init__.py +0 -0
doctr/contrib/artefacts.py +131 -0
doctr/contrib/base.py +105 -0
doctr/datasets/datasets/pytorch.py +2 -2
doctr/datasets/generator/base.py +6 -5
doctr/datasets/imgur5k.py +1 -1
doctr/datasets/loader.py +1 -6
doctr/datasets/utils.py +2 -1
doctr/datasets/vocabs.py +9 -2
doctr/file_utils.py +26 -12
doctr/io/elements.py +40 -6
doctr/io/html.py +2 -2
doctr/io/image/pytorch.py +6 -8
doctr/io/image/tensorflow.py +1 -1
doctr/io/pdf.py +5 -2
doctr/io/reader.py +6 -0
doctr/models/__init__.py +0 -1
doctr/models/_utils.py +57 -20
doctr/models/builder.py +71 -13
doctr/models/classification/mobilenet/pytorch.py +45 -9
doctr/models/classification/mobilenet/tensorflow.py +38 -7
doctr/models/classification/predictor/pytorch.py +18 -11
doctr/models/classification/predictor/tensorflow.py +16 -10
doctr/models/classification/textnet/pytorch.py +3 -3
doctr/models/classification/textnet/tensorflow.py +3 -3
doctr/models/classification/zoo.py +39 -15
doctr/models/detection/__init__.py +1 -0
doctr/models/detection/_utils/__init__.py +1 -0
doctr/models/detection/_utils/base.py +66 -0
doctr/models/detection/differentiable_binarization/base.py +4 -3
doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
doctr/models/detection/differentiable_binarization/tensorflow.py +14 -18
doctr/models/detection/fast/__init__.py +6 -0
doctr/models/detection/fast/base.py +257 -0
doctr/models/detection/fast/pytorch.py +442 -0
doctr/models/detection/fast/tensorflow.py +428 -0
doctr/models/detection/linknet/base.py +4 -3
doctr/models/detection/predictor/pytorch.py +15 -1
doctr/models/detection/predictor/tensorflow.py +15 -1
doctr/models/detection/zoo.py +21 -4
doctr/models/factory/hub.py +3 -12
doctr/models/kie_predictor/base.py +9 -3
doctr/models/kie_predictor/pytorch.py +41 -20
doctr/models/kie_predictor/tensorflow.py +36 -16
doctr/models/modules/layers/pytorch.py +89 -10
doctr/models/modules/layers/tensorflow.py +88 -10
doctr/models/modules/transformer/pytorch.py +2 -2
doctr/models/predictor/base.py +77 -50
doctr/models/predictor/pytorch.py +31 -20
doctr/models/predictor/tensorflow.py +27 -17
doctr/models/preprocessor/pytorch.py +4 -4
doctr/models/preprocessor/tensorflow.py +3 -2
doctr/models/recognition/master/pytorch.py +2 -2
doctr/models/recognition/parseq/pytorch.py +4 -3
doctr/models/recognition/parseq/tensorflow.py +4 -3
doctr/models/recognition/sar/pytorch.py +7 -6
doctr/models/recognition/sar/tensorflow.py +3 -9
doctr/models/recognition/vitstr/pytorch.py +1 -1
doctr/models/recognition/zoo.py +1 -1
doctr/models/zoo.py +2 -2
doctr/py.typed +0 -0
doctr/transforms/functional/base.py +1 -1
doctr/transforms/functional/pytorch.py +4 -4
doctr/transforms/modules/base.py +37 -15
doctr/transforms/modules/pytorch.py +66 -8
doctr/transforms/modules/tensorflow.py +63 -7
doctr/utils/fonts.py +7 -5
doctr/utils/geometry.py +35 -12
doctr/utils/metrics.py +33 -174
doctr/utils/reconstitution.py +126 -0
doctr/utils/visualization.py +5 -118
doctr/version.py +1 -1
{python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/METADATA +96 -91
{python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/RECORD +79 -75
{python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/WHEEL +1 -1
doctr/models/artefacts/__init__.py +0 -2
doctr/models/artefacts/barcode.py +0 -74
doctr/models/artefacts/face.py +0 -63
doctr/models/obj_detection/__init__.py +0 -1
doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
{python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/LICENSE +0 -0
{python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/zip-safe +0 -0

doctr/utils/metrics.py CHANGED Viewed

@@ -5,16 +5,14 @@
 from typing import Dict, List, Optional, Tuple
-import cv2
 import numpy as np
+from anyascii import anyascii
 from scipy.optimize import linear_sum_assignment
-from unidecode import unidecode
+from shapely.geometry import Polygon
 __all__ = [
     "TextMatch",
     "box_iou",
-    "box_ioa",
-    "mask_iou",
     "polygon_iou",
     "nms",
     "LocalizationConfusion",
@@ -34,16 +32,16 @@ def string_match(word1: str, word2: str) -> Tuple[bool, bool, bool, bool]:
     Returns:
     -------
         a tuple with booleans specifying respectively whether the raw strings, their lower-case counterparts, their
-            unidecode counterparts and their lower-case unidecode counterparts match
+            anyascii counterparts and their lower-case anyascii counterparts match
     """
     raw_match = word1 == word2
     caseless_match = word1.lower() == word2.lower()
-    unidecode_match = unidecode(word1) == unidecode(word2)
+    anyascii_match = anyascii(word1) == anyascii(word2)
     # Warning: the order is important here otherwise the pair ("EUR", "€") cannot be matched
-    unicase_match = unidecode(word1).lower() == unidecode(word2).lower()
+    unicase_match = anyascii(word1).lower() == anyascii(word2).lower()
-    return raw_match, caseless_match, unidecode_match, unicase_match
+    return raw_match, caseless_match, anyascii_match, unicase_match
 class TextMatch:
@@ -94,10 +92,10 @@ class TextMatch:
             raise AssertionError("prediction size does not match with ground-truth labels size")
         for gt_word, pred_word in zip(gt, pred):
-            _raw, _caseless, _unidecode, _unicase = string_match(gt_word, pred_word)
+            _raw, _caseless, _anyascii, _unicase = string_match(gt_word, pred_word)
             self.raw += int(_raw)
             self.caseless += int(_caseless)
-            self.unidecode += int(_unidecode)
+            self.anyascii += int(_anyascii)
             self.unicase += int(_unicase)
         self.total += len(gt)
@@ -107,8 +105,8 @@ class TextMatch:
         Returns
         -------
-            a dictionary with the exact match score for the raw data, its lower-case counterpart, its unidecode
-            counterpart and its lower-case unidecode counterpart
+            a dictionary with the exact match score for the raw data, its lower-case counterpart, its anyascii
+            counterpart and its lower-case anyascii counterpart
         """
         if self.total == 0:
             raise AssertionError("you need to update the metric before getting the summary")
@@ -116,14 +114,14 @@ class TextMatch:
         return dict(
             raw=self.raw / self.total,
             caseless=self.caseless / self.total,
-            unidecode=self.unidecode / self.total,
+            anyascii=self.anyascii / self.total,
             unicase=self.unicase / self.total,
         )
     def reset(self) -> None:
         self.raw = 0
         self.caseless = 0
-        self.unidecode = 0
+        self.anyascii = 0
         self.unicase = 0
         self.total = 0
@@ -158,66 +156,7 @@ def box_iou(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
     return iou_mat
-def box_ioa(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
-    """Computes the IoA (intersection over area) between two sets of bounding boxes:
-    ioa(i, j) = inter(i, j) / area(i)
-    Args:
-    ----
-        boxes_1: bounding boxes of shape (N, 4) in format (xmin, ymin, xmax, ymax)
-        boxes_2: bounding boxes of shape (M, 4) in format (xmin, ymin, xmax, ymax)
-    Returns:
-    -------
-        the IoA matrix of shape (N, M)
-    """
-    ioa_mat: np.ndarray = np.zeros((boxes_1.shape[0], boxes_2.shape[0]), dtype=np.float32)
-    if boxes_1.shape[0] > 0 and boxes_2.shape[0] > 0:
-        l1, t1, r1, b1 = np.split(boxes_1, 4, axis=1)
-        l2, t2, r2, b2 = np.split(boxes_2, 4, axis=1)
-        left = np.maximum(l1, l2.T)
-        top = np.maximum(t1, t2.T)
-        right = np.minimum(r1, r2.T)
-        bot = np.minimum(b1, b2.T)
-        intersection = np.clip(right - left, 0, np.Inf) * np.clip(bot - top, 0, np.Inf)
-        area = (r1 - l1) * (b1 - t1)
-        ioa_mat = intersection / area
-    return ioa_mat
-def mask_iou(masks_1: np.ndarray, masks_2: np.ndarray) -> np.ndarray:
-    """Computes the IoU between two sets of boolean masks
-    Args:
-    ----
-        masks_1: boolean masks of shape (N, H, W)
-        masks_2: boolean masks of shape (M, H, W)
-    Returns:
-    -------
-        the IoU matrix of shape (N, M)
-    """
-    if masks_1.shape[1:] != masks_2.shape[1:]:
-        raise AssertionError("both boolean masks should have the same spatial shape")
-    iou_mat: np.ndarray = np.zeros((masks_1.shape[0], masks_2.shape[0]), dtype=np.float32)
-    if masks_1.shape[0] > 0 and masks_2.shape[0] > 0:
-        axes = tuple(range(2, masks_1.ndim + 1))
-        intersection = np.logical_and(masks_1[:, None, ...], masks_2[None, ...]).sum(axis=axes)
-        union = np.logical_or(masks_1[:, None, ...], masks_2[None, ...]).sum(axis=axes)
-        iou_mat = intersection / union
-    return iou_mat
-def polygon_iou(
-    polys_1: np.ndarray, polys_2: np.ndarray, mask_shape: Tuple[int, int], use_broadcasting: bool = False
-) -> np.ndarray:
+def polygon_iou(polys_1: np.ndarray, polys_2: np.ndarray) -> np.ndarray:
     """Computes the IoU between two sets of rotated bounding boxes
     Args:
@@ -234,80 +173,18 @@ def polygon_iou(
     if polys_1.ndim != 3 or polys_2.ndim != 3:
         raise AssertionError("expects boxes to be in format (N, 4, 2)")
-    iou_mat: np.ndarray = np.zeros((polys_1.shape[0], polys_2.shape[0]), dtype=np.float32)
-    if polys_1.shape[0] > 0 and polys_2.shape[0] > 0:
-        if use_broadcasting:
-            masks_1 = rbox_to_mask(polys_1, shape=mask_shape)
-            masks_2 = rbox_to_mask(polys_2, shape=mask_shape)
-            iou_mat = mask_iou(masks_1, masks_2)
-        else:
-            # Save memory by doing the computation for each pair
-            for idx, b1 in enumerate(polys_1):
-                m1 = _rbox_to_mask(b1, mask_shape)
-                for _idx, b2 in enumerate(polys_2):
-                    m2 = _rbox_to_mask(b2, mask_shape)
-                    iou_mat[idx, _idx] = np.logical_and(m1, m2).sum() / np.logical_or(m1, m2).sum()
-    return iou_mat
+    iou_mat = np.zeros((polys_1.shape[0], polys_2.shape[0]), dtype=np.float32)
+    shapely_polys_1 = [Polygon(poly) for poly in polys_1]
+    shapely_polys_2 = [Polygon(poly) for poly in polys_2]
-def _rbox_to_mask(box: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
-    """Converts a rotated bounding box to a boolean mask
+    for i, poly1 in enumerate(shapely_polys_1):
+        for j, poly2 in enumerate(shapely_polys_2):
+            intersection_area = poly1.intersection(poly2).area
+            union_area = poly1.area + poly2.area - intersection_area
+            iou_mat[i, j] = intersection_area / union_area
-    Args:
-    ----
-        box: rotated bounding box of shape (4, 2)
-        shape: spatial shapes of the output masks
-    Returns:
-    -------
-        the boolean mask of the specified shape
-    """
-    mask: np.ndarray = np.zeros(shape, dtype=np.uint8)
-    # Get absolute coords
-    if not np.issubdtype(box.dtype, np.integer):
-        abs_box = box.copy()
-        abs_box[:, 0] = abs_box[:, 0] * shape[1]
-        abs_box[:, 1] = abs_box[:, 1] * shape[0]
-        abs_box = abs_box.round().astype(int)
-    else:
-        abs_box = box
-        abs_box[2:] = abs_box[2:] + 1
-    cv2.fillPoly(mask, [abs_box - 1], 1.0)  # type: ignore[call-overload]
-    return mask.astype(bool)
-def rbox_to_mask(boxes: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
-    """Converts rotated bounding boxes to boolean masks
-    Args:
-    ----
-        boxes: rotated bounding boxes of shape (N, 4, 2)
-        shape: spatial shapes of the output masks
-    Returns:
-    -------
-        the boolean masks of shape (N, H, W)
-    """
-    masks: np.ndarray = np.zeros((boxes.shape[0], *shape), dtype=np.uint8)
-    if boxes.shape[0] > 0:
-        # Get absolute coordinates
-        if not np.issubdtype(boxes.dtype, np.integer):
-            abs_boxes = boxes.copy()
-            abs_boxes[:, :, 0] = abs_boxes[:, :, 0] * shape[1]
-            abs_boxes[:, :, 1] = abs_boxes[:, :, 1] * shape[0]
-            abs_boxes = abs_boxes.round().astype(int)
-        else:
-            abs_boxes = boxes
-            abs_boxes[:, 2:] = abs_boxes[:, 2:] + 1
-        # TODO: optimize slicing to improve vectorization
-        for idx, _box in enumerate(abs_boxes):
-            cv2.fillPoly(masks[idx], [_box - 1], 1.0)  # type: ignore[call-overload]
-    return masks.astype(bool)
+    return iou_mat
 def nms(boxes: np.ndarray, thresh: float = 0.5) -> List[int]:
@@ -386,21 +263,15 @@ class LocalizationConfusion:
     ----
         iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
         use_polygons: if set to True, predictions and targets will be expected to have rotated format
-        mask_shape: if use_polygons is True, describes the spatial shape of the image used
-        use_broadcasting: if use_polygons is True, use broadcasting for IoU computation by consuming more memory
     """
     def __init__(
         self,
         iou_thresh: float = 0.5,
         use_polygons: bool = False,
-        mask_shape: Tuple[int, int] = (1024, 1024),
-        use_broadcasting: bool = True,
     ) -> None:
         self.iou_thresh = iou_thresh
         self.use_polygons = use_polygons
-        self.mask_shape = mask_shape
-        self.use_broadcasting = use_broadcasting
         self.reset()
     def update(self, gts: np.ndarray, preds: np.ndarray) -> None:
@@ -414,7 +285,7 @@ class LocalizationConfusion:
         if preds.shape[0] > 0:
             # Compute IoU
             if self.use_polygons:
-                iou_mat = polygon_iou(gts, preds, self.mask_shape, self.use_broadcasting)
+                iou_mat = polygon_iou(gts, preds)
             else:
                 iou_mat = box_iou(gts, preds)
             self.tot_iou += float(iou_mat.max(axis=0).sum())
@@ -441,7 +312,7 @@ class LocalizationConfusion:
         precision = self.matches / self.num_preds if self.num_preds > 0 else None
         # mean IoU
-        mean_iou = self.tot_iou / self.num_preds if self.num_preds > 0 else None
+        mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None
         return recall, precision, mean_iou
@@ -492,21 +363,15 @@ class OCRMetric:
     ----
         iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
         use_polygons: if set to True, predictions and targets will be expected to have rotated format
-        mask_shape: if use_polygons is True, describes the spatial shape of the image used
-        use_broadcasting: if use_polygons is True, use broadcasting for IoU computation by consuming more memory
     """
     def __init__(
         self,
         iou_thresh: float = 0.5,
         use_polygons: bool = False,
-        mask_shape: Tuple[int, int] = (1024, 1024),
-        use_broadcasting: bool = True,
     ) -> None:
         self.iou_thresh = iou_thresh
         self.use_polygons = use_polygons
-        self.mask_shape = mask_shape
-        self.use_broadcasting = use_broadcasting
         self.reset()
     def update(
@@ -533,7 +398,7 @@ class OCRMetric:
         # Compute IoU
         if pred_boxes.shape[0] > 0:
             if self.use_polygons:
-                iou_mat = polygon_iou(gt_boxes, pred_boxes, self.mask_shape, self.use_broadcasting)
+                iou_mat = polygon_iou(gt_boxes, pred_boxes)
             else:
                 iou_mat = box_iou(gt_boxes, pred_boxes)
@@ -544,10 +409,10 @@ class OCRMetric:
             is_kept = iou_mat[gt_indices, pred_indices] >= self.iou_thresh
             # String comparison
             for gt_idx, pred_idx in zip(gt_indices[is_kept], pred_indices[is_kept]):
-                _raw, _caseless, _unidecode, _unicase = string_match(gt_labels[gt_idx], pred_labels[pred_idx])
+                _raw, _caseless, _anyascii, _unicase = string_match(gt_labels[gt_idx], pred_labels[pred_idx])
                 self.raw_matches += int(_raw)
                 self.caseless_matches += int(_caseless)
-                self.unidecode_matches += int(_unidecode)
+                self.anyascii_matches += int(_anyascii)
                 self.unicase_matches += int(_unicase)
         self.num_gts += gt_boxes.shape[0]
@@ -564,7 +429,7 @@ class OCRMetric:
         recall = dict(
             raw=self.raw_matches / self.num_gts if self.num_gts > 0 else None,
             caseless=self.caseless_matches / self.num_gts if self.num_gts > 0 else None,
-            unidecode=self.unidecode_matches / self.num_gts if self.num_gts > 0 else None,
+            anyascii=self.anyascii_matches / self.num_gts if self.num_gts > 0 else None,
             unicase=self.unicase_matches / self.num_gts if self.num_gts > 0 else None,
         )
@@ -572,12 +437,12 @@ class OCRMetric:
         precision = dict(
             raw=self.raw_matches / self.num_preds if self.num_preds > 0 else None,
             caseless=self.caseless_matches / self.num_preds if self.num_preds > 0 else None,
-            unidecode=self.unidecode_matches / self.num_preds if self.num_preds > 0 else None,
+            anyascii=self.anyascii_matches / self.num_preds if self.num_preds > 0 else None,
             unicase=self.unicase_matches / self.num_preds if self.num_preds > 0 else None,
         )
         # mean IoU (overall detected boxes)
-        mean_iou = self.tot_iou / self.num_preds if self.num_preds > 0 else None
+        mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None
         return recall, precision, mean_iou
@@ -587,7 +452,7 @@ class OCRMetric:
         self.tot_iou = 0.0
         self.raw_matches = 0
         self.caseless_matches = 0
-        self.unidecode_matches = 0
+        self.anyascii_matches = 0
         self.unicase_matches = 0
@@ -631,21 +496,15 @@ class DetectionMetric:
     ----
         iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
         use_polygons: if set to True, predictions and targets will be expected to have rotated format
-        mask_shape: if use_polygons is True, describes the spatial shape of the image used
-        use_broadcasting: if use_polygons is True, use broadcasting for IoU computation by consuming more memory
     """
     def __init__(
         self,
         iou_thresh: float = 0.5,
         use_polygons: bool = False,
-        mask_shape: Tuple[int, int] = (1024, 1024),
-        use_broadcasting: bool = True,
     ) -> None:
         self.iou_thresh = iou_thresh
         self.use_polygons = use_polygons
-        self.mask_shape = mask_shape
-        self.use_broadcasting = use_broadcasting
         self.reset()
     def update(
@@ -672,7 +531,7 @@ class DetectionMetric:
         # Compute IoU
         if pred_boxes.shape[0] > 0:
             if self.use_polygons:
-                iou_mat = polygon_iou(gt_boxes, pred_boxes, self.mask_shape, self.use_broadcasting)
+                iou_mat = polygon_iou(gt_boxes, pred_boxes)
             else:
                 iou_mat = box_iou(gt_boxes, pred_boxes)
@@ -701,7 +560,7 @@ class DetectionMetric:
         precision = self.num_matches / self.num_preds if self.num_preds > 0 else None
         # mean IoU (overall detected boxes)
-        mean_iou = self.tot_iou / self.num_preds if self.num_preds > 0 else None
+        mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None
         return recall, precision, mean_iou

doctr/utils/reconstitution.py ADDED Viewed

@@ -0,0 +1,126 @@
+# Copyright (C) 2021-2024, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from typing import Any, Dict, Optional
+import numpy as np
+from anyascii import anyascii
+from PIL import Image, ImageDraw
+from .fonts import get_font
+__all__ = ["synthesize_page", "synthesize_kie_page"]
+def synthesize_page(
+    page: Dict[str, Any],
+    draw_proba: bool = False,
+    font_family: Optional[str] = None,
+) -> np.ndarray:
+    """Draw a the content of the element page (OCR response) on a blank page.
+    Args:
+    ----
+        page: exported Page object to represent
+        draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
+        font_size: size of the font, default font = 13
+        font_family: family of the font
+    Returns:
+    -------
+        the synthesized page
+    """
+    # Draw template
+    h, w = page["dimensions"]
+    response = 255 * np.ones((h, w, 3), dtype=np.int32)
+    # Draw each word
+    for block in page["blocks"]:
+        for line in block["lines"]:
+            for word in line["words"]:
+                # Get absolute word geometry
+                (xmin, ymin), (xmax, ymax) = word["geometry"]
+                xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
+                ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
+                # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
+                font = get_font(font_family, int(0.75 * (ymax - ymin)))
+                img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
+                d = ImageDraw.Draw(img)
+                # Draw in black the value of the word
+                try:
+                    d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
+                except UnicodeEncodeError:
+                    # When character cannot be encoded, use its anyascii version
+                    d.text((0, 0), anyascii(word["value"]), font=font, fill=(0, 0, 0))
+                # Colorize if draw_proba
+                if draw_proba:
+                    p = int(255 * word["confidence"])
+                    mask = np.where(np.array(img) == 0, 1, 0)
+                    proba: np.ndarray = np.array([255 - p, 0, p])
+                    color = mask * proba[np.newaxis, np.newaxis, :]
+                    white_mask = 255 * (1 - mask)
+                    img = color + white_mask
+                # Write to response page
+                response[ymin:ymax, xmin:xmax, :] = np.array(img)
+    return response
+def synthesize_kie_page(
+    page: Dict[str, Any],
+    draw_proba: bool = False,
+    font_family: Optional[str] = None,
+) -> np.ndarray:
+    """Draw a the content of the element page (OCR response) on a blank page.
+    Args:
+    ----
+        page: exported Page object to represent
+        draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
+        font_size: size of the font, default font = 13
+        font_family: family of the font
+    Returns:
+    -------
+        the synthesized page
+    """
+    # Draw template
+    h, w = page["dimensions"]
+    response = 255 * np.ones((h, w, 3), dtype=np.int32)
+    # Draw each word
+    for predictions in page["predictions"].values():
+        for prediction in predictions:
+            # Get aboslute word geometry
+            (xmin, ymin), (xmax, ymax) = prediction["geometry"]
+            xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
+            ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
+            # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
+            font = get_font(font_family, int(0.75 * (ymax - ymin)))
+            img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
+            d = ImageDraw.Draw(img)
+            # Draw in black the value of the word
+            try:
+                d.text((0, 0), prediction["value"], font=font, fill=(0, 0, 0))
+            except UnicodeEncodeError:
+                # When character cannot be encoded, use its anyascii version
+                d.text((0, 0), anyascii(prediction["value"]), font=font, fill=(0, 0, 0))
+            # Colorize if draw_proba
+            if draw_proba:
+                p = int(255 * prediction["confidence"])
+                mask = np.where(np.array(img) == 0, 1, 0)
+                proba: np.ndarray = np.array([255 - p, 0, p])
+                color = mask * proba[np.newaxis, np.newaxis, :]
+                white_mask = 255 * (1 - mask)
+                img = color + white_mask
+            # Write to response page
+            response[ymin:ymax, xmin:xmax, :] = np.array(img)
+    return response

doctr/utils/visualization.py CHANGED Viewed

@@ -9,16 +9,12 @@ from typing import Any, Dict, List, Optional, Tuple, Union
 import cv2
 import matplotlib.patches as patches
 import matplotlib.pyplot as plt
-import mplcursors
 import numpy as np
 from matplotlib.figure import Figure
-from PIL import Image, ImageDraw
-from unidecode import unidecode
 from .common_types import BoundingBox, Polygon4P
-from .fonts import get_font
-__all__ = ["visualize_page", "synthesize_page", "visualize_kie_page", "synthesize_kie_page", "draw_boxes"]
+__all__ = ["visualize_page", "visualize_kie_page", "draw_boxes"]
 def rect_patch(
@@ -281,6 +277,8 @@ def visualize_page(
                     artists.append(rect)
     if interactive:
+        import mplcursors
         # Create mlp Cursor to hover patches in artists
         mplcursors.Cursor(artists, hover=2).connect("add", lambda sel: sel.annotation.set_text(sel.artist.get_label()))
     fig.tight_layout(pad=0.0)
@@ -288,63 +286,6 @@ def visualize_page(
     return fig
-def synthesize_page(
-    page: Dict[str, Any],
-    draw_proba: bool = False,
-    font_family: Optional[str] = None,
-) -> np.ndarray:
-    """Draw a the content of the element page (OCR response) on a blank page.
-    Args:
-    ----
-        page: exported Page object to represent
-        draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
-        font_size: size of the font, default font = 13
-        font_family: family of the font
-    Returns:
-    -------
-        the synthesized page
-    """
-    # Draw template
-    h, w = page["dimensions"]
-    response = 255 * np.ones((h, w, 3), dtype=np.int32)
-    # Draw each word
-    for block in page["blocks"]:
-        for line in block["lines"]:
-            for word in line["words"]:
-                # Get aboslute word geometry
-                (xmin, ymin), (xmax, ymax) = word["geometry"]
-                xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
-                ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
-                # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
-                font = get_font(font_family, int(0.75 * (ymax - ymin)))
-                img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
-                d = ImageDraw.Draw(img)
-                # Draw in black the value of the word
-                try:
-                    d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
-                except UnicodeEncodeError:
-                    # When character cannot be encoded, use its unidecode version
-                    d.text((0, 0), unidecode(word["value"]), font=font, fill=(0, 0, 0))
-                # Colorize if draw_proba
-                if draw_proba:
-                    p = int(255 * word["confidence"])
-                    mask = np.where(np.array(img) == 0, 1, 0)
-                    proba: np.ndarray = np.array([255 - p, 0, p])
-                    color = mask * proba[np.newaxis, np.newaxis, :]
-                    white_mask = 255 * (1 - mask)
-                    img = color + white_mask
-                # Write to response page
-                response[ymin:ymax, xmin:xmax, :] = np.array(img)
-    return response
 def visualize_kie_page(
     page: Dict[str, Any],
     image: np.ndarray,
@@ -413,6 +354,8 @@ def visualize_kie_page(
                     artists.append(rect)
     if interactive:
+        import mplcursors
         # Create mlp Cursor to hover patches in artists
         mplcursors.Cursor(artists, hover=2).connect("add", lambda sel: sel.annotation.set_text(sel.artist.get_label()))
     fig.tight_layout(pad=0.0)
@@ -420,62 +363,6 @@ def visualize_kie_page(
     return fig
-def synthesize_kie_page(
-    page: Dict[str, Any],
-    draw_proba: bool = False,
-    font_family: Optional[str] = None,
-) -> np.ndarray:
-    """Draw a the content of the element page (OCR response) on a blank page.
-    Args:
-    ----
-        page: exported Page object to represent
-        draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
-        font_size: size of the font, default font = 13
-        font_family: family of the font
-    Returns:
-    -------
-        the synthesized page
-    """
-    # Draw template
-    h, w = page["dimensions"]
-    response = 255 * np.ones((h, w, 3), dtype=np.int32)
-    # Draw each word
-    for predictions in page["predictions"].values():
-        for prediction in predictions:
-            # Get aboslute word geometry
-            (xmin, ymin), (xmax, ymax) = prediction["geometry"]
-            xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
-            ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
-            # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
-            font = get_font(font_family, int(0.75 * (ymax - ymin)))
-            img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
-            d = ImageDraw.Draw(img)
-            # Draw in black the value of the word
-            try:
-                d.text((0, 0), prediction["value"], font=font, fill=(0, 0, 0))
-            except UnicodeEncodeError:
-                # When character cannot be encoded, use its unidecode version
-                d.text((0, 0), unidecode(prediction["value"]), font=font, fill=(0, 0, 0))
-            # Colorize if draw_proba
-            if draw_proba:
-                p = int(255 * prediction["confidence"])
-                mask = np.where(np.array(img) == 0, 1, 0)
-                proba: np.ndarray = np.array([255 - p, 0, p])
-                color = mask * proba[np.newaxis, np.newaxis, :]
-                white_mask = 255 * (1 - mask)
-                img = color + white_mask
-            # Write to response page
-            response[ymin:ymax, xmin:xmax, :] = np.array(img)
-    return response
 def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: Optional[Tuple[int, int, int]] = None, **kwargs) -> None:
     """Draw an array of relative straight boxes on an image

doctr/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = 'v0.8.0'
1	+ __version__ = 'v0.9.0'

python-doctr 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

python-doctr 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl