PyPI - python-doctr - Versions diffs - 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

python-doctr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

doctr/contrib/__init__.py +1 -0
doctr/contrib/artefacts.py +7 -9
doctr/contrib/base.py +8 -17
doctr/datasets/cord.py +17 -7
doctr/datasets/datasets/__init__.py +4 -4
doctr/datasets/datasets/base.py +16 -16
doctr/datasets/datasets/pytorch.py +12 -12
doctr/datasets/datasets/tensorflow.py +10 -10
doctr/datasets/detection.py +6 -9
doctr/datasets/doc_artefacts.py +3 -4
doctr/datasets/funsd.py +17 -6
doctr/datasets/generator/__init__.py +4 -4
doctr/datasets/generator/base.py +16 -17
doctr/datasets/generator/pytorch.py +1 -3
doctr/datasets/generator/tensorflow.py +1 -3
doctr/datasets/ic03.py +14 -5
doctr/datasets/ic13.py +13 -5
doctr/datasets/iiit5k.py +31 -20
doctr/datasets/iiithws.py +4 -5
doctr/datasets/imgur5k.py +15 -5
doctr/datasets/loader.py +4 -7
doctr/datasets/mjsynth.py +6 -5
doctr/datasets/ocr.py +3 -4
doctr/datasets/orientation.py +3 -4
doctr/datasets/recognition.py +3 -4
doctr/datasets/sroie.py +16 -5
doctr/datasets/svhn.py +16 -5
doctr/datasets/svt.py +14 -5
doctr/datasets/synthtext.py +14 -5
doctr/datasets/utils.py +37 -27
doctr/datasets/vocabs.py +21 -7
doctr/datasets/wildreceipt.py +25 -10
doctr/file_utils.py +18 -4
doctr/io/elements.py +69 -81
doctr/io/html.py +1 -3
doctr/io/image/__init__.py +3 -3
doctr/io/image/base.py +2 -5
doctr/io/image/pytorch.py +3 -12
doctr/io/image/tensorflow.py +2 -11
doctr/io/pdf.py +5 -7
doctr/io/reader.py +5 -11
doctr/models/_utils.py +14 -22
doctr/models/builder.py +32 -50
doctr/models/classification/magc_resnet/__init__.py +3 -3
doctr/models/classification/magc_resnet/pytorch.py +10 -13
doctr/models/classification/magc_resnet/tensorflow.py +21 -17
doctr/models/classification/mobilenet/__init__.py +3 -3
doctr/models/classification/mobilenet/pytorch.py +7 -17
doctr/models/classification/mobilenet/tensorflow.py +22 -29
doctr/models/classification/predictor/__init__.py +4 -4
doctr/models/classification/predictor/pytorch.py +13 -11
doctr/models/classification/predictor/tensorflow.py +13 -11
doctr/models/classification/resnet/__init__.py +4 -4
doctr/models/classification/resnet/pytorch.py +21 -31
doctr/models/classification/resnet/tensorflow.py +41 -39
doctr/models/classification/textnet/__init__.py +3 -3
doctr/models/classification/textnet/pytorch.py +10 -17
doctr/models/classification/textnet/tensorflow.py +19 -20
doctr/models/classification/vgg/__init__.py +3 -3
doctr/models/classification/vgg/pytorch.py +5 -7
doctr/models/classification/vgg/tensorflow.py +18 -15
doctr/models/classification/vit/__init__.py +3 -3
doctr/models/classification/vit/pytorch.py +8 -14
doctr/models/classification/vit/tensorflow.py +16 -16
doctr/models/classification/zoo.py +36 -19
doctr/models/core.py +3 -3
doctr/models/detection/_utils/__init__.py +4 -4
doctr/models/detection/_utils/base.py +4 -7
doctr/models/detection/_utils/pytorch.py +1 -5
doctr/models/detection/_utils/tensorflow.py +1 -5
doctr/models/detection/core.py +2 -8
doctr/models/detection/differentiable_binarization/__init__.py +4 -4
doctr/models/detection/differentiable_binarization/base.py +7 -17
doctr/models/detection/differentiable_binarization/pytorch.py +27 -30
doctr/models/detection/differentiable_binarization/tensorflow.py +49 -37
doctr/models/detection/fast/__init__.py +4 -4
doctr/models/detection/fast/base.py +6 -14
doctr/models/detection/fast/pytorch.py +24 -31
doctr/models/detection/fast/tensorflow.py +28 -37
doctr/models/detection/linknet/__init__.py +4 -4
doctr/models/detection/linknet/base.py +6 -15
doctr/models/detection/linknet/pytorch.py +24 -27
doctr/models/detection/linknet/tensorflow.py +36 -33
doctr/models/detection/predictor/__init__.py +5 -5
doctr/models/detection/predictor/pytorch.py +6 -7
doctr/models/detection/predictor/tensorflow.py +7 -8
doctr/models/detection/zoo.py +27 -7
doctr/models/factory/hub.py +8 -13
doctr/models/kie_predictor/__init__.py +5 -5
doctr/models/kie_predictor/base.py +8 -5
doctr/models/kie_predictor/pytorch.py +22 -19
doctr/models/kie_predictor/tensorflow.py +21 -15
doctr/models/modules/layers/__init__.py +3 -3
doctr/models/modules/layers/pytorch.py +6 -9
doctr/models/modules/layers/tensorflow.py +5 -7
doctr/models/modules/transformer/__init__.py +3 -3
doctr/models/modules/transformer/pytorch.py +12 -13
doctr/models/modules/transformer/tensorflow.py +9 -12
doctr/models/modules/vision_transformer/__init__.py +3 -3
doctr/models/modules/vision_transformer/pytorch.py +3 -4
doctr/models/modules/vision_transformer/tensorflow.py +4 -4
doctr/models/predictor/__init__.py +5 -5
doctr/models/predictor/base.py +52 -41
doctr/models/predictor/pytorch.py +16 -13
doctr/models/predictor/tensorflow.py +16 -10
doctr/models/preprocessor/__init__.py +4 -4
doctr/models/preprocessor/pytorch.py +13 -17
doctr/models/preprocessor/tensorflow.py +11 -15
doctr/models/recognition/core.py +3 -7
doctr/models/recognition/crnn/__init__.py +4 -4
doctr/models/recognition/crnn/pytorch.py +20 -28
doctr/models/recognition/crnn/tensorflow.py +19 -29
doctr/models/recognition/master/__init__.py +3 -3
doctr/models/recognition/master/base.py +3 -7
doctr/models/recognition/master/pytorch.py +22 -24
doctr/models/recognition/master/tensorflow.py +21 -26
doctr/models/recognition/parseq/__init__.py +3 -3
doctr/models/recognition/parseq/base.py +3 -7
doctr/models/recognition/parseq/pytorch.py +26 -26
doctr/models/recognition/parseq/tensorflow.py +26 -30
doctr/models/recognition/predictor/__init__.py +5 -5
doctr/models/recognition/predictor/_utils.py +7 -10
doctr/models/recognition/predictor/pytorch.py +6 -6
doctr/models/recognition/predictor/tensorflow.py +5 -6
doctr/models/recognition/sar/__init__.py +4 -4
doctr/models/recognition/sar/pytorch.py +20 -21
doctr/models/recognition/sar/tensorflow.py +19 -24
doctr/models/recognition/utils.py +5 -10
doctr/models/recognition/vitstr/__init__.py +4 -4
doctr/models/recognition/vitstr/base.py +3 -7
doctr/models/recognition/vitstr/pytorch.py +18 -20
doctr/models/recognition/vitstr/tensorflow.py +21 -24
doctr/models/recognition/zoo.py +22 -11
doctr/models/utils/__init__.py +4 -4
doctr/models/utils/pytorch.py +13 -16
doctr/models/utils/tensorflow.py +31 -30
doctr/models/zoo.py +1 -5
doctr/transforms/functional/__init__.py +3 -3
doctr/transforms/functional/base.py +4 -11
doctr/transforms/functional/pytorch.py +21 -29
doctr/transforms/functional/tensorflow.py +10 -22
doctr/transforms/modules/__init__.py +4 -4
doctr/transforms/modules/base.py +48 -55
doctr/transforms/modules/pytorch.py +65 -28
doctr/transforms/modules/tensorflow.py +33 -44
doctr/utils/common_types.py +8 -9
doctr/utils/data.py +8 -12
doctr/utils/fonts.py +2 -7
doctr/utils/geometry.py +120 -64
doctr/utils/metrics.py +18 -38
doctr/utils/multithreading.py +4 -6
doctr/utils/reconstitution.py +157 -75
doctr/utils/repr.py +2 -3
doctr/utils/visualization.py +16 -29
doctr/version.py +1 -1
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/METADATA +59 -57
python_doctr-0.11.0.dist-info/RECORD +173 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/WHEEL +1 -1
python_doctr-0.9.0.dist-info/RECORD +0 -173
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/LICENSE +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/zip-safe +0 -0

doctr/io/reader.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from collections.abc import Sequence
 from pathlib import Path
-from typing import List, Sequence, Union
 import numpy as np
@@ -22,37 +22,33 @@ class DocumentFile:
     """Read a document from multiple extensions"""
     @classmethod
-    def from_pdf(cls, file: AbstractFile, **kwargs) -> List[np.ndarray]:
+    def from_pdf(cls, file: AbstractFile, **kwargs) -> list[np.ndarray]:
         """Read a PDF file
         >>> from doctr.io import DocumentFile
         >>> doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
         Args:
-        ----
             file: the path to the PDF file or a binary stream
             **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
         Returns:
-        -------
             the list of pages decoded as numpy ndarray of shape H x W x 3
         """
         return read_pdf(file, **kwargs)
     @classmethod
-    def from_url(cls, url: str, **kwargs) -> List[np.ndarray]:
+    def from_url(cls, url: str, **kwargs) -> list[np.ndarray]:
         """Interpret a web page as a PDF document
         >>> from doctr.io import DocumentFile
         >>> doc = DocumentFile.from_url("https://www.yoursite.com")
         Args:
-        ----
             url: the URL of the target web page
             **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
         Returns:
-        -------
             the list of pages decoded as numpy ndarray of shape H x W x 3
         """
         requires_package(
@@ -64,19 +60,17 @@ class DocumentFile:
         return cls.from_pdf(pdf_stream, **kwargs)
     @classmethod
-    def from_images(cls, files: Union[Sequence[AbstractFile], AbstractFile], **kwargs) -> List[np.ndarray]:
+    def from_images(cls, files: Sequence[AbstractFile] | AbstractFile, **kwargs) -> list[np.ndarray]:
         """Read an image file (or a collection of image files) and convert it into an image in numpy format
         >>> from doctr.io import DocumentFile
         >>> pages = DocumentFile.from_images(["path/to/your/page1.png", "path/to/your/page2.png"])
         Args:
-        ----
             files: the path to the image file or a binary stream, or a collection of those
             **kwargs: additional parameters to :meth:`doctr.io.image.read_img_as_numpy`
         Returns:
-        -------
             the list of pages decoded as numpy ndarray of shape H x W x 3
         """
         if isinstance(files, (str, Path, bytes)):

doctr/models/_utils.py CHANGED Viewed

@@ -1,11 +1,11 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 from math import floor
 from statistics import median_low
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any
 import cv2
 import numpy as np
@@ -20,11 +20,9 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float:
     """Get the maximum shape ratio of a contour.
     Args:
-    ----
         contour: the contour from cv2.findContour
     Returns:
-    -------
         the maximum shape ratio
     """
     _, (w, h), _ = cv2.minAreaRect(contour)
@@ -33,7 +31,7 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float:
 def estimate_orientation(
     img: np.ndarray,
-    general_page_orientation: Optional[Tuple[int, float]] = None,
+    general_page_orientation: tuple[int, float] | None = None,
     n_ct: int = 70,
     ratio_threshold_for_lines: float = 3,
     min_confidence: float = 0.2,
@@ -43,7 +41,6 @@ def estimate_orientation(
      lines of the document and the assumption that they should be horizontal.
     Args:
-    ----
         img: the img or bitmap to analyze (H, W, C)
         general_page_orientation: the general orientation of the page (angle [0, 90, 180, 270 (-90)], confidence)
             estimated by a model
@@ -53,7 +50,6 @@ def estimate_orientation(
         lower_area: the minimum area of a contour to be considered
     Returns:
-    -------
         the estimated angle of the page (clockwise, negative for left side rotation, positive for right side rotation)
     """
     assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
@@ -64,13 +60,13 @@ def estimate_orientation(
         gray_img = cv2.medianBlur(gray_img, 5)
         thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
     else:
-        thresh = img.astype(np.uint8)  # type: ignore[assignment]
+        thresh = img.astype(np.uint8)
     page_orientation, orientation_confidence = general_page_orientation or (None, 0.0)
     if page_orientation and orientation_confidence >= min_confidence:
         # We rotate the image to the general orientation which improves the detection
         # No expand needed bitmap is already padded
-        thresh = rotate_image(thresh, -page_orientation)  # type: ignore
+        thresh = rotate_image(thresh, -page_orientation)
     else:  # That's only required if we do not work on the detection models bin map
         # try to merge words in lines
         (h, w) = img.shape[:2]
@@ -119,9 +115,9 @@ def estimate_orientation(
 def rectify_crops(
-    crops: List[np.ndarray],
-    orientations: List[int],
-) -> List[np.ndarray]:
+    crops: list[np.ndarray],
+    orientations: list[int],
+) -> list[np.ndarray]:
     """Rotate each crop of the list according to the predicted orientation:
     0: already straight, no rotation
     1: 90 ccw, rotate 3 times ccw
@@ -139,8 +135,8 @@ def rectify_crops(
 def rectify_loc_preds(
     page_loc_preds: np.ndarray,
-    orientations: List[int],
-) -> Optional[np.ndarray]:
+    orientations: list[int],
+) -> np.ndarray | None:
     """Orient the quadrangle (Polygon4P) according to the predicted orientation,
     so that the points are in this order: top L, top R, bot R, bot L if the crop is readable
     """
@@ -157,16 +153,14 @@ def rectify_loc_preds(
     )
-def get_language(text: str) -> Tuple[str, float]:
+def get_language(text: str) -> tuple[str, float]:
     """Get languages of a text using langdetect model.
     Get the language with the highest probability or no language if only a few words or a low probability
     Args:
-    ----
         text (str): text
     Returns:
-    -------
         The detected language in ISO 639 code and confidence score
     """
     try:
@@ -179,16 +173,14 @@ def get_language(text: str) -> Tuple[str, float]:
 def invert_data_structure(
-    x: Union[List[Dict[str, Any]], Dict[str, List[Any]]],
-) -> Union[List[Dict[str, Any]], Dict[str, List[Any]]]:
-    """Invert a List of Dict of elements to a Dict of list of elements and the other way around
+    x: list[dict[str, Any]] | dict[str, list[Any]],
+) -> list[dict[str, Any]] | dict[str, list[Any]]:
+    """Invert a list of dict of elements to a dict of list of elements and the other way around
     Args:
-    ----
         x: a list of dictionaries with the same keys or a dictionary of lists of the same length
     Returns:
-    -------
         dictionary of list when x is a list of dictionaries or a list of dictionaries when x is dictionary of lists
     """
     if isinstance(x, dict):

doctr/models/builder.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 import numpy as np
 from scipy.cluster.hierarchy import fclusterdata
@@ -20,7 +20,6 @@ class DocumentBuilder(NestedObject):
     """Implements a document builder
     Args:
-    ----
         resolve_lines: whether words should be automatically grouped into lines
         resolve_blocks: whether lines should be automatically grouped into blocks
         paragraph_break: relative length of the minimum space separating paragraphs
@@ -41,15 +40,13 @@ class DocumentBuilder(NestedObject):
         self.export_as_straight_boxes = export_as_straight_boxes
     @staticmethod
-    def _sort_boxes(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+    def _sort_boxes(boxes: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
         """Sort bounding boxes from top to bottom, left to right
         Args:
-        ----
             boxes: bounding boxes of shape (N, 4) or (N, 4, 2) (in case of rotated bbox)
         Returns:
-        -------
             tuple: indices of ordered boxes of shape (N,), boxes
                 If straight boxes are passed tpo the function, boxes are unchanged
                 else: boxes returned are straight boxes fitted to the straightened rotated boxes
@@ -65,16 +62,14 @@ class DocumentBuilder(NestedObject):
             boxes = np.concatenate((boxes.min(1), boxes.max(1)), -1)
         return (boxes[:, 0] + 2 * boxes[:, 3] / np.median(boxes[:, 3] - boxes[:, 1])).argsort(), boxes
-    def _resolve_sub_lines(self, boxes: np.ndarray, word_idcs: List[int]) -> List[List[int]]:
+    def _resolve_sub_lines(self, boxes: np.ndarray, word_idcs: list[int]) -> list[list[int]]:
         """Split a line in sub_lines
         Args:
-        ----
             boxes: bounding boxes of shape (N, 4)
             word_idcs: list of indexes for the words of the line
         Returns:
-        -------
             A list of (sub-)lines computed from the original line (words)
         """
         lines = []
@@ -105,15 +100,13 @@ class DocumentBuilder(NestedObject):
         return lines
-    def _resolve_lines(self, boxes: np.ndarray) -> List[List[int]]:
+    def _resolve_lines(self, boxes: np.ndarray) -> list[list[int]]:
         """Order boxes to group them in lines
         Args:
-        ----
             boxes: bounding boxes of shape (N, 4) or (N, 4, 2) in case of rotated bbox
         Returns:
-        -------
             nested list of box indices
         """
         # Sort boxes, and straighten the boxes if they are rotated
@@ -153,16 +146,14 @@ class DocumentBuilder(NestedObject):
         return lines
     @staticmethod
-    def _resolve_blocks(boxes: np.ndarray, lines: List[List[int]]) -> List[List[List[int]]]:
+    def _resolve_blocks(boxes: np.ndarray, lines: list[list[int]]) -> list[list[list[int]]]:
         """Order lines to group them in blocks
         Args:
-        ----
             boxes: bounding boxes of shape (N, 4) or (N, 4, 2)
             lines: list of lines, each line is a list of idx
         Returns:
-        -------
             nested list of box indices
         """
         # Resolve enclosing boxes of lines
@@ -207,7 +198,7 @@ class DocumentBuilder(NestedObject):
         # Compute clusters
         clusters = fclusterdata(box_features, t=0.1, depth=4, criterion="distance", metric="euclidean")
-        _blocks: Dict[int, List[int]] = {}
+        _blocks: dict[int, list[int]] = {}
         # Form clusters
         for line_idx, cluster_idx in enumerate(clusters):
             if cluster_idx in _blocks.keys():
@@ -224,13 +215,12 @@ class DocumentBuilder(NestedObject):
         self,
         boxes: np.ndarray,
         objectness_scores: np.ndarray,
-        word_preds: List[Tuple[str, float]],
-        crop_orientations: List[Dict[str, Any]],
-    ) -> List[Block]:
+        word_preds: list[tuple[str, float]],
+        crop_orientations: list[dict[str, Any]],
+    ) -> list[Block]:
         """Gather independent words in structured blocks
         Args:
-        ----
             boxes: bounding boxes of all detected words of the page, of shape (N, 4) or (N, 4, 2)
             objectness_scores: objectness scores of all detected words of the page, of shape N
             word_preds: list of all detected words of the page, of shape N
@@ -238,7 +228,6 @@ class DocumentBuilder(NestedObject):
                 the general orientation (orientations + confidences) of the crops
         Returns:
-        -------
             list of block elements
         """
         if boxes.shape[0] != len(word_preds):
@@ -266,7 +255,7 @@ class DocumentBuilder(NestedObject):
                 Line([
                     Word(
                         *word_preds[idx],
-                        tuple([tuple(pt) for pt in boxes[idx].tolist()]),  # type: ignore[arg-type]
+                        tuple(tuple(pt) for pt in boxes[idx].tolist()),  # type: ignore[arg-type]
                         float(objectness_scores[idx]),
                         crop_orientations[idx],
                     )
@@ -295,19 +284,18 @@ class DocumentBuilder(NestedObject):
     def __call__(
         self,
-        pages: List[np.ndarray],
-        boxes: List[np.ndarray],
-        objectness_scores: List[np.ndarray],
-        text_preds: List[List[Tuple[str, float]]],
-        page_shapes: List[Tuple[int, int]],
-        crop_orientations: List[Dict[str, Any]],
-        orientations: Optional[List[Dict[str, Any]]] = None,
-        languages: Optional[List[Dict[str, Any]]] = None,
+        pages: list[np.ndarray],
+        boxes: list[np.ndarray],
+        objectness_scores: list[np.ndarray],
+        text_preds: list[list[tuple[str, float]]],
+        page_shapes: list[tuple[int, int]],
+        crop_orientations: list[dict[str, Any]],
+        orientations: list[dict[str, Any]] | None = None,
+        languages: list[dict[str, Any]] | None = None,
     ) -> Document:
         """Re-arrange detected words into structured blocks
         Args:
-        ----
             pages: list of N elements, where each element represents the page image
             boxes: list of N elements, where each element represents the localization predictions, of shape (*, 4)
                 or (*, 4, 2) for all words for a given page
@@ -322,7 +310,6 @@ class DocumentBuilder(NestedObject):
                 where each element is a dictionary containing the language (language + confidence)
         Returns:
-        -------
             document object
         """
         if len(boxes) != len(text_preds) != len(crop_orientations) != len(objectness_scores) or len(boxes) != len(
@@ -374,7 +361,6 @@ class KIEDocumentBuilder(DocumentBuilder):
     """Implements a KIE document builder
     Args:
-    ----
         resolve_lines: whether words should be automatically grouped into lines
         resolve_blocks: whether lines should be automatically grouped into blocks
         paragraph_break: relative length of the minimum space separating paragraphs
@@ -384,19 +370,18 @@ class KIEDocumentBuilder(DocumentBuilder):
     def __call__(  # type: ignore[override]
         self,
-        pages: List[np.ndarray],
-        boxes: List[Dict[str, np.ndarray]],
-        objectness_scores: List[Dict[str, np.ndarray]],
-        text_preds: List[Dict[str, List[Tuple[str, float]]]],
-        page_shapes: List[Tuple[int, int]],
-        crop_orientations: List[Dict[str, List[Dict[str, Any]]]],
-        orientations: Optional[List[Dict[str, Any]]] = None,
-        languages: Optional[List[Dict[str, Any]]] = None,
+        pages: list[np.ndarray],
+        boxes: list[dict[str, np.ndarray]],
+        objectness_scores: list[dict[str, np.ndarray]],
+        text_preds: list[dict[str, list[tuple[str, float]]]],
+        page_shapes: list[tuple[int, int]],
+        crop_orientations: list[dict[str, list[dict[str, Any]]]],
+        orientations: list[dict[str, Any]] | None = None,
+        languages: list[dict[str, Any]] | None = None,
     ) -> KIEDocument:
         """Re-arrange detected words into structured predictions
         Args:
-        ----
             pages: list of N elements, where each element represents the page image
             boxes: list of N dictionaries, where each element represents the localization predictions for a class,
                 of shape (*, 5) or (*, 6) for all predictions
@@ -411,7 +396,6 @@ class KIEDocumentBuilder(DocumentBuilder):
                 where each element is a dictionary containing the language (language + confidence)
         Returns:
-        -------
             document object
         """
         if len(boxes) != len(text_preds) != len(crop_orientations) != len(objectness_scores) or len(boxes) != len(
@@ -425,7 +409,7 @@ class KIEDocumentBuilder(DocumentBuilder):
         if self.export_as_straight_boxes and len(boxes) > 0:
             # If boxes are already straight OK, else fit a bounding rect
             if next(iter(boxes[0].values())).ndim == 3:
-                straight_boxes: List[Dict[str, np.ndarray]] = []
+                straight_boxes: list[dict[str, np.ndarray]] = []
                 # Iterate over pages
                 for p_boxes in boxes:
                     # Iterate over boxes of the pages
@@ -471,20 +455,18 @@ class KIEDocumentBuilder(DocumentBuilder):
         self,
         boxes: np.ndarray,
         objectness_scores: np.ndarray,
-        word_preds: List[Tuple[str, float]],
-        crop_orientations: List[Dict[str, Any]],
-    ) -> List[Prediction]:
+        word_preds: list[tuple[str, float]],
+        crop_orientations: list[dict[str, Any]],
+    ) -> list[Prediction]:
         """Gather independent words in structured blocks
         Args:
-        ----
             boxes: bounding boxes of all detected words of the page, of shape (N, 4) or (N, 4, 2)
             objectness_scores: objectness scores of all detected words of the page
             word_preds: list of all detected words of the page, of shape N
             crop_orientations: list of orientations for each word crop
         Returns:
-        -------
             list of block elements
         """
         if boxes.shape[0] != len(word_preds):
@@ -500,7 +482,7 @@ class KIEDocumentBuilder(DocumentBuilder):
             Prediction(
                 value=word_preds[idx][0],
                 confidence=word_preds[idx][1],
-                geometry=tuple([tuple(pt) for pt in boxes[idx].tolist()]),  # type: ignore[arg-type]
+                geometry=tuple(tuple(pt) for pt in boxes[idx].tolist()),  # type: ignore[arg-type]
                 objectness_score=float(objectness_scores[idx]),
                 crop_orientation=crop_orientations[idx],
             )

doctr/models/classification/magc_resnet/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
     from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]

doctr/models/classification/magc_resnet/pytorch.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -7,7 +7,7 @@
 import math
 from copy import deepcopy
 from functools import partial
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 import torch
 from torch import nn
@@ -20,7 +20,7 @@ from ..resnet.pytorch import ResNet
 __all__ = ["magc_resnet31"]
-default_cfgs: Dict[str, Dict[str, Any]] = {
+default_cfgs: dict[str, dict[str, Any]] = {
     "magc_resnet31": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
@@ -36,7 +36,6 @@ class MAGC(nn.Module):
     <https://arxiv.org/pdf/1910.02562.pdf>`_.
     Args:
-    ----
         inplanes: input channels
         headers: number of headers to split channels
         attn_scale: if True, re-scale attention to counteract the variance distibutions
@@ -50,7 +49,7 @@ class MAGC(nn.Module):
         headers: int = 8,
         attn_scale: bool = False,
         ratio: float = 0.0625,  # bottleneck ratio of 1/16 as described in paper
-        cfg: Optional[Dict[str, Any]] = None,
+        cfg: dict[str, Any] | None = None,
     ) -> None:
         super().__init__()
@@ -105,12 +104,12 @@ class MAGC(nn.Module):
 def _magc_resnet(
     arch: str,
     pretrained: bool,
-    num_blocks: List[int],
-    output_channels: List[int],
-    stage_stride: List[int],
-    stage_conv: List[bool],
-    stage_pooling: List[Optional[Tuple[int, int]]],
-    ignore_keys: Optional[List[str]] = None,
+    num_blocks: list[int],
+    output_channels: list[int],
+    stage_stride: list[int],
+    stage_conv: list[bool],
+    stage_pooling: list[tuple[int, int] | None],
+    ignore_keys: list[str] | None = None,
     **kwargs: Any,
 ) -> ResNet:
     kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
@@ -154,12 +153,10 @@ def magc_resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the ResNet architecture
     Returns:
-    -------
         A feature extractor model
     """
     return _magc_resnet(

doctr/models/classification/magc_resnet/tensorflow.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,27 +6,27 @@
 import math
 from copy import deepcopy
 from functools import partial
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 import tensorflow as tf
-from tensorflow.keras import layers
+from tensorflow.keras import activations, layers
 from tensorflow.keras.models import Sequential
 from doctr.datasets import VOCABS
-from ...utils import load_pretrained_params
+from ...utils import _build_model, load_pretrained_params
 from ..resnet.tensorflow import ResNet
 __all__ = ["magc_resnet31"]
-default_cfgs: Dict[str, Dict[str, Any]] = {
+default_cfgs: dict[str, dict[str, Any]] = {
     "magc_resnet31": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
         "input_shape": (32, 32, 3),
         "classes": list(VOCABS["french"]),
-        "url": "https://doctr-static.mindee.com/models?id=v0.6.0/magc_resnet31-addbb705.zip&src=0",
+        "url": "https://doctr-static.mindee.com/models?id=v0.9.0/magc_resnet31-16aa7d71.weights.h5&src=0",
     },
 }
@@ -36,7 +36,6 @@ class MAGC(layers.Layer):
     <https://arxiv.org/pdf/1910.02562.pdf>`_.
     Args:
-    ----
         inplanes: input channels
         headers: number of headers to split channels
         attn_scale: if True, re-scale attention to counteract the variance distibutions
@@ -57,6 +56,7 @@ class MAGC(layers.Layer):
         self.headers = headers  # h
         self.inplanes = inplanes  # C
         self.attn_scale = attn_scale
+        self.ratio = ratio
         self.planes = int(inplanes * ratio)
         self.single_header_inplanes = int(inplanes / headers)  # C / h
@@ -97,7 +97,7 @@ class MAGC(layers.Layer):
         if self.attn_scale and self.headers > 1:
             context_mask = context_mask / math.sqrt(self.single_header_inplanes)
         # B*h, 1, H*W, 1
-        context_mask = tf.keras.activations.softmax(context_mask, axis=2)
+        context_mask = activations.softmax(context_mask, axis=2)
         # Compute context
         # B*h, 1, C/h, 1
@@ -114,18 +114,18 @@ class MAGC(layers.Layer):
         # Context modeling: B, H, W, C  ->  B, 1, 1, C
         context = self.context_modeling(inputs)
         # Transform: B, 1, 1, C  ->  B, 1, 1, C
-        transformed = self.transform(context)
+        transformed = self.transform(context, **kwargs)
         return inputs + transformed
 def _magc_resnet(
     arch: str,
     pretrained: bool,
-    num_blocks: List[int],
-    output_channels: List[int],
-    stage_downsample: List[bool],
-    stage_conv: List[bool],
-    stage_pooling: List[Optional[Tuple[int, int]]],
+    num_blocks: list[int],
+    output_channels: list[int],
+    stage_downsample: list[bool],
+    stage_conv: list[bool],
+    stage_pooling: list[tuple[int, int] | None],
     origin_stem: bool = True,
     **kwargs: Any,
 ) -> ResNet:
@@ -151,9 +151,15 @@ def _magc_resnet(
         cfg=_cfg,
         **kwargs,
     )
+    _build_model(model)
     # Load pretrained parameters
     if pretrained:
-        load_pretrained_params(model, default_cfgs[arch]["url"])
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # skip the mismatching layers for fine tuning
+        load_pretrained_params(
+            model, default_cfgs[arch]["url"], skip_mismatch=kwargs["num_classes"] != len(default_cfgs[arch]["classes"])
+        )
     return model
@@ -170,12 +176,10 @@ def magc_resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the ResNet architecture
     Returns:
-    -------
         A feature extractor model
     """
     return _magc_resnet(

doctr/models/classification/mobilenet/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
+if is_torch_available():
     from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *

python-doctr 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

python-doctr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl