PyPI - python-doctr - Versions diffs - 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

python-doctr 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

doctr/contrib/__init__.py +1 -0
doctr/contrib/artefacts.py +7 -9
doctr/contrib/base.py +8 -17
doctr/datasets/__init__.py +1 -0
doctr/datasets/coco_text.py +139 -0
doctr/datasets/cord.py +10 -8
doctr/datasets/datasets/__init__.py +4 -4
doctr/datasets/datasets/base.py +16 -16
doctr/datasets/datasets/pytorch.py +12 -12
doctr/datasets/datasets/tensorflow.py +10 -10
doctr/datasets/detection.py +6 -9
doctr/datasets/doc_artefacts.py +3 -4
doctr/datasets/funsd.py +9 -8
doctr/datasets/generator/__init__.py +4 -4
doctr/datasets/generator/base.py +16 -17
doctr/datasets/generator/pytorch.py +1 -3
doctr/datasets/generator/tensorflow.py +1 -3
doctr/datasets/ic03.py +5 -6
doctr/datasets/ic13.py +6 -6
doctr/datasets/iiit5k.py +10 -6
doctr/datasets/iiithws.py +4 -5
doctr/datasets/imgur5k.py +15 -7
doctr/datasets/loader.py +4 -7
doctr/datasets/mjsynth.py +6 -5
doctr/datasets/ocr.py +3 -4
doctr/datasets/orientation.py +3 -4
doctr/datasets/recognition.py +4 -5
doctr/datasets/sroie.py +6 -5
doctr/datasets/svhn.py +7 -6
doctr/datasets/svt.py +6 -7
doctr/datasets/synthtext.py +19 -7
doctr/datasets/utils.py +41 -35
doctr/datasets/vocabs.py +1107 -49
doctr/datasets/wildreceipt.py +14 -10
doctr/file_utils.py +11 -7
doctr/io/elements.py +96 -82
doctr/io/html.py +1 -3
doctr/io/image/__init__.py +3 -3
doctr/io/image/base.py +2 -5
doctr/io/image/pytorch.py +3 -12
doctr/io/image/tensorflow.py +2 -11
doctr/io/pdf.py +5 -7
doctr/io/reader.py +5 -11
doctr/models/_utils.py +15 -23
doctr/models/builder.py +30 -48
doctr/models/classification/__init__.py +1 -0
doctr/models/classification/magc_resnet/__init__.py +3 -3
doctr/models/classification/magc_resnet/pytorch.py +11 -15
doctr/models/classification/magc_resnet/tensorflow.py +11 -14
doctr/models/classification/mobilenet/__init__.py +3 -3
doctr/models/classification/mobilenet/pytorch.py +20 -18
doctr/models/classification/mobilenet/tensorflow.py +19 -23
doctr/models/classification/predictor/__init__.py +4 -4
doctr/models/classification/predictor/pytorch.py +7 -9
doctr/models/classification/predictor/tensorflow.py +6 -8
doctr/models/classification/resnet/__init__.py +4 -4
doctr/models/classification/resnet/pytorch.py +47 -34
doctr/models/classification/resnet/tensorflow.py +45 -35
doctr/models/classification/textnet/__init__.py +3 -3
doctr/models/classification/textnet/pytorch.py +20 -18
doctr/models/classification/textnet/tensorflow.py +19 -17
doctr/models/classification/vgg/__init__.py +3 -3
doctr/models/classification/vgg/pytorch.py +21 -8
doctr/models/classification/vgg/tensorflow.py +20 -14
doctr/models/classification/vip/__init__.py +4 -0
doctr/models/classification/vip/layers/__init__.py +4 -0
doctr/models/classification/vip/layers/pytorch.py +615 -0
doctr/models/classification/vip/pytorch.py +505 -0
doctr/models/classification/vit/__init__.py +3 -3
doctr/models/classification/vit/pytorch.py +18 -15
doctr/models/classification/vit/tensorflow.py +15 -12
doctr/models/classification/zoo.py +23 -14
doctr/models/core.py +3 -3
doctr/models/detection/_utils/__init__.py +4 -4
doctr/models/detection/_utils/base.py +4 -7
doctr/models/detection/_utils/pytorch.py +1 -5
doctr/models/detection/_utils/tensorflow.py +1 -5
doctr/models/detection/core.py +2 -8
doctr/models/detection/differentiable_binarization/__init__.py +4 -4
doctr/models/detection/differentiable_binarization/base.py +10 -21
doctr/models/detection/differentiable_binarization/pytorch.py +37 -31
doctr/models/detection/differentiable_binarization/tensorflow.py +26 -29
doctr/models/detection/fast/__init__.py +4 -4
doctr/models/detection/fast/base.py +8 -17
doctr/models/detection/fast/pytorch.py +37 -35
doctr/models/detection/fast/tensorflow.py +24 -28
doctr/models/detection/linknet/__init__.py +4 -4
doctr/models/detection/linknet/base.py +8 -18
doctr/models/detection/linknet/pytorch.py +34 -28
doctr/models/detection/linknet/tensorflow.py +24 -25
doctr/models/detection/predictor/__init__.py +5 -5
doctr/models/detection/predictor/pytorch.py +6 -7
doctr/models/detection/predictor/tensorflow.py +5 -6
doctr/models/detection/zoo.py +27 -7
doctr/models/factory/hub.py +6 -10
doctr/models/kie_predictor/__init__.py +5 -5
doctr/models/kie_predictor/base.py +4 -5
doctr/models/kie_predictor/pytorch.py +19 -20
doctr/models/kie_predictor/tensorflow.py +14 -15
doctr/models/modules/layers/__init__.py +3 -3
doctr/models/modules/layers/pytorch.py +55 -10
doctr/models/modules/layers/tensorflow.py +5 -7
doctr/models/modules/transformer/__init__.py +3 -3
doctr/models/modules/transformer/pytorch.py +12 -13
doctr/models/modules/transformer/tensorflow.py +9 -10
doctr/models/modules/vision_transformer/__init__.py +3 -3
doctr/models/modules/vision_transformer/pytorch.py +2 -3
doctr/models/modules/vision_transformer/tensorflow.py +3 -3
doctr/models/predictor/__init__.py +5 -5
doctr/models/predictor/base.py +28 -29
doctr/models/predictor/pytorch.py +13 -14
doctr/models/predictor/tensorflow.py +9 -10
doctr/models/preprocessor/__init__.py +4 -4
doctr/models/preprocessor/pytorch.py +13 -17
doctr/models/preprocessor/tensorflow.py +10 -14
doctr/models/recognition/__init__.py +1 -0
doctr/models/recognition/core.py +3 -7
doctr/models/recognition/crnn/__init__.py +4 -4
doctr/models/recognition/crnn/pytorch.py +30 -29
doctr/models/recognition/crnn/tensorflow.py +21 -24
doctr/models/recognition/master/__init__.py +3 -3
doctr/models/recognition/master/base.py +3 -7
doctr/models/recognition/master/pytorch.py +32 -25
doctr/models/recognition/master/tensorflow.py +22 -25
doctr/models/recognition/parseq/__init__.py +3 -3
doctr/models/recognition/parseq/base.py +3 -7
doctr/models/recognition/parseq/pytorch.py +47 -29
doctr/models/recognition/parseq/tensorflow.py +29 -27
doctr/models/recognition/predictor/__init__.py +5 -5
doctr/models/recognition/predictor/_utils.py +111 -52
doctr/models/recognition/predictor/pytorch.py +9 -9
doctr/models/recognition/predictor/tensorflow.py +8 -9
doctr/models/recognition/sar/__init__.py +4 -4
doctr/models/recognition/sar/pytorch.py +30 -22
doctr/models/recognition/sar/tensorflow.py +22 -24
doctr/models/recognition/utils.py +57 -53
doctr/models/recognition/viptr/__init__.py +4 -0
doctr/models/recognition/viptr/pytorch.py +277 -0
doctr/models/recognition/vitstr/__init__.py +4 -4
doctr/models/recognition/vitstr/base.py +3 -7
doctr/models/recognition/vitstr/pytorch.py +28 -21
doctr/models/recognition/vitstr/tensorflow.py +22 -23
doctr/models/recognition/zoo.py +27 -11
doctr/models/utils/__init__.py +4 -4
doctr/models/utils/pytorch.py +41 -34
doctr/models/utils/tensorflow.py +31 -23
doctr/models/zoo.py +1 -5
doctr/transforms/functional/__init__.py +3 -3
doctr/transforms/functional/base.py +4 -11
doctr/transforms/functional/pytorch.py +20 -28
doctr/transforms/functional/tensorflow.py +10 -22
doctr/transforms/modules/__init__.py +4 -4
doctr/transforms/modules/base.py +48 -55
doctr/transforms/modules/pytorch.py +58 -22
doctr/transforms/modules/tensorflow.py +18 -32
doctr/utils/common_types.py +8 -9
doctr/utils/data.py +9 -13
doctr/utils/fonts.py +2 -7
doctr/utils/geometry.py +17 -48
doctr/utils/metrics.py +17 -37
doctr/utils/multithreading.py +4 -6
doctr/utils/reconstitution.py +9 -13
doctr/utils/repr.py +2 -3
doctr/utils/visualization.py +16 -29
doctr/version.py +1 -1
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/METADATA +70 -52
python_doctr-0.12.0.dist-info/RECORD +180 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/WHEEL +1 -1
python_doctr-0.10.0.dist-info/RECORD +0 -173
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info/licenses}/LICENSE +0 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/zip-safe +0 -0

doctr/io/image/pytorch.py CHANGED Viewed

@@ -1,10 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 from io import BytesIO
-from typing import Tuple
 import numpy as np
 import torch
@@ -20,12 +19,10 @@ def tensor_from_pil(pil_img: Image.Image, dtype: torch.dtype = torch.float32) ->
     """Convert a PIL Image to a PyTorch tensor
     Args:
-    ----
         pil_img: a PIL image
         dtype: the output tensor data type
     Returns:
-    -------
         decoded image as tensor
     """
     if dtype == torch.float32:
@@ -40,12 +37,10 @@ def read_img_as_tensor(img_path: AbstractPath, dtype: torch.dtype = torch.float3
     """Read an image file as a PyTorch tensor
     Args:
-    ----
         img_path: location of the image file
         dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
     Returns:
-    -------
         decoded image as a tensor
     """
     if dtype not in (torch.uint8, torch.float16, torch.float32):
@@ -59,12 +54,10 @@ def decode_img_as_tensor(img_content: bytes, dtype: torch.dtype = torch.float32)
     """Read a byte stream as a PyTorch tensor
     Args:
-    ----
         img_content: bytes of a decoded image
         dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
     Returns:
-    -------
         decoded image as a tensor
     """
     if dtype not in (torch.uint8, torch.float16, torch.float32):
@@ -78,12 +71,10 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -
     """Read an image file as a PyTorch tensor
     Args:
-    ----
         npy_img: image encoded as a numpy array of shape (H, W, C) in np.uint8
         dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
     Returns:
-    -------
         same image as a tensor of shape (C, H, W)
     """
     if dtype not in (torch.uint8, torch.float16, torch.float32):
@@ -102,6 +93,6 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -
     return img
-def get_img_shape(img: torch.Tensor) -> Tuple[int, int]:
+def get_img_shape(img: torch.Tensor) -> tuple[int, int]:
     """Get the shape of an image"""
-    return img.shape[-2:]  # type: ignore[return-value]
+    return img.shape[-2:]

doctr/io/image/tensorflow.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Tuple
 import numpy as np
 import tensorflow as tf
@@ -19,12 +18,10 @@ def tensor_from_pil(pil_img: Image.Image, dtype: tf.dtypes.DType = tf.float32) -
     """Convert a PIL Image to a TensorFlow tensor
     Args:
-    ----
         pil_img: a PIL image
         dtype: the output tensor data type
     Returns:
-    -------
         decoded image as tensor
     """
     npy_img = img_to_array(pil_img)
@@ -36,12 +33,10 @@ def read_img_as_tensor(img_path: AbstractPath, dtype: tf.dtypes.DType = tf.float
     """Read an image file as a TensorFlow tensor
     Args:
-    ----
         img_path: location of the image file
         dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
     Returns:
-    -------
         decoded image as a tensor
     """
     if dtype not in (tf.uint8, tf.float16, tf.float32):
@@ -61,12 +56,10 @@ def decode_img_as_tensor(img_content: bytes, dtype: tf.dtypes.DType = tf.float32
     """Read a byte stream as a TensorFlow tensor
     Args:
-    ----
         img_content: bytes of a decoded image
         dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
     Returns:
-    -------
         decoded image as a tensor
     """
     if dtype not in (tf.uint8, tf.float16, tf.float32):
@@ -85,12 +78,10 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: tf.dtypes.DType = tf.float32)
     """Read an image file as a TensorFlow tensor
     Args:
-    ----
         npy_img: image encoded as a numpy array of shape (H, W, C) in np.uint8
         dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255.
     Returns:
-    -------
         same image as a tensor of shape (H, W, C)
     """
     if dtype not in (tf.uint8, tf.float16, tf.float32):
@@ -105,6 +96,6 @@ def tensor_from_numpy(npy_img: np.ndarray, dtype: tf.dtypes.DType = tf.float32)
     return img
-def get_img_shape(img: tf.Tensor) -> Tuple[int, int]:
+def get_img_shape(img: tf.Tensor) -> tuple[int, int]:
     """Get the shape of an image"""
     return img.shape[:2]

doctr/io/pdf.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, List, Optional
+from typing import Any
 import numpy as np
 import pypdfium2 as pdfium
@@ -15,18 +15,17 @@ __all__ = ["read_pdf"]
 def read_pdf(
     file: AbstractFile,
-    scale: float = 2,
+    scale: int = 2,
     rgb_mode: bool = True,
-    password: Optional[str] = None,
+    password: str | None = None,
     **kwargs: Any,
-) -> List[np.ndarray]:
+) -> list[np.ndarray]:
     """Read a PDF file and convert it into an image in numpy format
     >>> from doctr.io import read_pdf
     >>> doc = read_pdf("path/to/your/doc.pdf")
     Args:
-    ----
         file: the path to the PDF file
         scale: rendering scale (1 corresponds to 72dpi)
         rgb_mode: if True, the output will be RGB, otherwise BGR
@@ -34,7 +33,6 @@ def read_pdf(
         **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
     Returns:
-    -------
         the list of pages decoded as numpy ndarray of shape H x W x C
     """
     # Rasterise pages to numpy ndarrays with pypdfium2

doctr/io/reader.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from collections.abc import Sequence
 from pathlib import Path
-from typing import List, Sequence, Union
 import numpy as np
@@ -22,37 +22,33 @@ class DocumentFile:
     """Read a document from multiple extensions"""
     @classmethod
-    def from_pdf(cls, file: AbstractFile, **kwargs) -> List[np.ndarray]:
+    def from_pdf(cls, file: AbstractFile, **kwargs) -> list[np.ndarray]:
         """Read a PDF file
         >>> from doctr.io import DocumentFile
         >>> doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
         Args:
-        ----
             file: the path to the PDF file or a binary stream
             **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
         Returns:
-        -------
             the list of pages decoded as numpy ndarray of shape H x W x 3
         """
         return read_pdf(file, **kwargs)
     @classmethod
-    def from_url(cls, url: str, **kwargs) -> List[np.ndarray]:
+    def from_url(cls, url: str, **kwargs) -> list[np.ndarray]:
         """Interpret a web page as a PDF document
         >>> from doctr.io import DocumentFile
         >>> doc = DocumentFile.from_url("https://www.yoursite.com")
         Args:
-        ----
             url: the URL of the target web page
             **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
         Returns:
-        -------
             the list of pages decoded as numpy ndarray of shape H x W x 3
         """
         requires_package(
@@ -64,19 +60,17 @@ class DocumentFile:
         return cls.from_pdf(pdf_stream, **kwargs)
     @classmethod
-    def from_images(cls, files: Union[Sequence[AbstractFile], AbstractFile], **kwargs) -> List[np.ndarray]:
+    def from_images(cls, files: Sequence[AbstractFile] | AbstractFile, **kwargs) -> list[np.ndarray]:
         """Read an image file (or a collection of image files) and convert it into an image in numpy format
         >>> from doctr.io import DocumentFile
         >>> pages = DocumentFile.from_images(["path/to/your/page1.png", "path/to/your/page2.png"])
         Args:
-        ----
             files: the path to the image file or a binary stream, or a collection of those
             **kwargs: additional parameters to :meth:`doctr.io.image.read_img_as_numpy`
         Returns:
-        -------
             the list of pages decoded as numpy ndarray of shape H x W x 3
         """
         if isinstance(files, (str, Path, bytes)):

doctr/models/_utils.py CHANGED Viewed

@@ -1,11 +1,11 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 from math import floor
 from statistics import median_low
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any
 import cv2
 import numpy as np
@@ -20,11 +20,9 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float:
     """Get the maximum shape ratio of a contour.
     Args:
-    ----
         contour: the contour from cv2.findContour
     Returns:
-    -------
         the maximum shape ratio
     """
     _, (w, h), _ = cv2.minAreaRect(contour)
@@ -33,7 +31,7 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float:
 def estimate_orientation(
     img: np.ndarray,
-    general_page_orientation: Optional[Tuple[int, float]] = None,
+    general_page_orientation: tuple[int, float] | None = None,
     n_ct: int = 70,
     ratio_threshold_for_lines: float = 3,
     min_confidence: float = 0.2,
@@ -43,7 +41,6 @@ def estimate_orientation(
      lines of the document and the assumption that they should be horizontal.
     Args:
-    ----
         img: the img or bitmap to analyze (H, W, C)
         general_page_orientation: the general orientation of the page (angle [0, 90, 180, 270 (-90)], confidence)
             estimated by a model
@@ -53,7 +50,6 @@ def estimate_orientation(
         lower_area: the minimum area of a contour to be considered
     Returns:
-    -------
         the estimated angle of the page (clockwise, negative for left side rotation, positive for right side rotation)
     """
     assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
@@ -64,13 +60,13 @@ def estimate_orientation(
         gray_img = cv2.medianBlur(gray_img, 5)
         thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
     else:
-        thresh = img.astype(np.uint8)  # type: ignore[assignment]
+        thresh = img.astype(np.uint8)
     page_orientation, orientation_confidence = general_page_orientation or (None, 0.0)
     if page_orientation and orientation_confidence >= min_confidence:
         # We rotate the image to the general orientation which improves the detection
         # No expand needed bitmap is already padded
-        thresh = rotate_image(thresh, -page_orientation)  # type: ignore
+        thresh = rotate_image(thresh, -page_orientation)
     else:  # That's only required if we do not work on the detection models bin map
         # try to merge words in lines
         (h, w) = img.shape[:2]
@@ -91,7 +87,7 @@ def estimate_orientation(
     angles = []
     for contour in contours[:n_ct]:
-        _, (w, h), angle = cv2.minAreaRect(contour)  # type: ignore[assignment]
+        _, (w, h), angle = cv2.minAreaRect(contour)
         if w / h > ratio_threshold_for_lines:  # select only contours with ratio like lines
             angles.append(angle)
         elif w / h < 1 / ratio_threshold_for_lines:  # if lines are vertical, substract 90 degree
@@ -119,9 +115,9 @@ def estimate_orientation(
 def rectify_crops(
-    crops: List[np.ndarray],
-    orientations: List[int],
-) -> List[np.ndarray]:
+    crops: list[np.ndarray],
+    orientations: list[int],
+) -> list[np.ndarray]:
     """Rotate each crop of the list according to the predicted orientation:
     0: already straight, no rotation
     1: 90 ccw, rotate 3 times ccw
@@ -139,8 +135,8 @@ def rectify_crops(
 def rectify_loc_preds(
     page_loc_preds: np.ndarray,
-    orientations: List[int],
-) -> Optional[np.ndarray]:
+    orientations: list[int],
+) -> np.ndarray | None:
     """Orient the quadrangle (Polygon4P) according to the predicted orientation,
     so that the points are in this order: top L, top R, bot R, bot L if the crop is readable
     """
@@ -157,16 +153,14 @@ def rectify_loc_preds(
     )
-def get_language(text: str) -> Tuple[str, float]:
+def get_language(text: str) -> tuple[str, float]:
     """Get languages of a text using langdetect model.
     Get the language with the highest probability or no language if only a few words or a low probability
     Args:
-    ----
         text (str): text
     Returns:
-    -------
         The detected language in ISO 639 code and confidence score
     """
     try:
@@ -179,16 +173,14 @@ def get_language(text: str) -> Tuple[str, float]:
 def invert_data_structure(
-    x: Union[List[Dict[str, Any]], Dict[str, List[Any]]],
-) -> Union[List[Dict[str, Any]], Dict[str, List[Any]]]:
-    """Invert a List of Dict of elements to a Dict of list of elements and the other way around
+    x: list[dict[str, Any]] | dict[str, list[Any]],
+) -> list[dict[str, Any]] | dict[str, list[Any]]:
+    """Invert a list of dict of elements to a dict of list of elements and the other way around
     Args:
-    ----
         x: a list of dictionaries with the same keys or a dictionary of lists of the same length
     Returns:
-    -------
         dictionary of list when x is a list of dictionaries or a list of dictionaries when x is dictionary of lists
     """
     if isinstance(x, dict):

doctr/models/builder.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 import numpy as np
 from scipy.cluster.hierarchy import fclusterdata
@@ -20,7 +20,6 @@ class DocumentBuilder(NestedObject):
     """Implements a document builder
     Args:
-    ----
         resolve_lines: whether words should be automatically grouped into lines
         resolve_blocks: whether lines should be automatically grouped into blocks
         paragraph_break: relative length of the minimum space separating paragraphs
@@ -41,15 +40,13 @@ class DocumentBuilder(NestedObject):
         self.export_as_straight_boxes = export_as_straight_boxes
     @staticmethod
-    def _sort_boxes(boxes: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+    def _sort_boxes(boxes: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
         """Sort bounding boxes from top to bottom, left to right
         Args:
-        ----
             boxes: bounding boxes of shape (N, 4) or (N, 4, 2) (in case of rotated bbox)
         Returns:
-        -------
             tuple: indices of ordered boxes of shape (N,), boxes
                 If straight boxes are passed tpo the function, boxes are unchanged
                 else: boxes returned are straight boxes fitted to the straightened rotated boxes
@@ -65,16 +62,14 @@ class DocumentBuilder(NestedObject):
             boxes = np.concatenate((boxes.min(1), boxes.max(1)), -1)
         return (boxes[:, 0] + 2 * boxes[:, 3] / np.median(boxes[:, 3] - boxes[:, 1])).argsort(), boxes
-    def _resolve_sub_lines(self, boxes: np.ndarray, word_idcs: List[int]) -> List[List[int]]:
+    def _resolve_sub_lines(self, boxes: np.ndarray, word_idcs: list[int]) -> list[list[int]]:
         """Split a line in sub_lines
         Args:
-        ----
             boxes: bounding boxes of shape (N, 4)
             word_idcs: list of indexes for the words of the line
         Returns:
-        -------
             A list of (sub-)lines computed from the original line (words)
         """
         lines = []
@@ -105,15 +100,13 @@ class DocumentBuilder(NestedObject):
         return lines
-    def _resolve_lines(self, boxes: np.ndarray) -> List[List[int]]:
+    def _resolve_lines(self, boxes: np.ndarray) -> list[list[int]]:
         """Order boxes to group them in lines
         Args:
-        ----
             boxes: bounding boxes of shape (N, 4) or (N, 4, 2) in case of rotated bbox
         Returns:
-        -------
             nested list of box indices
         """
         # Sort boxes, and straighten the boxes if they are rotated
@@ -153,16 +146,14 @@ class DocumentBuilder(NestedObject):
         return lines
     @staticmethod
-    def _resolve_blocks(boxes: np.ndarray, lines: List[List[int]]) -> List[List[List[int]]]:
+    def _resolve_blocks(boxes: np.ndarray, lines: list[list[int]]) -> list[list[list[int]]]:
         """Order lines to group them in blocks
         Args:
-        ----
             boxes: bounding boxes of shape (N, 4) or (N, 4, 2)
             lines: list of lines, each line is a list of idx
         Returns:
-        -------
             nested list of box indices
         """
         # Resolve enclosing boxes of lines
@@ -207,7 +198,7 @@ class DocumentBuilder(NestedObject):
         # Compute clusters
         clusters = fclusterdata(box_features, t=0.1, depth=4, criterion="distance", metric="euclidean")
-        _blocks: Dict[int, List[int]] = {}
+        _blocks: dict[int, list[int]] = {}
         # Form clusters
         for line_idx, cluster_idx in enumerate(clusters):
             if cluster_idx in _blocks.keys():
@@ -224,13 +215,12 @@ class DocumentBuilder(NestedObject):
         self,
         boxes: np.ndarray,
         objectness_scores: np.ndarray,
-        word_preds: List[Tuple[str, float]],
-        crop_orientations: List[Dict[str, Any]],
-    ) -> List[Block]:
+        word_preds: list[tuple[str, float]],
+        crop_orientations: list[dict[str, Any]],
+    ) -> list[Block]:
         """Gather independent words in structured blocks
         Args:
-        ----
             boxes: bounding boxes of all detected words of the page, of shape (N, 4) or (N, 4, 2)
             objectness_scores: objectness scores of all detected words of the page, of shape N
             word_preds: list of all detected words of the page, of shape N
@@ -238,7 +228,6 @@ class DocumentBuilder(NestedObject):
                 the general orientation (orientations + confidences) of the crops
         Returns:
-        -------
             list of block elements
         """
         if boxes.shape[0] != len(word_preds):
@@ -295,19 +284,18 @@ class DocumentBuilder(NestedObject):
     def __call__(
         self,
-        pages: List[np.ndarray],
-        boxes: List[np.ndarray],
-        objectness_scores: List[np.ndarray],
-        text_preds: List[List[Tuple[str, float]]],
-        page_shapes: List[Tuple[int, int]],
-        crop_orientations: List[Dict[str, Any]],
-        orientations: Optional[List[Dict[str, Any]]] = None,
-        languages: Optional[List[Dict[str, Any]]] = None,
+        pages: list[np.ndarray],
+        boxes: list[np.ndarray],
+        objectness_scores: list[np.ndarray],
+        text_preds: list[list[tuple[str, float]]],
+        page_shapes: list[tuple[int, int]],
+        crop_orientations: list[dict[str, Any]],
+        orientations: list[dict[str, Any]] | None = None,
+        languages: list[dict[str, Any]] | None = None,
     ) -> Document:
         """Re-arrange detected words into structured blocks
         Args:
-        ----
             pages: list of N elements, where each element represents the page image
             boxes: list of N elements, where each element represents the localization predictions, of shape (*, 4)
                 or (*, 4, 2) for all words for a given page
@@ -322,7 +310,6 @@ class DocumentBuilder(NestedObject):
                 where each element is a dictionary containing the language (language + confidence)
         Returns:
-        -------
             document object
         """
         if len(boxes) != len(text_preds) != len(crop_orientations) != len(objectness_scores) or len(boxes) != len(
@@ -374,7 +361,6 @@ class KIEDocumentBuilder(DocumentBuilder):
     """Implements a KIE document builder
     Args:
-    ----
         resolve_lines: whether words should be automatically grouped into lines
         resolve_blocks: whether lines should be automatically grouped into blocks
         paragraph_break: relative length of the minimum space separating paragraphs
@@ -384,19 +370,18 @@ class KIEDocumentBuilder(DocumentBuilder):
     def __call__(  # type: ignore[override]
         self,
-        pages: List[np.ndarray],
-        boxes: List[Dict[str, np.ndarray]],
-        objectness_scores: List[Dict[str, np.ndarray]],
-        text_preds: List[Dict[str, List[Tuple[str, float]]]],
-        page_shapes: List[Tuple[int, int]],
-        crop_orientations: List[Dict[str, List[Dict[str, Any]]]],
-        orientations: Optional[List[Dict[str, Any]]] = None,
-        languages: Optional[List[Dict[str, Any]]] = None,
+        pages: list[np.ndarray],
+        boxes: list[dict[str, np.ndarray]],
+        objectness_scores: list[dict[str, np.ndarray]],
+        text_preds: list[dict[str, list[tuple[str, float]]]],
+        page_shapes: list[tuple[int, int]],
+        crop_orientations: list[dict[str, list[dict[str, Any]]]],
+        orientations: list[dict[str, Any]] | None = None,
+        languages: list[dict[str, Any]] | None = None,
     ) -> KIEDocument:
         """Re-arrange detected words into structured predictions
         Args:
-        ----
             pages: list of N elements, where each element represents the page image
             boxes: list of N dictionaries, where each element represents the localization predictions for a class,
                 of shape (*, 5) or (*, 6) for all predictions
@@ -411,7 +396,6 @@ class KIEDocumentBuilder(DocumentBuilder):
                 where each element is a dictionary containing the language (language + confidence)
         Returns:
-        -------
             document object
         """
         if len(boxes) != len(text_preds) != len(crop_orientations) != len(objectness_scores) or len(boxes) != len(
@@ -425,7 +409,7 @@ class KIEDocumentBuilder(DocumentBuilder):
         if self.export_as_straight_boxes and len(boxes) > 0:
             # If boxes are already straight OK, else fit a bounding rect
             if next(iter(boxes[0].values())).ndim == 3:
-                straight_boxes: List[Dict[str, np.ndarray]] = []
+                straight_boxes: list[dict[str, np.ndarray]] = []
                 # Iterate over pages
                 for p_boxes in boxes:
                     # Iterate over boxes of the pages
@@ -471,20 +455,18 @@ class KIEDocumentBuilder(DocumentBuilder):
         self,
         boxes: np.ndarray,
         objectness_scores: np.ndarray,
-        word_preds: List[Tuple[str, float]],
-        crop_orientations: List[Dict[str, Any]],
-    ) -> List[Prediction]:
+        word_preds: list[tuple[str, float]],
+        crop_orientations: list[dict[str, Any]],
+    ) -> list[Prediction]:
         """Gather independent words in structured blocks
         Args:
-        ----
             boxes: bounding boxes of all detected words of the page, of shape (N, 4) or (N, 4, 2)
             objectness_scores: objectness scores of all detected words of the page
             word_preds: list of all detected words of the page, of shape N
             crop_orientations: list of orientations for each word crop
         Returns:
-        -------
             list of block elements
         """
         if boxes.shape[0] != len(word_preds):

doctr/models/classification/__init__.py CHANGED Viewed

@@ -4,4 +4,5 @@ from .vgg import *
 from .magc_resnet import *
 from .vit import *
 from .textnet import *
+from .vip import *
 from .zoo import *

doctr/models/classification/magc_resnet/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
     from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]

python-doctr 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

python-doctr 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl