PyPI - python-doctr - Versions diffs - 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

python-doctr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

doctr/contrib/__init__.py +1 -0
doctr/contrib/artefacts.py +7 -9
doctr/contrib/base.py +8 -17
doctr/datasets/cord.py +17 -7
doctr/datasets/datasets/__init__.py +4 -4
doctr/datasets/datasets/base.py +16 -16
doctr/datasets/datasets/pytorch.py +12 -12
doctr/datasets/datasets/tensorflow.py +10 -10
doctr/datasets/detection.py +6 -9
doctr/datasets/doc_artefacts.py +3 -4
doctr/datasets/funsd.py +17 -6
doctr/datasets/generator/__init__.py +4 -4
doctr/datasets/generator/base.py +16 -17
doctr/datasets/generator/pytorch.py +1 -3
doctr/datasets/generator/tensorflow.py +1 -3
doctr/datasets/ic03.py +14 -5
doctr/datasets/ic13.py +13 -5
doctr/datasets/iiit5k.py +31 -20
doctr/datasets/iiithws.py +4 -5
doctr/datasets/imgur5k.py +15 -5
doctr/datasets/loader.py +4 -7
doctr/datasets/mjsynth.py +6 -5
doctr/datasets/ocr.py +3 -4
doctr/datasets/orientation.py +3 -4
doctr/datasets/recognition.py +3 -4
doctr/datasets/sroie.py +16 -5
doctr/datasets/svhn.py +16 -5
doctr/datasets/svt.py +14 -5
doctr/datasets/synthtext.py +14 -5
doctr/datasets/utils.py +37 -27
doctr/datasets/vocabs.py +21 -7
doctr/datasets/wildreceipt.py +25 -10
doctr/file_utils.py +18 -4
doctr/io/elements.py +69 -81
doctr/io/html.py +1 -3
doctr/io/image/__init__.py +3 -3
doctr/io/image/base.py +2 -5
doctr/io/image/pytorch.py +3 -12
doctr/io/image/tensorflow.py +2 -11
doctr/io/pdf.py +5 -7
doctr/io/reader.py +5 -11
doctr/models/_utils.py +14 -22
doctr/models/builder.py +32 -50
doctr/models/classification/magc_resnet/__init__.py +3 -3
doctr/models/classification/magc_resnet/pytorch.py +10 -13
doctr/models/classification/magc_resnet/tensorflow.py +21 -17
doctr/models/classification/mobilenet/__init__.py +3 -3
doctr/models/classification/mobilenet/pytorch.py +7 -17
doctr/models/classification/mobilenet/tensorflow.py +22 -29
doctr/models/classification/predictor/__init__.py +4 -4
doctr/models/classification/predictor/pytorch.py +13 -11
doctr/models/classification/predictor/tensorflow.py +13 -11
doctr/models/classification/resnet/__init__.py +4 -4
doctr/models/classification/resnet/pytorch.py +21 -31
doctr/models/classification/resnet/tensorflow.py +41 -39
doctr/models/classification/textnet/__init__.py +3 -3
doctr/models/classification/textnet/pytorch.py +10 -17
doctr/models/classification/textnet/tensorflow.py +19 -20
doctr/models/classification/vgg/__init__.py +3 -3
doctr/models/classification/vgg/pytorch.py +5 -7
doctr/models/classification/vgg/tensorflow.py +18 -15
doctr/models/classification/vit/__init__.py +3 -3
doctr/models/classification/vit/pytorch.py +8 -14
doctr/models/classification/vit/tensorflow.py +16 -16
doctr/models/classification/zoo.py +36 -19
doctr/models/core.py +3 -3
doctr/models/detection/_utils/__init__.py +4 -4
doctr/models/detection/_utils/base.py +4 -7
doctr/models/detection/_utils/pytorch.py +1 -5
doctr/models/detection/_utils/tensorflow.py +1 -5
doctr/models/detection/core.py +2 -8
doctr/models/detection/differentiable_binarization/__init__.py +4 -4
doctr/models/detection/differentiable_binarization/base.py +7 -17
doctr/models/detection/differentiable_binarization/pytorch.py +27 -30
doctr/models/detection/differentiable_binarization/tensorflow.py +49 -37
doctr/models/detection/fast/__init__.py +4 -4
doctr/models/detection/fast/base.py +6 -14
doctr/models/detection/fast/pytorch.py +24 -31
doctr/models/detection/fast/tensorflow.py +28 -37
doctr/models/detection/linknet/__init__.py +4 -4
doctr/models/detection/linknet/base.py +6 -15
doctr/models/detection/linknet/pytorch.py +24 -27
doctr/models/detection/linknet/tensorflow.py +36 -33
doctr/models/detection/predictor/__init__.py +5 -5
doctr/models/detection/predictor/pytorch.py +6 -7
doctr/models/detection/predictor/tensorflow.py +7 -8
doctr/models/detection/zoo.py +27 -7
doctr/models/factory/hub.py +8 -13
doctr/models/kie_predictor/__init__.py +5 -5
doctr/models/kie_predictor/base.py +8 -5
doctr/models/kie_predictor/pytorch.py +22 -19
doctr/models/kie_predictor/tensorflow.py +21 -15
doctr/models/modules/layers/__init__.py +3 -3
doctr/models/modules/layers/pytorch.py +6 -9
doctr/models/modules/layers/tensorflow.py +5 -7
doctr/models/modules/transformer/__init__.py +3 -3
doctr/models/modules/transformer/pytorch.py +12 -13
doctr/models/modules/transformer/tensorflow.py +9 -12
doctr/models/modules/vision_transformer/__init__.py +3 -3
doctr/models/modules/vision_transformer/pytorch.py +3 -4
doctr/models/modules/vision_transformer/tensorflow.py +4 -4
doctr/models/predictor/__init__.py +5 -5
doctr/models/predictor/base.py +52 -41
doctr/models/predictor/pytorch.py +16 -13
doctr/models/predictor/tensorflow.py +16 -10
doctr/models/preprocessor/__init__.py +4 -4
doctr/models/preprocessor/pytorch.py +13 -17
doctr/models/preprocessor/tensorflow.py +11 -15
doctr/models/recognition/core.py +3 -7
doctr/models/recognition/crnn/__init__.py +4 -4
doctr/models/recognition/crnn/pytorch.py +20 -28
doctr/models/recognition/crnn/tensorflow.py +19 -29
doctr/models/recognition/master/__init__.py +3 -3
doctr/models/recognition/master/base.py +3 -7
doctr/models/recognition/master/pytorch.py +22 -24
doctr/models/recognition/master/tensorflow.py +21 -26
doctr/models/recognition/parseq/__init__.py +3 -3
doctr/models/recognition/parseq/base.py +3 -7
doctr/models/recognition/parseq/pytorch.py +26 -26
doctr/models/recognition/parseq/tensorflow.py +26 -30
doctr/models/recognition/predictor/__init__.py +5 -5
doctr/models/recognition/predictor/_utils.py +7 -10
doctr/models/recognition/predictor/pytorch.py +6 -6
doctr/models/recognition/predictor/tensorflow.py +5 -6
doctr/models/recognition/sar/__init__.py +4 -4
doctr/models/recognition/sar/pytorch.py +20 -21
doctr/models/recognition/sar/tensorflow.py +19 -24
doctr/models/recognition/utils.py +5 -10
doctr/models/recognition/vitstr/__init__.py +4 -4
doctr/models/recognition/vitstr/base.py +3 -7
doctr/models/recognition/vitstr/pytorch.py +18 -20
doctr/models/recognition/vitstr/tensorflow.py +21 -24
doctr/models/recognition/zoo.py +22 -11
doctr/models/utils/__init__.py +4 -4
doctr/models/utils/pytorch.py +13 -16
doctr/models/utils/tensorflow.py +31 -30
doctr/models/zoo.py +1 -5
doctr/transforms/functional/__init__.py +3 -3
doctr/transforms/functional/base.py +4 -11
doctr/transforms/functional/pytorch.py +21 -29
doctr/transforms/functional/tensorflow.py +10 -22
doctr/transforms/modules/__init__.py +4 -4
doctr/transforms/modules/base.py +48 -55
doctr/transforms/modules/pytorch.py +65 -28
doctr/transforms/modules/tensorflow.py +33 -44
doctr/utils/common_types.py +8 -9
doctr/utils/data.py +8 -12
doctr/utils/fonts.py +2 -7
doctr/utils/geometry.py +120 -64
doctr/utils/metrics.py +18 -38
doctr/utils/multithreading.py +4 -6
doctr/utils/reconstitution.py +157 -75
doctr/utils/repr.py +2 -3
doctr/utils/visualization.py +16 -29
doctr/version.py +1 -1
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/METADATA +59 -57
python_doctr-0.11.0.dist-info/RECORD +173 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/WHEEL +1 -1
python_doctr-0.9.0.dist-info/RECORD +0 -173
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/LICENSE +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/zip-safe +0 -0

doctr/models/predictor/base.py CHANGED Viewed

@@ -1,14 +1,15 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from collections.abc import Callable
+from typing import Any
 import numpy as np
 from doctr.models.builder import DocumentBuilder
-from doctr.utils.geometry import extract_crops, extract_rcrops, rotate_image
+from doctr.utils.geometry import extract_crops, extract_rcrops, remove_image_padding, rotate_image
 from .._utils import estimate_orientation, rectify_crops, rectify_loc_preds
 from ..classification import crop_orientation_predictor, page_orientation_predictor
@@ -21,7 +22,6 @@ class _OCRPredictor:
     """Implements an object able to localize and identify text elements in a set of documents
     Args:
-    ----
         assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
             without rotated textual elements.
         straighten_pages: if True, estimates the page general orientation based on the median line orientation.
@@ -34,8 +34,8 @@ class _OCRPredictor:
         **kwargs: keyword args of `DocumentBuilder`
     """
-    crop_orientation_predictor: Optional[OrientationPredictor]
-    page_orientation_predictor: Optional[OrientationPredictor]
+    crop_orientation_predictor: OrientationPredictor | None
+    page_orientation_predictor: OrientationPredictor | None
     def __init__(
         self,
@@ -48,21 +48,27 @@ class _OCRPredictor:
     ) -> None:
         self.assume_straight_pages = assume_straight_pages
         self.straighten_pages = straighten_pages
-        self.crop_orientation_predictor = None if assume_straight_pages else crop_orientation_predictor(pretrained=True)
+        self._page_orientation_disabled = kwargs.pop("disable_page_orientation", False)
+        self._crop_orientation_disabled = kwargs.pop("disable_crop_orientation", False)
+        self.crop_orientation_predictor = (
+            None
+            if assume_straight_pages
+            else crop_orientation_predictor(pretrained=True, disabled=self._crop_orientation_disabled)
+        )
         self.page_orientation_predictor = (
-            page_orientation_predictor(pretrained=True)
+            page_orientation_predictor(pretrained=True, disabled=self._page_orientation_disabled)
             if detect_orientation or straighten_pages or not assume_straight_pages
             else None
         )
         self.doc_builder = DocumentBuilder(**kwargs)
         self.preserve_aspect_ratio = preserve_aspect_ratio
         self.symmetric_pad = symmetric_pad
-        self.hooks: List[Callable] = []
+        self.hooks: list[Callable] = []
     def _general_page_orientations(
         self,
-        pages: List[np.ndarray],
-    ) -> List[Tuple[int, float]]:
+        pages: list[np.ndarray],
+    ) -> list[tuple[int, float]]:
         _, classes, probs = zip(self.page_orientation_predictor(pages))  # type: ignore[misc]
         # Flatten to list of tuples with (value, confidence)
         page_orientations = [
@@ -73,8 +79,8 @@ class _OCRPredictor:
         return page_orientations
     def _get_orientations(
-        self, pages: List[np.ndarray], seg_maps: List[np.ndarray]
-    ) -> Tuple[List[Tuple[int, float]], List[int]]:
+        self, pages: list[np.ndarray], seg_maps: list[np.ndarray]
+    ) -> tuple[list[tuple[int, float]], list[int]]:
         general_pages_orientations = self._general_page_orientations(pages)
         origin_page_orientations = [
             estimate_orientation(seq_map, general_orientation)
@@ -84,11 +90,11 @@ class _OCRPredictor:
     def _straighten_pages(
         self,
-        pages: List[np.ndarray],
-        seg_maps: List[np.ndarray],
-        general_pages_orientations: Optional[List[Tuple[int, float]]] = None,
-        origin_pages_orientations: Optional[List[int]] = None,
-    ) -> List[np.ndarray]:
+        pages: list[np.ndarray],
+        seg_maps: list[np.ndarray],
+        general_pages_orientations: list[tuple[int, float]] | None = None,
+        origin_pages_orientations: list[int] | None = None,
+    ) -> list[np.ndarray]:
         general_pages_orientations = (
             general_pages_orientations if general_pages_orientations else self._general_page_orientations(pages)
         )
@@ -101,34 +107,40 @@ class _OCRPredictor:
             ]
         )
         return [
-            # We exapnd if the page is wider than tall and the angle is 90 or -90
-            rotate_image(page, angle, expand=page.shape[1] > page.shape[0] and abs(angle) == 90)
+            # expand if height and width are not equal, then remove the padding
+            remove_image_padding(rotate_image(page, angle, expand=page.shape[0] != page.shape[1]))
             for page, angle in zip(pages, origin_pages_orientations)
         ]
     @staticmethod
     def _generate_crops(
-        pages: List[np.ndarray],
-        loc_preds: List[np.ndarray],
+        pages: list[np.ndarray],
+        loc_preds: list[np.ndarray],
         channels_last: bool,
         assume_straight_pages: bool = False,
-    ) -> List[List[np.ndarray]]:
-        extraction_fn = extract_crops if assume_straight_pages else extract_rcrops
-        crops = [
-            extraction_fn(page, _boxes[:, :4], channels_last=channels_last)  # type: ignore[operator]
-            for page, _boxes in zip(pages, loc_preds)
-        ]
+        assume_horizontal: bool = False,
+    ) -> list[list[np.ndarray]]:
+        if assume_straight_pages:
+            crops = [
+                extract_crops(page, _boxes[:, :4], channels_last=channels_last)
+                for page, _boxes in zip(pages, loc_preds)
+            ]
+        else:
+            crops = [
+                extract_rcrops(page, _boxes[:, :4], channels_last=channels_last, assume_horizontal=assume_horizontal)
+                for page, _boxes in zip(pages, loc_preds)
+            ]
         return crops
     @staticmethod
     def _prepare_crops(
-        pages: List[np.ndarray],
-        loc_preds: List[np.ndarray],
+        pages: list[np.ndarray],
+        loc_preds: list[np.ndarray],
         channels_last: bool,
         assume_straight_pages: bool = False,
-    ) -> Tuple[List[List[np.ndarray]], List[np.ndarray]]:
-        crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages)
+        assume_horizontal: bool = False,
+    ) -> tuple[list[list[np.ndarray]], list[np.ndarray]]:
+        crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages, assume_horizontal)
         # Avoid sending zero-sized crops
         is_kept = [[all(s > 0 for s in crop.shape) for crop in page_crops] for page_crops in crops]
@@ -142,9 +154,9 @@ class _OCRPredictor:
     def _rectify_crops(
         self,
-        crops: List[List[np.ndarray]],
-        loc_preds: List[np.ndarray],
-    ) -> Tuple[List[List[np.ndarray]], List[np.ndarray], List[Tuple[int, float]]]:
+        crops: list[list[np.ndarray]],
+        loc_preds: list[np.ndarray],
+    ) -> tuple[list[list[np.ndarray]], list[np.ndarray], list[tuple[int, float]]]:
         # Work at a page level
         orientations, classes, probs = zip(*[self.crop_orientation_predictor(page_crops) for page_crops in crops])  # type: ignore[misc]
         rect_crops = [rectify_crops(page_crops, orientation) for page_crops, orientation in zip(crops, orientations)]
@@ -162,10 +174,10 @@ class _OCRPredictor:
     @staticmethod
     def _process_predictions(
-        loc_preds: List[np.ndarray],
-        word_preds: List[Tuple[str, float]],
-        crop_orientations: List[Dict[str, Any]],
-    ) -> Tuple[List[np.ndarray], List[List[Tuple[str, float]]], List[List[Dict[str, Any]]]]:
+        loc_preds: list[np.ndarray],
+        word_preds: list[tuple[str, float]],
+        crop_orientations: list[dict[str, Any]],
+    ) -> tuple[list[np.ndarray], list[list[tuple[str, float]]], list[list[dict[str, Any]]]]:
         text_preds = []
         crop_orientation_preds = []
         if len(loc_preds) > 0:
@@ -182,7 +194,6 @@ class _OCRPredictor:
         """Add a hook to the predictor
         Args:
-        ----
             hook: a callable that takes as input the `loc_preds` and returns the modified `loc_preds`
         """
         self.hooks.append(hook)

doctr/models/predictor/pytorch.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, List, Union
+from typing import Any
 import numpy as np
 import torch
@@ -24,7 +24,6 @@ class OCRPredictor(nn.Module, _OCRPredictor):
     """Implements an object able to localize and identify text elements in a set of documents
     Args:
-    ----
         det_predictor: detection module
         reco_predictor: recognition module
         assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
@@ -52,8 +51,8 @@ class OCRPredictor(nn.Module, _OCRPredictor):
         **kwargs: Any,
     ) -> None:
         nn.Module.__init__(self)
-        self.det_predictor = det_predictor.eval()  # type: ignore[attr-defined]
-        self.reco_predictor = reco_predictor.eval()  # type: ignore[attr-defined]
+        self.det_predictor = det_predictor.eval()
+        self.reco_predictor = reco_predictor.eval()
         _OCRPredictor.__init__(
             self,
             assume_straight_pages,
@@ -69,7 +68,7 @@ class OCRPredictor(nn.Module, _OCRPredictor):
     @torch.inference_mode()
     def forward(
         self,
-        pages: List[Union[np.ndarray, torch.Tensor]],
+        pages: list[np.ndarray | torch.Tensor],
         **kwargs: Any,
     ) -> Document:
         # Dimension check
@@ -87,7 +86,7 @@ class OCRPredictor(nn.Module, _OCRPredictor):
             for out_map in out_maps
         ]
         if self.detect_orientation:
-            general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps)  # type: ignore[arg-type]
+            general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps)
             orientations = [
                 {"value": orientation_page, "confidence": None} for orientation_page in origin_pages_orientations
             ]
@@ -96,13 +95,16 @@ class OCRPredictor(nn.Module, _OCRPredictor):
             general_pages_orientations = None
             origin_pages_orientations = None
         if self.straighten_pages:
-            pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)  # type: ignore
+            pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)
+            # update page shapes after straightening
+            origin_page_shapes = [page.shape[:2] for page in pages]
             # Forward again to get predictions on straight pages
             loc_preds = self.det_predictor(pages, **kwargs)
-        assert all(
-            len(loc_pred) == 1 for loc_pred in loc_preds
-        ), "Detection Model in ocr_predictor should output only one class"
+        assert all(len(loc_pred) == 1 for loc_pred in loc_preds), (
+            "Detection Model in ocr_predictor should output only one class"
+        )
         loc_preds = [list(loc_pred.values())[0] for loc_pred in loc_preds]
         # Detach objectness scores from loc_preds
@@ -116,10 +118,11 @@ class OCRPredictor(nn.Module, _OCRPredictor):
         # Crop images
         crops, loc_preds = self._prepare_crops(
-            pages,  # type: ignore[arg-type]
+            pages,
             loc_preds,
             channels_last=channels_last,
             assume_straight_pages=self.assume_straight_pages,
+            assume_horizontal=self._page_orientation_disabled,
         )
         # Rectify crop orientation and get crop orientation predictions
         crop_orientations: Any = []
@@ -143,7 +146,7 @@ class OCRPredictor(nn.Module, _OCRPredictor):
             languages_dict = None
         out = self.doc_builder(
-            pages,  # type: ignore[arg-type]
+            pages,
             boxes,
             objectness_scores,
             text_preds,

doctr/models/predictor/tensorflow.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, List, Union
+from typing import Any
 import numpy as np
 import tensorflow as tf
@@ -24,7 +24,6 @@ class OCRPredictor(NestedObject, _OCRPredictor):
     """Implements an object able to localize and identify text elements in a set of documents
     Args:
-    ----
         det_predictor: detection module
         reco_predictor: recognition module
         assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
@@ -69,7 +68,7 @@ class OCRPredictor(NestedObject, _OCRPredictor):
     def __call__(
         self,
-        pages: List[Union[np.ndarray, tf.Tensor]],
+        pages: list[np.ndarray | tf.Tensor],
         **kwargs: Any,
     ) -> Document:
         # Dimension check
@@ -97,13 +96,16 @@ class OCRPredictor(NestedObject, _OCRPredictor):
             origin_pages_orientations = None
         if self.straighten_pages:
             pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)
+            # update page shapes after straightening
+            origin_page_shapes = [page.shape[:2] for page in pages]
             # forward again to get predictions on straight pages
-            loc_preds_dict = self.det_predictor(pages, **kwargs)  # type: ignore[assignment]
+            loc_preds_dict = self.det_predictor(pages, **kwargs)
-        assert all(
-            len(loc_pred) == 1 for loc_pred in loc_preds_dict
-        ), "Detection Model in ocr_predictor should output only one class"
-        loc_preds: List[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict]  # type: ignore[union-attr]
+        assert all(len(loc_pred) == 1 for loc_pred in loc_preds_dict), (
+            "Detection Model in ocr_predictor should output only one class"
+        )
+        loc_preds: list[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict]
         # Detach objectness scores from loc_preds
         loc_preds, objectness_scores = detach_scores(loc_preds)
@@ -113,7 +115,11 @@ class OCRPredictor(NestedObject, _OCRPredictor):
         # Crop images
         crops, loc_preds = self._prepare_crops(
-            pages, loc_preds, channels_last=True, assume_straight_pages=self.assume_straight_pages
+            pages,
+            loc_preds,
+            channels_last=True,
+            assume_straight_pages=self.assume_straight_pages,
+            assume_horizontal=self._page_orientation_disabled,
         )
         # Rectify crop orientation and get crop orientation predictions
         crop_orientations: Any = []

doctr/models/preprocessor/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

doctr/models/preprocessor/pytorch.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import math
-from typing import Any, List, Tuple, Union
+from typing import Any
 import numpy as np
 import torch
@@ -22,19 +22,19 @@ class PreProcessor(nn.Module):
     """Implements an abstract preprocessor object which performs casting, resizing, batching and normalization.
     Args:
-    ----
         output_size: expected size of each page in format (H, W)
         batch_size: the size of page batches
         mean: mean value of the training distribution by channel
         std: standard deviation of the training distribution by channel
+        **kwargs: additional arguments for the resizing operation
     """
     def __init__(
         self,
-        output_size: Tuple[int, int],
+        output_size: tuple[int, int],
         batch_size: int,
-        mean: Tuple[float, float, float] = (0.5, 0.5, 0.5),
-        std: Tuple[float, float, float] = (1.0, 1.0, 1.0),
+        mean: tuple[float, float, float] = (0.5, 0.5, 0.5),
+        std: tuple[float, float, float] = (1.0, 1.0, 1.0),
         **kwargs: Any,
     ) -> None:
         super().__init__()
@@ -43,15 +43,13 @@ class PreProcessor(nn.Module):
         # Perform the division by 255 at the same time
         self.normalize = T.Normalize(mean, std)
-    def batch_inputs(self, samples: List[torch.Tensor]) -> List[torch.Tensor]:
+    def batch_inputs(self, samples: list[torch.Tensor]) -> list[torch.Tensor]:
         """Gather samples into batches for inference purposes
         Args:
-        ----
             samples: list of samples of shape (C, H, W)
         Returns:
-        -------
             list of batched samples (*, C, H, W)
         """
         num_batches = int(math.ceil(len(samples) / self.batch_size))
@@ -62,7 +60,7 @@ class PreProcessor(nn.Module):
         return batches
-    def sample_transforms(self, x: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
+    def sample_transforms(self, x: np.ndarray | torch.Tensor) -> torch.Tensor:
         if x.ndim != 3:
             raise AssertionError("expected list of 3D Tensors")
         if isinstance(x, np.ndarray):
@@ -79,17 +77,15 @@ class PreProcessor(nn.Module):
         else:
             x = x.to(dtype=torch.float32)  # type: ignore[union-attr]
-        return x
+        return x  # type: ignore[return-value]
-    def __call__(self, x: Union[torch.Tensor, np.ndarray, List[Union[torch.Tensor, np.ndarray]]]) -> List[torch.Tensor]:
+    def __call__(self, x: torch.Tensor | np.ndarray | list[torch.Tensor | np.ndarray]) -> list[torch.Tensor]:
         """Prepare document data for model forwarding
         Args:
-        ----
             x: list of images (np.array) or tensors (already resized and batched)
         Returns:
-        -------
             list of page batches
         """
         # Input type check
@@ -103,7 +99,7 @@ class PreProcessor(nn.Module):
             elif x.dtype not in (torch.uint8, torch.float16, torch.float32):
                 raise TypeError("unsupported data type for torch.Tensor")
             # Resizing
-            if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]:
+            if x.shape[-2] != self.resize.size[0] or x.shape[-1] != self.resize.size[1]:  # type: ignore[union-attr]
                 x = F.resize(
                     x, self.resize.size, interpolation=self.resize.interpolation, antialias=self.resize.antialias
                 )
@@ -118,11 +114,11 @@ class PreProcessor(nn.Module):
             # Sample transform (to tensor, resize)
             samples = list(multithread_exec(self.sample_transforms, x))
             # Batching
-            batches = self.batch_inputs(samples)
+            batches = self.batch_inputs(samples)  # type: ignore[assignment]
         else:
             raise TypeError(f"invalid input type: {type(x)}")
         # Batch transforms (normalize)
         batches = list(multithread_exec(self.normalize, batches))
-        return batches
+        return batches  # type: ignore[return-value]

doctr/models/preprocessor/tensorflow.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import math
-from typing import Any, List, Tuple, Union
+from typing import Any
 import numpy as np
 import tensorflow as tf
@@ -20,38 +20,36 @@ class PreProcessor(NestedObject):
     """Implements an abstract preprocessor object which performs casting, resizing, batching and normalization.
     Args:
-    ----
         output_size: expected size of each page in format (H, W)
         batch_size: the size of page batches
         mean: mean value of the training distribution by channel
         std: standard deviation of the training distribution by channel
+        **kwargs: additional arguments for the resizing operation
     """
-    _children_names: List[str] = ["resize", "normalize"]
+    _children_names: list[str] = ["resize", "normalize"]
     def __init__(
         self,
-        output_size: Tuple[int, int],
+        output_size: tuple[int, int],
         batch_size: int,
-        mean: Tuple[float, float, float] = (0.5, 0.5, 0.5),
-        std: Tuple[float, float, float] = (1.0, 1.0, 1.0),
+        mean: tuple[float, float, float] = (0.5, 0.5, 0.5),
+        std: tuple[float, float, float] = (1.0, 1.0, 1.0),
         **kwargs: Any,
     ) -> None:
         self.batch_size = batch_size
         self.resize = Resize(output_size, **kwargs)
         # Perform the division by 255 at the same time
         self.normalize = Normalize(mean, std)
-        self._runs_on_cuda = tf.test.is_gpu_available()
+        self._runs_on_cuda = tf.config.list_physical_devices("GPU") != []
-    def batch_inputs(self, samples: List[tf.Tensor]) -> List[tf.Tensor]:
+    def batch_inputs(self, samples: list[tf.Tensor]) -> list[tf.Tensor]:
         """Gather samples into batches for inference purposes
         Args:
-        ----
             samples: list of samples (tf.Tensor)
         Returns:
-        -------
             list of batched samples
         """
         num_batches = int(math.ceil(len(samples) / self.batch_size))
@@ -62,7 +60,7 @@ class PreProcessor(NestedObject):
         return batches
-    def sample_transforms(self, x: Union[np.ndarray, tf.Tensor]) -> tf.Tensor:
+    def sample_transforms(self, x: np.ndarray | tf.Tensor) -> tf.Tensor:
         if x.ndim != 3:
             raise AssertionError("expected list of 3D Tensors")
         if isinstance(x, np.ndarray):
@@ -79,15 +77,13 @@ class PreProcessor(NestedObject):
         return x
-    def __call__(self, x: Union[tf.Tensor, np.ndarray, List[Union[tf.Tensor, np.ndarray]]]) -> List[tf.Tensor]:
+    def __call__(self, x: tf.Tensor | np.ndarray | list[tf.Tensor | np.ndarray]) -> list[tf.Tensor]:
         """Prepare document data for model forwarding
         Args:
-        ----
             x: list of images (np.array) or tensors (already resized and batched)
         Returns:
-        -------
             list of page batches
         """
         # Input type check

doctr/models/recognition/core.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import List, Tuple
 import numpy as np
@@ -21,17 +20,15 @@ class RecognitionModel(NestedObject):
     def build_target(
         self,
-        gts: List[str],
-    ) -> Tuple[np.ndarray, List[int]]:
+        gts: list[str],
+    ) -> tuple[np.ndarray, list[int]]:
         """Encode a list of gts sequences into a np array and gives the corresponding*
         sequence lengths.
         Args:
-        ----
             gts: list of ground-truth labels
         Returns:
-        -------
             A tuple of 2 tensors: Encoded labels and sequence lengths (for each entry of the batch)
         """
         encoded = encode_sequences(sequences=gts, vocab=self.vocab, target_size=self.max_length, eos=len(self.vocab))
@@ -43,7 +40,6 @@ class RecognitionPostProcessor(NestedObject):
     """Abstract class to postprocess the raw output of the model
     Args:
-    ----
         vocab: string containing the ordered sequence of supported characters
     """

doctr/models/recognition/crnn/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

python-doctr 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

python-doctr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl