PyPI - python-doctr - Versions diffs - 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

python-doctr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

doctr/contrib/__init__.py +1 -0
doctr/contrib/artefacts.py +7 -9
doctr/contrib/base.py +8 -17
doctr/datasets/cord.py +17 -7
doctr/datasets/datasets/__init__.py +4 -4
doctr/datasets/datasets/base.py +16 -16
doctr/datasets/datasets/pytorch.py +12 -12
doctr/datasets/datasets/tensorflow.py +10 -10
doctr/datasets/detection.py +6 -9
doctr/datasets/doc_artefacts.py +3 -4
doctr/datasets/funsd.py +17 -6
doctr/datasets/generator/__init__.py +4 -4
doctr/datasets/generator/base.py +16 -17
doctr/datasets/generator/pytorch.py +1 -3
doctr/datasets/generator/tensorflow.py +1 -3
doctr/datasets/ic03.py +14 -5
doctr/datasets/ic13.py +13 -5
doctr/datasets/iiit5k.py +31 -20
doctr/datasets/iiithws.py +4 -5
doctr/datasets/imgur5k.py +15 -5
doctr/datasets/loader.py +4 -7
doctr/datasets/mjsynth.py +6 -5
doctr/datasets/ocr.py +3 -4
doctr/datasets/orientation.py +3 -4
doctr/datasets/recognition.py +3 -4
doctr/datasets/sroie.py +16 -5
doctr/datasets/svhn.py +16 -5
doctr/datasets/svt.py +14 -5
doctr/datasets/synthtext.py +14 -5
doctr/datasets/utils.py +37 -27
doctr/datasets/vocabs.py +21 -7
doctr/datasets/wildreceipt.py +25 -10
doctr/file_utils.py +18 -4
doctr/io/elements.py +69 -81
doctr/io/html.py +1 -3
doctr/io/image/__init__.py +3 -3
doctr/io/image/base.py +2 -5
doctr/io/image/pytorch.py +3 -12
doctr/io/image/tensorflow.py +2 -11
doctr/io/pdf.py +5 -7
doctr/io/reader.py +5 -11
doctr/models/_utils.py +14 -22
doctr/models/builder.py +32 -50
doctr/models/classification/magc_resnet/__init__.py +3 -3
doctr/models/classification/magc_resnet/pytorch.py +10 -13
doctr/models/classification/magc_resnet/tensorflow.py +21 -17
doctr/models/classification/mobilenet/__init__.py +3 -3
doctr/models/classification/mobilenet/pytorch.py +7 -17
doctr/models/classification/mobilenet/tensorflow.py +22 -29
doctr/models/classification/predictor/__init__.py +4 -4
doctr/models/classification/predictor/pytorch.py +13 -11
doctr/models/classification/predictor/tensorflow.py +13 -11
doctr/models/classification/resnet/__init__.py +4 -4
doctr/models/classification/resnet/pytorch.py +21 -31
doctr/models/classification/resnet/tensorflow.py +41 -39
doctr/models/classification/textnet/__init__.py +3 -3
doctr/models/classification/textnet/pytorch.py +10 -17
doctr/models/classification/textnet/tensorflow.py +19 -20
doctr/models/classification/vgg/__init__.py +3 -3
doctr/models/classification/vgg/pytorch.py +5 -7
doctr/models/classification/vgg/tensorflow.py +18 -15
doctr/models/classification/vit/__init__.py +3 -3
doctr/models/classification/vit/pytorch.py +8 -14
doctr/models/classification/vit/tensorflow.py +16 -16
doctr/models/classification/zoo.py +36 -19
doctr/models/core.py +3 -3
doctr/models/detection/_utils/__init__.py +4 -4
doctr/models/detection/_utils/base.py +4 -7
doctr/models/detection/_utils/pytorch.py +1 -5
doctr/models/detection/_utils/tensorflow.py +1 -5
doctr/models/detection/core.py +2 -8
doctr/models/detection/differentiable_binarization/__init__.py +4 -4
doctr/models/detection/differentiable_binarization/base.py +7 -17
doctr/models/detection/differentiable_binarization/pytorch.py +27 -30
doctr/models/detection/differentiable_binarization/tensorflow.py +49 -37
doctr/models/detection/fast/__init__.py +4 -4
doctr/models/detection/fast/base.py +6 -14
doctr/models/detection/fast/pytorch.py +24 -31
doctr/models/detection/fast/tensorflow.py +28 -37
doctr/models/detection/linknet/__init__.py +4 -4
doctr/models/detection/linknet/base.py +6 -15
doctr/models/detection/linknet/pytorch.py +24 -27
doctr/models/detection/linknet/tensorflow.py +36 -33
doctr/models/detection/predictor/__init__.py +5 -5
doctr/models/detection/predictor/pytorch.py +6 -7
doctr/models/detection/predictor/tensorflow.py +7 -8
doctr/models/detection/zoo.py +27 -7
doctr/models/factory/hub.py +8 -13
doctr/models/kie_predictor/__init__.py +5 -5
doctr/models/kie_predictor/base.py +8 -5
doctr/models/kie_predictor/pytorch.py +22 -19
doctr/models/kie_predictor/tensorflow.py +21 -15
doctr/models/modules/layers/__init__.py +3 -3
doctr/models/modules/layers/pytorch.py +6 -9
doctr/models/modules/layers/tensorflow.py +5 -7
doctr/models/modules/transformer/__init__.py +3 -3
doctr/models/modules/transformer/pytorch.py +12 -13
doctr/models/modules/transformer/tensorflow.py +9 -12
doctr/models/modules/vision_transformer/__init__.py +3 -3
doctr/models/modules/vision_transformer/pytorch.py +3 -4
doctr/models/modules/vision_transformer/tensorflow.py +4 -4
doctr/models/predictor/__init__.py +5 -5
doctr/models/predictor/base.py +52 -41
doctr/models/predictor/pytorch.py +16 -13
doctr/models/predictor/tensorflow.py +16 -10
doctr/models/preprocessor/__init__.py +4 -4
doctr/models/preprocessor/pytorch.py +13 -17
doctr/models/preprocessor/tensorflow.py +11 -15
doctr/models/recognition/core.py +3 -7
doctr/models/recognition/crnn/__init__.py +4 -4
doctr/models/recognition/crnn/pytorch.py +20 -28
doctr/models/recognition/crnn/tensorflow.py +19 -29
doctr/models/recognition/master/__init__.py +3 -3
doctr/models/recognition/master/base.py +3 -7
doctr/models/recognition/master/pytorch.py +22 -24
doctr/models/recognition/master/tensorflow.py +21 -26
doctr/models/recognition/parseq/__init__.py +3 -3
doctr/models/recognition/parseq/base.py +3 -7
doctr/models/recognition/parseq/pytorch.py +26 -26
doctr/models/recognition/parseq/tensorflow.py +26 -30
doctr/models/recognition/predictor/__init__.py +5 -5
doctr/models/recognition/predictor/_utils.py +7 -10
doctr/models/recognition/predictor/pytorch.py +6 -6
doctr/models/recognition/predictor/tensorflow.py +5 -6
doctr/models/recognition/sar/__init__.py +4 -4
doctr/models/recognition/sar/pytorch.py +20 -21
doctr/models/recognition/sar/tensorflow.py +19 -24
doctr/models/recognition/utils.py +5 -10
doctr/models/recognition/vitstr/__init__.py +4 -4
doctr/models/recognition/vitstr/base.py +3 -7
doctr/models/recognition/vitstr/pytorch.py +18 -20
doctr/models/recognition/vitstr/tensorflow.py +21 -24
doctr/models/recognition/zoo.py +22 -11
doctr/models/utils/__init__.py +4 -4
doctr/models/utils/pytorch.py +13 -16
doctr/models/utils/tensorflow.py +31 -30
doctr/models/zoo.py +1 -5
doctr/transforms/functional/__init__.py +3 -3
doctr/transforms/functional/base.py +4 -11
doctr/transforms/functional/pytorch.py +21 -29
doctr/transforms/functional/tensorflow.py +10 -22
doctr/transforms/modules/__init__.py +4 -4
doctr/transforms/modules/base.py +48 -55
doctr/transforms/modules/pytorch.py +65 -28
doctr/transforms/modules/tensorflow.py +33 -44
doctr/utils/common_types.py +8 -9
doctr/utils/data.py +8 -12
doctr/utils/fonts.py +2 -7
doctr/utils/geometry.py +120 -64
doctr/utils/metrics.py +18 -38
doctr/utils/multithreading.py +4 -6
doctr/utils/reconstitution.py +157 -75
doctr/utils/repr.py +2 -3
doctr/utils/visualization.py +16 -29
doctr/version.py +1 -1
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/METADATA +59 -57
python_doctr-0.11.0.dist-info/RECORD +173 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/WHEEL +1 -1
python_doctr-0.9.0.dist-info/RECORD +0 -173
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/LICENSE +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/zip-safe +0 -0

doctr/models/kie_predictor/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from doctr.file_utils import is_tf_available
+from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-else:
-    from .pytorch import *  # type: ignore[assignment]
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

doctr/models/kie_predictor/base.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, Optional
+from typing import Any
 from doctr.models.builder import KIEDocumentBuilder
@@ -17,7 +17,6 @@ class _KIEPredictor(_OCRPredictor):
     """Implements an object able to localize and identify text elements in a set of documents
     Args:
-    ----
         assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
             without rotated textual elements.
         straighten_pages: if True, estimates the page general orientation based on the median line orientation.
@@ -30,8 +29,8 @@ class _KIEPredictor(_OCRPredictor):
         kwargs: keyword args of `DocumentBuilder`
     """
-    crop_orientation_predictor: Optional[OrientationPredictor]
-    page_orientation_predictor: Optional[OrientationPredictor]
+    crop_orientation_predictor: OrientationPredictor | None
+    page_orientation_predictor: OrientationPredictor | None
     def __init__(
         self,
@@ -46,4 +45,8 @@ class _KIEPredictor(_OCRPredictor):
             assume_straight_pages, straighten_pages, preserve_aspect_ratio, symmetric_pad, detect_orientation, **kwargs
         )
+        # Remove the following arguments from kwargs after initialization of the parent class
+        kwargs.pop("disable_page_orientation", None)
+        kwargs.pop("disable_crop_orientation", None)
         self.doc_builder: KIEDocumentBuilder = KIEDocumentBuilder(**kwargs)

doctr/models/kie_predictor/pytorch.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, Dict, List, Union
+from typing import Any
 import numpy as np
 import torch
@@ -24,7 +24,6 @@ class KIEPredictor(nn.Module, _KIEPredictor):
     """Implements an object able to localize and identify text elements in a set of documents
     Args:
-    ----
         det_predictor: detection module
         reco_predictor: recognition module
         assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
@@ -52,8 +51,8 @@ class KIEPredictor(nn.Module, _KIEPredictor):
         **kwargs: Any,
     ) -> None:
         nn.Module.__init__(self)
-        self.det_predictor = det_predictor.eval()  # type: ignore[attr-defined]
-        self.reco_predictor = reco_predictor.eval()  # type: ignore[attr-defined]
+        self.det_predictor = det_predictor.eval()
+        self.reco_predictor = reco_predictor.eval()
         _KIEPredictor.__init__(
             self,
             assume_straight_pages,
@@ -69,7 +68,7 @@ class KIEPredictor(nn.Module, _KIEPredictor):
     @torch.inference_mode()
     def forward(
         self,
-        pages: List[Union[np.ndarray, torch.Tensor]],
+        pages: list[np.ndarray | torch.Tensor],
         **kwargs: Any,
     ) -> Document:
         # Dimension check
@@ -89,7 +88,7 @@ class KIEPredictor(nn.Module, _KIEPredictor):
             for out_map in out_maps
         ]
         if self.detect_orientation:
-            general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps)  # type: ignore[arg-type]
+            general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps)
             orientations = [
                 {"value": orientation_page, "confidence": None} for orientation_page in origin_pages_orientations
             ]
@@ -98,11 +97,14 @@ class KIEPredictor(nn.Module, _KIEPredictor):
             general_pages_orientations = None
             origin_pages_orientations = None
         if self.straighten_pages:
-            pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)  # type: ignore
+            pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)
+            # update page shapes after straightening
+            origin_page_shapes = [page.shape[:2] for page in pages]
             # Forward again to get predictions on straight pages
             loc_preds = self.det_predictor(pages, **kwargs)
-        dict_loc_preds: Dict[str, List[np.ndarray]] = invert_data_structure(loc_preds)  # type: ignore[assignment]
+        dict_loc_preds: dict[str, list[np.ndarray]] = invert_data_structure(loc_preds)  # type: ignore[assignment]
         # Detach objectness scores from loc_preds
         objectness_scores = {}
@@ -122,10 +124,11 @@ class KIEPredictor(nn.Module, _KIEPredictor):
         crops = {}
         for class_name in dict_loc_preds.keys():
             crops[class_name], dict_loc_preds[class_name] = self._prepare_crops(
-                pages,  # type: ignore[arg-type]
+                pages,
                 dict_loc_preds[class_name],
                 channels_last=channels_last,
                 assume_straight_pages=self.assume_straight_pages,
+                assume_horizontal=self._page_orientation_disabled,
             )
         # Rectify crop orientation
         crop_orientations: Any = {}
@@ -146,18 +149,18 @@ class KIEPredictor(nn.Module, _KIEPredictor):
         if not crop_orientations:
             crop_orientations = {k: [{"value": 0, "confidence": None} for _ in word_preds[k]] for k in word_preds}
-        boxes: Dict = {}
-        text_preds: Dict = {}
-        word_crop_orientations: Dict = {}
+        boxes: dict = {}
+        text_preds: dict = {}
+        word_crop_orientations: dict = {}
         for class_name in dict_loc_preds.keys():
             boxes[class_name], text_preds[class_name], word_crop_orientations[class_name] = self._process_predictions(
                 dict_loc_preds[class_name], word_preds[class_name], crop_orientations[class_name]
             )
-        boxes_per_page: List[Dict] = invert_data_structure(boxes)  # type: ignore[assignment]
-        objectness_scores_per_page: List[Dict] = invert_data_structure(objectness_scores)  # type: ignore[assignment]
-        text_preds_per_page: List[Dict] = invert_data_structure(text_preds)  # type: ignore[assignment]
-        crop_orientations_per_page: List[Dict] = invert_data_structure(word_crop_orientations)  # type: ignore[assignment]
+        boxes_per_page: list[dict] = invert_data_structure(boxes)  # type: ignore[assignment]
+        objectness_scores_per_page: list[dict] = invert_data_structure(objectness_scores)  # type: ignore[assignment]
+        text_preds_per_page: list[dict] = invert_data_structure(text_preds)  # type: ignore[assignment]
+        crop_orientations_per_page: list[dict] = invert_data_structure(word_crop_orientations)  # type: ignore[assignment]
         if self.detect_language:
             languages = [get_language(self.get_text(text_pred)) for text_pred in text_preds_per_page]
@@ -166,7 +169,7 @@ class KIEPredictor(nn.Module, _KIEPredictor):
             languages_dict = None
         out = self.doc_builder(
-            pages,  # type: ignore[arg-type]
+            pages,
             boxes_per_page,
             objectness_scores_per_page,
             text_preds_per_page,
@@ -178,7 +181,7 @@ class KIEPredictor(nn.Module, _KIEPredictor):
         return out
     @staticmethod
-    def get_text(text_pred: Dict) -> str:
+    def get_text(text_pred: dict) -> str:
         text = []
         for value in text_pred.values():
             text += [item[0] for item in value]

doctr/models/kie_predictor/tensorflow.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, Dict, List, Union
+from typing import Any
 import numpy as np
 import tensorflow as tf
@@ -24,7 +24,6 @@ class KIEPredictor(NestedObject, _KIEPredictor):
     """Implements an object able to localize and identify text elements in a set of documents
     Args:
-    ----
         det_predictor: detection module
         reco_predictor: recognition module
         assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
@@ -69,7 +68,7 @@ class KIEPredictor(NestedObject, _KIEPredictor):
     def __call__(
         self,
-        pages: List[Union[np.ndarray, tf.Tensor]],
+        pages: list[np.ndarray | tf.Tensor],
         **kwargs: Any,
     ) -> Document:
         # Dimension check
@@ -99,10 +98,13 @@ class KIEPredictor(NestedObject, _KIEPredictor):
             origin_pages_orientations = None
         if self.straighten_pages:
             pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)
+            # update page shapes after straightening
+            origin_page_shapes = [page.shape[:2] for page in pages]
             # Forward again to get predictions on straight pages
-            loc_preds = self.det_predictor(pages, **kwargs)  # type: ignore[assignment]
+            loc_preds = self.det_predictor(pages, **kwargs)
-        dict_loc_preds: Dict[str, List[np.ndarray]] = invert_data_structure(loc_preds)  # type: ignore
+        dict_loc_preds: dict[str, list[np.ndarray]] = invert_data_structure(loc_preds)  # type: ignore
         # Detach objectness scores from loc_preds
         objectness_scores = {}
@@ -119,7 +121,11 @@ class KIEPredictor(NestedObject, _KIEPredictor):
         crops = {}
         for class_name in dict_loc_preds.keys():
             crops[class_name], dict_loc_preds[class_name] = self._prepare_crops(
-                pages, dict_loc_preds[class_name], channels_last=True, assume_straight_pages=self.assume_straight_pages
+                pages,
+                dict_loc_preds[class_name],
+                channels_last=True,
+                assume_straight_pages=self.assume_straight_pages,
+                assume_horizontal=self._page_orientation_disabled,
             )
         # Rectify crop orientation
@@ -141,18 +147,18 @@ class KIEPredictor(NestedObject, _KIEPredictor):
         if not crop_orientations:
             crop_orientations = {k: [{"value": 0, "confidence": None} for _ in word_preds[k]] for k in word_preds}
-        boxes: Dict = {}
-        text_preds: Dict = {}
-        word_crop_orientations: Dict = {}
+        boxes: dict = {}
+        text_preds: dict = {}
+        word_crop_orientations: dict = {}
         for class_name in dict_loc_preds.keys():
             boxes[class_name], text_preds[class_name], word_crop_orientations[class_name] = self._process_predictions(
                 dict_loc_preds[class_name], word_preds[class_name], crop_orientations[class_name]
             )
-        boxes_per_page: List[Dict] = invert_data_structure(boxes)  # type: ignore[assignment]
-        objectness_scores_per_page: List[Dict] = invert_data_structure(objectness_scores)  # type: ignore[assignment]
-        text_preds_per_page: List[Dict] = invert_data_structure(text_preds)  # type: ignore[assignment]
-        crop_orientations_per_page: List[Dict] = invert_data_structure(word_crop_orientations)  # type: ignore[assignment]
+        boxes_per_page: list[dict] = invert_data_structure(boxes)  # type: ignore[assignment]
+        objectness_scores_per_page: list[dict] = invert_data_structure(objectness_scores)  # type: ignore[assignment]
+        text_preds_per_page: list[dict] = invert_data_structure(text_preds)  # type: ignore[assignment]
+        crop_orientations_per_page: list[dict] = invert_data_structure(word_crop_orientations)  # type: ignore[assignment]
         if self.detect_language:
             languages = [get_language(self.get_text(text_pred)) for text_pred in text_preds_per_page]
@@ -173,7 +179,7 @@ class KIEPredictor(NestedObject, _KIEPredictor):
         return out
     @staticmethod
-    def get_text(text_pred: Dict) -> str:
+    def get_text(text_pred: dict) -> str:
         text = []
         for value in text_pred.values():
             text += [item[0] for item in value]

doctr/models/modules/layers/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
     from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]

doctr/models/modules/layers/pytorch.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Tuple, Union
 import numpy as np
 import torch
@@ -19,7 +18,7 @@ class FASTConvLayer(nn.Module):
         self,
         in_channels: int,
         out_channels: int,
-        kernel_size: Union[int, Tuple[int, int]],
+        kernel_size: int | tuple[int, int],
         stride: int = 1,
         dilation: int = 1,
         groups: int = 1,
@@ -93,9 +92,7 @@ class FASTConvLayer(nn.Module):
     # The following logic is used to reparametrize the layer
     # Borrowed from: https://github.com/czczup/FAST/blob/main/models/utils/nas_utils.py
-    def _identity_to_conv(
-        self, identity: Union[nn.BatchNorm2d, None]
-    ) -> Union[Tuple[torch.Tensor, torch.Tensor], Tuple[int, int]]:
+    def _identity_to_conv(self, identity: nn.BatchNorm2d | None) -> tuple[torch.Tensor, torch.Tensor] | tuple[int, int]:
         if identity is None or identity.running_var is None:
             return 0, 0
         if not hasattr(self, "id_tensor"):
@@ -106,18 +103,18 @@ class FASTConvLayer(nn.Module):
             id_tensor = torch.from_numpy(kernel_value).to(identity.weight.device)
             self.id_tensor = self._pad_to_mxn_tensor(id_tensor)
         kernel = self.id_tensor
-        std = (identity.running_var + identity.eps).sqrt()
+        std = (identity.running_var + identity.eps).sqrt()  # type: ignore
         t = (identity.weight / std).reshape(-1, 1, 1, 1)
         return kernel * t, identity.bias - identity.running_mean * identity.weight / std
-    def _fuse_bn_tensor(self, conv: nn.Conv2d, bn: nn.BatchNorm2d) -> Tuple[torch.Tensor, torch.Tensor]:
+    def _fuse_bn_tensor(self, conv: nn.Conv2d, bn: nn.BatchNorm2d) -> tuple[torch.Tensor, torch.Tensor]:
         kernel = conv.weight
         kernel = self._pad_to_mxn_tensor(kernel)
         std = (bn.running_var + bn.eps).sqrt()  # type: ignore
         t = (bn.weight / std).reshape(-1, 1, 1, 1)
         return kernel * t, bn.bias - bn.running_mean * bn.weight / std
-    def _get_equivalent_kernel_bias(self) -> Tuple[torch.Tensor, torch.Tensor]:
+    def _get_equivalent_kernel_bias(self) -> tuple[torch.Tensor, torch.Tensor]:
         kernel_mxn, bias_mxn = self._fuse_bn_tensor(self.conv, self.bn)
         if self.ver_conv is not None:
             kernel_mx1, bias_mx1 = self._fuse_bn_tensor(self.ver_conv, self.ver_bn)  # type: ignore[arg-type]

doctr/models/modules/layers/tensorflow.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, Tuple, Union
+from typing import Any
 import numpy as np
 import tensorflow as tf
@@ -21,7 +21,7 @@ class FASTConvLayer(layers.Layer, NestedObject):
         self,
         in_channels: int,
         out_channels: int,
-        kernel_size: Union[int, Tuple[int, int]],
+        kernel_size: int | tuple[int, int],
         stride: int = 1,
         dilation: int = 1,
         groups: int = 1,
@@ -103,9 +103,7 @@ class FASTConvLayer(layers.Layer, NestedObject):
     # The following logic is used to reparametrize the layer
     # Adapted from: https://github.com/mindee/doctr/blob/main/doctr/models/modules/layers/pytorch.py
-    def _identity_to_conv(
-        self, identity: layers.BatchNormalization
-    ) -> Union[Tuple[tf.Tensor, tf.Tensor], Tuple[int, int]]:
+    def _identity_to_conv(self, identity: layers.BatchNormalization) -> tuple[tf.Tensor, tf.Tensor] | tuple[int, int]:
         if identity is None or not hasattr(identity, "moving_mean") or not hasattr(identity, "moving_variance"):
             return 0, 0
         if not hasattr(self, "id_tensor"):
@@ -120,7 +118,7 @@ class FASTConvLayer(layers.Layer, NestedObject):
         t = tf.reshape(identity.gamma / std, (1, 1, 1, -1))
         return kernel * t, identity.beta - identity.moving_mean * identity.gamma / std
-    def _fuse_bn_tensor(self, conv: layers.Conv2D, bn: layers.BatchNormalization) -> Tuple[tf.Tensor, tf.Tensor]:
+    def _fuse_bn_tensor(self, conv: layers.Conv2D, bn: layers.BatchNormalization) -> tuple[tf.Tensor, tf.Tensor]:
         kernel = conv.kernel
         kernel = self._pad_to_mxn_tensor(kernel)
         std = tf.sqrt(bn.moving_variance + bn.epsilon)

doctr/models/modules/transformer/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
     from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]

doctr/models/modules/transformer/pytorch.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,7 +6,8 @@
 # This module 'transformer.py' is inspired by https://github.com/wenwenyu/MASTER-pytorch and Decoder is borrowed
 import math
-from typing import Any, Callable, Optional, Tuple
+from collections.abc import Callable
+from typing import Any
 import torch
 from torch import nn
@@ -33,26 +34,24 @@ class PositionalEncoding(nn.Module):
         """Forward pass
         Args:
-        ----
             x: embeddings (batch, max_len, d_model)
-        Returns
-        -------
+        Returns:
             positional embeddings (batch, max_len, d_model)
         """
-        x = x + self.pe[:, : x.size(1)]
+        x = x + self.pe[:, : x.size(1)]  # type: ignore[index]
         return self.dropout(x)
 def scaled_dot_product_attention(
-    query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, mask: Optional[torch.Tensor] = None
-) -> Tuple[torch.Tensor, torch.Tensor]:
+    query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, mask: torch.Tensor | None = None
+) -> tuple[torch.Tensor, torch.Tensor]:
     """Scaled Dot-Product Attention"""
     scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(query.size(-1))
     if mask is not None:
         # NOTE: to ensure the ONNX compatibility, masked_fill works only with int equal condition
-        scores = scores.masked_fill(mask == 0, float("-inf"))
-    p_attn = torch.softmax(scores, dim=-1)
+        scores = scores.masked_fill(mask == 0, float("-inf"))  # type: ignore[attr-defined]
+    p_attn = torch.softmax(scores, dim=-1)  # type: ignore[call-overload]
     return torch.matmul(p_attn, value), p_attn
@@ -130,7 +129,7 @@ class EncoderBlock(nn.Module):
             PositionwiseFeedForward(d_model, dff, dropout, activation_fct) for _ in range(self.num_layers)
         ])
-    def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
+    def forward(self, x: torch.Tensor, mask: torch.Tensor | None = None) -> torch.Tensor:
         output = x
         for i in range(self.num_layers):
@@ -183,8 +182,8 @@ class Decoder(nn.Module):
         self,
         tgt: torch.Tensor,
         memory: torch.Tensor,
-        source_mask: Optional[torch.Tensor] = None,
-        target_mask: Optional[torch.Tensor] = None,
+        source_mask: torch.Tensor | None = None,
+        target_mask: torch.Tensor | None = None,
     ) -> torch.Tensor:
         tgt = self.embed(tgt) * math.sqrt(self.d_model)
         pos_enc_tgt = self.positional_encoding(tgt)

doctr/models/modules/transformer/tensorflow.py CHANGED Viewed

@@ -1,10 +1,11 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import math
-from typing import Any, Callable, Optional, Tuple
+from collections.abc import Callable
+from typing import Any
 import tensorflow as tf
 from tensorflow.keras import layers
@@ -13,8 +14,6 @@ from doctr.utils.repr import NestedObject
 __all__ = ["Decoder", "PositionalEncoding", "EncoderBlock", "PositionwiseFeedForward", "MultiHeadAttention"]
-tf.config.run_functions_eagerly(True)
 class PositionalEncoding(layers.Layer, NestedObject):
     """Compute positional encoding"""
@@ -45,12 +44,10 @@ class PositionalEncoding(layers.Layer, NestedObject):
         """Forward pass
         Args:
-        ----
             x: embeddings (batch, max_len, d_model)
             **kwargs: additional arguments
-        Returns
-        -------
+        Returns:
             positional embeddings (batch, max_len, d_model)
         """
         if x.dtype == tf.float16:  # amp fix: cast to half
@@ -62,8 +59,8 @@ class PositionalEncoding(layers.Layer, NestedObject):
 @tf.function
 def scaled_dot_product_attention(
-    query: tf.Tensor, key: tf.Tensor, value: tf.Tensor, mask: Optional[tf.Tensor] = None
-) -> Tuple[tf.Tensor, tf.Tensor]:
+    query: tf.Tensor, key: tf.Tensor, value: tf.Tensor, mask: tf.Tensor | None = None
+) -> tuple[tf.Tensor, tf.Tensor]:
     """Scaled Dot-Product Attention"""
     scores = tf.matmul(query, tf.transpose(key, perm=[0, 1, 3, 2])) / math.sqrt(query.shape[-1])
     if mask is not None:
@@ -162,7 +159,7 @@ class EncoderBlock(layers.Layer, NestedObject):
             PositionwiseFeedForward(d_model, dff, dropout, activation_fct) for _ in range(self.num_layers)
         ]
-    def call(self, x: tf.Tensor, mask: Optional[tf.Tensor] = None, **kwargs: Any) -> tf.Tensor:
+    def call(self, x: tf.Tensor, mask: tf.Tensor | None = None, **kwargs: Any) -> tf.Tensor:
         output = x
         for i in range(self.num_layers):
@@ -212,8 +209,8 @@ class Decoder(layers.Layer, NestedObject):
         self,
         tgt: tf.Tensor,
         memory: tf.Tensor,
-        source_mask: Optional[tf.Tensor] = None,
-        target_mask: Optional[tf.Tensor] = None,
+        source_mask: tf.Tensor | None = None,
+        target_mask: tf.Tensor | None = None,
         **kwargs: Any,
     ) -> tf.Tensor:
         tgt = self.embed(tgt, **kwargs) * math.sqrt(self.d_model)

doctr/models/modules/vision_transformer/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
     from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]

doctr/models/modules/vision_transformer/pytorch.py CHANGED Viewed

@@ -1,10 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import math
-from typing import Tuple
 import torch
 from torch import nn
@@ -15,12 +14,12 @@ __all__ = ["PatchEmbedding"]
 class PatchEmbedding(nn.Module):
     """Compute 2D patch embeddings with cls token and positional encoding"""
-    def __init__(self, input_shape: Tuple[int, int, int], embed_dim: int, patch_size: Tuple[int, int]) -> None:
+    def __init__(self, input_shape: tuple[int, int, int], embed_dim: int, patch_size: tuple[int, int]) -> None:
         super().__init__()
         channels, height, width = input_shape
         self.patch_size = patch_size
         self.interpolate = True if patch_size[0] == patch_size[1] else False
-        self.grid_size = tuple([s // p for s, p in zip((height, width), self.patch_size)])
+        self.grid_size = tuple(s // p for s, p in zip((height, width), self.patch_size))
         self.num_patches = self.grid_size[0] * self.grid_size[1]
         self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim))

doctr/models/modules/vision_transformer/tensorflow.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import math
-from typing import Any, Tuple
+from typing import Any
 import tensorflow as tf
 from tensorflow.keras import layers
@@ -17,12 +17,12 @@ __all__ = ["PatchEmbedding"]
 class PatchEmbedding(layers.Layer, NestedObject):
     """Compute 2D patch embeddings with cls token and positional encoding"""
-    def __init__(self, input_shape: Tuple[int, int, int], embed_dim: int, patch_size: Tuple[int, int]) -> None:
+    def __init__(self, input_shape: tuple[int, int, int], embed_dim: int, patch_size: tuple[int, int]) -> None:
         super().__init__()
         height, width, _ = input_shape
         self.patch_size = patch_size
         self.interpolate = True if patch_size[0] == patch_size[1] else False
-        self.grid_size = tuple([s // p for s, p in zip((height, width), self.patch_size)])
+        self.grid_size = tuple(s // p for s, p in zip((height, width), self.patch_size))
         self.num_patches = self.grid_size[0] * self.grid_size[1]
         self.cls_token = self.add_weight(shape=(1, 1, embed_dim), initializer="zeros", trainable=True, name="cls_token")

doctr/models/predictor/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from doctr.file_utils import is_tf_available
+from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-else:
-    from .pytorch import *  # type: ignore[assignment]
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

python-doctr 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

python-doctr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl