PyPI - python-doctr - Versions diffs - 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

python-doctr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

doctr/contrib/__init__.py +1 -0
doctr/contrib/artefacts.py +7 -9
doctr/contrib/base.py +8 -17
doctr/datasets/cord.py +17 -7
doctr/datasets/datasets/__init__.py +4 -4
doctr/datasets/datasets/base.py +16 -16
doctr/datasets/datasets/pytorch.py +12 -12
doctr/datasets/datasets/tensorflow.py +10 -10
doctr/datasets/detection.py +6 -9
doctr/datasets/doc_artefacts.py +3 -4
doctr/datasets/funsd.py +17 -6
doctr/datasets/generator/__init__.py +4 -4
doctr/datasets/generator/base.py +16 -17
doctr/datasets/generator/pytorch.py +1 -3
doctr/datasets/generator/tensorflow.py +1 -3
doctr/datasets/ic03.py +14 -5
doctr/datasets/ic13.py +13 -5
doctr/datasets/iiit5k.py +31 -20
doctr/datasets/iiithws.py +4 -5
doctr/datasets/imgur5k.py +15 -5
doctr/datasets/loader.py +4 -7
doctr/datasets/mjsynth.py +6 -5
doctr/datasets/ocr.py +3 -4
doctr/datasets/orientation.py +3 -4
doctr/datasets/recognition.py +3 -4
doctr/datasets/sroie.py +16 -5
doctr/datasets/svhn.py +16 -5
doctr/datasets/svt.py +14 -5
doctr/datasets/synthtext.py +14 -5
doctr/datasets/utils.py +37 -27
doctr/datasets/vocabs.py +21 -7
doctr/datasets/wildreceipt.py +25 -10
doctr/file_utils.py +18 -4
doctr/io/elements.py +69 -81
doctr/io/html.py +1 -3
doctr/io/image/__init__.py +3 -3
doctr/io/image/base.py +2 -5
doctr/io/image/pytorch.py +3 -12
doctr/io/image/tensorflow.py +2 -11
doctr/io/pdf.py +5 -7
doctr/io/reader.py +5 -11
doctr/models/_utils.py +14 -22
doctr/models/builder.py +32 -50
doctr/models/classification/magc_resnet/__init__.py +3 -3
doctr/models/classification/magc_resnet/pytorch.py +10 -13
doctr/models/classification/magc_resnet/tensorflow.py +21 -17
doctr/models/classification/mobilenet/__init__.py +3 -3
doctr/models/classification/mobilenet/pytorch.py +7 -17
doctr/models/classification/mobilenet/tensorflow.py +22 -29
doctr/models/classification/predictor/__init__.py +4 -4
doctr/models/classification/predictor/pytorch.py +13 -11
doctr/models/classification/predictor/tensorflow.py +13 -11
doctr/models/classification/resnet/__init__.py +4 -4
doctr/models/classification/resnet/pytorch.py +21 -31
doctr/models/classification/resnet/tensorflow.py +41 -39
doctr/models/classification/textnet/__init__.py +3 -3
doctr/models/classification/textnet/pytorch.py +10 -17
doctr/models/classification/textnet/tensorflow.py +19 -20
doctr/models/classification/vgg/__init__.py +3 -3
doctr/models/classification/vgg/pytorch.py +5 -7
doctr/models/classification/vgg/tensorflow.py +18 -15
doctr/models/classification/vit/__init__.py +3 -3
doctr/models/classification/vit/pytorch.py +8 -14
doctr/models/classification/vit/tensorflow.py +16 -16
doctr/models/classification/zoo.py +36 -19
doctr/models/core.py +3 -3
doctr/models/detection/_utils/__init__.py +4 -4
doctr/models/detection/_utils/base.py +4 -7
doctr/models/detection/_utils/pytorch.py +1 -5
doctr/models/detection/_utils/tensorflow.py +1 -5
doctr/models/detection/core.py +2 -8
doctr/models/detection/differentiable_binarization/__init__.py +4 -4
doctr/models/detection/differentiable_binarization/base.py +7 -17
doctr/models/detection/differentiable_binarization/pytorch.py +27 -30
doctr/models/detection/differentiable_binarization/tensorflow.py +49 -37
doctr/models/detection/fast/__init__.py +4 -4
doctr/models/detection/fast/base.py +6 -14
doctr/models/detection/fast/pytorch.py +24 -31
doctr/models/detection/fast/tensorflow.py +28 -37
doctr/models/detection/linknet/__init__.py +4 -4
doctr/models/detection/linknet/base.py +6 -15
doctr/models/detection/linknet/pytorch.py +24 -27
doctr/models/detection/linknet/tensorflow.py +36 -33
doctr/models/detection/predictor/__init__.py +5 -5
doctr/models/detection/predictor/pytorch.py +6 -7
doctr/models/detection/predictor/tensorflow.py +7 -8
doctr/models/detection/zoo.py +27 -7
doctr/models/factory/hub.py +8 -13
doctr/models/kie_predictor/__init__.py +5 -5
doctr/models/kie_predictor/base.py +8 -5
doctr/models/kie_predictor/pytorch.py +22 -19
doctr/models/kie_predictor/tensorflow.py +21 -15
doctr/models/modules/layers/__init__.py +3 -3
doctr/models/modules/layers/pytorch.py +6 -9
doctr/models/modules/layers/tensorflow.py +5 -7
doctr/models/modules/transformer/__init__.py +3 -3
doctr/models/modules/transformer/pytorch.py +12 -13
doctr/models/modules/transformer/tensorflow.py +9 -12
doctr/models/modules/vision_transformer/__init__.py +3 -3
doctr/models/modules/vision_transformer/pytorch.py +3 -4
doctr/models/modules/vision_transformer/tensorflow.py +4 -4
doctr/models/predictor/__init__.py +5 -5
doctr/models/predictor/base.py +52 -41
doctr/models/predictor/pytorch.py +16 -13
doctr/models/predictor/tensorflow.py +16 -10
doctr/models/preprocessor/__init__.py +4 -4
doctr/models/preprocessor/pytorch.py +13 -17
doctr/models/preprocessor/tensorflow.py +11 -15
doctr/models/recognition/core.py +3 -7
doctr/models/recognition/crnn/__init__.py +4 -4
doctr/models/recognition/crnn/pytorch.py +20 -28
doctr/models/recognition/crnn/tensorflow.py +19 -29
doctr/models/recognition/master/__init__.py +3 -3
doctr/models/recognition/master/base.py +3 -7
doctr/models/recognition/master/pytorch.py +22 -24
doctr/models/recognition/master/tensorflow.py +21 -26
doctr/models/recognition/parseq/__init__.py +3 -3
doctr/models/recognition/parseq/base.py +3 -7
doctr/models/recognition/parseq/pytorch.py +26 -26
doctr/models/recognition/parseq/tensorflow.py +26 -30
doctr/models/recognition/predictor/__init__.py +5 -5
doctr/models/recognition/predictor/_utils.py +7 -10
doctr/models/recognition/predictor/pytorch.py +6 -6
doctr/models/recognition/predictor/tensorflow.py +5 -6
doctr/models/recognition/sar/__init__.py +4 -4
doctr/models/recognition/sar/pytorch.py +20 -21
doctr/models/recognition/sar/tensorflow.py +19 -24
doctr/models/recognition/utils.py +5 -10
doctr/models/recognition/vitstr/__init__.py +4 -4
doctr/models/recognition/vitstr/base.py +3 -7
doctr/models/recognition/vitstr/pytorch.py +18 -20
doctr/models/recognition/vitstr/tensorflow.py +21 -24
doctr/models/recognition/zoo.py +22 -11
doctr/models/utils/__init__.py +4 -4
doctr/models/utils/pytorch.py +13 -16
doctr/models/utils/tensorflow.py +31 -30
doctr/models/zoo.py +1 -5
doctr/transforms/functional/__init__.py +3 -3
doctr/transforms/functional/base.py +4 -11
doctr/transforms/functional/pytorch.py +21 -29
doctr/transforms/functional/tensorflow.py +10 -22
doctr/transforms/modules/__init__.py +4 -4
doctr/transforms/modules/base.py +48 -55
doctr/transforms/modules/pytorch.py +65 -28
doctr/transforms/modules/tensorflow.py +33 -44
doctr/utils/common_types.py +8 -9
doctr/utils/data.py +8 -12
doctr/utils/fonts.py +2 -7
doctr/utils/geometry.py +120 -64
doctr/utils/metrics.py +18 -38
doctr/utils/multithreading.py +4 -6
doctr/utils/reconstitution.py +157 -75
doctr/utils/repr.py +2 -3
doctr/utils/visualization.py +16 -29
doctr/version.py +1 -1
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/METADATA +59 -57
python_doctr-0.11.0.dist-info/RECORD +173 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/WHEEL +1 -1
python_doctr-0.9.0.dist-info/RECORD +0 -173
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/LICENSE +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/zip-safe +0 -0

doctr/models/classification/mobilenet/pytorch.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,15 +6,17 @@
 # Greatly inspired by https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenetv3.py
 from copy import deepcopy
-from typing import Any, Dict, List, Optional
+from typing import Any
 from torchvision.models import mobilenetv3
+from torchvision.models.mobilenetv3 import MobileNetV3
 from doctr.datasets import VOCABS
 from ...utils import load_pretrained_params
 __all__ = [
+    "MobileNetV3",
     "mobilenet_v3_small",
     "mobilenet_v3_small_r",
     "mobilenet_v3_large",
@@ -23,7 +25,7 @@ __all__ = [
     "mobilenet_v3_small_page_orientation",
 ]
-default_cfgs: Dict[str, Dict[str, Any]] = {
+default_cfgs: dict[str, dict[str, Any]] = {
     "mobilenet_v3_large": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
@@ -72,8 +74,8 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
 def _mobilenet_v3(
     arch: str,
     pretrained: bool,
-    rect_strides: Optional[List[str]] = None,
-    ignore_keys: Optional[List[str]] = None,
+    rect_strides: list[str] | None = None,
+    ignore_keys: list[str] | None = None,
     **kwargs: Any,
 ) -> mobilenetv3.MobileNetV3:
     kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
@@ -121,12 +123,10 @@ def mobilenet_v3_small(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.M
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
@@ -146,12 +146,10 @@ def mobilenet_v3_small_r(pretrained: bool = False, **kwargs: Any) -> mobilenetv3
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
@@ -175,12 +173,10 @@ def mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.M
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
@@ -203,12 +199,10 @@ def mobilenet_v3_large_r(pretrained: bool = False, **kwargs: Any) -> mobilenetv3
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
@@ -232,12 +226,10 @@ def mobilenet_v3_small_crop_orientation(pretrained: bool = False, **kwargs: Any)
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
@@ -260,12 +252,10 @@ def mobilenet_v3_small_page_orientation(pretrained: bool = False, **kwargs: Any)
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(

doctr/models/classification/mobilenet/tensorflow.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,14 +6,14 @@
 # Greatly inspired by https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenetv3.py
 from copy import deepcopy
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any
 import tensorflow as tf
 from tensorflow.keras import layers
 from tensorflow.keras.models import Sequential
 from ....datasets import VOCABS
-from ...utils import conv_sequence, load_pretrained_params
+from ...utils import _build_model, conv_sequence, load_pretrained_params
 __all__ = [
     "MobileNetV3",
@@ -26,48 +26,48 @@ __all__ = [
 ]
-default_cfgs: Dict[str, Dict[str, Any]] = {
+default_cfgs: dict[str, dict[str, Any]] = {
     "mobilenet_v3_large": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
         "input_shape": (32, 32, 3),
         "classes": list(VOCABS["french"]),
-        "url": "https://doctr-static.mindee.com/models?id=v0.4.1/mobilenet_v3_large-47d25d7e.zip&src=0",
+        "url": "https://doctr-static.mindee.com/models?id=v0.9.0/mobilenet_v3_large-d857506e.weights.h5&src=0",
     },
     "mobilenet_v3_large_r": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
         "input_shape": (32, 32, 3),
         "classes": list(VOCABS["french"]),
-        "url": "https://doctr-static.mindee.com/models?id=v0.4.1/mobilenet_v3_large_r-a108e192.zip&src=0",
+        "url": "https://doctr-static.mindee.com/models?id=v0.9.0/mobilenet_v3_large_r-eef2e3c6.weights.h5&src=0",
     },
     "mobilenet_v3_small": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
         "input_shape": (32, 32, 3),
         "classes": list(VOCABS["french"]),
-        "url": "https://doctr-static.mindee.com/models?id=v0.4.1/mobilenet_v3_small-8a32c32c.zip&src=0",
+        "url": "https://doctr-static.mindee.com/models?id=v0.9.0/mobilenet_v3_small-3fcebad7.weights.h5&src=0",
     },
     "mobilenet_v3_small_r": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
         "input_shape": (32, 32, 3),
         "classes": list(VOCABS["french"]),
-        "url": "https://doctr-static.mindee.com/models?id=v0.4.1/mobilenet_v3_small_r-3d61452e.zip&src=0",
+        "url": "https://doctr-static.mindee.com/models?id=v0.9.0/mobilenet_v3_small_r-dd50218d.weights.h5&src=0",
     },
     "mobilenet_v3_small_crop_orientation": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
         "input_shape": (128, 128, 3),
         "classes": [0, -90, 180, 90],
-        "url": "https://doctr-static.mindee.com/models?id=v0.4.1/classif_mobilenet_v3_small-1ea8db03.zip&src=0",
+        "url": "https://doctr-static.mindee.com/models?id=v0.9.0/mobilenet_v3_small_crop_orientation-ef019b6b.weights.h5&src=0",
     },
     "mobilenet_v3_small_page_orientation": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
         "input_shape": (512, 512, 3),
         "classes": [0, -90, 180, 90],
-        "url": "https://doctr-static.mindee.com/models?id=v0.8.1/mobilenet_v3_small_page_orientation-aec9553e.zip&src=0",
+        "url": "https://doctr-static.mindee.com/models?id=v0.9.0/mobilenet_v3_small_page_orientation-0071d55d.weights.h5&src=0",
     },
 }
@@ -76,7 +76,7 @@ def hard_swish(x: tf.Tensor) -> tf.Tensor:
     return x * tf.nn.relu6(x + 3.0) / 6.0
-def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int:
+def _make_divisible(v: float, divisor: int, min_value: int | None = None) -> int:
     if min_value is None:
         min_value = divisor
     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
@@ -112,7 +112,7 @@ class InvertedResidualConfig:
         out_channels: int,
         use_se: bool,
         activation: str,
-        stride: Union[int, Tuple[int, int]],
+        stride: int | tuple[int, int],
         width_mult: float = 1,
     ) -> None:
         self.input_channels = self.adjust_channels(input_channels, width_mult)
@@ -132,7 +132,6 @@ class InvertedResidual(layers.Layer):
     """InvertedResidual for mobilenet
     Args:
-    ----
         conf: configuration object for inverted residual
     """
@@ -201,12 +200,12 @@ class MobileNetV3(Sequential):
     def __init__(
         self,
-        layout: List[InvertedResidualConfig],
+        layout: list[InvertedResidualConfig],
         include_top: bool = True,
         head_chans: int = 1024,
         num_classes: int = 1000,
-        cfg: Optional[Dict[str, Any]] = None,
-        input_shape: Optional[Tuple[int, int, int]] = None,
+        cfg: dict[str, Any] | None = None,
+        input_shape: tuple[int, int, int] | None = None,
     ) -> None:
         _layers = [
             Sequential(
@@ -295,9 +294,15 @@ def _mobilenet_v3(arch: str, pretrained: bool, rect_strides: bool = False, **kwa
         cfg=_cfg,
         **kwargs,
     )
+    _build_model(model)
     # Load pretrained parameters
     if pretrained:
-        load_pretrained_params(model, default_cfgs[arch]["url"])
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # skip the mismatching layers for fine tuning
+        load_pretrained_params(
+            model, default_cfgs[arch]["url"], skip_mismatch=kwargs["num_classes"] != len(default_cfgs[arch]["classes"])
+        )
     return model
@@ -314,12 +319,10 @@ def mobilenet_v3_small(pretrained: bool = False, **kwargs: Any) -> MobileNetV3:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_small", pretrained, False, **kwargs)
@@ -337,12 +340,10 @@ def mobilenet_v3_small_r(pretrained: bool = False, **kwargs: Any) -> MobileNetV3
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_small_r", pretrained, True, **kwargs)
@@ -360,12 +361,10 @@ def mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> MobileNetV3:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_large", pretrained, False, **kwargs)
@@ -383,12 +382,10 @@ def mobilenet_v3_large_r(pretrained: bool = False, **kwargs: Any) -> MobileNetV3
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_large_r", pretrained, True, **kwargs)
@@ -406,12 +403,10 @@ def mobilenet_v3_small_crop_orientation(pretrained: bool = False, **kwargs: Any)
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_small_crop_orientation", pretrained, include_top=True, **kwargs)
@@ -429,12 +424,10 @@ def mobilenet_v3_small_page_orientation(pretrained: bool = False, **kwargs: Any)
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_small_page_orientation", pretrained, include_top=True, **kwargs)

doctr/models/classification/predictor/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

doctr/models/classification/predictor/pytorch.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import List, Union
 import numpy as np
 import torch
@@ -20,35 +19,38 @@ class OrientationPredictor(nn.Module):
     4 possible orientations: 0, 90, 180, 270 (-90) degrees counter clockwise.
     Args:
-    ----
         pre_processor: transform inputs for easier batched model inference
         model: core classification architecture (backbone + classification head)
     """
     def __init__(
         self,
-        pre_processor: PreProcessor,
-        model: nn.Module,
+        pre_processor: PreProcessor | None,
+        model: nn.Module | None,
     ) -> None:
         super().__init__()
-        self.pre_processor = pre_processor
-        self.model = model.eval()
+        self.pre_processor = pre_processor if isinstance(pre_processor, PreProcessor) else None
+        self.model = model.eval() if isinstance(model, nn.Module) else None
     @torch.inference_mode()
     def forward(
         self,
-        inputs: List[Union[np.ndarray, torch.Tensor]],
-    ) -> List[Union[List[int], List[float]]]:
+        inputs: list[np.ndarray | torch.Tensor],
+    ) -> list[list[int] | list[float]]:
         # Dimension check
         if any(input.ndim != 3 for input in inputs):
             raise ValueError("incorrect input shape: all inputs are expected to be multi-channel 2D images.")
+        if self.model is None or self.pre_processor is None:
+            # predictor is disabled
+            return [[0] * len(inputs), [0] * len(inputs), [1.0] * len(inputs)]
         processed_batches = self.pre_processor(inputs)
         _params = next(self.model.parameters())
         self.model, processed_batches = set_device_and_dtype(
             self.model, processed_batches, _params.device, _params.dtype
         )
-        predicted_batches = [self.model(batch) for batch in processed_batches]
+        predicted_batches = [self.model(batch) for batch in processed_batches]  # type: ignore[misc]
         # confidence
         probs = [
             torch.max(torch.softmax(batch, dim=1), dim=1).values.cpu().detach().numpy() for batch in predicted_batches
@@ -57,7 +59,7 @@ class OrientationPredictor(nn.Module):
         predicted_batches = [out_batch.argmax(dim=1).cpu().detach().numpy() for out_batch in predicted_batches]
         class_idxs = [int(pred) for batch in predicted_batches for pred in batch]
-        classes = [int(self.model.cfg["classes"][idx]) for idx in class_idxs]
+        classes = [int(self.model.cfg["classes"][idx]) for idx in class_idxs]  # type: ignore
         confs = [round(float(p), 2) for prob in probs for p in prob]
         return [class_idxs, classes, confs]

doctr/models/classification/predictor/tensorflow.py CHANGED Viewed

@@ -1,13 +1,12 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import List, Union
 import numpy as np
 import tensorflow as tf
-from tensorflow import keras
+from tensorflow.keras import Model
 from doctr.models.preprocessor import PreProcessor
 from doctr.utils.repr import NestedObject
@@ -20,29 +19,32 @@ class OrientationPredictor(NestedObject):
     4 possible orientations: 0, 90, 180, 270 (-90) degrees counter clockwise.
     Args:
-    ----
         pre_processor: transform inputs for easier batched model inference
         model: core classification architecture (backbone + classification head)
     """
-    _children_names: List[str] = ["pre_processor", "model"]
+    _children_names: list[str] = ["pre_processor", "model"]
     def __init__(
         self,
-        pre_processor: PreProcessor,
-        model: keras.Model,
+        pre_processor: PreProcessor | None,
+        model: Model | None,
     ) -> None:
-        self.pre_processor = pre_processor
-        self.model = model
+        self.pre_processor = pre_processor if isinstance(pre_processor, PreProcessor) else None
+        self.model = model if isinstance(model, Model) else None
     def __call__(
         self,
-        inputs: List[Union[np.ndarray, tf.Tensor]],
-    ) -> List[Union[List[int], List[float]]]:
+        inputs: list[np.ndarray | tf.Tensor],
+    ) -> list[list[int] | list[float]]:
         # Dimension check
         if any(input.ndim != 3 for input in inputs):
             raise ValueError("incorrect input shape: all inputs are expected to be multi-channel 2D images.")
+        if self.model is None or self.pre_processor is None:
+            # predictor is disabled
+            return [[0] * len(inputs), [0] * len(inputs), [1.0] * len(inputs)]
         processed_batches = self.pre_processor(inputs)
         predicted_batches = [self.model(batch, training=False) for batch in processed_batches]

doctr/models/classification/resnet/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

doctr/models/classification/resnet/pytorch.py CHANGED Viewed

@@ -1,11 +1,12 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+from collections.abc import Callable
 from copy import deepcopy
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing import Any
 from torch import nn
 from torchvision.models.resnet import BasicBlock
@@ -21,7 +22,7 @@ from ...utils import conv_sequence_pt, load_pretrained_params
 __all__ = ["ResNet", "resnet18", "resnet31", "resnet34", "resnet50", "resnet34_wide", "resnet_stage"]
-default_cfgs: Dict[str, Dict[str, Any]] = {
+default_cfgs: dict[str, dict[str, Any]] = {
     "resnet18": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
@@ -60,9 +61,9 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
 }
-def resnet_stage(in_channels: int, out_channels: int, num_blocks: int, stride: int) -> List[nn.Module]:
+def resnet_stage(in_channels: int, out_channels: int, num_blocks: int, stride: int) -> list[nn.Module]:
     """Build a ResNet stage"""
-    _layers: List[nn.Module] = []
+    _layers: list[nn.Module] = []
     in_chan = in_channels
     s = stride
@@ -84,7 +85,6 @@ class ResNet(nn.Sequential):
     Text Recognition" <https://arxiv.org/pdf/1811.00751.pdf>`_.
     Args:
-    ----
         num_blocks: number of resnet block in each stage
         output_channels: number of channels in each stage
         stage_conv: whether to add a conv_sequence after each stage
@@ -98,19 +98,19 @@ class ResNet(nn.Sequential):
     def __init__(
         self,
-        num_blocks: List[int],
-        output_channels: List[int],
-        stage_stride: List[int],
-        stage_conv: List[bool],
-        stage_pooling: List[Optional[Tuple[int, int]]],
+        num_blocks: list[int],
+        output_channels: list[int],
+        stage_stride: list[int],
+        stage_conv: list[bool],
+        stage_pooling: list[tuple[int, int] | None],
         origin_stem: bool = True,
         stem_channels: int = 64,
-        attn_module: Optional[Callable[[int], nn.Module]] = None,
+        attn_module: Callable[[int], nn.Module] | None = None,
         include_top: bool = True,
         num_classes: int = 1000,
-        cfg: Optional[Dict[str, Any]] = None,
+        cfg: dict[str, Any] | None = None,
     ) -> None:
-        _layers: List[nn.Module]
+        _layers: list[nn.Module]
         if origin_stem:
             _layers = [
                 *conv_sequence_pt(3, stem_channels, True, True, kernel_size=7, padding=3, stride=2),
@@ -156,12 +156,12 @@ class ResNet(nn.Sequential):
 def _resnet(
     arch: str,
     pretrained: bool,
-    num_blocks: List[int],
-    output_channels: List[int],
-    stage_stride: List[int],
-    stage_conv: List[bool],
-    stage_pooling: List[Optional[Tuple[int, int]]],
-    ignore_keys: Optional[List[str]] = None,
+    num_blocks: list[int],
+    output_channels: list[int],
+    stage_stride: list[int],
+    stage_conv: list[bool],
+    stage_pooling: list[tuple[int, int] | None],
+    ignore_keys: list[str] | None = None,
     **kwargs: Any,
 ) -> ResNet:
     kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
@@ -188,7 +188,7 @@ def _tv_resnet(
     arch: str,
     pretrained: bool,
     arch_fn,
-    ignore_keys: Optional[List[str]] = None,
+    ignore_keys: list[str] | None = None,
     **kwargs: Any,
 ) -> TVResNet:
     kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
@@ -224,12 +224,10 @@ def resnet18(pretrained: bool = False, **kwargs: Any) -> TVResNet:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the ResNet architecture
     Returns:
-    -------
         A resnet18 model
     """
     return _tv_resnet(
@@ -253,12 +251,10 @@ def resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the ResNet architecture
     Returns:
-    -------
         A resnet31 model
     """
     return _resnet(
@@ -287,12 +283,10 @@ def resnet34(pretrained: bool = False, **kwargs: Any) -> TVResNet:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the ResNet architecture
     Returns:
-    -------
         A resnet34 model
     """
     return _tv_resnet(
@@ -315,12 +309,10 @@ def resnet34_wide(pretrained: bool = False, **kwargs: Any) -> ResNet:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the ResNet architecture
     Returns:
-    -------
         A resnet34_wide model
     """
     return _resnet(
@@ -349,12 +341,10 @@ def resnet50(pretrained: bool = False, **kwargs: Any) -> TVResNet:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the ResNet architecture
     Returns:
-    -------
         A resnet50 model
     """
     return _tv_resnet(

python-doctr 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

python-doctr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl