PyPI - python-doctr - Versions diffs - 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

python-doctr 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

doctr/contrib/__init__.py +1 -0
doctr/contrib/artefacts.py +7 -9
doctr/contrib/base.py +8 -17
doctr/datasets/__init__.py +1 -0
doctr/datasets/coco_text.py +139 -0
doctr/datasets/cord.py +10 -8
doctr/datasets/datasets/__init__.py +4 -4
doctr/datasets/datasets/base.py +16 -16
doctr/datasets/datasets/pytorch.py +12 -12
doctr/datasets/datasets/tensorflow.py +10 -10
doctr/datasets/detection.py +6 -9
doctr/datasets/doc_artefacts.py +3 -4
doctr/datasets/funsd.py +9 -8
doctr/datasets/generator/__init__.py +4 -4
doctr/datasets/generator/base.py +16 -17
doctr/datasets/generator/pytorch.py +1 -3
doctr/datasets/generator/tensorflow.py +1 -3
doctr/datasets/ic03.py +5 -6
doctr/datasets/ic13.py +6 -6
doctr/datasets/iiit5k.py +10 -6
doctr/datasets/iiithws.py +4 -5
doctr/datasets/imgur5k.py +15 -7
doctr/datasets/loader.py +4 -7
doctr/datasets/mjsynth.py +6 -5
doctr/datasets/ocr.py +3 -4
doctr/datasets/orientation.py +3 -4
doctr/datasets/recognition.py +4 -5
doctr/datasets/sroie.py +6 -5
doctr/datasets/svhn.py +7 -6
doctr/datasets/svt.py +6 -7
doctr/datasets/synthtext.py +19 -7
doctr/datasets/utils.py +41 -35
doctr/datasets/vocabs.py +1107 -49
doctr/datasets/wildreceipt.py +14 -10
doctr/file_utils.py +11 -7
doctr/io/elements.py +96 -82
doctr/io/html.py +1 -3
doctr/io/image/__init__.py +3 -3
doctr/io/image/base.py +2 -5
doctr/io/image/pytorch.py +3 -12
doctr/io/image/tensorflow.py +2 -11
doctr/io/pdf.py +5 -7
doctr/io/reader.py +5 -11
doctr/models/_utils.py +15 -23
doctr/models/builder.py +30 -48
doctr/models/classification/__init__.py +1 -0
doctr/models/classification/magc_resnet/__init__.py +3 -3
doctr/models/classification/magc_resnet/pytorch.py +11 -15
doctr/models/classification/magc_resnet/tensorflow.py +11 -14
doctr/models/classification/mobilenet/__init__.py +3 -3
doctr/models/classification/mobilenet/pytorch.py +20 -18
doctr/models/classification/mobilenet/tensorflow.py +19 -23
doctr/models/classification/predictor/__init__.py +4 -4
doctr/models/classification/predictor/pytorch.py +7 -9
doctr/models/classification/predictor/tensorflow.py +6 -8
doctr/models/classification/resnet/__init__.py +4 -4
doctr/models/classification/resnet/pytorch.py +47 -34
doctr/models/classification/resnet/tensorflow.py +45 -35
doctr/models/classification/textnet/__init__.py +3 -3
doctr/models/classification/textnet/pytorch.py +20 -18
doctr/models/classification/textnet/tensorflow.py +19 -17
doctr/models/classification/vgg/__init__.py +3 -3
doctr/models/classification/vgg/pytorch.py +21 -8
doctr/models/classification/vgg/tensorflow.py +20 -14
doctr/models/classification/vip/__init__.py +4 -0
doctr/models/classification/vip/layers/__init__.py +4 -0
doctr/models/classification/vip/layers/pytorch.py +615 -0
doctr/models/classification/vip/pytorch.py +505 -0
doctr/models/classification/vit/__init__.py +3 -3
doctr/models/classification/vit/pytorch.py +18 -15
doctr/models/classification/vit/tensorflow.py +15 -12
doctr/models/classification/zoo.py +23 -14
doctr/models/core.py +3 -3
doctr/models/detection/_utils/__init__.py +4 -4
doctr/models/detection/_utils/base.py +4 -7
doctr/models/detection/_utils/pytorch.py +1 -5
doctr/models/detection/_utils/tensorflow.py +1 -5
doctr/models/detection/core.py +2 -8
doctr/models/detection/differentiable_binarization/__init__.py +4 -4
doctr/models/detection/differentiable_binarization/base.py +10 -21
doctr/models/detection/differentiable_binarization/pytorch.py +37 -31
doctr/models/detection/differentiable_binarization/tensorflow.py +26 -29
doctr/models/detection/fast/__init__.py +4 -4
doctr/models/detection/fast/base.py +8 -17
doctr/models/detection/fast/pytorch.py +37 -35
doctr/models/detection/fast/tensorflow.py +24 -28
doctr/models/detection/linknet/__init__.py +4 -4
doctr/models/detection/linknet/base.py +8 -18
doctr/models/detection/linknet/pytorch.py +34 -28
doctr/models/detection/linknet/tensorflow.py +24 -25
doctr/models/detection/predictor/__init__.py +5 -5
doctr/models/detection/predictor/pytorch.py +6 -7
doctr/models/detection/predictor/tensorflow.py +5 -6
doctr/models/detection/zoo.py +27 -7
doctr/models/factory/hub.py +6 -10
doctr/models/kie_predictor/__init__.py +5 -5
doctr/models/kie_predictor/base.py +4 -5
doctr/models/kie_predictor/pytorch.py +19 -20
doctr/models/kie_predictor/tensorflow.py +14 -15
doctr/models/modules/layers/__init__.py +3 -3
doctr/models/modules/layers/pytorch.py +55 -10
doctr/models/modules/layers/tensorflow.py +5 -7
doctr/models/modules/transformer/__init__.py +3 -3
doctr/models/modules/transformer/pytorch.py +12 -13
doctr/models/modules/transformer/tensorflow.py +9 -10
doctr/models/modules/vision_transformer/__init__.py +3 -3
doctr/models/modules/vision_transformer/pytorch.py +2 -3
doctr/models/modules/vision_transformer/tensorflow.py +3 -3
doctr/models/predictor/__init__.py +5 -5
doctr/models/predictor/base.py +28 -29
doctr/models/predictor/pytorch.py +13 -14
doctr/models/predictor/tensorflow.py +9 -10
doctr/models/preprocessor/__init__.py +4 -4
doctr/models/preprocessor/pytorch.py +13 -17
doctr/models/preprocessor/tensorflow.py +10 -14
doctr/models/recognition/__init__.py +1 -0
doctr/models/recognition/core.py +3 -7
doctr/models/recognition/crnn/__init__.py +4 -4
doctr/models/recognition/crnn/pytorch.py +30 -29
doctr/models/recognition/crnn/tensorflow.py +21 -24
doctr/models/recognition/master/__init__.py +3 -3
doctr/models/recognition/master/base.py +3 -7
doctr/models/recognition/master/pytorch.py +32 -25
doctr/models/recognition/master/tensorflow.py +22 -25
doctr/models/recognition/parseq/__init__.py +3 -3
doctr/models/recognition/parseq/base.py +3 -7
doctr/models/recognition/parseq/pytorch.py +47 -29
doctr/models/recognition/parseq/tensorflow.py +29 -27
doctr/models/recognition/predictor/__init__.py +5 -5
doctr/models/recognition/predictor/_utils.py +111 -52
doctr/models/recognition/predictor/pytorch.py +9 -9
doctr/models/recognition/predictor/tensorflow.py +8 -9
doctr/models/recognition/sar/__init__.py +4 -4
doctr/models/recognition/sar/pytorch.py +30 -22
doctr/models/recognition/sar/tensorflow.py +22 -24
doctr/models/recognition/utils.py +57 -53
doctr/models/recognition/viptr/__init__.py +4 -0
doctr/models/recognition/viptr/pytorch.py +277 -0
doctr/models/recognition/vitstr/__init__.py +4 -4
doctr/models/recognition/vitstr/base.py +3 -7
doctr/models/recognition/vitstr/pytorch.py +28 -21
doctr/models/recognition/vitstr/tensorflow.py +22 -23
doctr/models/recognition/zoo.py +27 -11
doctr/models/utils/__init__.py +4 -4
doctr/models/utils/pytorch.py +41 -34
doctr/models/utils/tensorflow.py +31 -23
doctr/models/zoo.py +1 -5
doctr/transforms/functional/__init__.py +3 -3
doctr/transforms/functional/base.py +4 -11
doctr/transforms/functional/pytorch.py +20 -28
doctr/transforms/functional/tensorflow.py +10 -22
doctr/transforms/modules/__init__.py +4 -4
doctr/transforms/modules/base.py +48 -55
doctr/transforms/modules/pytorch.py +58 -22
doctr/transforms/modules/tensorflow.py +18 -32
doctr/utils/common_types.py +8 -9
doctr/utils/data.py +9 -13
doctr/utils/fonts.py +2 -7
doctr/utils/geometry.py +17 -48
doctr/utils/metrics.py +17 -37
doctr/utils/multithreading.py +4 -6
doctr/utils/reconstitution.py +9 -13
doctr/utils/repr.py +2 -3
doctr/utils/visualization.py +16 -29
doctr/version.py +1 -1
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/METADATA +70 -52
python_doctr-0.12.0.dist-info/RECORD +180 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/WHEEL +1 -1
python_doctr-0.10.0.dist-info/RECORD +0 -173
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info/licenses}/LICENSE +0 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/zip-safe +0 -0

doctr/models/classification/magc_resnet/pytorch.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -7,20 +7,19 @@
 import math
 from copy import deepcopy
 from functools import partial
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 import torch
 from torch import nn
 from doctr.datasets import VOCABS
-from ...utils.pytorch import load_pretrained_params
 from ..resnet.pytorch import ResNet
 __all__ = ["magc_resnet31"]
-default_cfgs: Dict[str, Dict[str, Any]] = {
+default_cfgs: dict[str, dict[str, Any]] = {
     "magc_resnet31": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
@@ -36,7 +35,6 @@ class MAGC(nn.Module):
     <https://arxiv.org/pdf/1910.02562.pdf>`_.
     Args:
-    ----
         inplanes: input channels
         headers: number of headers to split channels
         attn_scale: if True, re-scale attention to counteract the variance distibutions
@@ -50,7 +48,7 @@ class MAGC(nn.Module):
         headers: int = 8,
         attn_scale: bool = False,
         ratio: float = 0.0625,  # bottleneck ratio of 1/16 as described in paper
-        cfg: Optional[Dict[str, Any]] = None,
+        cfg: dict[str, Any] | None = None,
     ) -> None:
         super().__init__()
@@ -105,12 +103,12 @@ class MAGC(nn.Module):
 def _magc_resnet(
     arch: str,
     pretrained: bool,
-    num_blocks: List[int],
-    output_channels: List[int],
-    stage_stride: List[int],
-    stage_conv: List[bool],
-    stage_pooling: List[Optional[Tuple[int, int]]],
-    ignore_keys: Optional[List[str]] = None,
+    num_blocks: list[int],
+    output_channels: list[int],
+    stage_stride: list[int],
+    stage_conv: list[bool],
+    stage_pooling: list[tuple[int, int] | None],
+    ignore_keys: list[str] | None = None,
     **kwargs: Any,
 ) -> ResNet:
     kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
@@ -137,7 +135,7 @@ def _magc_resnet(
         # The number of classes is not the same as the number of classes in the pretrained model =>
         # remove the last layer weights
         _ignore_keys = ignore_keys if kwargs["num_classes"] != len(default_cfgs[arch]["classes"]) else None
-        load_pretrained_params(model, default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
     return model
@@ -154,12 +152,10 @@ def magc_resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the ResNet architecture
     Returns:
-    -------
         A feature extractor model
     """
     return _magc_resnet(

doctr/models/classification/magc_resnet/tensorflow.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,7 +6,7 @@
 import math
 from copy import deepcopy
 from functools import partial
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 import tensorflow as tf
 from tensorflow.keras import activations, layers
@@ -14,13 +14,13 @@ from tensorflow.keras.models import Sequential
 from doctr.datasets import VOCABS
-from ...utils import _build_model, load_pretrained_params
+from ...utils import _build_model
 from ..resnet.tensorflow import ResNet
 __all__ = ["magc_resnet31"]
-default_cfgs: Dict[str, Dict[str, Any]] = {
+default_cfgs: dict[str, dict[str, Any]] = {
     "magc_resnet31": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
@@ -36,7 +36,6 @@ class MAGC(layers.Layer):
     <https://arxiv.org/pdf/1910.02562.pdf>`_.
     Args:
-    ----
         inplanes: input channels
         headers: number of headers to split channels
         attn_scale: if True, re-scale attention to counteract the variance distibutions
@@ -122,11 +121,11 @@ class MAGC(layers.Layer):
 def _magc_resnet(
     arch: str,
     pretrained: bool,
-    num_blocks: List[int],
-    output_channels: List[int],
-    stage_downsample: List[bool],
-    stage_conv: List[bool],
-    stage_pooling: List[Optional[Tuple[int, int]]],
+    num_blocks: list[int],
+    output_channels: list[int],
+    stage_downsample: list[bool],
+    stage_conv: list[bool],
+    stage_pooling: list[tuple[int, int] | None],
     origin_stem: bool = True,
     **kwargs: Any,
 ) -> ResNet:
@@ -158,8 +157,8 @@ def _magc_resnet(
     if pretrained:
         # The number of classes is not the same as the number of classes in the pretrained model =>
         # skip the mismatching layers for fine tuning
-        load_pretrained_params(
-            model, default_cfgs[arch]["url"], skip_mismatch=kwargs["num_classes"] != len(default_cfgs[arch]["classes"])
+        model.from_pretrained(
+            default_cfgs[arch]["url"], skip_mismatch=kwargs["num_classes"] != len(default_cfgs[arch]["classes"])
         )
     return model
@@ -177,12 +176,10 @@ def magc_resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the ResNet architecture
     Returns:
-    -------
         A feature extractor model
     """
     return _magc_resnet(

doctr/models/classification/mobilenet/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
+if is_torch_available():
     from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *

doctr/models/classification/mobilenet/pytorch.py CHANGED Viewed

@@ -1,12 +1,13 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 # Greatly inspired by https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenetv3.py
+import types
 from copy import deepcopy
-from typing import Any, Dict, List, Optional
+from typing import Any
 from torchvision.models import mobilenetv3
 from torchvision.models.mobilenetv3 import MobileNetV3
@@ -25,7 +26,7 @@ __all__ = [
     "mobilenet_v3_small_page_orientation",
 ]
-default_cfgs: Dict[str, Dict[str, Any]] = {
+default_cfgs: dict[str, dict[str, Any]] = {
     "mobilenet_v3_large": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
@@ -74,8 +75,8 @@ default_cfgs: Dict[str, Dict[str, Any]] = {
 def _mobilenet_v3(
     arch: str,
     pretrained: bool,
-    rect_strides: Optional[List[str]] = None,
-    ignore_keys: Optional[List[str]] = None,
+    rect_strides: list[str] | None = None,
+    ignore_keys: list[str] | None = None,
     **kwargs: Any,
 ) -> mobilenetv3.MobileNetV3:
     kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
@@ -99,12 +100,25 @@ def _mobilenet_v3(
                 m = getattr(m, child)
             m.stride = (2, 1)
+    # monkeypatch the model to allow for loading pretrained parameters
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:  # noqa: D417
+        """Load pretrained parameters onto the model
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+    # Bind method to the instance
+    model.from_pretrained = types.MethodType(from_pretrained, model)
     # Load pretrained parameters
     if pretrained:
         # The number of classes is not the same as the number of classes in the pretrained model =>
         # remove the last layer weights
         _ignore_keys = ignore_keys if kwargs["num_classes"] != len(default_cfgs[arch]["classes"]) else None
-        load_pretrained_params(model, default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
     model.cfg = _cfg
@@ -123,12 +137,10 @@ def mobilenet_v3_small(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.M
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
@@ -148,12 +160,10 @@ def mobilenet_v3_small_r(pretrained: bool = False, **kwargs: Any) -> mobilenetv3
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
@@ -177,12 +187,10 @@ def mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.M
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
@@ -205,12 +213,10 @@ def mobilenet_v3_large_r(pretrained: bool = False, **kwargs: Any) -> mobilenetv3
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
@@ -234,12 +240,10 @@ def mobilenet_v3_small_crop_orientation(pretrained: bool = False, **kwargs: Any)
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(
@@ -262,12 +266,10 @@ def mobilenet_v3_small_page_orientation(pretrained: bool = False, **kwargs: Any)
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a torch.nn.Module
     """
     return _mobilenet_v3(

doctr/models/classification/mobilenet/tensorflow.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,7 +6,7 @@
 # Greatly inspired by https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenetv3.py
 from copy import deepcopy
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any
 import tensorflow as tf
 from tensorflow.keras import layers
@@ -26,7 +26,7 @@ __all__ = [
 ]
-default_cfgs: Dict[str, Dict[str, Any]] = {
+default_cfgs: dict[str, dict[str, Any]] = {
     "mobilenet_v3_large": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
@@ -76,7 +76,7 @@ def hard_swish(x: tf.Tensor) -> tf.Tensor:
     return x * tf.nn.relu6(x + 3.0) / 6.0
-def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int:
+def _make_divisible(v: float, divisor: int, min_value: int | None = None) -> int:
     if min_value is None:
         min_value = divisor
     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
@@ -112,7 +112,7 @@ class InvertedResidualConfig:
         out_channels: int,
         use_se: bool,
         activation: str,
-        stride: Union[int, Tuple[int, int]],
+        stride: int | tuple[int, int],
         width_mult: float = 1,
     ) -> None:
         self.input_channels = self.adjust_channels(input_channels, width_mult)
@@ -132,7 +132,6 @@ class InvertedResidual(layers.Layer):
     """InvertedResidual for mobilenet
     Args:
-    ----
         conf: configuration object for inverted residual
     """
@@ -201,12 +200,12 @@ class MobileNetV3(Sequential):
     def __init__(
         self,
-        layout: List[InvertedResidualConfig],
+        layout: list[InvertedResidualConfig],
         include_top: bool = True,
         head_chans: int = 1024,
         num_classes: int = 1000,
-        cfg: Optional[Dict[str, Any]] = None,
-        input_shape: Optional[Tuple[int, int, int]] = None,
+        cfg: dict[str, Any] | None = None,
+        input_shape: tuple[int, int, int] | None = None,
     ) -> None:
         _layers = [
             Sequential(
@@ -237,6 +236,15 @@ class MobileNetV3(Sequential):
         super().__init__(_layers)
         self.cfg = cfg
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
 def _mobilenet_v3(arch: str, pretrained: bool, rect_strides: bool = False, **kwargs: Any) -> MobileNetV3:
     kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
@@ -301,8 +309,8 @@ def _mobilenet_v3(arch: str, pretrained: bool, rect_strides: bool = False, **kwa
     if pretrained:
         # The number of classes is not the same as the number of classes in the pretrained model =>
         # skip the mismatching layers for fine tuning
-        load_pretrained_params(
-            model, default_cfgs[arch]["url"], skip_mismatch=kwargs["num_classes"] != len(default_cfgs[arch]["classes"])
+        model.from_pretrained(
+            default_cfgs[arch]["url"], skip_mismatch=kwargs["num_classes"] != len(default_cfgs[arch]["classes"])
         )
     return model
@@ -320,12 +328,10 @@ def mobilenet_v3_small(pretrained: bool = False, **kwargs: Any) -> MobileNetV3:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_small", pretrained, False, **kwargs)
@@ -343,12 +349,10 @@ def mobilenet_v3_small_r(pretrained: bool = False, **kwargs: Any) -> MobileNetV3
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_small_r", pretrained, True, **kwargs)
@@ -366,12 +370,10 @@ def mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> MobileNetV3:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_large", pretrained, False, **kwargs)
@@ -389,12 +391,10 @@ def mobilenet_v3_large_r(pretrained: bool = False, **kwargs: Any) -> MobileNetV3
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_large_r", pretrained, True, **kwargs)
@@ -412,12 +412,10 @@ def mobilenet_v3_small_crop_orientation(pretrained: bool = False, **kwargs: Any)
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_small_crop_orientation", pretrained, include_top=True, **kwargs)
@@ -435,12 +433,10 @@ def mobilenet_v3_small_page_orientation(pretrained: bool = False, **kwargs: Any)
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained: boolean, True if model is pretrained
         **kwargs: keyword arguments of the MobileNetV3 architecture
     Returns:
-    -------
         a keras.Model
     """
     return _mobilenet_v3("mobilenet_v3_small_page_orientation", pretrained, include_top=True, **kwargs)

doctr/models/classification/predictor/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

doctr/models/classification/predictor/pytorch.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import List, Optional, Union
 import numpy as np
 import torch
@@ -20,15 +19,14 @@ class OrientationPredictor(nn.Module):
     4 possible orientations: 0, 90, 180, 270 (-90) degrees counter clockwise.
     Args:
-    ----
         pre_processor: transform inputs for easier batched model inference
         model: core classification architecture (backbone + classification head)
     """
     def __init__(
         self,
-        pre_processor: Optional[PreProcessor],
-        model: Optional[nn.Module],
+        pre_processor: PreProcessor | None,
+        model: nn.Module | None,
     ) -> None:
         super().__init__()
         self.pre_processor = pre_processor if isinstance(pre_processor, PreProcessor) else None
@@ -37,8 +35,8 @@ class OrientationPredictor(nn.Module):
     @torch.inference_mode()
     def forward(
         self,
-        inputs: List[Union[np.ndarray, torch.Tensor]],
-    ) -> List[Union[List[int], List[float]]]:
+        inputs: list[np.ndarray | torch.Tensor],
+    ) -> list[list[int] | list[float]]:
         # Dimension check
         if any(input.ndim != 3 for input in inputs):
             raise ValueError("incorrect input shape: all inputs are expected to be multi-channel 2D images.")
@@ -52,7 +50,7 @@ class OrientationPredictor(nn.Module):
         self.model, processed_batches = set_device_and_dtype(
             self.model, processed_batches, _params.device, _params.dtype
         )
-        predicted_batches = [self.model(batch) for batch in processed_batches]  # type: ignore[misc]
+        predicted_batches = [self.model(batch) for batch in processed_batches]
         # confidence
         probs = [
             torch.max(torch.softmax(batch, dim=1), dim=1).values.cpu().detach().numpy() for batch in predicted_batches
@@ -61,7 +59,7 @@ class OrientationPredictor(nn.Module):
         predicted_batches = [out_batch.argmax(dim=1).cpu().detach().numpy() for out_batch in predicted_batches]
         class_idxs = [int(pred) for batch in predicted_batches for pred in batch]
-        classes = [int(self.model.cfg["classes"][idx]) for idx in class_idxs]  # type: ignore[union-attr]
+        classes = [int(self.model.cfg["classes"][idx]) for idx in class_idxs]  # type: ignore
         confs = [round(float(p), 2) for prob in probs for p in prob]
         return [class_idxs, classes, confs]

doctr/models/classification/predictor/tensorflow.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import List, Optional, Union
 import numpy as np
 import tensorflow as tf
@@ -20,25 +19,24 @@ class OrientationPredictor(NestedObject):
     4 possible orientations: 0, 90, 180, 270 (-90) degrees counter clockwise.
     Args:
-    ----
         pre_processor: transform inputs for easier batched model inference
         model: core classification architecture (backbone + classification head)
     """
-    _children_names: List[str] = ["pre_processor", "model"]
+    _children_names: list[str] = ["pre_processor", "model"]
     def __init__(
         self,
-        pre_processor: Optional[PreProcessor],
-        model: Optional[Model],
+        pre_processor: PreProcessor | None,
+        model: Model | None,
     ) -> None:
         self.pre_processor = pre_processor if isinstance(pre_processor, PreProcessor) else None
         self.model = model if isinstance(model, Model) else None
     def __call__(
         self,
-        inputs: List[Union[np.ndarray, tf.Tensor]],
-    ) -> List[Union[List[int], List[float]]]:
+        inputs: list[np.ndarray | tf.Tensor],
+    ) -> list[list[int] | list[float]]:
         # Dimension check
         if any(input.ndim != 3 for input in inputs):
             raise ValueError("incorrect input shape: all inputs are expected to be multi-channel 2D images.")

doctr/models/classification/resnet/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

python-doctr 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

python-doctr 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl