PyPI - python-doctr - Versions diffs - 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

python-doctr 0.10.0py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

doctr/contrib/__init__.py +1 -0
doctr/contrib/artefacts.py +7 -9
doctr/contrib/base.py +8 -17
doctr/datasets/cord.py +8 -7
doctr/datasets/datasets/__init__.py +4 -4
doctr/datasets/datasets/base.py +16 -16
doctr/datasets/datasets/pytorch.py +12 -12
doctr/datasets/datasets/tensorflow.py +10 -10
doctr/datasets/detection.py +6 -9
doctr/datasets/doc_artefacts.py +3 -4
doctr/datasets/funsd.py +7 -6
doctr/datasets/generator/__init__.py +4 -4
doctr/datasets/generator/base.py +16 -17
doctr/datasets/generator/pytorch.py +1 -3
doctr/datasets/generator/tensorflow.py +1 -3
doctr/datasets/ic03.py +4 -5
doctr/datasets/ic13.py +4 -5
doctr/datasets/iiit5k.py +6 -5
doctr/datasets/iiithws.py +4 -5
doctr/datasets/imgur5k.py +6 -5
doctr/datasets/loader.py +4 -7
doctr/datasets/mjsynth.py +6 -5
doctr/datasets/ocr.py +3 -4
doctr/datasets/orientation.py +3 -4
doctr/datasets/recognition.py +3 -4
doctr/datasets/sroie.py +6 -5
doctr/datasets/svhn.py +6 -5
doctr/datasets/svt.py +4 -5
doctr/datasets/synthtext.py +4 -5
doctr/datasets/utils.py +34 -29
doctr/datasets/vocabs.py +17 -7
doctr/datasets/wildreceipt.py +14 -10
doctr/file_utils.py +2 -7
doctr/io/elements.py +59 -79
doctr/io/html.py +1 -3
doctr/io/image/__init__.py +3 -3
doctr/io/image/base.py +2 -5
doctr/io/image/pytorch.py +3 -12
doctr/io/image/tensorflow.py +2 -11
doctr/io/pdf.py +5 -7
doctr/io/reader.py +5 -11
doctr/models/_utils.py +14 -22
doctr/models/builder.py +30 -48
doctr/models/classification/magc_resnet/__init__.py +3 -3
doctr/models/classification/magc_resnet/pytorch.py +10 -13
doctr/models/classification/magc_resnet/tensorflow.py +8 -11
doctr/models/classification/mobilenet/__init__.py +3 -3
doctr/models/classification/mobilenet/pytorch.py +5 -17
doctr/models/classification/mobilenet/tensorflow.py +8 -21
doctr/models/classification/predictor/__init__.py +4 -4
doctr/models/classification/predictor/pytorch.py +6 -8
doctr/models/classification/predictor/tensorflow.py +6 -8
doctr/models/classification/resnet/__init__.py +4 -4
doctr/models/classification/resnet/pytorch.py +21 -31
doctr/models/classification/resnet/tensorflow.py +20 -31
doctr/models/classification/textnet/__init__.py +3 -3
doctr/models/classification/textnet/pytorch.py +10 -17
doctr/models/classification/textnet/tensorflow.py +8 -15
doctr/models/classification/vgg/__init__.py +3 -3
doctr/models/classification/vgg/pytorch.py +5 -7
doctr/models/classification/vgg/tensorflow.py +9 -12
doctr/models/classification/vit/__init__.py +3 -3
doctr/models/classification/vit/pytorch.py +8 -14
doctr/models/classification/vit/tensorflow.py +6 -12
doctr/models/classification/zoo.py +19 -14
doctr/models/core.py +3 -3
doctr/models/detection/_utils/__init__.py +4 -4
doctr/models/detection/_utils/base.py +4 -7
doctr/models/detection/_utils/pytorch.py +1 -5
doctr/models/detection/_utils/tensorflow.py +1 -5
doctr/models/detection/core.py +2 -8
doctr/models/detection/differentiable_binarization/__init__.py +4 -4
doctr/models/detection/differentiable_binarization/base.py +7 -17
doctr/models/detection/differentiable_binarization/pytorch.py +27 -30
doctr/models/detection/differentiable_binarization/tensorflow.py +15 -25
doctr/models/detection/fast/__init__.py +4 -4
doctr/models/detection/fast/base.py +6 -14
doctr/models/detection/fast/pytorch.py +24 -31
doctr/models/detection/fast/tensorflow.py +14 -26
doctr/models/detection/linknet/__init__.py +4 -4
doctr/models/detection/linknet/base.py +6 -15
doctr/models/detection/linknet/pytorch.py +24 -27
doctr/models/detection/linknet/tensorflow.py +14 -23
doctr/models/detection/predictor/__init__.py +5 -5
doctr/models/detection/predictor/pytorch.py +6 -7
doctr/models/detection/predictor/tensorflow.py +5 -6
doctr/models/detection/zoo.py +27 -7
doctr/models/factory/hub.py +3 -7
doctr/models/kie_predictor/__init__.py +5 -5
doctr/models/kie_predictor/base.py +4 -5
doctr/models/kie_predictor/pytorch.py +18 -19
doctr/models/kie_predictor/tensorflow.py +13 -14
doctr/models/modules/layers/__init__.py +3 -3
doctr/models/modules/layers/pytorch.py +6 -9
doctr/models/modules/layers/tensorflow.py +5 -7
doctr/models/modules/transformer/__init__.py +3 -3
doctr/models/modules/transformer/pytorch.py +12 -13
doctr/models/modules/transformer/tensorflow.py +9 -10
doctr/models/modules/vision_transformer/__init__.py +3 -3
doctr/models/modules/vision_transformer/pytorch.py +2 -3
doctr/models/modules/vision_transformer/tensorflow.py +3 -3
doctr/models/predictor/__init__.py +5 -5
doctr/models/predictor/base.py +28 -29
doctr/models/predictor/pytorch.py +12 -13
doctr/models/predictor/tensorflow.py +8 -9
doctr/models/preprocessor/__init__.py +4 -4
doctr/models/preprocessor/pytorch.py +13 -17
doctr/models/preprocessor/tensorflow.py +10 -14
doctr/models/recognition/core.py +3 -7
doctr/models/recognition/crnn/__init__.py +4 -4
doctr/models/recognition/crnn/pytorch.py +20 -28
doctr/models/recognition/crnn/tensorflow.py +11 -23
doctr/models/recognition/master/__init__.py +3 -3
doctr/models/recognition/master/base.py +3 -7
doctr/models/recognition/master/pytorch.py +22 -24
doctr/models/recognition/master/tensorflow.py +12 -22
doctr/models/recognition/parseq/__init__.py +3 -3
doctr/models/recognition/parseq/base.py +3 -7
doctr/models/recognition/parseq/pytorch.py +26 -26
doctr/models/recognition/parseq/tensorflow.py +16 -22
doctr/models/recognition/predictor/__init__.py +5 -5
doctr/models/recognition/predictor/_utils.py +7 -10
doctr/models/recognition/predictor/pytorch.py +6 -6
doctr/models/recognition/predictor/tensorflow.py +5 -6
doctr/models/recognition/sar/__init__.py +4 -4
doctr/models/recognition/sar/pytorch.py +20 -21
doctr/models/recognition/sar/tensorflow.py +12 -21
doctr/models/recognition/utils.py +5 -10
doctr/models/recognition/vitstr/__init__.py +4 -4
doctr/models/recognition/vitstr/base.py +3 -7
doctr/models/recognition/vitstr/pytorch.py +18 -20
doctr/models/recognition/vitstr/tensorflow.py +12 -20
doctr/models/recognition/zoo.py +22 -11
doctr/models/utils/__init__.py +4 -4
doctr/models/utils/pytorch.py +14 -17
doctr/models/utils/tensorflow.py +17 -16
doctr/models/zoo.py +1 -5
doctr/transforms/functional/__init__.py +3 -3
doctr/transforms/functional/base.py +4 -11
doctr/transforms/functional/pytorch.py +20 -28
doctr/transforms/functional/tensorflow.py +10 -22
doctr/transforms/modules/__init__.py +4 -4
doctr/transforms/modules/base.py +48 -55
doctr/transforms/modules/pytorch.py +58 -22
doctr/transforms/modules/tensorflow.py +18 -32
doctr/utils/common_types.py +8 -9
doctr/utils/data.py +8 -12
doctr/utils/fonts.py +2 -7
doctr/utils/geometry.py +16 -47
doctr/utils/metrics.py +17 -37
doctr/utils/multithreading.py +4 -6
doctr/utils/reconstitution.py +9 -13
doctr/utils/repr.py +2 -3
doctr/utils/visualization.py +16 -29
doctr/version.py +1 -1
{python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/METADATA +54 -52
python_doctr-0.11.0.dist-info/RECORD +173 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/WHEEL +1 -1
python_doctr-0.10.0.dist-info/RECORD +0 -173
{python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/LICENSE +0 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/zip-safe +0 -0

doctr/models/recognition/vitstr/tensorflow.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 from copy import deepcopy
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 import tensorflow as tf
 from tensorflow.keras import Model, layers
@@ -17,7 +17,7 @@ from .base import _ViTSTR, _ViTSTRPostProcessor
 __all__ = ["ViTSTR", "vitstr_small", "vitstr_base"]
-default_cfgs: Dict[str, Dict[str, Any]] = {
+default_cfgs: dict[str, dict[str, Any]] = {
     "vitstr_small": {
         "mean": (0.694, 0.695, 0.693),
         "std": (0.299, 0.296, 0.301),
@@ -40,7 +40,6 @@ class ViTSTR(_ViTSTR, Model):
     Efficient Scene Text Recognition" <https://arxiv.org/pdf/2105.08582.pdf>`_.
     Args:
-    ----
         feature_extractor: the backbone serving as feature extractor
         vocab: vocabulary used for encoding
         embedding_units: number of embedding units
@@ -51,7 +50,7 @@ class ViTSTR(_ViTSTR, Model):
         cfg: dictionary containing information about the model
     """
-    _children_names: List[str] = ["feat_extractor", "postprocessor"]
+    _children_names: list[str] = ["feat_extractor", "postprocessor"]
     def __init__(
         self,
@@ -60,9 +59,9 @@ class ViTSTR(_ViTSTR, Model):
         embedding_units: int,
         max_length: int = 32,
         dropout_prob: float = 0.0,
-        input_shape: Tuple[int, int, int] = (32, 128, 3),  # different from paper
+        input_shape: tuple[int, int, int] = (32, 128, 3),  # different from paper
         exportable: bool = False,
-        cfg: Optional[Dict[str, Any]] = None,
+        cfg: dict[str, Any] | None = None,
     ) -> None:
         super().__init__()
         self.vocab = vocab
@@ -79,19 +78,17 @@ class ViTSTR(_ViTSTR, Model):
     def compute_loss(
         model_output: tf.Tensor,
         gt: tf.Tensor,
-        seq_len: List[int],
+        seq_len: list[int],
     ) -> tf.Tensor:
         """Compute categorical cross-entropy loss for the model.
         Sequences are masked after the EOS character.
         Args:
-        ----
             model_output: predicted logits of the model
             gt: the encoded tensor with gt labels
             seq_len: lengths of each gt word inside the batch
         Returns:
-        -------
             The loss of the model on the batch
         """
         # Input length : number of steps
@@ -114,11 +111,11 @@ class ViTSTR(_ViTSTR, Model):
     def call(
         self,
         x: tf.Tensor,
-        target: Optional[List[str]] = None,
+        target: list[str] | None = None,
         return_model_output: bool = False,
         return_preds: bool = False,
         **kwargs: Any,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         features = self.feat_extractor(x, **kwargs)  # (batch_size, patches_seqlen, d_model)
         if target is not None:
@@ -136,7 +133,7 @@ class ViTSTR(_ViTSTR, Model):
         )  # (batch_size, max_length, vocab + 1)
         decoded_features = _bf16_to_float32(logits[:, 1:])  # remove cls_token
-        out: Dict[str, tf.Tensor] = {}
+        out: dict[str, tf.Tensor] = {}
         if self.exportable:
             out["logits"] = decoded_features
             return out
@@ -158,14 +155,13 @@ class ViTSTRPostProcessor(_ViTSTRPostProcessor):
     """Post processor for ViTSTR architecture
     Args:
-    ----
         vocab: string containing the ordered sequence of supported characters
     """
     def __call__(
         self,
         logits: tf.Tensor,
-    ) -> List[Tuple[str, float]]:
+    ) -> list[tuple[str, float]]:
         # compute pred with argmax for attention models
         out_idxs = tf.math.argmax(logits, axis=2)
         preds_prob = tf.math.reduce_max(tf.nn.softmax(logits, axis=-1), axis=-1)
@@ -191,7 +187,7 @@ def _vitstr(
     arch: str,
     pretrained: bool,
     backbone_fn,
-    input_shape: Optional[Tuple[int, int, int]] = None,
+    input_shape: tuple[int, int, int] | None = None,
     **kwargs: Any,
 ) -> ViTSTR:
     # Patch the config
@@ -239,12 +235,10 @@ def vitstr_small(pretrained: bool = False, **kwargs: Any) -> ViTSTR:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
         **kwargs: keyword arguments of the ViTSTR architecture
     Returns:
-    -------
         text recognition architecture
     """
     return _vitstr(
@@ -268,12 +262,10 @@ def vitstr_base(pretrained: bool = False, **kwargs: Any) -> ViTSTR:
     >>> out = model(input_tensor)
     Args:
-    ----
         pretrained (bool): If True, returns a model pre-trained on our text recognition dataset
         **kwargs: keyword arguments of the ViTSTR architecture
     Returns:
-    -------
         text recognition architecture
     """
     return _vitstr(

doctr/models/recognition/zoo.py CHANGED Viewed

@@ -1,11 +1,11 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, List
+from typing import Any
-from doctr.file_utils import is_tf_available
+from doctr.file_utils import is_tf_available, is_torch_available
 from doctr.models.preprocessor import PreProcessor
 from .. import recognition
@@ -14,7 +14,7 @@ from .predictor import RecognitionPredictor
 __all__ = ["recognition_predictor"]
-ARCHS: List[str] = [
+ARCHS: list[str] = [
     "crnn_vgg16_bn",
     "crnn_mobilenet_v3_small",
     "crnn_mobilenet_v3_large",
@@ -35,9 +35,14 @@ def _predictor(arch: Any, pretrained: bool, **kwargs: Any) -> RecognitionPredict
             pretrained=pretrained, pretrained_backbone=kwargs.get("pretrained_backbone", True)
         )
     else:
-        if not isinstance(
-            arch, (recognition.CRNN, recognition.SAR, recognition.MASTER, recognition.ViTSTR, recognition.PARSeq)
-        ):
+        allowed_archs = [recognition.CRNN, recognition.SAR, recognition.MASTER, recognition.ViTSTR, recognition.PARSeq]
+        if is_torch_available():
+            # Adding the type for torch compiled models to the allowed architectures
+            from doctr.models.utils import _CompiledModule
+            allowed_archs.append(_CompiledModule)
+        if not isinstance(arch, tuple(allowed_archs)):
             raise ValueError(f"unknown architecture: {type(arch)}")
         _model = arch
@@ -52,7 +57,13 @@ def _predictor(arch: Any, pretrained: bool, **kwargs: Any) -> RecognitionPredict
     return predictor
-def recognition_predictor(arch: Any = "crnn_vgg16_bn", pretrained: bool = False, **kwargs: Any) -> RecognitionPredictor:
+def recognition_predictor(
+    arch: Any = "crnn_vgg16_bn",
+    pretrained: bool = False,
+    symmetric_pad: bool = False,
+    batch_size: int = 128,
+    **kwargs: Any,
+) -> RecognitionPredictor:
     """Text recognition architecture.
     Example::
@@ -63,13 +74,13 @@ def recognition_predictor(arch: Any = "crnn_vgg16_bn", pretrained: bool = False,
         >>> out = model([input_page])
     Args:
-    ----
         arch: name of the architecture or model itself to use (e.g. 'crnn_vgg16_bn')
         pretrained: If True, returns a model pre-trained on our text recognition dataset
+        symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right
+        batch_size: number of samples the model processes in parallel
         **kwargs: optional parameters to be passed to the architecture
     Returns:
-    -------
         Recognition predictor
     """
-    return _predictor(arch, pretrained, **kwargs)
+    return _predictor(arch=arch, pretrained=pretrained, symmetric_pad=symmetric_pad, batch_size=batch_size, **kwargs)

doctr/models/utils/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

doctr/models/utils/pytorch.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import logging
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any
 import torch
 from torch import nn
@@ -18,8 +18,12 @@ __all__ = [
     "export_model_to_onnx",
     "_copy_tensor",
     "_bf16_to_float32",
+    "_CompiledModule",
 ]
+# torch compiled model type
+_CompiledModule = torch._dynamo.eval_frame.OptimizedModule
 def _copy_tensor(x: torch.Tensor) -> torch.Tensor:
     return x.clone().detach()
@@ -32,9 +36,9 @@ def _bf16_to_float32(x: torch.Tensor) -> torch.Tensor:
 def load_pretrained_params(
     model: nn.Module,
-    url: Optional[str] = None,
-    hash_prefix: Optional[str] = None,
-    ignore_keys: Optional[List[str]] = None,
+    url: str | None = None,
+    hash_prefix: str | None = None,
+    ignore_keys: list[str] | None = None,
     **kwargs: Any,
 ) -> None:
     """Load a set of parameters onto a model
@@ -43,7 +47,6 @@ def load_pretrained_params(
     >>> load_pretrained_params(model, "https://yoursource.com/yourcheckpoint-yourhash.zip")
     Args:
-    ----
         model: the PyTorch model to be loaded
         url: URL of the zipped set of parameters
         hash_prefix: first characters of SHA256 expected hash
@@ -76,7 +79,7 @@ def conv_sequence_pt(
     relu: bool = False,
     bn: bool = False,
     **kwargs: Any,
-) -> List[nn.Module]:
+) -> list[nn.Module]:
     """Builds a convolutional-based layer sequence
     >>> from torch.nn import Sequential
@@ -84,7 +87,6 @@ def conv_sequence_pt(
     >>> module = Sequential(conv_sequence(3, 32, True, True, kernel_size=3))
     Args:
-    ----
         in_channels: number of input channels
         out_channels: number of output channels
         relu: whether ReLU should be used
@@ -92,13 +94,12 @@ def conv_sequence_pt(
         **kwargs: additional arguments to be passed to the convolutional layer
     Returns:
-    -------
         list of layers
     """
     # No bias before Batch norm
     kwargs["bias"] = kwargs.get("bias", not bn)
     # Add activation directly to the conv if there is no BN
-    conv_seq: List[nn.Module] = [nn.Conv2d(in_channels, out_channels, **kwargs)]
+    conv_seq: list[nn.Module] = [nn.Conv2d(in_channels, out_channels, **kwargs)]
     if bn:
         conv_seq.append(nn.BatchNorm2d(out_channels))
@@ -110,8 +111,8 @@ def conv_sequence_pt(
 def set_device_and_dtype(
-    model: Any, batches: List[torch.Tensor], device: Union[str, torch.device], dtype: torch.dtype
-) -> Tuple[Any, List[torch.Tensor]]:
+    model: Any, batches: list[torch.Tensor], device: str | torch.device, dtype: torch.dtype
+) -> tuple[Any, list[torch.Tensor]]:
     """Set the device and dtype of a model and its batches
     >>> import torch
@@ -122,14 +123,12 @@ def set_device_and_dtype(
     >>> model, batches = set_device_and_dtype(model, batches, device="cuda", dtype=torch.float16)
     Args:
-    ----
         model: the model to be set
         batches: the batches to be set
         device: the device to be used
         dtype: the dtype to be used
     Returns:
-    -------
         the model and batches set
     """
     return model.to(device=device, dtype=dtype), [batch.to(device=device, dtype=dtype) for batch in batches]
@@ -145,19 +144,17 @@ def export_model_to_onnx(model: nn.Module, model_name: str, dummy_input: torch.T
     >>> export_model_to_onnx(model, "my_model", dummy_input=torch.randn(1, 3, 32, 32))
     Args:
-    ----
         model: the PyTorch model to be exported
         model_name: the name for the exported model
         dummy_input: the dummy input to the model
         kwargs: additional arguments to be passed to torch.onnx.export
     Returns:
-    -------
         the path to the exported model
     """
     torch.onnx.export(
         model,
-        dummy_input,  # type: ignore[arg-type]
+        dummy_input,
         f"{model_name}.onnx",
         input_names=["input"],
         output_names=["logits"],

doctr/models/utils/tensorflow.py CHANGED Viewed

@@ -1,10 +1,11 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import logging
-from typing import Any, Callable, List, Optional, Tuple, Union
+from collections.abc import Callable
+from typing import Any
 import tensorflow as tf
 import tf2onnx
@@ -39,7 +40,6 @@ def _build_model(model: Model):
     """Build a model by calling it once with dummy input
     Args:
-    ----
         model: the model to be built
     """
     model(tf.zeros((1, *model.cfg["input_shape"])), training=False)
@@ -47,8 +47,8 @@ def _build_model(model: Model):
 def load_pretrained_params(
     model: Model,
-    url: Optional[str] = None,
-    hash_prefix: Optional[str] = None,
+    url: str | None = None,
+    hash_prefix: str | None = None,
     skip_mismatch: bool = False,
     **kwargs: Any,
 ) -> None:
@@ -58,7 +58,6 @@ def load_pretrained_params(
     >>> load_pretrained_params(model, "https://yoursource.com/yourcheckpoint-yourhash.weights.h5")
     Args:
-    ----
         model: the keras model to be loaded
         url: URL of the zipped set of parameters
         hash_prefix: first characters of SHA256 expected hash
@@ -75,12 +74,12 @@ def load_pretrained_params(
 def conv_sequence(
     out_channels: int,
-    activation: Optional[Union[str, Callable]] = None,
+    activation: str | Callable | None = None,
     bn: bool = False,
     padding: str = "same",
     kernel_initializer: str = "he_normal",
     **kwargs: Any,
-) -> List[layers.Layer]:
+) -> list[layers.Layer]:
     """Builds a convolutional-based layer sequence
     >>> from tensorflow.keras import Sequential
@@ -88,7 +87,6 @@ def conv_sequence(
     >>> module = Sequential(conv_sequence(32, 'relu', True, kernel_size=3, input_shape=[224, 224, 3]))
     Args:
-    ----
         out_channels: number of output channels
         activation: activation to be used (default: no activation)
         bn: should a batch normalization layer be added
@@ -97,7 +95,6 @@ def conv_sequence(
         **kwargs: additional arguments to be passed to the convolutional layer
     Returns:
-    -------
         list of layers
     """
     # No bias before Batch norm
@@ -125,12 +122,11 @@ class IntermediateLayerGetter(Model):
     >>> feat_extractor = IntermediateLayerGetter(ResNet50(include_top=False, pooling=False), target_layers)
     Args:
-    ----
         model: the model to extract feature maps from
         layer_names: the list of layers to retrieve the feature map from
     """
-    def __init__(self, model: Model, layer_names: List[str]) -> None:
+    def __init__(self, model: Model, layer_names: list[str]) -> None:
         intermediate_fmaps = [model.get_layer(layer_name).get_output_at(0) for layer_name in layer_names]
         super().__init__(model.input, outputs=intermediate_fmaps)
@@ -139,8 +135,8 @@ class IntermediateLayerGetter(Model):
 def export_model_to_onnx(
-    model: Model, model_name: str, dummy_input: List[tf.TensorSpec], **kwargs: Any
-) -> Tuple[str, List[str]]:
+    model: Model, model_name: str, dummy_input: list[tf.TensorSpec], **kwargs: Any
+) -> tuple[str, list[str]]:
     """Export model to ONNX format.
     >>> import tensorflow as tf
@@ -151,16 +147,18 @@ def export_model_to_onnx(
     >>> dummy_input=[tf.TensorSpec([None, 32, 32, 3], tf.float32, name="input")])
     Args:
-    ----
         model: the keras model to be exported
         model_name: the name for the exported model
         dummy_input: the dummy input to the model
         kwargs: additional arguments to be passed to tf2onnx
     Returns:
-    -------
         the path to the exported model and a list with the output layer names
     """
+    # get the users eager mode
+    eager_mode = tf.executing_eagerly()
+    # set eager mode to true to avoid issues with tf2onnx
+    tf.config.run_functions_eagerly(True)
     large_model = kwargs.get("large_model", False)
     model_proto, _ = tf2onnx.convert.from_keras(
         model,
@@ -171,6 +169,9 @@ def export_model_to_onnx(
     # Get the output layer names
     output = [n.name for n in model_proto.graph.output]
+    # reset the eager mode to the users mode
+    tf.config.run_functions_eagerly(eager_mode)
     # models which are too large (weights > 2GB while converting to ONNX) needs to be handled
     # about an external tensor storage where the graph and weights are seperatly stored in a archive
     if large_model:

doctr/models/zoo.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -83,7 +83,6 @@ def ocr_predictor(
     >>> out = model([input_page])
     Args:
-    ----
         det_arch: name of the detection architecture or the model itself to use
             (e.g. 'db_resnet50', 'db_mobilenet_v3_large')
         reco_arch: name of the recognition architecture or the model itself to use
@@ -108,7 +107,6 @@ def ocr_predictor(
         kwargs: keyword args of `OCRPredictor`
     Returns:
-    -------
         OCR predictor
     """
     return _predictor(
@@ -197,7 +195,6 @@ def kie_predictor(
     >>> out = model([input_page])
     Args:
-    ----
         det_arch: name of the detection architecture or the model itself to use
             (e.g. 'db_resnet50', 'db_mobilenet_v3_large')
         reco_arch: name of the recognition architecture or the model itself to use
@@ -222,7 +219,6 @@ def kie_predictor(
         kwargs: keyword args of `OCRPredictor`
     Returns:
-    -------
         KIE predictor
     """
     return _kie_predictor(

doctr/transforms/functional/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
+if is_torch_available():
     from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *

doctr/transforms/functional/base.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Tuple, Union
 import cv2
 import numpy as np
@@ -15,17 +14,15 @@ __all__ = ["crop_boxes", "create_shadow_mask"]
 def crop_boxes(
     boxes: np.ndarray,
-    crop_box: Union[Tuple[int, int, int, int], Tuple[float, float, float, float]],
+    crop_box: tuple[int, int, int, int] | tuple[float, float, float, float],
 ) -> np.ndarray:
     """Crop localization boxes
     Args:
-    ----
         boxes: ndarray of shape (N, 4) in relative or abs coordinates
         crop_box: box (xmin, ymin, xmax, ymax) to crop the image, in the same coord format that the boxes
     Returns:
-    -------
         the cropped boxes
     """
     is_box_rel = boxes.max() <= 1
@@ -49,17 +46,15 @@ def crop_boxes(
     return boxes[is_valid]
-def expand_line(line: np.ndarray, target_shape: Tuple[int, int]) -> Tuple[float, float]:
+def expand_line(line: np.ndarray, target_shape: tuple[int, int]) -> tuple[float, float]:
     """Expands a 2-point line, so that the first is on the edge. In other terms, we extend the line in
     the same direction until we meet one of the edges.
     Args:
-    ----
         line: array of shape (2, 2) of the point supposed to be on one edge, and the shadow tip.
         target_shape: the desired mask shape
     Returns:
-    -------
         2D coordinates of the first point once we extended the line (on one of the edges)
     """
     if any(coord == 0 or coord == size for coord, size in zip(line[0], target_shape[::-1])):
@@ -112,7 +107,7 @@ def expand_line(line: np.ndarray, target_shape: Tuple[int, int]) -> Tuple[float,
 def create_shadow_mask(
-    target_shape: Tuple[int, int],
+    target_shape: tuple[int, int],
     min_base_width=0.3,
     max_tip_width=0.5,
     max_tip_height=0.3,
@@ -120,14 +115,12 @@ def create_shadow_mask(
     """Creates a random shadow mask
     Args:
-    ----
         target_shape: the target shape (H, W)
         min_base_width: the relative minimum shadow base width
         max_tip_width: the relative maximum shadow tip width
         max_tip_height: the relative maximum shadow tip height
     Returns:
-    -------
         a numpy ndarray of shape (H, W, 1) with values in the range [0, 1]
     """
     # Default base is top

python-doctr 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

python-doctr 0.10.0py3-none-any.whl → 0.11.0py3-none-any.whl