PyPI - python-doctr - Versions diffs - 0.12.0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

python-doctr 0.12.0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

doctr/__init__.py +0 -1
doctr/datasets/__init__.py +0 -5
doctr/datasets/datasets/__init__.py +1 -6
doctr/datasets/datasets/pytorch.py +2 -2
doctr/datasets/generator/__init__.py +1 -6
doctr/datasets/vocabs.py +0 -2
doctr/file_utils.py +2 -101
doctr/io/image/__init__.py +1 -7
doctr/io/image/pytorch.py +1 -1
doctr/models/_utils.py +3 -3
doctr/models/classification/magc_resnet/__init__.py +1 -6
doctr/models/classification/magc_resnet/pytorch.py +2 -2
doctr/models/classification/mobilenet/__init__.py +1 -6
doctr/models/classification/predictor/__init__.py +1 -6
doctr/models/classification/predictor/pytorch.py +1 -1
doctr/models/classification/resnet/__init__.py +1 -6
doctr/models/classification/textnet/__init__.py +1 -6
doctr/models/classification/textnet/pytorch.py +1 -1
doctr/models/classification/vgg/__init__.py +1 -6
doctr/models/classification/vip/__init__.py +1 -4
doctr/models/classification/vip/layers/__init__.py +1 -4
doctr/models/classification/vip/layers/pytorch.py +1 -1
doctr/models/classification/vit/__init__.py +1 -6
doctr/models/classification/vit/pytorch.py +2 -2
doctr/models/classification/zoo.py +6 -11
doctr/models/detection/_utils/__init__.py +1 -6
doctr/models/detection/core.py +1 -1
doctr/models/detection/differentiable_binarization/__init__.py +1 -6
doctr/models/detection/differentiable_binarization/base.py +4 -12
doctr/models/detection/differentiable_binarization/pytorch.py +3 -3
doctr/models/detection/fast/__init__.py +1 -6
doctr/models/detection/fast/base.py +4 -14
doctr/models/detection/fast/pytorch.py +4 -4
doctr/models/detection/linknet/__init__.py +1 -6
doctr/models/detection/linknet/base.py +3 -12
doctr/models/detection/linknet/pytorch.py +2 -2
doctr/models/detection/predictor/__init__.py +1 -6
doctr/models/detection/predictor/pytorch.py +1 -1
doctr/models/detection/zoo.py +15 -32
doctr/models/factory/hub.py +8 -21
doctr/models/kie_predictor/__init__.py +1 -6
doctr/models/kie_predictor/pytorch.py +2 -6
doctr/models/modules/layers/__init__.py +1 -6
doctr/models/modules/layers/pytorch.py +3 -3
doctr/models/modules/transformer/__init__.py +1 -6
doctr/models/modules/transformer/pytorch.py +2 -2
doctr/models/modules/vision_transformer/__init__.py +1 -6
doctr/models/predictor/__init__.py +1 -6
doctr/models/predictor/base.py +3 -8
doctr/models/predictor/pytorch.py +2 -5
doctr/models/preprocessor/__init__.py +1 -6
doctr/models/preprocessor/pytorch.py +27 -32
doctr/models/recognition/crnn/__init__.py +1 -6
doctr/models/recognition/crnn/pytorch.py +6 -6
doctr/models/recognition/master/__init__.py +1 -6
doctr/models/recognition/master/pytorch.py +5 -5
doctr/models/recognition/parseq/__init__.py +1 -6
doctr/models/recognition/parseq/pytorch.py +5 -5
doctr/models/recognition/predictor/__init__.py +1 -6
doctr/models/recognition/predictor/_utils.py +7 -16
doctr/models/recognition/predictor/pytorch.py +1 -2
doctr/models/recognition/sar/__init__.py +1 -6
doctr/models/recognition/sar/pytorch.py +3 -3
doctr/models/recognition/viptr/__init__.py +1 -4
doctr/models/recognition/viptr/pytorch.py +3 -3
doctr/models/recognition/vitstr/__init__.py +1 -6
doctr/models/recognition/vitstr/pytorch.py +3 -3
doctr/models/recognition/zoo.py +13 -13
doctr/models/utils/__init__.py +1 -6
doctr/models/utils/pytorch.py +1 -1
doctr/transforms/functional/__init__.py +1 -6
doctr/transforms/functional/pytorch.py +4 -4
doctr/transforms/modules/__init__.py +1 -7
doctr/transforms/modules/base.py +26 -92
doctr/transforms/modules/pytorch.py +28 -26
doctr/utils/geometry.py +6 -10
doctr/utils/visualization.py +1 -1
doctr/version.py +1 -1
{python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/METADATA +18 -75
python_doctr-1.0.0.dist-info/RECORD +149 -0
doctr/datasets/datasets/tensorflow.py +0 -59
doctr/datasets/generator/tensorflow.py +0 -58
doctr/datasets/loader.py +0 -94
doctr/io/image/tensorflow.py +0 -101
doctr/models/classification/magc_resnet/tensorflow.py +0 -196
doctr/models/classification/mobilenet/tensorflow.py +0 -442
doctr/models/classification/predictor/tensorflow.py +0 -60
doctr/models/classification/resnet/tensorflow.py +0 -418
doctr/models/classification/textnet/tensorflow.py +0 -275
doctr/models/classification/vgg/tensorflow.py +0 -125
doctr/models/classification/vit/tensorflow.py +0 -201
doctr/models/detection/_utils/tensorflow.py +0 -34
doctr/models/detection/differentiable_binarization/tensorflow.py +0 -421
doctr/models/detection/fast/tensorflow.py +0 -427
doctr/models/detection/linknet/tensorflow.py +0 -377
doctr/models/detection/predictor/tensorflow.py +0 -70
doctr/models/kie_predictor/tensorflow.py +0 -187
doctr/models/modules/layers/tensorflow.py +0 -171
doctr/models/modules/transformer/tensorflow.py +0 -235
doctr/models/modules/vision_transformer/tensorflow.py +0 -100
doctr/models/predictor/tensorflow.py +0 -155
doctr/models/preprocessor/tensorflow.py +0 -122
doctr/models/recognition/crnn/tensorflow.py +0 -317
doctr/models/recognition/master/tensorflow.py +0 -320
doctr/models/recognition/parseq/tensorflow.py +0 -516
doctr/models/recognition/predictor/tensorflow.py +0 -79
doctr/models/recognition/sar/tensorflow.py +0 -423
doctr/models/recognition/vitstr/tensorflow.py +0 -285
doctr/models/utils/tensorflow.py +0 -189
doctr/transforms/functional/tensorflow.py +0 -254
doctr/transforms/modules/tensorflow.py +0 -562
python_doctr-0.12.0.dist-info/RECORD +0 -180
{python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/WHEEL +0 -0
{python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/licenses/LICENSE +0 -0
{python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.12.0.dist-info → python_doctr-1.0.0.dist-info}/zip-safe +0 -0

doctr/models/recognition/parseq/pytorch.py CHANGED Viewed

@@ -19,7 +19,7 @@ from doctr.datasets import VOCABS
 from doctr.models.modules.transformer import MultiHeadAttention, PositionwiseFeedForward
 from ...classification import vit_s
-from ...utils.pytorch import _bf16_to_float32, load_pretrained_params
+from ...utils import _bf16_to_float32, load_pretrained_params
 from .base import _PARSeq, _PARSeqPostProcessor
 __all__ = ["PARSeq", "parseq"]
@@ -299,7 +299,7 @@ class PARSeq(_PARSeq, nn.Module):
                 # Stop decoding if all sequences have reached the EOS token
                 # NOTE: `break` isn't correctly translated to Onnx so we don't break here if we want to export
-                if not self.exportable and max_len is None and (ys == self.vocab_size).any(dim=-1).all():  # type: ignore[attr-defined]
+                if not self.exportable and max_len is None and (ys == self.vocab_size).any(dim=-1).all():
                     break
         logits = torch.cat(pos_logits, dim=1)  # (N, max_length, vocab_size + 1)
@@ -314,7 +314,7 @@ class PARSeq(_PARSeq, nn.Module):
         # Create padding mask for refined target input maskes all behind EOS token as False
         # (N, 1, 1, max_length)
-        target_pad_mask = ~((ys == self.vocab_size).int().cumsum(-1) > 0).unsqueeze(1).unsqueeze(1)  # type: ignore[attr-defined]
+        target_pad_mask = ~((ys == self.vocab_size).int().cumsum(-1) > 0).unsqueeze(1).unsqueeze(1)
         mask = (target_pad_mask.bool() & query_mask[:, : ys.shape[1]].bool()).int()
         logits = self.head(self.decode(ys, features, mask, target_query=pos_queries))
@@ -367,7 +367,7 @@ class PARSeq(_PARSeq, nn.Module):
                     # remove the [EOS] tokens for the succeeding perms
                     if i == 1:
                         gt_out = torch.where(gt_out == self.vocab_size, self.vocab_size + 2, gt_out)
-                        n = (gt_out != self.vocab_size + 2).sum().item()  # type: ignore[attr-defined]
+                        n = (gt_out != self.vocab_size + 2).sum().item()
                 loss /= loss_numel
@@ -391,7 +391,7 @@ class PARSeq(_PARSeq, nn.Module):
         if target is None or return_preds:
             # Disable for torch.compile compatibility
-            @torch.compiler.disable  # type: ignore[attr-defined]
+            @torch.compiler.disable
             def _postprocess(logits: torch.Tensor) -> list[tuple[str, float]]:
                 return self.postprocessor(logits)

doctr/models/recognition/predictor/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/models/recognition/predictor/_utils.py CHANGED Viewed

@@ -18,17 +18,15 @@ def split_crops(
     max_ratio: float,
     target_ratio: int,
     split_overlap_ratio: float,
-    channels_last: bool = True,
 ) -> tuple[list[np.ndarray], list[int | tuple[int, int, float]], bool]:
     """
     Split crops horizontally if they exceed a given aspect ratio.
     Args:
-        crops: List of image crops (H, W, C) if channels_last else (C, H, W).
+        crops: List of image crops (H, W, C).
         max_ratio: Aspect ratio threshold above which crops are split.
         target_ratio: Target aspect ratio after splitting (e.g., 4 for 128x32).
         split_overlap_ratio: Desired overlap between splits (as a fraction of split width).
-        channels_last: Whether the crops are in channels-last format.
     Returns:
         A tuple containing:
@@ -44,14 +42,14 @@ def split_crops(
     crop_map: list[int | tuple[int, int, float]] = []
     for crop in crops:
-        h, w = crop.shape[:2] if channels_last else crop.shape[-2:]
+        h, w = crop.shape[:2]
         aspect_ratio = w / h
         if aspect_ratio > max_ratio:
             split_width = max(1, math.ceil(h * target_ratio))
             overlap_width = max(0, math.floor(split_width * split_overlap_ratio))
-            splits, last_overlap = _split_horizontally(crop, split_width, overlap_width, channels_last)
+            splits, last_overlap = _split_horizontally(crop, split_width, overlap_width)
             # Remove any empty splits
             splits = [s for s in splits if all(dim > 0 for dim in s.shape)]
@@ -70,23 +68,20 @@ def split_crops(
     return new_crops, crop_map, remap_required
-def _split_horizontally(
-    image: np.ndarray, split_width: int, overlap_width: int, channels_last: bool
-) -> tuple[list[np.ndarray], float]:
+def _split_horizontally(image: np.ndarray, split_width: int, overlap_width: int) -> tuple[list[np.ndarray], float]:
     """
     Horizontally split a single image with overlapping regions.
     Args:
-        image: The image to split (H, W, C) if channels_last else (C, H, W).
+        image: The image to split (H, W, C).
         split_width: Width of each split.
         overlap_width: Width of the overlapping region.
-        channels_last: Whether the image is in channels-last format.
     Returns:
         - A list of horizontal image slices.
         - The actual overlap ratio of the last split.
     """
-    image_width = image.shape[1] if channels_last else image.shape[-1]
+    image_width = image.shape[1]
     if image_width <= split_width:
         return [image], 0.0
@@ -101,11 +96,7 @@ def _split_horizontally(
     splits = []
     for start_col in starts:
         end_col = start_col + split_width
-        if channels_last:
-            split = image[:, start_col:end_col, :]
-        else:
-            split = image[:, :, start_col:end_col]
-        splits.append(split)
+        splits.append(image[:, start_col:end_col, :])
     # Calculate the last overlap ratio, if only one split no overlap
     last_overlap = 0

doctr/models/recognition/predictor/pytorch.py CHANGED Viewed

@@ -44,7 +44,7 @@ class RecognitionPredictor(nn.Module):
     @torch.inference_mode()
     def forward(
         self,
-        crops: Sequence[np.ndarray | torch.Tensor],
+        crops: Sequence[np.ndarray],
         **kwargs: Any,
     ) -> list[tuple[str, float]]:
         if len(crops) == 0:
@@ -61,7 +61,6 @@ class RecognitionPredictor(nn.Module):
                 self.critical_ar,
                 self.target_ar,
                 self.overlap_ratio,
-                isinstance(crops[0], np.ndarray),
             )
             if remapped:
                 crops = new_crops

doctr/models/recognition/sar/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/models/recognition/sar/pytorch.py CHANGED Viewed

@@ -15,7 +15,7 @@ from torchvision.models._utils import IntermediateLayerGetter
 from doctr.datasets import VOCABS
 from ...classification import resnet31
-from ...utils.pytorch import _bf16_to_float32, load_pretrained_params
+from ...utils import _bf16_to_float32, load_pretrained_params
 from ..core import RecognitionModel, RecognitionPostProcessor
 __all__ = ["SAR", "sar_resnet31"]
@@ -272,7 +272,7 @@ class SAR(nn.Module, RecognitionModel):
         if target is None or return_preds:
             # Disable for torch.compile compatibility
-            @torch.compiler.disable  # type: ignore[attr-defined]
+            @torch.compiler.disable
             def _postprocess(decoded_features: torch.Tensor) -> list[tuple[str, float]]:
                 return self.postprocessor(decoded_features)
@@ -304,7 +304,7 @@ class SAR(nn.Module, RecognitionModel):
         # Input length : number of timesteps
         input_len = model_output.shape[1]
         # Add one for additional <eos> token
-        seq_len = seq_len + 1  # type: ignore[assignment]
+        seq_len = seq_len + 1
         # Compute loss
         # (N, L, vocab_size + 1)
         cce = F.cross_entropy(model_output.permute(0, 2, 1), gt, reduction="none")

doctr/models/recognition/viptr/__init__.py CHANGED Viewed

@@ -1,4 +1 @@
-from doctr.file_utils import is_torch_available
-if is_torch_available():
-    from .pytorch import *
+from .pytorch import *

doctr/models/recognition/viptr/pytorch.py CHANGED Viewed

@@ -16,7 +16,7 @@ from torchvision.models._utils import IntermediateLayerGetter
 from doctr.datasets import VOCABS, decode_sequence
 from ...classification import vip_tiny
-from ...utils.pytorch import _bf16_to_float32, load_pretrained_params
+from ...utils import _bf16_to_float32, load_pretrained_params
 from ..core import RecognitionModel, RecognitionPostProcessor
 __all__ = ["VIPTR", "viptr_tiny"]
@@ -70,7 +70,7 @@ class VIPTRPostProcessor(RecognitionPostProcessor):
     def __call__(self, logits: torch.Tensor) -> list[tuple[str, float]]:
         """Performs decoding of raw output with CTC and decoding of CTC predictions
-        with label_to_idx mapping dictionnary
+        with label_to_idx mapping dictionary
         Args:
             logits: raw output of the model, shape (N, C + 1, seq_len)
@@ -166,7 +166,7 @@ class VIPTR(RecognitionModel, nn.Module):
         if target is None or return_preds:
             # Disable for torch.compile compatibility
-            @torch.compiler.disable  # type: ignore[attr-defined]
+            @torch.compiler.disable
             def _postprocess(decoded_features: torch.Tensor) -> list[tuple[str, float]]:
                 return self.postprocessor(decoded_features)

doctr/models/recognition/vitstr/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/models/recognition/vitstr/pytorch.py CHANGED Viewed

@@ -15,7 +15,7 @@ from torchvision.models._utils import IntermediateLayerGetter
 from doctr.datasets import VOCABS
 from ...classification import vit_b, vit_s
-from ...utils.pytorch import _bf16_to_float32, load_pretrained_params
+from ...utils import _bf16_to_float32, load_pretrained_params
 from .base import _ViTSTR, _ViTSTRPostProcessor
 __all__ = ["ViTSTR", "vitstr_small", "vitstr_base"]
@@ -117,7 +117,7 @@ class ViTSTR(_ViTSTR, nn.Module):
         if target is None or return_preds:
             # Disable for torch.compile compatibility
-            @torch.compiler.disable  # type: ignore[attr-defined]
+            @torch.compiler.disable
             def _postprocess(decoded_features: torch.Tensor) -> list[tuple[str, float]]:
                 return self.postprocessor(decoded_features)
@@ -149,7 +149,7 @@ class ViTSTR(_ViTSTR, nn.Module):
         # Input length : number of steps
         input_len = model_output.shape[1]
         # Add one for additional <eos> token (sos disappear in shift!)
-        seq_len = seq_len + 1  # type: ignore[assignment]
+        seq_len = seq_len + 1
         # Compute loss: don't forget to shift gt! Otherwise the model learns to output the gt[t-1]!
         # The "masked" first gt char is <sos>.
         cce = F.cross_entropy(model_output.permute(0, 2, 1), gt[:, 1:], reduction="none")

doctr/models/recognition/zoo.py CHANGED Viewed

@@ -5,8 +5,8 @@
 from typing import Any
-from doctr.file_utils import is_tf_available, is_torch_available
 from doctr.models.preprocessor import PreProcessor
+from doctr.models.utils import _CompiledModule
 from .. import recognition
 from .predictor import RecognitionPredictor
@@ -23,11 +23,9 @@ ARCHS: list[str] = [
     "vitstr_small",
     "vitstr_base",
     "parseq",
+    "viptr_tiny",
 ]
-if is_torch_available():
-    ARCHS.extend(["viptr_tiny"])
 def _predictor(arch: Any, pretrained: bool, **kwargs: Any) -> RecognitionPredictor:
     if isinstance(arch, str):
@@ -38,14 +36,16 @@ def _predictor(arch: Any, pretrained: bool, **kwargs: Any) -> RecognitionPredict
             pretrained=pretrained, pretrained_backbone=kwargs.get("pretrained_backbone", True)
         )
     else:
-        allowed_archs = [recognition.CRNN, recognition.SAR, recognition.MASTER, recognition.ViTSTR, recognition.PARSeq]
-        if is_torch_available():
-            # Add VIPTR which is only available in torch at the moment
-            allowed_archs.append(recognition.VIPTR)
-            # Adding the type for torch compiled models to the allowed architectures
-            from doctr.models.utils import _CompiledModule
-            allowed_archs.append(_CompiledModule)
+        # Adding the type for torch compiled models to the allowed architectures
+        allowed_archs = [
+            recognition.CRNN,
+            recognition.SAR,
+            recognition.MASTER,
+            recognition.ViTSTR,
+            recognition.PARSeq,
+            recognition.VIPTR,
+            _CompiledModule,
+        ]
         if not isinstance(arch, tuple(allowed_archs)):
             raise ValueError(f"unknown architecture: {type(arch)}")
@@ -56,7 +56,7 @@ def _predictor(arch: Any, pretrained: bool, **kwargs: Any) -> RecognitionPredict
     kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
     kwargs["std"] = kwargs.get("std", _model.cfg["std"])
     kwargs["batch_size"] = kwargs.get("batch_size", 128)
-    input_shape = _model.cfg["input_shape"][:2] if is_tf_available() else _model.cfg["input_shape"][-2:]
+    input_shape = _model.cfg["input_shape"][-2:]
     predictor = RecognitionPredictor(PreProcessor(input_shape, preserve_aspect_ratio=True, **kwargs), _model)
     return predictor

doctr/models/utils/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/models/utils/pytorch.py CHANGED Viewed

@@ -164,7 +164,7 @@ def export_model_to_onnx(model: nn.Module, model_name: str, dummy_input: torch.T
     """
     torch.onnx.export(
         model,
-        dummy_input,
+        dummy_input,  # type: ignore[arg-type]
         f"{model_name}.onnx",
         input_names=["input"],
         output_names=["logits"],

doctr/transforms/functional/__init__.py CHANGED Viewed

@@ -1,6 +1 @@
-from doctr.file_utils import is_tf_available, is_torch_available
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *
+from .pytorch import *

doctr/transforms/functional/pytorch.py CHANGED Viewed

@@ -33,9 +33,9 @@ def invert_colors(img: torch.Tensor, min_val: float = 0.6) -> torch.Tensor:
     rgb_shift = min_val + (1 - min_val) * torch.rand(shift_shape)
     # Inverse the color
     if out.dtype == torch.uint8:
-        out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8)  # type: ignore[attr-defined]
+        out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8)
     else:
-        out = out * rgb_shift.to(dtype=out.dtype)  # type: ignore[attr-defined]
+        out = out * rgb_shift.to(dtype=out.dtype)
     # Inverse the color
     out = 255 - out if out.dtype == torch.uint8 else 1 - out
     return out
@@ -77,7 +77,7 @@ def rotate_sample(
     rotated_geoms: np.ndarray = rotate_abs_geoms(
         _geoms,
         angle,
-        img.shape[1:],
+        img.shape[1:],  # type: ignore[arg-type]
         expand,
     ).astype(np.float32)
@@ -124,7 +124,7 @@ def random_shadow(img: torch.Tensor, opacity_range: tuple[float, float], **kwarg
     Returns:
         Shadowed image as a PyTorch tensor (same shape as input).
     """
-    shadow_mask = create_shadow_mask(img.shape[1:], **kwargs)
+    shadow_mask = create_shadow_mask(img.shape[1:], **kwargs)  # type: ignore[arg-type]
     opacity = np.random.uniform(*opacity_range)
     # Apply Gaussian blur to the shadow mask

doctr/transforms/modules/__init__.py CHANGED Viewed

@@ -1,8 +1,2 @@
-from doctr.file_utils import is_tf_available, is_torch_available
 from .base import *
-if is_torch_available():
-    from .pytorch import *
-elif is_tf_available():
-    from .tensorflow import *  # type: ignore[assignment]
+from .pytorch import *

doctr/transforms/modules/base.py CHANGED Viewed

@@ -20,27 +20,13 @@ __all__ = ["SampleCompose", "ImageTransform", "ColorInversion", "OneOf", "Random
 class SampleCompose(NestedObject):
     """Implements a wrapper that will apply transformations sequentially on both image and target
-    .. tabs::
+    .. code:: python
-        .. tab:: PyTorch
-            .. code:: python
-                >>> import numpy as np
-                >>> import torch
-                >>> from doctr.transforms import SampleCompose, ImageTransform, ColorInversion, RandomRotate
-                >>> transfos = SampleCompose([ImageTransform(ColorInversion((32, 32))), RandomRotate(30)])
-                >>> out, out_boxes = transfos(torch.rand(8, 64, 64, 3), np.zeros((2, 4)))
-        .. tab:: TensorFlow
-            .. code:: python
-                >>> import numpy as np
-                >>> import tensorflow as tf
-                >>> from doctr.transforms import SampleCompose, ImageTransform, ColorInversion, RandomRotate
-                >>> transfo = SampleCompose([ImageTransform(ColorInversion((32, 32))), RandomRotate(30)])
-                >>> out, out_boxes = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1), np.zeros((2, 4)))
+        >>> import numpy as np
+        >>> import torch
+        >>> from doctr.transforms import SampleCompose, ImageTransform, ColorInversion, RandomRotate
+        >>> transfos = SampleCompose([ImageTransform(ColorInversion((32, 32))), RandomRotate(30)])
+        >>> out, out_boxes = transfos(torch.rand(8, 64, 64, 3), np.zeros((2, 4)))
     Args:
         transforms: list of transformation modules
@@ -61,25 +47,12 @@ class SampleCompose(NestedObject):
 class ImageTransform(NestedObject):
     """Implements a transform wrapper to turn an image-only transformation into an image+target transform
-    .. tabs::
-        .. tab:: PyTorch
-            .. code:: python
+    .. code:: python
-                >>> import torch
-                >>> from doctr.transforms import ImageTransform, ColorInversion
-                >>> transfo = ImageTransform(ColorInversion((32, 32)))
-                >>> out, _ = transfo(torch.rand(8, 64, 64, 3), None)
-        .. tab:: TensorFlow
-            .. code:: python
-                >>> import tensorflow as tf
-                >>> from doctr.transforms import ImageTransform, ColorInversion
-                >>> transfo = ImageTransform(ColorInversion((32, 32)))
-                >>> out, _ = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1), None)
+        >>> import torch
+        >>> from doctr.transforms import ImageTransform, ColorInversion
+        >>> transfo = ImageTransform(ColorInversion((32, 32)))
+        >>> out, _ = transfo(torch.rand(8, 64, 64, 3), None)
     Args:
         transform: the image transformation module to wrap
@@ -99,25 +72,12 @@ class ColorInversion(NestedObject):
     """Applies the following tranformation to a tensor (image or batch of images):
     convert to grayscale, colorize (shift 0-values randomly), and then invert colors
-    .. tabs::
-        .. tab:: PyTorch
-            .. code:: python
-                >>> import torch
-                >>> from doctr.transforms import ColorInversion
-                >>> transfo = ColorInversion(min_val=0.6)
-                >>> out = transfo(torch.rand(8, 64, 64, 3))
+    .. code:: python
-        .. tab:: TensorFlow
-            .. code:: python
-                >>> import tensorflow as tf
-                >>> from doctr.transforms import ColorInversion
-                >>> transfo = ColorInversion(min_val=0.6)
-                >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
+        >>> import torch
+        >>> from doctr.transforms import ColorInversion
+        >>> transfo = ColorInversion(min_val=0.6)
+        >>> out = transfo(torch.rand(8, 64, 64, 3))
     Args:
         min_val: range [min_val, 1] to colorize RGB pixels
@@ -136,25 +96,12 @@ class ColorInversion(NestedObject):
 class OneOf(NestedObject):
     """Randomly apply one of the input transformations
-    .. tabs::
-        .. tab:: PyTorch
-            .. code:: python
-                >>> import torch
-                >>> from doctr.transforms import OneOf
-                >>> transfo = OneOf([JpegQuality(), Gamma()])
-                >>> out = transfo(torch.rand(1, 64, 64, 3))
-        .. tab:: TensorFlow
+    .. code:: python
-            .. code:: python
-                >>> import tensorflow as tf
-                >>> from doctr.transforms import OneOf
-                >>> transfo = OneOf([JpegQuality(), Gamma()])
-                >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
+        >>> import torch
+        >>> from doctr.transforms import OneOf
+        >>> transfo = OneOf([JpegQuality(), Gamma()])
+        >>> out = transfo(torch.rand(1, 64, 64, 3))
     Args:
         transforms: list of transformations, one only will be picked
@@ -175,25 +122,12 @@ class OneOf(NestedObject):
 class RandomApply(NestedObject):
     """Apply with a probability p the input transformation
-    .. tabs::
-        .. tab:: PyTorch
-            .. code:: python
-                >>> import torch
-                >>> from doctr.transforms import RandomApply
-                >>> transfo = RandomApply(Gamma(), p=.5)
-                >>> out = transfo(torch.rand(1, 64, 64, 3))
-        .. tab:: TensorFlow
-            .. code:: python
+    .. code:: python
-                >>> import tensorflow as tf
-                >>> from doctr.transforms import RandomApply
-                >>> transfo = RandomApply(Gamma(), p=.5)
-                >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
+        >>> import torch
+        >>> from doctr.transforms import RandomApply
+        >>> transfo = RandomApply(Gamma(), p=.5)
+        >>> out = transfo(torch.rand(1, 64, 64, 3))
     Args:
         transform: transformation to apply

doctr/transforms/modules/pytorch.py CHANGED Viewed

@@ -13,7 +13,7 @@ from torch.nn.functional import pad
 from torchvision.transforms import functional as F
 from torchvision.transforms import transforms as T
-from ..functional.pytorch import random_shadow
+from ..functional import random_shadow
 __all__ = [
     "Resize",
@@ -27,7 +27,21 @@ __all__ = [
 class Resize(T.Resize):
-    """Resize the input image to the given size"""
+    """Resize the input image to the given size
+    >>> import torch
+    >>> from doctr.transforms import Resize
+    >>> transfo = Resize((64, 64), preserve_aspect_ratio=True, symmetric_pad=True)
+    >>> out = transfo(torch.rand((3, 64, 64)))
+    Args:
+        size: output size in pixels, either a tuple (height, width) or a single integer for square images
+        interpolation: interpolation mode to use for resizing, default is bilinear
+        preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
+            if True, the image will be resized to fit within the target size while maintaining its aspect ratio
+        symmetric_pad: whether to symmetrically pad the image to the target size,
+            if True, the image will be padded equally on both sides to fit the target size
+    """
     def __init__(
         self,
@@ -36,25 +50,19 @@ class Resize(T.Resize):
         preserve_aspect_ratio: bool = False,
         symmetric_pad: bool = False,
     ) -> None:
-        super().__init__(size, interpolation, antialias=True)
+        super().__init__(size if isinstance(size, (list, tuple)) else (size, size), interpolation, antialias=True)
         self.preserve_aspect_ratio = preserve_aspect_ratio
         self.symmetric_pad = symmetric_pad
-        if not isinstance(self.size, (int, tuple, list)):
-            raise AssertionError("size should be either a tuple, a list or an int")
     def forward(
         self,
         img: torch.Tensor,
         target: np.ndarray | None = None,
     ) -> torch.Tensor | tuple[torch.Tensor, np.ndarray]:
-        if isinstance(self.size, int):
-            target_ratio = img.shape[-2] / img.shape[-1]
-        else:
-            target_ratio = self.size[0] / self.size[1]
+        target_ratio = self.size[0] / self.size[1]
         actual_ratio = img.shape[-2] / img.shape[-1]
-        if not self.preserve_aspect_ratio or (target_ratio == actual_ratio and (isinstance(self.size, (tuple, list)))):
+        if not self.preserve_aspect_ratio or (target_ratio == actual_ratio):
             # If we don't preserve the aspect ratio or the wanted aspect ratio is the same than the original one
             # We can use with the regular resize
             if target is not None:
@@ -62,16 +70,10 @@ class Resize(T.Resize):
             return super().forward(img)
         else:
             # Resize
-            if isinstance(self.size, (tuple, list)):
-                if actual_ratio > target_ratio:
-                    tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1))
-                else:
-                    tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1])
-            elif isinstance(self.size, int):  # self.size is the longest side, infer the other
-                if img.shape[-2] <= img.shape[-1]:
-                    tmp_size = (max(int(self.size * actual_ratio), 1), self.size)
-                else:
-                    tmp_size = (self.size, max(int(self.size / actual_ratio), 1))
+            if actual_ratio > target_ratio:
+                tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1))
+            else:
+                tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1])
             # Scale image
             img = F.resize(img, tmp_size, self.interpolation, antialias=True)
@@ -93,14 +95,14 @@ class Resize(T.Resize):
                 if self.preserve_aspect_ratio:
                     # Get absolute coords
                     if target.shape[1:] == (4,):
-                        if isinstance(self.size, (tuple, list)) and self.symmetric_pad:
+                        if self.symmetric_pad:
                             target[:, [0, 2]] = offset[0] + target[:, [0, 2]] * raw_shape[-1] / img.shape[-1]
                             target[:, [1, 3]] = offset[1] + target[:, [1, 3]] * raw_shape[-2] / img.shape[-2]
                         else:
                             target[:, [0, 2]] *= raw_shape[-1] / img.shape[-1]
                             target[:, [1, 3]] *= raw_shape[-2] / img.shape[-2]
                     elif target.shape[1:] == (4, 2):
-                        if isinstance(self.size, (tuple, list)) and self.symmetric_pad:
+                        if self.symmetric_pad:
                             target[..., 0] = offset[0] + target[..., 0] * raw_shape[-1] / img.shape[-1]
                             target[..., 1] = offset[1] + target[..., 1] * raw_shape[-2] / img.shape[-2]
                         else:
@@ -143,9 +145,9 @@ class GaussianNoise(torch.nn.Module):
         # Reshape the distribution
         noise = self.mean + 2 * self.std * torch.rand(x.shape, device=x.device) - self.std
         if x.dtype == torch.uint8:
-            return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8)  # type: ignore[attr-defined]
+            return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8)
         else:
-            return (x + noise.to(dtype=x.dtype)).clamp(0, 1)  # type: ignore[attr-defined]
+            return (x + noise.to(dtype=x.dtype)).clamp(0, 1)
     def extra_repr(self) -> str:
         return f"mean={self.mean}, std={self.std}"
@@ -233,7 +235,7 @@ class RandomShadow(torch.nn.Module):
         try:
             if x.dtype == torch.uint8:
                 return (
-                    (  # type: ignore[attr-defined]
+                    (
                         255
                         * random_shadow(
                             x.to(dtype=torch.float32) / 255,

python-doctr 0.12.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

python-doctr 0.12.0py3-none-any.whl → 1.0.0py3-none-any.whl