PyPI - python-doctr - Versions diffs - 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

python-doctr 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

doctr/contrib/__init__.py +1 -0
doctr/contrib/artefacts.py +7 -9
doctr/contrib/base.py +8 -17
doctr/datasets/__init__.py +1 -0
doctr/datasets/coco_text.py +139 -0
doctr/datasets/cord.py +10 -8
doctr/datasets/datasets/__init__.py +4 -4
doctr/datasets/datasets/base.py +16 -16
doctr/datasets/datasets/pytorch.py +12 -12
doctr/datasets/datasets/tensorflow.py +10 -10
doctr/datasets/detection.py +6 -9
doctr/datasets/doc_artefacts.py +3 -4
doctr/datasets/funsd.py +9 -8
doctr/datasets/generator/__init__.py +4 -4
doctr/datasets/generator/base.py +16 -17
doctr/datasets/generator/pytorch.py +1 -3
doctr/datasets/generator/tensorflow.py +1 -3
doctr/datasets/ic03.py +5 -6
doctr/datasets/ic13.py +6 -6
doctr/datasets/iiit5k.py +10 -6
doctr/datasets/iiithws.py +4 -5
doctr/datasets/imgur5k.py +15 -7
doctr/datasets/loader.py +4 -7
doctr/datasets/mjsynth.py +6 -5
doctr/datasets/ocr.py +3 -4
doctr/datasets/orientation.py +3 -4
doctr/datasets/recognition.py +4 -5
doctr/datasets/sroie.py +6 -5
doctr/datasets/svhn.py +7 -6
doctr/datasets/svt.py +6 -7
doctr/datasets/synthtext.py +19 -7
doctr/datasets/utils.py +41 -35
doctr/datasets/vocabs.py +1107 -49
doctr/datasets/wildreceipt.py +14 -10
doctr/file_utils.py +11 -7
doctr/io/elements.py +96 -82
doctr/io/html.py +1 -3
doctr/io/image/__init__.py +3 -3
doctr/io/image/base.py +2 -5
doctr/io/image/pytorch.py +3 -12
doctr/io/image/tensorflow.py +2 -11
doctr/io/pdf.py +5 -7
doctr/io/reader.py +5 -11
doctr/models/_utils.py +15 -23
doctr/models/builder.py +30 -48
doctr/models/classification/__init__.py +1 -0
doctr/models/classification/magc_resnet/__init__.py +3 -3
doctr/models/classification/magc_resnet/pytorch.py +11 -15
doctr/models/classification/magc_resnet/tensorflow.py +11 -14
doctr/models/classification/mobilenet/__init__.py +3 -3
doctr/models/classification/mobilenet/pytorch.py +20 -18
doctr/models/classification/mobilenet/tensorflow.py +19 -23
doctr/models/classification/predictor/__init__.py +4 -4
doctr/models/classification/predictor/pytorch.py +7 -9
doctr/models/classification/predictor/tensorflow.py +6 -8
doctr/models/classification/resnet/__init__.py +4 -4
doctr/models/classification/resnet/pytorch.py +47 -34
doctr/models/classification/resnet/tensorflow.py +45 -35
doctr/models/classification/textnet/__init__.py +3 -3
doctr/models/classification/textnet/pytorch.py +20 -18
doctr/models/classification/textnet/tensorflow.py +19 -17
doctr/models/classification/vgg/__init__.py +3 -3
doctr/models/classification/vgg/pytorch.py +21 -8
doctr/models/classification/vgg/tensorflow.py +20 -14
doctr/models/classification/vip/__init__.py +4 -0
doctr/models/classification/vip/layers/__init__.py +4 -0
doctr/models/classification/vip/layers/pytorch.py +615 -0
doctr/models/classification/vip/pytorch.py +505 -0
doctr/models/classification/vit/__init__.py +3 -3
doctr/models/classification/vit/pytorch.py +18 -15
doctr/models/classification/vit/tensorflow.py +15 -12
doctr/models/classification/zoo.py +23 -14
doctr/models/core.py +3 -3
doctr/models/detection/_utils/__init__.py +4 -4
doctr/models/detection/_utils/base.py +4 -7
doctr/models/detection/_utils/pytorch.py +1 -5
doctr/models/detection/_utils/tensorflow.py +1 -5
doctr/models/detection/core.py +2 -8
doctr/models/detection/differentiable_binarization/__init__.py +4 -4
doctr/models/detection/differentiable_binarization/base.py +10 -21
doctr/models/detection/differentiable_binarization/pytorch.py +37 -31
doctr/models/detection/differentiable_binarization/tensorflow.py +26 -29
doctr/models/detection/fast/__init__.py +4 -4
doctr/models/detection/fast/base.py +8 -17
doctr/models/detection/fast/pytorch.py +37 -35
doctr/models/detection/fast/tensorflow.py +24 -28
doctr/models/detection/linknet/__init__.py +4 -4
doctr/models/detection/linknet/base.py +8 -18
doctr/models/detection/linknet/pytorch.py +34 -28
doctr/models/detection/linknet/tensorflow.py +24 -25
doctr/models/detection/predictor/__init__.py +5 -5
doctr/models/detection/predictor/pytorch.py +6 -7
doctr/models/detection/predictor/tensorflow.py +5 -6
doctr/models/detection/zoo.py +27 -7
doctr/models/factory/hub.py +6 -10
doctr/models/kie_predictor/__init__.py +5 -5
doctr/models/kie_predictor/base.py +4 -5
doctr/models/kie_predictor/pytorch.py +19 -20
doctr/models/kie_predictor/tensorflow.py +14 -15
doctr/models/modules/layers/__init__.py +3 -3
doctr/models/modules/layers/pytorch.py +55 -10
doctr/models/modules/layers/tensorflow.py +5 -7
doctr/models/modules/transformer/__init__.py +3 -3
doctr/models/modules/transformer/pytorch.py +12 -13
doctr/models/modules/transformer/tensorflow.py +9 -10
doctr/models/modules/vision_transformer/__init__.py +3 -3
doctr/models/modules/vision_transformer/pytorch.py +2 -3
doctr/models/modules/vision_transformer/tensorflow.py +3 -3
doctr/models/predictor/__init__.py +5 -5
doctr/models/predictor/base.py +28 -29
doctr/models/predictor/pytorch.py +13 -14
doctr/models/predictor/tensorflow.py +9 -10
doctr/models/preprocessor/__init__.py +4 -4
doctr/models/preprocessor/pytorch.py +13 -17
doctr/models/preprocessor/tensorflow.py +10 -14
doctr/models/recognition/__init__.py +1 -0
doctr/models/recognition/core.py +3 -7
doctr/models/recognition/crnn/__init__.py +4 -4
doctr/models/recognition/crnn/pytorch.py +30 -29
doctr/models/recognition/crnn/tensorflow.py +21 -24
doctr/models/recognition/master/__init__.py +3 -3
doctr/models/recognition/master/base.py +3 -7
doctr/models/recognition/master/pytorch.py +32 -25
doctr/models/recognition/master/tensorflow.py +22 -25
doctr/models/recognition/parseq/__init__.py +3 -3
doctr/models/recognition/parseq/base.py +3 -7
doctr/models/recognition/parseq/pytorch.py +47 -29
doctr/models/recognition/parseq/tensorflow.py +29 -27
doctr/models/recognition/predictor/__init__.py +5 -5
doctr/models/recognition/predictor/_utils.py +111 -52
doctr/models/recognition/predictor/pytorch.py +9 -9
doctr/models/recognition/predictor/tensorflow.py +8 -9
doctr/models/recognition/sar/__init__.py +4 -4
doctr/models/recognition/sar/pytorch.py +30 -22
doctr/models/recognition/sar/tensorflow.py +22 -24
doctr/models/recognition/utils.py +57 -53
doctr/models/recognition/viptr/__init__.py +4 -0
doctr/models/recognition/viptr/pytorch.py +277 -0
doctr/models/recognition/vitstr/__init__.py +4 -4
doctr/models/recognition/vitstr/base.py +3 -7
doctr/models/recognition/vitstr/pytorch.py +28 -21
doctr/models/recognition/vitstr/tensorflow.py +22 -23
doctr/models/recognition/zoo.py +27 -11
doctr/models/utils/__init__.py +4 -4
doctr/models/utils/pytorch.py +41 -34
doctr/models/utils/tensorflow.py +31 -23
doctr/models/zoo.py +1 -5
doctr/transforms/functional/__init__.py +3 -3
doctr/transforms/functional/base.py +4 -11
doctr/transforms/functional/pytorch.py +20 -28
doctr/transforms/functional/tensorflow.py +10 -22
doctr/transforms/modules/__init__.py +4 -4
doctr/transforms/modules/base.py +48 -55
doctr/transforms/modules/pytorch.py +58 -22
doctr/transforms/modules/tensorflow.py +18 -32
doctr/utils/common_types.py +8 -9
doctr/utils/data.py +9 -13
doctr/utils/fonts.py +2 -7
doctr/utils/geometry.py +17 -48
doctr/utils/metrics.py +17 -37
doctr/utils/multithreading.py +4 -6
doctr/utils/reconstitution.py +9 -13
doctr/utils/repr.py +2 -3
doctr/utils/visualization.py +16 -29
doctr/version.py +1 -1
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/METADATA +70 -52
python_doctr-0.12.0.dist-info/RECORD +180 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/WHEEL +1 -1
python_doctr-0.10.0.dist-info/RECORD +0 -173
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info/licenses}/LICENSE +0 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/zip-safe +0 -0

doctr/models/utils/tensorflow.py CHANGED Viewed

@@ -1,13 +1,15 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import logging
-from typing import Any, Callable, List, Optional, Tuple, Union
+from collections.abc import Callable
+from typing import Any
 import tensorflow as tf
 import tf2onnx
+import validators
 from tensorflow.keras import Model, layers
 from doctr.utils.data import download_from_url
@@ -39,7 +41,6 @@ def _build_model(model: Model):
     """Build a model by calling it once with dummy input
     Args:
-    ----
         model: the model to be built
     """
     model(tf.zeros((1, *model.cfg["input_shape"])), training=False)
@@ -47,8 +48,8 @@ def _build_model(model: Model):
 def load_pretrained_params(
     model: Model,
-    url: Optional[str] = None,
-    hash_prefix: Optional[str] = None,
+    path_or_url: str | None = None,
+    hash_prefix: str | None = None,
     skip_mismatch: bool = False,
     **kwargs: Any,
 ) -> None:
@@ -58,29 +59,34 @@ def load_pretrained_params(
     >>> load_pretrained_params(model, "https://yoursource.com/yourcheckpoint-yourhash.weights.h5")
     Args:
-    ----
         model: the keras model to be loaded
-        url: URL of the zipped set of parameters
+        path_or_url: the path or URL to the model parameters (checkpoint)
         hash_prefix: first characters of SHA256 expected hash
         skip_mismatch: skip loading layers with mismatched shapes
         **kwargs: additional arguments to be passed to `doctr.utils.data.download_from_url`
     """
-    if url is None:
-        logging.warning("Invalid model URL, using default initialization.")
-    else:
-        archive_path = download_from_url(url, hash_prefix=hash_prefix, cache_subdir="models", **kwargs)
-        # Load weights
-        model.load_weights(archive_path, skip_mismatch=skip_mismatch)
+    if path_or_url is None:
+        logging.warning("No model URL or Path provided, using default initialization.")
+        return
+    archive_path = (
+        download_from_url(path_or_url, hash_prefix=hash_prefix, cache_subdir="models", **kwargs)
+        if validators.url(path_or_url)
+        else path_or_url
+    )
+    # Load weights
+    model.load_weights(archive_path, skip_mismatch=skip_mismatch)
 def conv_sequence(
     out_channels: int,
-    activation: Optional[Union[str, Callable]] = None,
+    activation: str | Callable | None = None,
     bn: bool = False,
     padding: str = "same",
     kernel_initializer: str = "he_normal",
     **kwargs: Any,
-) -> List[layers.Layer]:
+) -> list[layers.Layer]:
     """Builds a convolutional-based layer sequence
     >>> from tensorflow.keras import Sequential
@@ -88,7 +94,6 @@ def conv_sequence(
     >>> module = Sequential(conv_sequence(32, 'relu', True, kernel_size=3, input_shape=[224, 224, 3]))
     Args:
-    ----
         out_channels: number of output channels
         activation: activation to be used (default: no activation)
         bn: should a batch normalization layer be added
@@ -97,7 +102,6 @@ def conv_sequence(
         **kwargs: additional arguments to be passed to the convolutional layer
     Returns:
-    -------
         list of layers
     """
     # No bias before Batch norm
@@ -125,12 +129,11 @@ class IntermediateLayerGetter(Model):
     >>> feat_extractor = IntermediateLayerGetter(ResNet50(include_top=False, pooling=False), target_layers)
     Args:
-    ----
         model: the model to extract feature maps from
         layer_names: the list of layers to retrieve the feature map from
     """
-    def __init__(self, model: Model, layer_names: List[str]) -> None:
+    def __init__(self, model: Model, layer_names: list[str]) -> None:
         intermediate_fmaps = [model.get_layer(layer_name).get_output_at(0) for layer_name in layer_names]
         super().__init__(model.input, outputs=intermediate_fmaps)
@@ -139,8 +142,8 @@ class IntermediateLayerGetter(Model):
 def export_model_to_onnx(
-    model: Model, model_name: str, dummy_input: List[tf.TensorSpec], **kwargs: Any
-) -> Tuple[str, List[str]]:
+    model: Model, model_name: str, dummy_input: list[tf.TensorSpec], **kwargs: Any
+) -> tuple[str, list[str]]:
     """Export model to ONNX format.
     >>> import tensorflow as tf
@@ -151,16 +154,18 @@ def export_model_to_onnx(
     >>> dummy_input=[tf.TensorSpec([None, 32, 32, 3], tf.float32, name="input")])
     Args:
-    ----
         model: the keras model to be exported
         model_name: the name for the exported model
         dummy_input: the dummy input to the model
         kwargs: additional arguments to be passed to tf2onnx
     Returns:
-    -------
         the path to the exported model and a list with the output layer names
     """
+    # get the users eager mode
+    eager_mode = tf.executing_eagerly()
+    # set eager mode to true to avoid issues with tf2onnx
+    tf.config.run_functions_eagerly(True)
     large_model = kwargs.get("large_model", False)
     model_proto, _ = tf2onnx.convert.from_keras(
         model,
@@ -171,6 +176,9 @@ def export_model_to_onnx(
     # Get the output layer names
     output = [n.name for n in model_proto.graph.output]
+    # reset the eager mode to the users mode
+    tf.config.run_functions_eagerly(eager_mode)
     # models which are too large (weights > 2GB while converting to ONNX) needs to be handled
     # about an external tensor storage where the graph and weights are seperatly stored in a archive
     if large_model:

doctr/models/zoo.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -83,7 +83,6 @@ def ocr_predictor(
     >>> out = model([input_page])
     Args:
-    ----
         det_arch: name of the detection architecture or the model itself to use
             (e.g. 'db_resnet50', 'db_mobilenet_v3_large')
         reco_arch: name of the recognition architecture or the model itself to use
@@ -108,7 +107,6 @@ def ocr_predictor(
         kwargs: keyword args of `OCRPredictor`
     Returns:
-    -------
         OCR predictor
     """
     return _predictor(
@@ -197,7 +195,6 @@ def kie_predictor(
     >>> out = model([input_page])
     Args:
-    ----
         det_arch: name of the detection architecture or the model itself to use
             (e.g. 'db_resnet50', 'db_mobilenet_v3_large')
         reco_arch: name of the recognition architecture or the model itself to use
@@ -222,7 +219,6 @@ def kie_predictor(
         kwargs: keyword args of `OCRPredictor`
     Returns:
-    -------
         KIE predictor
     """
     return _kie_predictor(

doctr/transforms/functional/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
+if is_torch_available():
     from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *

doctr/transforms/functional/base.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Tuple, Union
 import cv2
 import numpy as np
@@ -15,17 +14,15 @@ __all__ = ["crop_boxes", "create_shadow_mask"]
 def crop_boxes(
     boxes: np.ndarray,
-    crop_box: Union[Tuple[int, int, int, int], Tuple[float, float, float, float]],
+    crop_box: tuple[int, int, int, int] | tuple[float, float, float, float],
 ) -> np.ndarray:
     """Crop localization boxes
     Args:
-    ----
         boxes: ndarray of shape (N, 4) in relative or abs coordinates
         crop_box: box (xmin, ymin, xmax, ymax) to crop the image, in the same coord format that the boxes
     Returns:
-    -------
         the cropped boxes
     """
     is_box_rel = boxes.max() <= 1
@@ -49,17 +46,15 @@ def crop_boxes(
     return boxes[is_valid]
-def expand_line(line: np.ndarray, target_shape: Tuple[int, int]) -> Tuple[float, float]:
+def expand_line(line: np.ndarray, target_shape: tuple[int, int]) -> tuple[float, float]:
     """Expands a 2-point line, so that the first is on the edge. In other terms, we extend the line in
     the same direction until we meet one of the edges.
     Args:
-    ----
         line: array of shape (2, 2) of the point supposed to be on one edge, and the shadow tip.
         target_shape: the desired mask shape
     Returns:
-    -------
         2D coordinates of the first point once we extended the line (on one of the edges)
     """
     if any(coord == 0 or coord == size for coord, size in zip(line[0], target_shape[::-1])):
@@ -112,7 +107,7 @@ def expand_line(line: np.ndarray, target_shape: Tuple[int, int]) -> Tuple[float,
 def create_shadow_mask(
-    target_shape: Tuple[int, int],
+    target_shape: tuple[int, int],
     min_base_width=0.3,
     max_tip_width=0.5,
     max_tip_height=0.3,
@@ -120,14 +115,12 @@ def create_shadow_mask(
     """Creates a random shadow mask
     Args:
-    ----
         target_shape: the target shape (H, W)
         min_base_width: the relative minimum shadow base width
         max_tip_width: the relative maximum shadow tip width
         max_tip_height: the relative maximum shadow tip height
     Returns:
-    -------
         a numpy ndarray of shape (H, W, 1) with values in the range [0, 1]
     """
     # Default base is top

doctr/transforms/functional/pytorch.py CHANGED Viewed

@@ -1,13 +1,13 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 from copy import deepcopy
-from typing import Tuple
 import numpy as np
 import torch
+from scipy.ndimage import gaussian_filter
 from torchvision.transforms import functional as F
 from doctr.utils.geometry import rotate_abs_geoms
@@ -21,12 +21,10 @@ def invert_colors(img: torch.Tensor, min_val: float = 0.6) -> torch.Tensor:
     """Invert the colors of an image
     Args:
-    ----
         img : torch.Tensor, the image to invert
         min_val : minimum value of the random shift
     Returns:
-    -------
         the inverted image
     """
     out = F.rgb_to_grayscale(img, num_output_channels=3)
@@ -35,9 +33,9 @@ def invert_colors(img: torch.Tensor, min_val: float = 0.6) -> torch.Tensor:
     rgb_shift = min_val + (1 - min_val) * torch.rand(shift_shape)
     # Inverse the color
     if out.dtype == torch.uint8:
-        out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8)
+        out = (out.to(dtype=rgb_shift.dtype) * rgb_shift).to(dtype=torch.uint8)  # type: ignore[attr-defined]
     else:
-        out = out * rgb_shift.to(dtype=out.dtype)
+        out = out * rgb_shift.to(dtype=out.dtype)  # type: ignore[attr-defined]
     # Inverse the color
     out = 255 - out if out.dtype == torch.uint8 else 1 - out
     return out
@@ -48,18 +46,16 @@ def rotate_sample(
     geoms: np.ndarray,
     angle: float,
     expand: bool = False,
-) -> Tuple[torch.Tensor, np.ndarray]:
+) -> tuple[torch.Tensor, np.ndarray]:
     """Rotate image around the center, interpolation=NEAREST, pad with 0 (black)
     Args:
-    ----
         img: image to rotate
         geoms: array of geometries of shape (N, 4) or (N, 4, 2)
         angle: angle in degrees. +: counter-clockwise, -: clockwise
         expand: whether the image should be padded before the rotation
     Returns:
-    -------
         A tuple of rotated img (tensor), rotated geometries of shape (N, 4, 2)
     """
     rotated_img = F.rotate(img, angle=angle, fill=0, expand=expand)  # Interpolation NEAREST by default
@@ -81,7 +77,7 @@ def rotate_sample(
     rotated_geoms: np.ndarray = rotate_abs_geoms(
         _geoms,
         angle,
-        img.shape[1:],  # type: ignore[arg-type]
+        img.shape[1:],
         expand,
     ).astype(np.float32)
@@ -93,18 +89,16 @@ def rotate_sample(
 def crop_detection(
-    img: torch.Tensor, boxes: np.ndarray, crop_box: Tuple[float, float, float, float]
-) -> Tuple[torch.Tensor, np.ndarray]:
+    img: torch.Tensor, boxes: np.ndarray, crop_box: tuple[float, float, float, float]
+) -> tuple[torch.Tensor, np.ndarray]:
     """Crop and image and associated bboxes
     Args:
-    ----
         img: image to crop
         boxes: array of boxes to clip, absolute (int) or relative (float)
         crop_box: box (xmin, ymin, xmax, ymax) to crop the image. Relative coords.
     Returns:
-    -------
         A tuple of cropped image, cropped boxes, where the image is not resized.
     """
     if any(val < 0 or val > 1 for val in crop_box):
@@ -119,27 +113,25 @@ def crop_detection(
     return cropped_img, boxes
-def random_shadow(img: torch.Tensor, opacity_range: Tuple[float, float], **kwargs) -> torch.Tensor:
-    """Crop and image and associated bboxes
+def random_shadow(img: torch.Tensor, opacity_range: tuple[float, float], **kwargs) -> torch.Tensor:
+    """Apply a random shadow effect to an image using NumPy for blurring.
     Args:
-    ----
-        img: image to modify
-        opacity_range: the minimum and maximum desired opacity of the shadow
-        **kwargs: additional arguments to pass to `create_shadow_mask`
+        img: Image to modify (C, H, W) as a PyTorch tensor.
+        opacity_range: The minimum and maximum desired opacity of the shadow.
+        **kwargs: Additional arguments to pass to `create_shadow_mask`.
     Returns:
-    -------
-        shaded image
+        Shadowed image as a PyTorch tensor (same shape as input).
     """
-    shadow_mask = create_shadow_mask(img.shape[1:], **kwargs)  # type: ignore[arg-type]
+    shadow_mask = create_shadow_mask(img.shape[1:], **kwargs)
     opacity = np.random.uniform(*opacity_range)
-    shadow_tensor = 1 - torch.from_numpy(shadow_mask[None, ...])
-    # Add some blur to make it believable
-    k = 7 + 2 * int(4 * np.random.rand(1))
+    # Apply Gaussian blur to the shadow mask
     sigma = np.random.uniform(0.5, 5.0)
-    shadow_tensor = F.gaussian_blur(shadow_tensor, k, sigma=[sigma, sigma])
+    blurred_mask = gaussian_filter(shadow_mask, sigma=sigma)
+    shadow_tensor = 1 - torch.from_numpy(blurred_mask).float()
+    shadow_tensor = shadow_tensor.to(img.device).unsqueeze(0)  # Add channel dimension
     return opacity * shadow_tensor * img + (1 - opacity) * img

doctr/transforms/functional/tensorflow.py CHANGED Viewed

@@ -1,12 +1,12 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import math
 import random
+from collections.abc import Iterable
 from copy import deepcopy
-from typing import Iterable, Optional, Tuple, Union
 import numpy as np
 import tensorflow as tf
@@ -22,12 +22,10 @@ def invert_colors(img: tf.Tensor, min_val: float = 0.6) -> tf.Tensor:
     """Invert the colors of an image
     Args:
-    ----
         img : tf.Tensor, the image to invert
         min_val : minimum value of the random shift
     Returns:
-    -------
         the inverted image
     """
     out = tf.image.rgb_to_grayscale(img)  # Convert to gray
@@ -48,13 +46,11 @@ def rotated_img_tensor(img: tf.Tensor, angle: float, expand: bool = False) -> tf
     """Rotate image around the center, interpolation=NEAREST, pad with 0 (black)
     Args:
-    ----
         img: image to rotate
         angle: angle in degrees. +: counter-clockwise, -: clockwise
         expand: whether the image should be padded before the rotation
     Returns:
-    -------
         the rotated image (tensor)
     """
     # Compute the expanded padding
@@ -103,18 +99,16 @@ def rotate_sample(
     geoms: np.ndarray,
     angle: float,
     expand: bool = False,
-) -> Tuple[tf.Tensor, np.ndarray]:
+) -> tuple[tf.Tensor, np.ndarray]:
     """Rotate image around the center, interpolation=NEAREST, pad with 0 (black)
     Args:
-    ----
         img: image to rotate
         geoms: array of geometries of shape (N, 4) or (N, 4, 2)
         angle: angle in degrees. +: counter-clockwise, -: clockwise
         expand: whether the image should be padded before the rotation
     Returns:
-    -------
         A tuple of rotated img (tensor), rotated boxes (np array)
     """
     # Rotated the image
@@ -140,22 +134,20 @@ def rotate_sample(
     rotated_geoms[..., 0] = rotated_geoms[..., 0] / rotated_img.shape[1]
     rotated_geoms[..., 1] = rotated_geoms[..., 1] / rotated_img.shape[0]
-    return rotated_img, np.clip(rotated_geoms, 0, 1)
+    return rotated_img, np.clip(np.around(rotated_geoms, decimals=15), 0, 1)
 def crop_detection(
-    img: tf.Tensor, boxes: np.ndarray, crop_box: Tuple[float, float, float, float]
-) -> Tuple[tf.Tensor, np.ndarray]:
+    img: tf.Tensor, boxes: np.ndarray, crop_box: tuple[float, float, float, float]
+) -> tuple[tf.Tensor, np.ndarray]:
     """Crop and image and associated bboxes
     Args:
-    ----
         img: image to crop
         boxes: array of boxes to clip, absolute (int) or relative (float)
         crop_box: box (xmin, ymin, xmax, ymax) to crop the image. Relative coords.
     Returns:
-    -------
         A tuple of cropped image, cropped boxes, where the image is not resized.
     """
     if any(val < 0 or val > 1 for val in crop_box):
@@ -172,16 +164,15 @@ def crop_detection(
 def _gaussian_filter(
     img: tf.Tensor,
-    kernel_size: Union[int, Iterable[int]],
+    kernel_size: int | Iterable[int],
     sigma: float,
-    mode: Optional[str] = None,
-    pad_value: Optional[int] = 0,
+    mode: str | None = None,
+    pad_value: int = 0,
 ):
     """Apply Gaussian filter to image.
     Adapted from: https://github.com/tensorflow/addons/blob/master/tensorflow_addons/image/filters.py
     Args:
-    ----
         img: image to filter of shape (N, H, W, C)
         kernel_size: kernel size of the filter
         sigma: standard deviation of the Gaussian filter
@@ -189,7 +180,6 @@ def _gaussian_filter(
         pad_value: value to pad the image with
     Returns:
-    -------
         A tensor of shape (N, H, W, C)
     """
     ksize = tf.convert_to_tensor(tf.broadcast_to(kernel_size, [2]), dtype=tf.int32)
@@ -235,17 +225,15 @@ def _gaussian_filter(
     return tf.nn.depthwise_conv2d(img, g, [1, 1, 1, 1], padding="VALID", data_format="NHWC")
-def random_shadow(img: tf.Tensor, opacity_range: Tuple[float, float], **kwargs) -> tf.Tensor:
+def random_shadow(img: tf.Tensor, opacity_range: tuple[float, float], **kwargs) -> tf.Tensor:
     """Apply a random shadow to a given image
     Args:
-    ----
         img: image to modify
         opacity_range: the minimum and maximum desired opacity of the shadow
         **kwargs: additional arguments to pass to `create_shadow_mask`
     Returns:
-    -------
         shadowed image
     """
     shadow_mask = create_shadow_mask(img.shape[:2], **kwargs)

doctr/transforms/modules/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@ from doctr.file_utils import is_tf_available, is_torch_available
 from .base import *
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

python-doctr 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

python-doctr 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl