PyPI - python-doctr - Versions diffs - 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

python-doctr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

doctr/contrib/__init__.py +1 -0
doctr/contrib/artefacts.py +7 -9
doctr/contrib/base.py +8 -17
doctr/datasets/cord.py +17 -7
doctr/datasets/datasets/__init__.py +4 -4
doctr/datasets/datasets/base.py +16 -16
doctr/datasets/datasets/pytorch.py +12 -12
doctr/datasets/datasets/tensorflow.py +10 -10
doctr/datasets/detection.py +6 -9
doctr/datasets/doc_artefacts.py +3 -4
doctr/datasets/funsd.py +17 -6
doctr/datasets/generator/__init__.py +4 -4
doctr/datasets/generator/base.py +16 -17
doctr/datasets/generator/pytorch.py +1 -3
doctr/datasets/generator/tensorflow.py +1 -3
doctr/datasets/ic03.py +14 -5
doctr/datasets/ic13.py +13 -5
doctr/datasets/iiit5k.py +31 -20
doctr/datasets/iiithws.py +4 -5
doctr/datasets/imgur5k.py +15 -5
doctr/datasets/loader.py +4 -7
doctr/datasets/mjsynth.py +6 -5
doctr/datasets/ocr.py +3 -4
doctr/datasets/orientation.py +3 -4
doctr/datasets/recognition.py +3 -4
doctr/datasets/sroie.py +16 -5
doctr/datasets/svhn.py +16 -5
doctr/datasets/svt.py +14 -5
doctr/datasets/synthtext.py +14 -5
doctr/datasets/utils.py +37 -27
doctr/datasets/vocabs.py +21 -7
doctr/datasets/wildreceipt.py +25 -10
doctr/file_utils.py +18 -4
doctr/io/elements.py +69 -81
doctr/io/html.py +1 -3
doctr/io/image/__init__.py +3 -3
doctr/io/image/base.py +2 -5
doctr/io/image/pytorch.py +3 -12
doctr/io/image/tensorflow.py +2 -11
doctr/io/pdf.py +5 -7
doctr/io/reader.py +5 -11
doctr/models/_utils.py +14 -22
doctr/models/builder.py +32 -50
doctr/models/classification/magc_resnet/__init__.py +3 -3
doctr/models/classification/magc_resnet/pytorch.py +10 -13
doctr/models/classification/magc_resnet/tensorflow.py +21 -17
doctr/models/classification/mobilenet/__init__.py +3 -3
doctr/models/classification/mobilenet/pytorch.py +7 -17
doctr/models/classification/mobilenet/tensorflow.py +22 -29
doctr/models/classification/predictor/__init__.py +4 -4
doctr/models/classification/predictor/pytorch.py +13 -11
doctr/models/classification/predictor/tensorflow.py +13 -11
doctr/models/classification/resnet/__init__.py +4 -4
doctr/models/classification/resnet/pytorch.py +21 -31
doctr/models/classification/resnet/tensorflow.py +41 -39
doctr/models/classification/textnet/__init__.py +3 -3
doctr/models/classification/textnet/pytorch.py +10 -17
doctr/models/classification/textnet/tensorflow.py +19 -20
doctr/models/classification/vgg/__init__.py +3 -3
doctr/models/classification/vgg/pytorch.py +5 -7
doctr/models/classification/vgg/tensorflow.py +18 -15
doctr/models/classification/vit/__init__.py +3 -3
doctr/models/classification/vit/pytorch.py +8 -14
doctr/models/classification/vit/tensorflow.py +16 -16
doctr/models/classification/zoo.py +36 -19
doctr/models/core.py +3 -3
doctr/models/detection/_utils/__init__.py +4 -4
doctr/models/detection/_utils/base.py +4 -7
doctr/models/detection/_utils/pytorch.py +1 -5
doctr/models/detection/_utils/tensorflow.py +1 -5
doctr/models/detection/core.py +2 -8
doctr/models/detection/differentiable_binarization/__init__.py +4 -4
doctr/models/detection/differentiable_binarization/base.py +7 -17
doctr/models/detection/differentiable_binarization/pytorch.py +27 -30
doctr/models/detection/differentiable_binarization/tensorflow.py +49 -37
doctr/models/detection/fast/__init__.py +4 -4
doctr/models/detection/fast/base.py +6 -14
doctr/models/detection/fast/pytorch.py +24 -31
doctr/models/detection/fast/tensorflow.py +28 -37
doctr/models/detection/linknet/__init__.py +4 -4
doctr/models/detection/linknet/base.py +6 -15
doctr/models/detection/linknet/pytorch.py +24 -27
doctr/models/detection/linknet/tensorflow.py +36 -33
doctr/models/detection/predictor/__init__.py +5 -5
doctr/models/detection/predictor/pytorch.py +6 -7
doctr/models/detection/predictor/tensorflow.py +7 -8
doctr/models/detection/zoo.py +27 -7
doctr/models/factory/hub.py +8 -13
doctr/models/kie_predictor/__init__.py +5 -5
doctr/models/kie_predictor/base.py +8 -5
doctr/models/kie_predictor/pytorch.py +22 -19
doctr/models/kie_predictor/tensorflow.py +21 -15
doctr/models/modules/layers/__init__.py +3 -3
doctr/models/modules/layers/pytorch.py +6 -9
doctr/models/modules/layers/tensorflow.py +5 -7
doctr/models/modules/transformer/__init__.py +3 -3
doctr/models/modules/transformer/pytorch.py +12 -13
doctr/models/modules/transformer/tensorflow.py +9 -12
doctr/models/modules/vision_transformer/__init__.py +3 -3
doctr/models/modules/vision_transformer/pytorch.py +3 -4
doctr/models/modules/vision_transformer/tensorflow.py +4 -4
doctr/models/predictor/__init__.py +5 -5
doctr/models/predictor/base.py +52 -41
doctr/models/predictor/pytorch.py +16 -13
doctr/models/predictor/tensorflow.py +16 -10
doctr/models/preprocessor/__init__.py +4 -4
doctr/models/preprocessor/pytorch.py +13 -17
doctr/models/preprocessor/tensorflow.py +11 -15
doctr/models/recognition/core.py +3 -7
doctr/models/recognition/crnn/__init__.py +4 -4
doctr/models/recognition/crnn/pytorch.py +20 -28
doctr/models/recognition/crnn/tensorflow.py +19 -29
doctr/models/recognition/master/__init__.py +3 -3
doctr/models/recognition/master/base.py +3 -7
doctr/models/recognition/master/pytorch.py +22 -24
doctr/models/recognition/master/tensorflow.py +21 -26
doctr/models/recognition/parseq/__init__.py +3 -3
doctr/models/recognition/parseq/base.py +3 -7
doctr/models/recognition/parseq/pytorch.py +26 -26
doctr/models/recognition/parseq/tensorflow.py +26 -30
doctr/models/recognition/predictor/__init__.py +5 -5
doctr/models/recognition/predictor/_utils.py +7 -10
doctr/models/recognition/predictor/pytorch.py +6 -6
doctr/models/recognition/predictor/tensorflow.py +5 -6
doctr/models/recognition/sar/__init__.py +4 -4
doctr/models/recognition/sar/pytorch.py +20 -21
doctr/models/recognition/sar/tensorflow.py +19 -24
doctr/models/recognition/utils.py +5 -10
doctr/models/recognition/vitstr/__init__.py +4 -4
doctr/models/recognition/vitstr/base.py +3 -7
doctr/models/recognition/vitstr/pytorch.py +18 -20
doctr/models/recognition/vitstr/tensorflow.py +21 -24
doctr/models/recognition/zoo.py +22 -11
doctr/models/utils/__init__.py +4 -4
doctr/models/utils/pytorch.py +13 -16
doctr/models/utils/tensorflow.py +31 -30
doctr/models/zoo.py +1 -5
doctr/transforms/functional/__init__.py +3 -3
doctr/transforms/functional/base.py +4 -11
doctr/transforms/functional/pytorch.py +21 -29
doctr/transforms/functional/tensorflow.py +10 -22
doctr/transforms/modules/__init__.py +4 -4
doctr/transforms/modules/base.py +48 -55
doctr/transforms/modules/pytorch.py +65 -28
doctr/transforms/modules/tensorflow.py +33 -44
doctr/utils/common_types.py +8 -9
doctr/utils/data.py +8 -12
doctr/utils/fonts.py +2 -7
doctr/utils/geometry.py +120 -64
doctr/utils/metrics.py +18 -38
doctr/utils/multithreading.py +4 -6
doctr/utils/reconstitution.py +157 -75
doctr/utils/repr.py +2 -3
doctr/utils/visualization.py +16 -29
doctr/version.py +1 -1
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/METADATA +59 -57
python_doctr-0.11.0.dist-info/RECORD +173 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/WHEEL +1 -1
python_doctr-0.9.0.dist-info/RECORD +0 -173
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/LICENSE +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/zip-safe +0 -0

doctr/contrib/__init__.py CHANGED Viewed

	@@ -0,0 +1 @@
1	+ from .artefacts import ArtefactDetector

doctr/contrib/artefacts.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any
 import cv2
 import numpy as np
@@ -14,7 +14,7 @@ from .base import _BasePredictor
 __all__ = ["ArtefactDetector"]
-default_cfgs: Dict[str, Dict[str, Any]] = {
+default_cfgs: dict[str, dict[str, Any]] = {
     "yolov8_artefact": {
         "input_shape": (3, 1024, 1024),
         "labels": ["bar_code", "qr_code", "logo", "photo"],
@@ -34,7 +34,6 @@ class ArtefactDetector(_BasePredictor):
     >>> results = detector(doc)
     Args:
-    ----
         arch: the architecture to use
         batch_size: the batch size to use
         model_path: the path to the model to use
@@ -50,9 +49,9 @@ class ArtefactDetector(_BasePredictor):
         self,
         arch: str = "yolov8_artefact",
         batch_size: int = 2,
-        model_path: Optional[str] = None,
-        labels: Optional[List[str]] = None,
-        input_shape: Optional[Tuple[int, int, int]] = None,
+        model_path: str | None = None,
+        labels: list[str] | None = None,
+        input_shape: tuple[int, int, int] | None = None,
         conf_threshold: float = 0.5,
         iou_threshold: float = 0.5,
         **kwargs: Any,
@@ -66,7 +65,7 @@ class ArtefactDetector(_BasePredictor):
     def preprocess(self, img: np.ndarray) -> np.ndarray:
         return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
-    def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> List[List[Dict[str, Any]]]:
+    def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]:
         results = []
         for batch in zip(output, input_images):
@@ -109,7 +108,6 @@ class ArtefactDetector(_BasePredictor):
         Display the results
         Args:
-        ----
             **kwargs: additional keyword arguments to be passed to `plt.show`
         """
         requires_package("matplotlib", "`.show()` requires matplotlib installed")

doctr/contrib/base.py CHANGED Viewed

@@ -1,9 +1,9 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
-from typing import Any, List, Optional
+from typing import Any
 import numpy as np
@@ -16,32 +16,29 @@ class _BasePredictor:
     Base class for all predictors
     Args:
-    ----
         batch_size: the batch size to use
         url: the url to use to download a model if needed
         model_path: the path to the model to use
         **kwargs: additional arguments to be passed to `download_from_url`
     """
-    def __init__(self, batch_size: int, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs) -> None:
+    def __init__(self, batch_size: int, url: str | None = None, model_path: str | None = None, **kwargs) -> None:
         self.batch_size = batch_size
         self.session = self._init_model(url, model_path, **kwargs)
-        self._inputs: List[np.ndarray] = []
-        self._results: List[Any] = []
+        self._inputs: list[np.ndarray] = []
+        self._results: list[Any] = []
-    def _init_model(self, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs: Any) -> Any:
+    def _init_model(self, url: str | None = None, model_path: str | None = None, **kwargs: Any) -> Any:
         """
         Download the model from the given url if needed
         Args:
-        ----
             url: the url to use
             model_path: the path to the model to use
             **kwargs: additional arguments to be passed to `download_from_url`
         Returns:
-        -------
             Any: the ONNX loaded model
         """
         requires_package("onnxruntime", "`.contrib` module requires `onnxruntime` to be installed.")
@@ -57,40 +54,34 @@ class _BasePredictor:
         Preprocess the input image
         Args:
-        ----
             img: the input image to preprocess
         Returns:
-        -------
             np.ndarray: the preprocessed image
         """
         raise NotImplementedError
-    def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> Any:
+    def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> Any:
         """
         Postprocess the model output
         Args:
-        ----
             output: the model output to postprocess
             input_images: the input images used to generate the output
         Returns:
-        -------
             Any: the postprocessed output
         """
         raise NotImplementedError
-    def __call__(self, inputs: List[np.ndarray]) -> Any:
+    def __call__(self, inputs: list[np.ndarray]) -> Any:
         """
         Call the model on the given inputs
         Args:
-        ----
             inputs: the inputs to use
         Returns:
-        -------
             Any: the postprocessed output
         """
         self._inputs = inputs

doctr/datasets/cord.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,7 +6,7 @@
 import json
 import os
 from pathlib import Path
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any
 import numpy as np
 from tqdm import tqdm
@@ -29,10 +29,10 @@ class CORD(VisionDataset):
     >>> img, target = train_set[0]
     Args:
-    ----
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -53,6 +53,7 @@ class CORD(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         url, sha256, name = self.TRAIN if train else self.TEST
@@ -64,13 +65,20 @@ class CORD(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
-        # List images
+        # list images
         tmp_root = os.path.join(self.root, "image")
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
+        self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
         self.train = train
         np_dtype = np.float32
-        for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking CORD", total=len(os.listdir(tmp_root))):
+        for img_path in tqdm(
+            iterable=os.listdir(tmp_root), desc="Preparing and Loading CORD", total=len(os.listdir(tmp_root))
+        ):
             # File existence check
             if not os.path.exists(os.path.join(tmp_root, img_path)):
                 raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_path)}")
@@ -84,7 +92,7 @@ class CORD(VisionDataset):
                         if len(word["text"]) > 0:
                             x = word["quad"]["x1"], word["quad"]["x2"], word["quad"]["x3"], word["quad"]["x4"]
                             y = word["quad"]["y1"], word["quad"]["y2"], word["quad"]["y3"], word["quad"]["y4"]
-                            box: Union[List[float], np.ndarray]
+                            box: list[float] | np.ndarray
                             if use_polygons:
                                 # (x, y) coordinates of top left, top right, bottom right, bottom left corners
                                 box = np.array(
@@ -109,6 +117,8 @@ class CORD(VisionDataset):
                 )
                 for crop, label in zip(crops, list(text_targets)):
                     self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
             else:
                 self.data.append((
                     img_path,

doctr/datasets/datasets/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

doctr/datasets/datasets/base.py CHANGED Viewed

@@ -1,12 +1,13 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import os
 import shutil
+from collections.abc import Callable
 from pathlib import Path
-from typing import Any, Callable, List, Optional, Tuple, Union
+from typing import Any
 import numpy as np
@@ -19,15 +20,15 @@ __all__ = ["_AbstractDataset", "_VisionDataset"]
 class _AbstractDataset:
-    data: List[Any] = []
-    _pre_transforms: Optional[Callable[[Any, Any], Tuple[Any, Any]]] = None
+    data: list[Any] = []
+    _pre_transforms: Callable[[Any, Any], tuple[Any, Any]] | None = None
     def __init__(
         self,
-        root: Union[str, Path],
-        img_transforms: Optional[Callable[[Any], Any]] = None,
-        sample_transforms: Optional[Callable[[Any, Any], Tuple[Any, Any]]] = None,
-        pre_transforms: Optional[Callable[[Any, Any], Tuple[Any, Any]]] = None,
+        root: str | Path,
+        img_transforms: Callable[[Any], Any] | None = None,
+        sample_transforms: Callable[[Any, Any], tuple[Any, Any]] | None = None,
+        pre_transforms: Callable[[Any, Any], tuple[Any, Any]] | None = None,
     ) -> None:
         if not Path(root).is_dir():
             raise ValueError(f"expected a path to a reachable folder: {root}")
@@ -41,10 +42,10 @@ class _AbstractDataset:
     def __len__(self) -> int:
         return len(self.data)
-    def _read_sample(self, index: int) -> Tuple[Any, Any]:
+    def _read_sample(self, index: int) -> tuple[Any, Any]:
         raise NotImplementedError
-    def __getitem__(self, index: int) -> Tuple[Any, Any]:
+    def __getitem__(self, index: int) -> tuple[Any, Any]:
         # Read image
         img, target = self._read_sample(index)
         # Pre-transforms (format conversion at run-time etc.)
@@ -82,7 +83,6 @@ class _VisionDataset(_AbstractDataset):
     """Implements an abstract dataset
     Args:
-    ----
         url: URL of the dataset
         file_name: name of the file once downloaded
         file_hash: expected SHA256 of the file
@@ -96,13 +96,13 @@ class _VisionDataset(_AbstractDataset):
     def __init__(
         self,
         url: str,
-        file_name: Optional[str] = None,
-        file_hash: Optional[str] = None,
+        file_name: str | None = None,
+        file_hash: str | None = None,
         extract_archive: bool = False,
         download: bool = False,
         overwrite: bool = False,
-        cache_dir: Optional[str] = None,
-        cache_subdir: Optional[str] = None,
+        cache_dir: str | None = None,
+        cache_subdir: str | None = None,
         **kwargs: Any,
     ) -> None:
         cache_dir = (
@@ -115,7 +115,7 @@ class _VisionDataset(_AbstractDataset):
         file_name = file_name if isinstance(file_name, str) else os.path.basename(url)
         # Download the file if not present
-        archive_path: Union[str, Path] = os.path.join(cache_dir, cache_subdir, file_name)
+        archive_path: str | Path = os.path.join(cache_dir, cache_subdir, file_name)
         if not os.path.exists(archive_path) and not download:
             raise ValueError("the dataset needs to be downloaded first with download=True")

doctr/datasets/datasets/pytorch.py CHANGED Viewed

@@ -1,11 +1,11 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import os
 from copy import deepcopy
-from typing import Any, List, Tuple
+from typing import Any
 import numpy as np
 import torch
@@ -20,7 +20,7 @@ __all__ = ["AbstractDataset", "VisionDataset"]
 class AbstractDataset(_AbstractDataset):
     """Abstract class for all datasets"""
-    def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]:
+    def _read_sample(self, index: int) -> tuple[torch.Tensor, Any]:
         img_name, target = self.data[index]
         # Check target
@@ -29,14 +29,14 @@ class AbstractDataset(_AbstractDataset):
             assert "labels" in target, "Target should contain 'labels' key"
         elif isinstance(target, tuple):
             assert len(target) == 2
-            assert isinstance(target[0], str) or isinstance(
-                target[0], np.ndarray
-            ), "first element of the tuple should be a string or a numpy array"
+            assert isinstance(target[0], str) or isinstance(target[0], np.ndarray), (
+                "first element of the tuple should be a string or a numpy array"
+            )
             assert isinstance(target[1], list), "second element of the tuple should be a list"
         else:
-            assert isinstance(target, str) or isinstance(
-                target, np.ndarray
-            ), "Target should be a string or a numpy array"
+            assert isinstance(target, str) or isinstance(target, np.ndarray), (
+                "Target should be a string or a numpy array"
+            )
         # Read image
         img = (
@@ -48,11 +48,11 @@ class AbstractDataset(_AbstractDataset):
         return img, deepcopy(target)
     @staticmethod
-    def collate_fn(samples: List[Tuple[torch.Tensor, Any]]) -> Tuple[torch.Tensor, List[Any]]:
+    def collate_fn(samples: list[tuple[torch.Tensor, Any]]) -> tuple[torch.Tensor, list[Any]]:
         images, targets = zip(*samples)
-        images = torch.stack(images, dim=0)  # type: ignore[assignment]
+        images = torch.stack(images, dim=0)
-        return images, list(targets)  # type: ignore[return-value]
+        return images, list(targets)
 class VisionDataset(AbstractDataset, _VisionDataset):  # noqa: D101

doctr/datasets/datasets/tensorflow.py CHANGED Viewed

@@ -1,11 +1,11 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import os
 from copy import deepcopy
-from typing import Any, List, Tuple
+from typing import Any
 import numpy as np
 import tensorflow as tf
@@ -20,7 +20,7 @@ __all__ = ["AbstractDataset", "VisionDataset"]
 class AbstractDataset(_AbstractDataset):
     """Abstract class for all datasets"""
-    def _read_sample(self, index: int) -> Tuple[tf.Tensor, Any]:
+    def _read_sample(self, index: int) -> tuple[tf.Tensor, Any]:
         img_name, target = self.data[index]
         # Check target
@@ -29,14 +29,14 @@ class AbstractDataset(_AbstractDataset):
             assert "labels" in target, "Target should contain 'labels' key"
         elif isinstance(target, tuple):
             assert len(target) == 2
-            assert isinstance(target[0], str) or isinstance(
-                target[0], np.ndarray
-            ), "first element of the tuple should be a string or a numpy array"
+            assert isinstance(target[0], str) or isinstance(target[0], np.ndarray), (
+                "first element of the tuple should be a string or a numpy array"
+            )
             assert isinstance(target[1], list), "second element of the tuple should be a list"
         else:
-            assert isinstance(target, str) or isinstance(
-                target, np.ndarray
-            ), "Target should be a string or a numpy array"
+            assert isinstance(target, str) or isinstance(target, np.ndarray), (
+                "Target should be a string or a numpy array"
+            )
         # Read image
         img = (
@@ -48,7 +48,7 @@ class AbstractDataset(_AbstractDataset):
         return img, deepcopy(target)
     @staticmethod
-    def collate_fn(samples: List[Tuple[tf.Tensor, Any]]) -> Tuple[tf.Tensor, List[Any]]:
+    def collate_fn(samples: list[tuple[tf.Tensor, Any]]) -> tuple[tf.Tensor, list[Any]]:
         images, targets = zip(*samples)
         images = tf.stack(images, axis=0)

doctr/datasets/detection.py CHANGED Viewed

@@ -1,11 +1,11 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import json
 import os
-from typing import Any, Dict, List, Tuple, Type, Union
+from typing import Any
 import numpy as np
@@ -26,7 +26,6 @@ class DetectionDataset(AbstractDataset):
     >>> img, target = train_set[0]
     Args:
-    ----
         img_folder: folder with all the images of the dataset
         label_path: path to the annotations of each image
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
@@ -47,13 +46,13 @@ class DetectionDataset(AbstractDataset):
         )
         # File existence check
-        self._class_names: List = []
+        self._class_names: list = []
         if not os.path.exists(label_path):
             raise FileNotFoundError(f"unable to locate {label_path}")
         with open(label_path, "rb") as f:
             labels = json.load(f)
-        self.data: List[Tuple[str, Tuple[np.ndarray, List[str]]]] = []
+        self.data: list[tuple[str, tuple[np.ndarray, list[str]]]] = []
         np_dtype = np.float32
         for img_name, label in labels.items():
             # File existence check
@@ -65,18 +64,16 @@ class DetectionDataset(AbstractDataset):
             self.data.append((img_name, (np.asarray(geoms, dtype=np_dtype), polygons_classes)))
     def format_polygons(
-        self, polygons: Union[List, Dict], use_polygons: bool, np_dtype: Type
-    ) -> Tuple[np.ndarray, List[str]]:
+        self, polygons: list | dict, use_polygons: bool, np_dtype: type
+    ) -> tuple[np.ndarray, list[str]]:
         """Format polygons into an array
         Args:
-        ----
             polygons: the bounding boxes
             use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
             np_dtype: dtype of array
         Returns:
-        -------
             geoms: bounding boxes as np array
             polygons_classes: list of classes for each bounding box
         """

doctr/datasets/doc_artefacts.py CHANGED Viewed

@@ -1,11 +1,11 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import json
 import os
-from typing import Any, Dict, List, Tuple
+from typing import Any
 import numpy as np
@@ -26,7 +26,6 @@ class DocArtefacts(VisionDataset):
     >>> img, target = train_set[0]
     Args:
-    ----
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         **kwargs: keyword arguments from `VisionDataset`.
@@ -51,7 +50,7 @@ class DocArtefacts(VisionDataset):
         tmp_root = os.path.join(self.root, "images")
         with open(os.path.join(self.root, "labels.json"), "rb") as f:
             labels = json.load(f)
-        self.data: List[Tuple[str, Dict[str, Any]]] = []
+        self.data: list[tuple[str, dict[str, Any]]] = []
         img_list = os.listdir(tmp_root)
         if len(labels) != len(img_list):
             raise AssertionError("the number of images and labels do not match")

doctr/datasets/funsd.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,7 +6,7 @@
 import json
 import os
 from pathlib import Path
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any
 import numpy as np
 from tqdm import tqdm
@@ -29,10 +29,10 @@ class FUNSD(VisionDataset):
     >>> img, target = train_set[0]
     Args:
-    ----
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
+        detection_task: whether the dataset should be used for detection task
         **kwargs: keyword arguments from `VisionDataset`.
     """
@@ -45,6 +45,7 @@ class FUNSD(VisionDataset):
         train: bool = True,
         use_polygons: bool = False,
         recognition_task: bool = False,
+        detection_task: bool = False,
         **kwargs: Any,
     ) -> None:
         super().__init__(
@@ -55,16 +56,24 @@ class FUNSD(VisionDataset):
             pre_transforms=convert_target_to_relative if not recognition_task else None,
             **kwargs,
         )
+        if recognition_task and detection_task:
+            raise ValueError(
+                "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
+                + "To get the whole dataset with boxes and labels leave both parameters to False."
+            )
         self.train = train
         np_dtype = np.float32
         # Use the subset
         subfolder = os.path.join("dataset", "training_data" if train else "testing_data")
-        # # List images
+        # # list images
         tmp_root = os.path.join(self.root, subfolder, "images")
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
-        for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking FUNSD", total=len(os.listdir(tmp_root))):
+        self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
+        for img_path in tqdm(
+            iterable=os.listdir(tmp_root), desc="Preparing and Loading FUNSD", total=len(os.listdir(tmp_root))
+        ):
             # File existence check
             if not os.path.exists(os.path.join(tmp_root, img_path)):
                 raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_path)}")
@@ -100,6 +109,8 @@ class FUNSD(VisionDataset):
                     # filter labels with unknown characters
                     if not any(char in label for char in ["☑", "☐", "\uf703", "\uf702"]):
                         self.data.append((crop, label))
+            elif detection_task:
+                self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype)))
             else:
                 self.data.append((
                     img_path,

doctr/datasets/generator/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

python-doctr 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

python-doctr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl