PyPI - python-doctr - Versions diffs - 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

python-doctr 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

doctr/contrib/__init__.py +1 -0
doctr/contrib/artefacts.py +7 -9
doctr/contrib/base.py +8 -17
doctr/datasets/__init__.py +1 -0
doctr/datasets/coco_text.py +139 -0
doctr/datasets/cord.py +10 -8
doctr/datasets/datasets/__init__.py +4 -4
doctr/datasets/datasets/base.py +16 -16
doctr/datasets/datasets/pytorch.py +12 -12
doctr/datasets/datasets/tensorflow.py +10 -10
doctr/datasets/detection.py +6 -9
doctr/datasets/doc_artefacts.py +3 -4
doctr/datasets/funsd.py +9 -8
doctr/datasets/generator/__init__.py +4 -4
doctr/datasets/generator/base.py +16 -17
doctr/datasets/generator/pytorch.py +1 -3
doctr/datasets/generator/tensorflow.py +1 -3
doctr/datasets/ic03.py +5 -6
doctr/datasets/ic13.py +6 -6
doctr/datasets/iiit5k.py +10 -6
doctr/datasets/iiithws.py +4 -5
doctr/datasets/imgur5k.py +15 -7
doctr/datasets/loader.py +4 -7
doctr/datasets/mjsynth.py +6 -5
doctr/datasets/ocr.py +3 -4
doctr/datasets/orientation.py +3 -4
doctr/datasets/recognition.py +4 -5
doctr/datasets/sroie.py +6 -5
doctr/datasets/svhn.py +7 -6
doctr/datasets/svt.py +6 -7
doctr/datasets/synthtext.py +19 -7
doctr/datasets/utils.py +41 -35
doctr/datasets/vocabs.py +1107 -49
doctr/datasets/wildreceipt.py +14 -10
doctr/file_utils.py +11 -7
doctr/io/elements.py +96 -82
doctr/io/html.py +1 -3
doctr/io/image/__init__.py +3 -3
doctr/io/image/base.py +2 -5
doctr/io/image/pytorch.py +3 -12
doctr/io/image/tensorflow.py +2 -11
doctr/io/pdf.py +5 -7
doctr/io/reader.py +5 -11
doctr/models/_utils.py +15 -23
doctr/models/builder.py +30 -48
doctr/models/classification/__init__.py +1 -0
doctr/models/classification/magc_resnet/__init__.py +3 -3
doctr/models/classification/magc_resnet/pytorch.py +11 -15
doctr/models/classification/magc_resnet/tensorflow.py +11 -14
doctr/models/classification/mobilenet/__init__.py +3 -3
doctr/models/classification/mobilenet/pytorch.py +20 -18
doctr/models/classification/mobilenet/tensorflow.py +19 -23
doctr/models/classification/predictor/__init__.py +4 -4
doctr/models/classification/predictor/pytorch.py +7 -9
doctr/models/classification/predictor/tensorflow.py +6 -8
doctr/models/classification/resnet/__init__.py +4 -4
doctr/models/classification/resnet/pytorch.py +47 -34
doctr/models/classification/resnet/tensorflow.py +45 -35
doctr/models/classification/textnet/__init__.py +3 -3
doctr/models/classification/textnet/pytorch.py +20 -18
doctr/models/classification/textnet/tensorflow.py +19 -17
doctr/models/classification/vgg/__init__.py +3 -3
doctr/models/classification/vgg/pytorch.py +21 -8
doctr/models/classification/vgg/tensorflow.py +20 -14
doctr/models/classification/vip/__init__.py +4 -0
doctr/models/classification/vip/layers/__init__.py +4 -0
doctr/models/classification/vip/layers/pytorch.py +615 -0
doctr/models/classification/vip/pytorch.py +505 -0
doctr/models/classification/vit/__init__.py +3 -3
doctr/models/classification/vit/pytorch.py +18 -15
doctr/models/classification/vit/tensorflow.py +15 -12
doctr/models/classification/zoo.py +23 -14
doctr/models/core.py +3 -3
doctr/models/detection/_utils/__init__.py +4 -4
doctr/models/detection/_utils/base.py +4 -7
doctr/models/detection/_utils/pytorch.py +1 -5
doctr/models/detection/_utils/tensorflow.py +1 -5
doctr/models/detection/core.py +2 -8
doctr/models/detection/differentiable_binarization/__init__.py +4 -4
doctr/models/detection/differentiable_binarization/base.py +10 -21
doctr/models/detection/differentiable_binarization/pytorch.py +37 -31
doctr/models/detection/differentiable_binarization/tensorflow.py +26 -29
doctr/models/detection/fast/__init__.py +4 -4
doctr/models/detection/fast/base.py +8 -17
doctr/models/detection/fast/pytorch.py +37 -35
doctr/models/detection/fast/tensorflow.py +24 -28
doctr/models/detection/linknet/__init__.py +4 -4
doctr/models/detection/linknet/base.py +8 -18
doctr/models/detection/linknet/pytorch.py +34 -28
doctr/models/detection/linknet/tensorflow.py +24 -25
doctr/models/detection/predictor/__init__.py +5 -5
doctr/models/detection/predictor/pytorch.py +6 -7
doctr/models/detection/predictor/tensorflow.py +5 -6
doctr/models/detection/zoo.py +27 -7
doctr/models/factory/hub.py +6 -10
doctr/models/kie_predictor/__init__.py +5 -5
doctr/models/kie_predictor/base.py +4 -5
doctr/models/kie_predictor/pytorch.py +19 -20
doctr/models/kie_predictor/tensorflow.py +14 -15
doctr/models/modules/layers/__init__.py +3 -3
doctr/models/modules/layers/pytorch.py +55 -10
doctr/models/modules/layers/tensorflow.py +5 -7
doctr/models/modules/transformer/__init__.py +3 -3
doctr/models/modules/transformer/pytorch.py +12 -13
doctr/models/modules/transformer/tensorflow.py +9 -10
doctr/models/modules/vision_transformer/__init__.py +3 -3
doctr/models/modules/vision_transformer/pytorch.py +2 -3
doctr/models/modules/vision_transformer/tensorflow.py +3 -3
doctr/models/predictor/__init__.py +5 -5
doctr/models/predictor/base.py +28 -29
doctr/models/predictor/pytorch.py +13 -14
doctr/models/predictor/tensorflow.py +9 -10
doctr/models/preprocessor/__init__.py +4 -4
doctr/models/preprocessor/pytorch.py +13 -17
doctr/models/preprocessor/tensorflow.py +10 -14
doctr/models/recognition/__init__.py +1 -0
doctr/models/recognition/core.py +3 -7
doctr/models/recognition/crnn/__init__.py +4 -4
doctr/models/recognition/crnn/pytorch.py +30 -29
doctr/models/recognition/crnn/tensorflow.py +21 -24
doctr/models/recognition/master/__init__.py +3 -3
doctr/models/recognition/master/base.py +3 -7
doctr/models/recognition/master/pytorch.py +32 -25
doctr/models/recognition/master/tensorflow.py +22 -25
doctr/models/recognition/parseq/__init__.py +3 -3
doctr/models/recognition/parseq/base.py +3 -7
doctr/models/recognition/parseq/pytorch.py +47 -29
doctr/models/recognition/parseq/tensorflow.py +29 -27
doctr/models/recognition/predictor/__init__.py +5 -5
doctr/models/recognition/predictor/_utils.py +111 -52
doctr/models/recognition/predictor/pytorch.py +9 -9
doctr/models/recognition/predictor/tensorflow.py +8 -9
doctr/models/recognition/sar/__init__.py +4 -4
doctr/models/recognition/sar/pytorch.py +30 -22
doctr/models/recognition/sar/tensorflow.py +22 -24
doctr/models/recognition/utils.py +57 -53
doctr/models/recognition/viptr/__init__.py +4 -0
doctr/models/recognition/viptr/pytorch.py +277 -0
doctr/models/recognition/vitstr/__init__.py +4 -4
doctr/models/recognition/vitstr/base.py +3 -7
doctr/models/recognition/vitstr/pytorch.py +28 -21
doctr/models/recognition/vitstr/tensorflow.py +22 -23
doctr/models/recognition/zoo.py +27 -11
doctr/models/utils/__init__.py +4 -4
doctr/models/utils/pytorch.py +41 -34
doctr/models/utils/tensorflow.py +31 -23
doctr/models/zoo.py +1 -5
doctr/transforms/functional/__init__.py +3 -3
doctr/transforms/functional/base.py +4 -11
doctr/transforms/functional/pytorch.py +20 -28
doctr/transforms/functional/tensorflow.py +10 -22
doctr/transforms/modules/__init__.py +4 -4
doctr/transforms/modules/base.py +48 -55
doctr/transforms/modules/pytorch.py +58 -22
doctr/transforms/modules/tensorflow.py +18 -32
doctr/utils/common_types.py +8 -9
doctr/utils/data.py +9 -13
doctr/utils/fonts.py +2 -7
doctr/utils/geometry.py +17 -48
doctr/utils/metrics.py +17 -37
doctr/utils/multithreading.py +4 -6
doctr/utils/reconstitution.py +9 -13
doctr/utils/repr.py +2 -3
doctr/utils/visualization.py +16 -29
doctr/version.py +1 -1
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/METADATA +70 -52
python_doctr-0.12.0.dist-info/RECORD +180 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/WHEEL +1 -1
python_doctr-0.10.0.dist-info/RECORD +0 -173
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info/licenses}/LICENSE +0 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/top_level.txt +0 -0
{python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/zip-safe +0 -0

doctr/datasets/funsd.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,7 +6,7 @@
 import json
 import os
 from pathlib import Path
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any
 import numpy as np
 from tqdm import tqdm
@@ -29,7 +29,6 @@ class FUNSD(VisionDataset):
     >>> img, target = train_set[0]
     Args:
-    ----
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
@@ -69,10 +68,12 @@ class FUNSD(VisionDataset):
         # Use the subset
         subfolder = os.path.join("dataset", "training_data" if train else "testing_data")
-        # # List images
+        # # list images
         tmp_root = os.path.join(self.root, subfolder, "images")
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
-        for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking FUNSD", total=len(os.listdir(tmp_root))):
+        self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
+        for img_path in tqdm(
+            iterable=os.listdir(tmp_root), desc="Preparing and Loading FUNSD", total=len(os.listdir(tmp_root))
+        ):
             # File existence check
             if not os.path.exists(os.path.join(tmp_root, img_path)):
                 raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_path)}")
@@ -106,8 +107,8 @@ class FUNSD(VisionDataset):
                 )
                 for crop, label in zip(crops, list(text_targets)):
                     # filter labels with unknown characters
-                    if not any(char in label for char in ["☑", "☐", "\uf703", "\uf702"]):
-                        self.data.append((crop, label))
+                    if not any(char in label for char in ["☑", "☐", "\u03bf", "\uf703", "\uf702", " "]):
+                        self.data.append((crop, label.replace("–", "-")))
             elif detection_task:
                 self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype)))
             else:

doctr/datasets/generator/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from doctr.file_utils import is_tf_available, is_torch_available
-if is_tf_available():
-    from .tensorflow import *
-elif is_torch_available():
-    from .pytorch import *  # type: ignore[assignment]
+if is_torch_available():
+    from .pytorch import *
+elif is_tf_available():
+    from .tensorflow import *  # type: ignore[assignment]

doctr/datasets/generator/base.py CHANGED Viewed

@@ -1,10 +1,11 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import random
-from typing import Any, Callable, List, Optional, Tuple, Union
+from collections.abc import Callable
+from typing import Any
 from PIL import Image, ImageDraw
@@ -17,14 +18,13 @@ from ..datasets import AbstractDataset
 def synthesize_text_img(
     text: str,
     font_size: int = 32,
-    font_family: Optional[str] = None,
-    background_color: Optional[Tuple[int, int, int]] = None,
-    text_color: Optional[Tuple[int, int, int]] = None,
+    font_family: str | None = None,
+    background_color: tuple[int, int, int] | None = None,
+    text_color: tuple[int, int, int] | None = None,
 ) -> Image.Image:
     """Generate a synthetic text image
     Args:
-    ----
         text: the text to render as an image
         font_size: the size of the font
         font_family: the font family (has to be installed on your system)
@@ -32,7 +32,6 @@ def synthesize_text_img(
         text_color: text color on the final image
     Returns:
-    -------
         PIL image of the text
     """
     background_color = (0, 0, 0) if background_color is None else background_color
@@ -61,9 +60,9 @@ class _CharacterGenerator(AbstractDataset):
         vocab: str,
         num_samples: int,
         cache_samples: bool = False,
-        font_family: Optional[Union[str, List[str]]] = None,
-        img_transforms: Optional[Callable[[Any], Any]] = None,
-        sample_transforms: Optional[Callable[[Any, Any], Tuple[Any, Any]]] = None,
+        font_family: str | list[str] | None = None,
+        img_transforms: Callable[[Any], Any] | None = None,
+        sample_transforms: Callable[[Any, Any], tuple[Any, Any]] | None = None,
     ) -> None:
         self.vocab = vocab
         self._num_samples = num_samples
@@ -78,7 +77,7 @@ class _CharacterGenerator(AbstractDataset):
         self.img_transforms = img_transforms
         self.sample_transforms = sample_transforms
-        self._data: List[Image.Image] = []
+        self._data: list[Image.Image] = []
         if cache_samples:
             self._data = [
                 (synthesize_text_img(char, font_family=font), idx)  # type: ignore[misc]
@@ -89,7 +88,7 @@ class _CharacterGenerator(AbstractDataset):
     def __len__(self) -> int:
         return self._num_samples
-    def _read_sample(self, index: int) -> Tuple[Any, int]:
+    def _read_sample(self, index: int) -> tuple[Any, int]:
         # Samples are already cached
         if len(self._data) > 0:
             idx = index % len(self._data)
@@ -110,9 +109,9 @@ class _WordGenerator(AbstractDataset):
         max_chars: int,
         num_samples: int,
         cache_samples: bool = False,
-        font_family: Optional[Union[str, List[str]]] = None,
-        img_transforms: Optional[Callable[[Any], Any]] = None,
-        sample_transforms: Optional[Callable[[Any, Any], Tuple[Any, Any]]] = None,
+        font_family: str | list[str] | None = None,
+        img_transforms: Callable[[Any], Any] | None = None,
+        sample_transforms: Callable[[Any, Any], tuple[Any, Any]] | None = None,
     ) -> None:
         self.vocab = vocab
         self.wordlen_range = (min_chars, max_chars)
@@ -128,7 +127,7 @@ class _WordGenerator(AbstractDataset):
         self.img_transforms = img_transforms
         self.sample_transforms = sample_transforms
-        self._data: List[Image.Image] = []
+        self._data: list[Image.Image] = []
         if cache_samples:
             _words = [self._generate_string(*self.wordlen_range) for _ in range(num_samples)]
             self._data = [
@@ -143,7 +142,7 @@ class _WordGenerator(AbstractDataset):
     def __len__(self) -> int:
         return self._num_samples
-    def _read_sample(self, index: int) -> Tuple[Any, str]:
+    def _read_sample(self, index: int) -> tuple[Any, str]:
         # Samples are already cached
         if len(self._data) > 0:
             pil_img, target = self._data[index]  # type: ignore[misc]

doctr/datasets/generator/pytorch.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -18,7 +18,6 @@ class CharacterGenerator(_CharacterGenerator):
     >>> img, target = ds[0]
     Args:
-    ----
         vocab: vocabulary to take the character from
         num_samples: number of samples that will be generated iterating over the dataset
         cache_samples: whether generated images should be cached firsthand
@@ -40,7 +39,6 @@ class WordGenerator(_WordGenerator):
     >>> img, target = ds[0]
     Args:
-    ----
         vocab: vocabulary to take the character from
         min_chars: minimum number of characters in a word
         max_chars: maximum number of characters in a word

doctr/datasets/generator/tensorflow.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -18,7 +18,6 @@ class CharacterGenerator(_CharacterGenerator):
     >>> img, target = ds[0]
     Args:
-    ----
         vocab: vocabulary to take the character from
         num_samples: number of samples that will be generated iterating over the dataset
         cache_samples: whether generated images should be cached firsthand
@@ -46,7 +45,6 @@ class WordGenerator(_WordGenerator):
     >>> img, target = ds[0]
     Args:
-    ----
         vocab: vocabulary to take the character from
         min_chars: minimum number of characters in a word
         max_chars: maximum number of characters in a word

doctr/datasets/ic03.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import os
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any
 import defusedxml.ElementTree as ET
 import numpy as np
@@ -28,7 +28,6 @@ class IC03(VisionDataset):
     >>> img, target = train_set[0]
     Args:
-    ----
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
@@ -71,7 +70,7 @@ class IC03(VisionDataset):
             )
         self.train = train
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
+        self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
         np_dtype = np.float32
         # Load xml data
@@ -81,7 +80,7 @@ class IC03(VisionDataset):
         xml_tree = ET.parse(os.path.join(tmp_root, "words.xml"))
         xml_root = xml_tree.getroot()
-        for image in tqdm(iterable=xml_root, desc="Unpacking IC03", total=len(xml_root)):
+        for image in tqdm(iterable=xml_root, desc="Preparing and Loading IC03", total=len(xml_root)):
             name, _resolution, rectangles = image
             # File existence check
@@ -123,7 +122,7 @@ class IC03(VisionDataset):
                 if recognition_task:
                     crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, name.text), geoms=boxes)
                     for crop, label in zip(crops, labels):
-                        if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
+                        if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0 and " " not in label:
                             self.data.append((crop, label))
                 elif detection_task:
                     self.data.append((name.text, boxes))

doctr/datasets/ic13.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,7 +6,7 @@
 import csv
 import os
 from pathlib import Path
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any
 import numpy as np
 from tqdm import tqdm
@@ -33,7 +33,6 @@ class IC13(AbstractDataset):
     >>> img, target = test_set[0]
     Args:
-    ----
         img_folder: folder with all the images of the dataset
         label_folder: folder with all annotation files for the images
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
@@ -66,12 +65,12 @@ class IC13(AbstractDataset):
                 f"unable to locate {label_folder if not os.path.exists(label_folder) else img_folder}"
             )
-        self.data: List[Tuple[Union[Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
+        self.data: list[tuple[Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
         np_dtype = np.float32
         img_names = os.listdir(img_folder)
-        for img_name in tqdm(iterable=img_names, desc="Unpacking IC13", total=len(img_names)):
+        for img_name in tqdm(iterable=img_names, desc="Preparing and Loading IC13", total=len(img_names)):
             img_path = Path(img_folder, img_name)
             label_path = Path(label_folder, "gt_" + Path(img_name).stem + ".txt")
@@ -101,7 +100,8 @@ class IC13(AbstractDataset):
             if recognition_task:
                 crops = crop_bboxes_from_image(img_path=img_path, geoms=box_targets)
                 for crop, label in zip(crops, labels):
-                    self.data.append((crop, label))
+                    if " " not in label:
+                        self.data.append((crop, label))
             elif detection_task:
                 self.data.append((img_path, box_targets))
             else:

doctr/datasets/iiit5k.py CHANGED Viewed

@@ -1,13 +1,14 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import os
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any
 import numpy as np
 import scipy.io as sio
+from PIL import Image
 from tqdm import tqdm
 from .datasets import VisionDataset
@@ -30,7 +31,6 @@ class IIIT5K(VisionDataset):
     >>> img, target = train_set[0]
     Args:
-    ----
         train: whether the subset should be the training one
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
         recognition_task: whether the dataset should be used for recognition task
@@ -70,10 +70,12 @@ class IIIT5K(VisionDataset):
         mat_file = "trainCharBound" if self.train else "testCharBound"
         mat_data = sio.loadmat(os.path.join(tmp_root, f"{mat_file}.mat"))[mat_file][0]
-        self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
+        self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
         np_dtype = np.float32
-        for img_path, label, box_targets in tqdm(iterable=mat_data, desc="Unpacking IIIT5K", total=len(mat_data)):
+        for img_path, label, box_targets in tqdm(
+            iterable=mat_data, desc="Preparing and Loading IIIT5K", total=len(mat_data)
+        ):
             _raw_path = img_path[0]
             _raw_label = label[0]
@@ -97,7 +99,9 @@ class IIIT5K(VisionDataset):
                 box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]
             if recognition_task:
-                self.data.append((_raw_path, _raw_label))
+                if " " not in _raw_label:
+                    with Image.open(os.path.join(tmp_root, _raw_path)) as pil_img:
+                        self.data.append((np.array(pil_img.convert("RGB")), _raw_label))
             elif detection_task:
                 self.data.append((_raw_path, np.asarray(box_targets, dtype=np_dtype)))
             else:

doctr/datasets/iiithws.py CHANGED Viewed

@@ -1,11 +1,11 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import os
 from random import sample
-from typing import Any, List, Tuple
+from typing import Any
 from tqdm import tqdm
@@ -32,7 +32,6 @@ class IIITHWS(AbstractDataset):
     >>> img, target = test_set[0]
     Args:
-    ----
         img_folder: folder with all the images of the dataset
         label_path: path to the file with the labels
         train: whether the subset should be the training one
@@ -52,7 +51,7 @@ class IIITHWS(AbstractDataset):
         if not os.path.exists(label_path) or not os.path.exists(img_folder):
             raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
-        self.data: List[Tuple[str, str]] = []
+        self.data: list[tuple[str, str]] = []
         self.train = train
         with open(label_path) as f:
@@ -64,7 +63,7 @@ class IIITHWS(AbstractDataset):
         set_slice = slice(train_samples) if self.train else slice(train_samples, None)
         for annotation in tqdm(
-            iterable=annotations[set_slice], desc="Unpacking IIITHWS", total=len(annotations[set_slice])
+            iterable=annotations[set_slice], desc="Preparing and Loading IIITHWS", total=len(annotations[set_slice])
         ):
             img_path, label = annotation.split()[0:2]
             img_path = os.path.join(img_folder, img_path)

doctr/datasets/imgur5k.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -7,7 +7,7 @@ import glob
 import json
 import os
 from pathlib import Path
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any
 import cv2
 import numpy as np
@@ -40,7 +40,6 @@ class IMGUR5K(AbstractDataset):
     >>> img, target = test_set[0]
     Args:
-    ----
         img_folder: folder with all the images of the dataset
         label_path: path to the annotations file of the dataset
         train: whether the subset should be the training one
@@ -73,7 +72,7 @@ class IMGUR5K(AbstractDataset):
         if not os.path.exists(label_path) or not os.path.exists(img_folder):
             raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
-        self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
+        self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
         self.train = train
         np_dtype = np.float32
@@ -96,7 +95,9 @@ class IMGUR5K(AbstractDataset):
         with open(label_path) as f:
             annotation_file = json.load(f)
-        for img_name in tqdm(iterable=img_names[set_slice], desc="Unpacking IMGUR5K", total=len(img_names[set_slice])):
+        for img_name in tqdm(
+            iterable=img_names[set_slice], desc="Preparing and Loading IMGUR5K", total=len(img_names[set_slice])
+        ):
             img_path = Path(img_folder, img_name)
             img_id = img_name.split(".")[0]
@@ -132,7 +133,13 @@ class IMGUR5K(AbstractDataset):
                         img_path=os.path.join(self.root, img_name), geoms=np.asarray(box_targets, dtype=np_dtype)
                     )
                     for crop, label in zip(crops, labels):
-                        if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
+                        if (
+                            crop.shape[0] > 0
+                            and crop.shape[1] > 0
+                            and len(label) > 0
+                            and len(label) < 30
+                            and " " not in label
+                        ):
                             # write data to disk
                             with open(os.path.join(reco_folder_path, f"{reco_images_counter}.txt"), "w") as f:
                                 f.write(label)
@@ -151,6 +158,7 @@ class IMGUR5K(AbstractDataset):
         return f"train={self.train}"
     def _read_from_folder(self, path: str) -> None:
-        for img_path in glob.glob(os.path.join(path, "*.png")):
+        img_paths = glob.glob(os.path.join(path, "*.png"))
+        for img_path in tqdm(iterable=img_paths, desc="Preparing and Loading IMGUR5K", total=len(img_paths)):
             with open(os.path.join(path, f"{os.path.basename(img_path)[:-4]}.txt"), "r") as f:
                 self.data.append((img_path, f.read()))

doctr/datasets/loader.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import math
-from typing import Callable, Optional
+from collections.abc import Callable
 import numpy as np
 import tensorflow as tf
@@ -16,12 +16,10 @@ def default_collate(samples):
     """Collate multiple elements into batches
     Args:
-    ----
         samples: list of N tuples containing M elements
     Returns:
-    -------
-        Tuple of M sequences contianing N elements each
+        tuple of M sequences containing N elements each
     """
     batch_data = zip(*samples)
@@ -40,7 +38,6 @@ class DataLoader:
     >>> images, targets = next(train_iter)
     Args:
-    ----
         dataset: the dataset
         shuffle: whether the samples should be shuffled before passing it to the iterator
         batch_size: number of elements in each batch
@@ -54,7 +51,7 @@ class DataLoader:
         shuffle: bool = True,
         batch_size: int = 1,
         drop_last: bool = False,
-        collate_fn: Optional[Callable] = None,
+        collate_fn: Callable | None = None,
     ) -> None:
         self.dataset = dataset
         self.shuffle = shuffle

doctr/datasets/mjsynth.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import os
-from typing import Any, List, Tuple
+from typing import Any
 from tqdm import tqdm
@@ -30,7 +30,6 @@ class MJSynth(AbstractDataset):
     >>> img, target = test_set[0]
     Args:
-    ----
         img_folder: folder with all the images of the dataset
         label_path: path to the file with the labels
         train: whether the subset should be the training one
@@ -86,7 +85,7 @@ class MJSynth(AbstractDataset):
         if not os.path.exists(label_path) or not os.path.exists(img_folder):
             raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
-        self.data: List[Tuple[str, str]] = []
+        self.data: list[tuple[str, str]] = []
         self.train = train
         with open(label_path) as f:
@@ -95,7 +94,9 @@ class MJSynth(AbstractDataset):
         train_samples = int(len(img_paths) * 0.9)
         set_slice = slice(train_samples) if self.train else slice(train_samples, None)
-        for path in tqdm(iterable=img_paths[set_slice], desc="Unpacking MJSynth", total=len(img_paths[set_slice])):
+        for path in tqdm(
+            iterable=img_paths[set_slice], desc="Preparing and Loading MJSynth", total=len(img_paths[set_slice])
+        ):
             if path not in self.BLACKLIST:
                 label = path.split("_")[1]
                 img_path = os.path.join(img_folder, path[2:]).strip()

doctr/datasets/ocr.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,7 +6,7 @@
 import json
 import os
 from pathlib import Path
-from typing import Any, Dict, List, Tuple
+from typing import Any
 import numpy as np
@@ -24,7 +24,6 @@ class OCRDataset(AbstractDataset):
     >>> img, target = train_set[0]
     Args:
-    ----
         img_folder: local path to image folder (all jpg at the root)
         label_file: local path to the label file
         use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
@@ -41,7 +40,7 @@ class OCRDataset(AbstractDataset):
         super().__init__(img_folder, **kwargs)
         # List images
-        self.data: List[Tuple[str, Dict[str, Any]]] = []
+        self.data: list[tuple[Path, dict[str, Any]]] = []
         np_dtype = np.float32
         with open(label_file, "rb") as f:
             data = json.load(f)

doctr/datasets/orientation.py CHANGED Viewed

@@ -1,10 +1,10 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 import os
-from typing import Any, List, Tuple
+from typing import Any
 import numpy as np
@@ -21,7 +21,6 @@ class OrientationDataset(AbstractDataset):
     >>> img, target = train_set[0]
     Args:
-    ----
         img_folder: folder with all the images of the dataset
         **kwargs: keyword arguments from `AbstractDataset`.
     """
@@ -37,4 +36,4 @@ class OrientationDataset(AbstractDataset):
         )
         # initialize dataset with 0 degree rotation targets
-        self.data: List[Tuple[str, np.ndarray]] = [(img_name, np.array([0])) for img_name in os.listdir(self.root)]
+        self.data: list[tuple[str, np.ndarray]] = [(img_name, np.array([0])) for img_name in os.listdir(self.root)]

doctr/datasets/recognition.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024, Mindee.
+# Copyright (C) 2021-2025, Mindee.
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,7 +6,7 @@
 import json
 import os
 from pathlib import Path
-from typing import Any, List, Tuple
+from typing import Any
 from .datasets import AbstractDataset
@@ -22,9 +22,8 @@ class RecognitionDataset(AbstractDataset):
     >>> img, target = train_set[0]
     Args:
-    ----
         img_folder: path to the images folder
-        labels_path: pathe to the json file containing all labels (character sequences)
+        labels_path: path to the json file containing all labels (character sequences)
         **kwargs: keyword arguments from `AbstractDataset`.
     """
@@ -36,7 +35,7 @@ class RecognitionDataset(AbstractDataset):
     ) -> None:
         super().__init__(img_folder, **kwargs)
-        self.data: List[Tuple[str, str]] = []
+        self.data: list[tuple[str, str]] = []
         with open(labels_path, encoding="utf-8") as f:
             labels = json.load(f)

python-doctr 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

python-doctr 0.10.0py3-none-any.whl → 0.12.0py3-none-any.whl