python-doctr 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctr/contrib/__init__.py +1 -0
- doctr/contrib/artefacts.py +7 -9
- doctr/contrib/base.py +8 -17
- doctr/datasets/cord.py +8 -7
- doctr/datasets/datasets/__init__.py +4 -4
- doctr/datasets/datasets/base.py +16 -16
- doctr/datasets/datasets/pytorch.py +12 -12
- doctr/datasets/datasets/tensorflow.py +10 -10
- doctr/datasets/detection.py +6 -9
- doctr/datasets/doc_artefacts.py +3 -4
- doctr/datasets/funsd.py +7 -6
- doctr/datasets/generator/__init__.py +4 -4
- doctr/datasets/generator/base.py +16 -17
- doctr/datasets/generator/pytorch.py +1 -3
- doctr/datasets/generator/tensorflow.py +1 -3
- doctr/datasets/ic03.py +4 -5
- doctr/datasets/ic13.py +4 -5
- doctr/datasets/iiit5k.py +6 -5
- doctr/datasets/iiithws.py +4 -5
- doctr/datasets/imgur5k.py +6 -5
- doctr/datasets/loader.py +4 -7
- doctr/datasets/mjsynth.py +6 -5
- doctr/datasets/ocr.py +3 -4
- doctr/datasets/orientation.py +3 -4
- doctr/datasets/recognition.py +3 -4
- doctr/datasets/sroie.py +6 -5
- doctr/datasets/svhn.py +6 -5
- doctr/datasets/svt.py +4 -5
- doctr/datasets/synthtext.py +4 -5
- doctr/datasets/utils.py +34 -29
- doctr/datasets/vocabs.py +17 -7
- doctr/datasets/wildreceipt.py +14 -10
- doctr/file_utils.py +2 -7
- doctr/io/elements.py +59 -79
- doctr/io/html.py +1 -3
- doctr/io/image/__init__.py +3 -3
- doctr/io/image/base.py +2 -5
- doctr/io/image/pytorch.py +3 -12
- doctr/io/image/tensorflow.py +2 -11
- doctr/io/pdf.py +5 -7
- doctr/io/reader.py +5 -11
- doctr/models/_utils.py +14 -22
- doctr/models/builder.py +30 -48
- doctr/models/classification/magc_resnet/__init__.py +3 -3
- doctr/models/classification/magc_resnet/pytorch.py +10 -13
- doctr/models/classification/magc_resnet/tensorflow.py +8 -11
- doctr/models/classification/mobilenet/__init__.py +3 -3
- doctr/models/classification/mobilenet/pytorch.py +5 -17
- doctr/models/classification/mobilenet/tensorflow.py +8 -21
- doctr/models/classification/predictor/__init__.py +4 -4
- doctr/models/classification/predictor/pytorch.py +6 -8
- doctr/models/classification/predictor/tensorflow.py +6 -8
- doctr/models/classification/resnet/__init__.py +4 -4
- doctr/models/classification/resnet/pytorch.py +21 -31
- doctr/models/classification/resnet/tensorflow.py +20 -31
- doctr/models/classification/textnet/__init__.py +3 -3
- doctr/models/classification/textnet/pytorch.py +10 -17
- doctr/models/classification/textnet/tensorflow.py +8 -15
- doctr/models/classification/vgg/__init__.py +3 -3
- doctr/models/classification/vgg/pytorch.py +5 -7
- doctr/models/classification/vgg/tensorflow.py +9 -12
- doctr/models/classification/vit/__init__.py +3 -3
- doctr/models/classification/vit/pytorch.py +8 -14
- doctr/models/classification/vit/tensorflow.py +6 -12
- doctr/models/classification/zoo.py +19 -14
- doctr/models/core.py +3 -3
- doctr/models/detection/_utils/__init__.py +4 -4
- doctr/models/detection/_utils/base.py +4 -7
- doctr/models/detection/_utils/pytorch.py +1 -5
- doctr/models/detection/_utils/tensorflow.py +1 -5
- doctr/models/detection/core.py +2 -8
- doctr/models/detection/differentiable_binarization/__init__.py +4 -4
- doctr/models/detection/differentiable_binarization/base.py +7 -17
- doctr/models/detection/differentiable_binarization/pytorch.py +27 -30
- doctr/models/detection/differentiable_binarization/tensorflow.py +15 -25
- doctr/models/detection/fast/__init__.py +4 -4
- doctr/models/detection/fast/base.py +6 -14
- doctr/models/detection/fast/pytorch.py +24 -31
- doctr/models/detection/fast/tensorflow.py +14 -26
- doctr/models/detection/linknet/__init__.py +4 -4
- doctr/models/detection/linknet/base.py +6 -15
- doctr/models/detection/linknet/pytorch.py +24 -27
- doctr/models/detection/linknet/tensorflow.py +14 -23
- doctr/models/detection/predictor/__init__.py +5 -5
- doctr/models/detection/predictor/pytorch.py +6 -7
- doctr/models/detection/predictor/tensorflow.py +5 -6
- doctr/models/detection/zoo.py +27 -7
- doctr/models/factory/hub.py +3 -7
- doctr/models/kie_predictor/__init__.py +5 -5
- doctr/models/kie_predictor/base.py +4 -5
- doctr/models/kie_predictor/pytorch.py +18 -19
- doctr/models/kie_predictor/tensorflow.py +13 -14
- doctr/models/modules/layers/__init__.py +3 -3
- doctr/models/modules/layers/pytorch.py +6 -9
- doctr/models/modules/layers/tensorflow.py +5 -7
- doctr/models/modules/transformer/__init__.py +3 -3
- doctr/models/modules/transformer/pytorch.py +12 -13
- doctr/models/modules/transformer/tensorflow.py +9 -10
- doctr/models/modules/vision_transformer/__init__.py +3 -3
- doctr/models/modules/vision_transformer/pytorch.py +2 -3
- doctr/models/modules/vision_transformer/tensorflow.py +3 -3
- doctr/models/predictor/__init__.py +5 -5
- doctr/models/predictor/base.py +28 -29
- doctr/models/predictor/pytorch.py +12 -13
- doctr/models/predictor/tensorflow.py +8 -9
- doctr/models/preprocessor/__init__.py +4 -4
- doctr/models/preprocessor/pytorch.py +13 -17
- doctr/models/preprocessor/tensorflow.py +10 -14
- doctr/models/recognition/core.py +3 -7
- doctr/models/recognition/crnn/__init__.py +4 -4
- doctr/models/recognition/crnn/pytorch.py +20 -28
- doctr/models/recognition/crnn/tensorflow.py +11 -23
- doctr/models/recognition/master/__init__.py +3 -3
- doctr/models/recognition/master/base.py +3 -7
- doctr/models/recognition/master/pytorch.py +22 -24
- doctr/models/recognition/master/tensorflow.py +12 -22
- doctr/models/recognition/parseq/__init__.py +3 -3
- doctr/models/recognition/parseq/base.py +3 -7
- doctr/models/recognition/parseq/pytorch.py +26 -26
- doctr/models/recognition/parseq/tensorflow.py +16 -22
- doctr/models/recognition/predictor/__init__.py +5 -5
- doctr/models/recognition/predictor/_utils.py +7 -10
- doctr/models/recognition/predictor/pytorch.py +6 -6
- doctr/models/recognition/predictor/tensorflow.py +5 -6
- doctr/models/recognition/sar/__init__.py +4 -4
- doctr/models/recognition/sar/pytorch.py +20 -21
- doctr/models/recognition/sar/tensorflow.py +12 -21
- doctr/models/recognition/utils.py +5 -10
- doctr/models/recognition/vitstr/__init__.py +4 -4
- doctr/models/recognition/vitstr/base.py +3 -7
- doctr/models/recognition/vitstr/pytorch.py +18 -20
- doctr/models/recognition/vitstr/tensorflow.py +12 -20
- doctr/models/recognition/zoo.py +22 -11
- doctr/models/utils/__init__.py +4 -4
- doctr/models/utils/pytorch.py +14 -17
- doctr/models/utils/tensorflow.py +17 -16
- doctr/models/zoo.py +1 -5
- doctr/transforms/functional/__init__.py +3 -3
- doctr/transforms/functional/base.py +4 -11
- doctr/transforms/functional/pytorch.py +20 -28
- doctr/transforms/functional/tensorflow.py +10 -22
- doctr/transforms/modules/__init__.py +4 -4
- doctr/transforms/modules/base.py +48 -55
- doctr/transforms/modules/pytorch.py +58 -22
- doctr/transforms/modules/tensorflow.py +18 -32
- doctr/utils/common_types.py +8 -9
- doctr/utils/data.py +8 -12
- doctr/utils/fonts.py +2 -7
- doctr/utils/geometry.py +16 -47
- doctr/utils/metrics.py +17 -37
- doctr/utils/multithreading.py +4 -6
- doctr/utils/reconstitution.py +9 -13
- doctr/utils/repr.py +2 -3
- doctr/utils/visualization.py +16 -29
- doctr/version.py +1 -1
- {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/METADATA +54 -52
- python_doctr-0.11.0.dist-info/RECORD +173 -0
- {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/WHEEL +1 -1
- python_doctr-0.10.0.dist-info/RECORD +0 -173
- {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/LICENSE +0 -0
- {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/top_level.txt +0 -0
- {python_doctr-0.10.0.dist-info → python_doctr-0.11.0.dist-info}/zip-safe +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -18,7 +18,6 @@ class CharacterGenerator(_CharacterGenerator):
|
|
|
18
18
|
>>> img, target = ds[0]
|
|
19
19
|
|
|
20
20
|
Args:
|
|
21
|
-
----
|
|
22
21
|
vocab: vocabulary to take the character from
|
|
23
22
|
num_samples: number of samples that will be generated iterating over the dataset
|
|
24
23
|
cache_samples: whether generated images should be cached firsthand
|
|
@@ -40,7 +39,6 @@ class WordGenerator(_WordGenerator):
|
|
|
40
39
|
>>> img, target = ds[0]
|
|
41
40
|
|
|
42
41
|
Args:
|
|
43
|
-
----
|
|
44
42
|
vocab: vocabulary to take the character from
|
|
45
43
|
min_chars: minimum number of characters in a word
|
|
46
44
|
max_chars: maximum number of characters in a word
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -18,7 +18,6 @@ class CharacterGenerator(_CharacterGenerator):
|
|
|
18
18
|
>>> img, target = ds[0]
|
|
19
19
|
|
|
20
20
|
Args:
|
|
21
|
-
----
|
|
22
21
|
vocab: vocabulary to take the character from
|
|
23
22
|
num_samples: number of samples that will be generated iterating over the dataset
|
|
24
23
|
cache_samples: whether generated images should be cached firsthand
|
|
@@ -46,7 +45,6 @@ class WordGenerator(_WordGenerator):
|
|
|
46
45
|
>>> img, target = ds[0]
|
|
47
46
|
|
|
48
47
|
Args:
|
|
49
|
-
----
|
|
50
48
|
vocab: vocabulary to take the character from
|
|
51
49
|
min_chars: minimum number of characters in a word
|
|
52
50
|
max_chars: maximum number of characters in a word
|
doctr/datasets/ic03.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import os
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
import defusedxml.ElementTree as ET
|
|
10
10
|
import numpy as np
|
|
@@ -28,7 +28,6 @@ class IC03(VisionDataset):
|
|
|
28
28
|
>>> img, target = train_set[0]
|
|
29
29
|
|
|
30
30
|
Args:
|
|
31
|
-
----
|
|
32
31
|
train: whether the subset should be the training one
|
|
33
32
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
34
33
|
recognition_task: whether the dataset should be used for recognition task
|
|
@@ -71,7 +70,7 @@ class IC03(VisionDataset):
|
|
|
71
70
|
)
|
|
72
71
|
|
|
73
72
|
self.train = train
|
|
74
|
-
self.data:
|
|
73
|
+
self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
|
|
75
74
|
np_dtype = np.float32
|
|
76
75
|
|
|
77
76
|
# Load xml data
|
|
@@ -81,7 +80,7 @@ class IC03(VisionDataset):
|
|
|
81
80
|
xml_tree = ET.parse(os.path.join(tmp_root, "words.xml"))
|
|
82
81
|
xml_root = xml_tree.getroot()
|
|
83
82
|
|
|
84
|
-
for image in tqdm(iterable=xml_root, desc="
|
|
83
|
+
for image in tqdm(iterable=xml_root, desc="Preparing and Loading IC03", total=len(xml_root)):
|
|
85
84
|
name, _resolution, rectangles = image
|
|
86
85
|
|
|
87
86
|
# File existence check
|
doctr/datasets/ic13.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import csv
|
|
7
7
|
import os
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
from tqdm import tqdm
|
|
@@ -33,7 +33,6 @@ class IC13(AbstractDataset):
|
|
|
33
33
|
>>> img, target = test_set[0]
|
|
34
34
|
|
|
35
35
|
Args:
|
|
36
|
-
----
|
|
37
36
|
img_folder: folder with all the images of the dataset
|
|
38
37
|
label_folder: folder with all annotation files for the images
|
|
39
38
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
@@ -66,12 +65,12 @@ class IC13(AbstractDataset):
|
|
|
66
65
|
f"unable to locate {label_folder if not os.path.exists(label_folder) else img_folder}"
|
|
67
66
|
)
|
|
68
67
|
|
|
69
|
-
self.data:
|
|
68
|
+
self.data: list[tuple[Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
|
|
70
69
|
np_dtype = np.float32
|
|
71
70
|
|
|
72
71
|
img_names = os.listdir(img_folder)
|
|
73
72
|
|
|
74
|
-
for img_name in tqdm(iterable=img_names, desc="
|
|
73
|
+
for img_name in tqdm(iterable=img_names, desc="Preparing and Loading IC13", total=len(img_names)):
|
|
75
74
|
img_path = Path(img_folder, img_name)
|
|
76
75
|
label_path = Path(label_folder, "gt_" + Path(img_name).stem + ".txt")
|
|
77
76
|
|
doctr/datasets/iiit5k.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import os
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import scipy.io as sio
|
|
@@ -30,7 +30,6 @@ class IIIT5K(VisionDataset):
|
|
|
30
30
|
>>> img, target = train_set[0]
|
|
31
31
|
|
|
32
32
|
Args:
|
|
33
|
-
----
|
|
34
33
|
train: whether the subset should be the training one
|
|
35
34
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
36
35
|
recognition_task: whether the dataset should be used for recognition task
|
|
@@ -70,10 +69,12 @@ class IIIT5K(VisionDataset):
|
|
|
70
69
|
mat_file = "trainCharBound" if self.train else "testCharBound"
|
|
71
70
|
mat_data = sio.loadmat(os.path.join(tmp_root, f"{mat_file}.mat"))[mat_file][0]
|
|
72
71
|
|
|
73
|
-
self.data:
|
|
72
|
+
self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
|
|
74
73
|
np_dtype = np.float32
|
|
75
74
|
|
|
76
|
-
for img_path, label, box_targets in tqdm(
|
|
75
|
+
for img_path, label, box_targets in tqdm(
|
|
76
|
+
iterable=mat_data, desc="Preparing and Loading IIIT5K", total=len(mat_data)
|
|
77
|
+
):
|
|
77
78
|
_raw_path = img_path[0]
|
|
78
79
|
_raw_label = label[0]
|
|
79
80
|
|
doctr/datasets/iiithws.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import os
|
|
7
7
|
from random import sample
|
|
8
|
-
from typing import Any
|
|
8
|
+
from typing import Any
|
|
9
9
|
|
|
10
10
|
from tqdm import tqdm
|
|
11
11
|
|
|
@@ -32,7 +32,6 @@ class IIITHWS(AbstractDataset):
|
|
|
32
32
|
>>> img, target = test_set[0]
|
|
33
33
|
|
|
34
34
|
Args:
|
|
35
|
-
----
|
|
36
35
|
img_folder: folder with all the images of the dataset
|
|
37
36
|
label_path: path to the file with the labels
|
|
38
37
|
train: whether the subset should be the training one
|
|
@@ -52,7 +51,7 @@ class IIITHWS(AbstractDataset):
|
|
|
52
51
|
if not os.path.exists(label_path) or not os.path.exists(img_folder):
|
|
53
52
|
raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
|
|
54
53
|
|
|
55
|
-
self.data:
|
|
54
|
+
self.data: list[tuple[str, str]] = []
|
|
56
55
|
self.train = train
|
|
57
56
|
|
|
58
57
|
with open(label_path) as f:
|
|
@@ -64,7 +63,7 @@ class IIITHWS(AbstractDataset):
|
|
|
64
63
|
set_slice = slice(train_samples) if self.train else slice(train_samples, None)
|
|
65
64
|
|
|
66
65
|
for annotation in tqdm(
|
|
67
|
-
iterable=annotations[set_slice], desc="
|
|
66
|
+
iterable=annotations[set_slice], desc="Preparing and Loading IIITHWS", total=len(annotations[set_slice])
|
|
68
67
|
):
|
|
69
68
|
img_path, label = annotation.split()[0:2]
|
|
70
69
|
img_path = os.path.join(img_folder, img_path)
|
doctr/datasets/imgur5k.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -7,7 +7,7 @@ import glob
|
|
|
7
7
|
import json
|
|
8
8
|
import os
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Any
|
|
10
|
+
from typing import Any
|
|
11
11
|
|
|
12
12
|
import cv2
|
|
13
13
|
import numpy as np
|
|
@@ -40,7 +40,6 @@ class IMGUR5K(AbstractDataset):
|
|
|
40
40
|
>>> img, target = test_set[0]
|
|
41
41
|
|
|
42
42
|
Args:
|
|
43
|
-
----
|
|
44
43
|
img_folder: folder with all the images of the dataset
|
|
45
44
|
label_path: path to the annotations file of the dataset
|
|
46
45
|
train: whether the subset should be the training one
|
|
@@ -73,7 +72,7 @@ class IMGUR5K(AbstractDataset):
|
|
|
73
72
|
if not os.path.exists(label_path) or not os.path.exists(img_folder):
|
|
74
73
|
raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
|
|
75
74
|
|
|
76
|
-
self.data:
|
|
75
|
+
self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
|
|
77
76
|
self.train = train
|
|
78
77
|
np_dtype = np.float32
|
|
79
78
|
|
|
@@ -96,7 +95,9 @@ class IMGUR5K(AbstractDataset):
|
|
|
96
95
|
with open(label_path) as f:
|
|
97
96
|
annotation_file = json.load(f)
|
|
98
97
|
|
|
99
|
-
for img_name in tqdm(
|
|
98
|
+
for img_name in tqdm(
|
|
99
|
+
iterable=img_names[set_slice], desc="Preparing and Loading IMGUR5K", total=len(img_names[set_slice])
|
|
100
|
+
):
|
|
100
101
|
img_path = Path(img_folder, img_name)
|
|
101
102
|
img_id = img_name.split(".")[0]
|
|
102
103
|
|
doctr/datasets/loader.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import math
|
|
7
|
-
from
|
|
7
|
+
from collections.abc import Callable
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import tensorflow as tf
|
|
@@ -16,12 +16,10 @@ def default_collate(samples):
|
|
|
16
16
|
"""Collate multiple elements into batches
|
|
17
17
|
|
|
18
18
|
Args:
|
|
19
|
-
----
|
|
20
19
|
samples: list of N tuples containing M elements
|
|
21
20
|
|
|
22
21
|
Returns:
|
|
23
|
-
|
|
24
|
-
Tuple of M sequences contianing N elements each
|
|
22
|
+
tuple of M sequences contianing N elements each
|
|
25
23
|
"""
|
|
26
24
|
batch_data = zip(*samples)
|
|
27
25
|
|
|
@@ -40,7 +38,6 @@ class DataLoader:
|
|
|
40
38
|
>>> images, targets = next(train_iter)
|
|
41
39
|
|
|
42
40
|
Args:
|
|
43
|
-
----
|
|
44
41
|
dataset: the dataset
|
|
45
42
|
shuffle: whether the samples should be shuffled before passing it to the iterator
|
|
46
43
|
batch_size: number of elements in each batch
|
|
@@ -54,7 +51,7 @@ class DataLoader:
|
|
|
54
51
|
shuffle: bool = True,
|
|
55
52
|
batch_size: int = 1,
|
|
56
53
|
drop_last: bool = False,
|
|
57
|
-
collate_fn:
|
|
54
|
+
collate_fn: Callable | None = None,
|
|
58
55
|
) -> None:
|
|
59
56
|
self.dataset = dataset
|
|
60
57
|
self.shuffle = shuffle
|
doctr/datasets/mjsynth.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import os
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
from tqdm import tqdm
|
|
10
10
|
|
|
@@ -30,7 +30,6 @@ class MJSynth(AbstractDataset):
|
|
|
30
30
|
>>> img, target = test_set[0]
|
|
31
31
|
|
|
32
32
|
Args:
|
|
33
|
-
----
|
|
34
33
|
img_folder: folder with all the images of the dataset
|
|
35
34
|
label_path: path to the file with the labels
|
|
36
35
|
train: whether the subset should be the training one
|
|
@@ -86,7 +85,7 @@ class MJSynth(AbstractDataset):
|
|
|
86
85
|
if not os.path.exists(label_path) or not os.path.exists(img_folder):
|
|
87
86
|
raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
|
|
88
87
|
|
|
89
|
-
self.data:
|
|
88
|
+
self.data: list[tuple[str, str]] = []
|
|
90
89
|
self.train = train
|
|
91
90
|
|
|
92
91
|
with open(label_path) as f:
|
|
@@ -95,7 +94,9 @@ class MJSynth(AbstractDataset):
|
|
|
95
94
|
train_samples = int(len(img_paths) * 0.9)
|
|
96
95
|
set_slice = slice(train_samples) if self.train else slice(train_samples, None)
|
|
97
96
|
|
|
98
|
-
for path in tqdm(
|
|
97
|
+
for path in tqdm(
|
|
98
|
+
iterable=img_paths[set_slice], desc="Preparing and Loading MJSynth", total=len(img_paths[set_slice])
|
|
99
|
+
):
|
|
99
100
|
if path not in self.BLACKLIST:
|
|
100
101
|
label = path.split("_")[1]
|
|
101
102
|
img_path = os.path.join(img_folder, path[2:]).strip()
|
doctr/datasets/ocr.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import json
|
|
7
7
|
import os
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
|
|
@@ -24,7 +24,6 @@ class OCRDataset(AbstractDataset):
|
|
|
24
24
|
>>> img, target = train_set[0]
|
|
25
25
|
|
|
26
26
|
Args:
|
|
27
|
-
----
|
|
28
27
|
img_folder: local path to image folder (all jpg at the root)
|
|
29
28
|
label_file: local path to the label file
|
|
30
29
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
@@ -41,7 +40,7 @@ class OCRDataset(AbstractDataset):
|
|
|
41
40
|
super().__init__(img_folder, **kwargs)
|
|
42
41
|
|
|
43
42
|
# List images
|
|
44
|
-
self.data:
|
|
43
|
+
self.data: list[tuple[str, dict[str, Any]]] = []
|
|
45
44
|
np_dtype = np.float32
|
|
46
45
|
with open(label_file, "rb") as f:
|
|
47
46
|
data = json.load(f)
|
doctr/datasets/orientation.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import os
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
|
|
@@ -21,7 +21,6 @@ class OrientationDataset(AbstractDataset):
|
|
|
21
21
|
>>> img, target = train_set[0]
|
|
22
22
|
|
|
23
23
|
Args:
|
|
24
|
-
----
|
|
25
24
|
img_folder: folder with all the images of the dataset
|
|
26
25
|
**kwargs: keyword arguments from `AbstractDataset`.
|
|
27
26
|
"""
|
|
@@ -37,4 +36,4 @@ class OrientationDataset(AbstractDataset):
|
|
|
37
36
|
)
|
|
38
37
|
|
|
39
38
|
# initialize dataset with 0 degree rotation targets
|
|
40
|
-
self.data:
|
|
39
|
+
self.data: list[tuple[str, np.ndarray]] = [(img_name, np.array([0])) for img_name in os.listdir(self.root)]
|
doctr/datasets/recognition.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import json
|
|
7
7
|
import os
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
10
10
|
|
|
11
11
|
from .datasets import AbstractDataset
|
|
12
12
|
|
|
@@ -22,7 +22,6 @@ class RecognitionDataset(AbstractDataset):
|
|
|
22
22
|
>>> img, target = train_set[0]
|
|
23
23
|
|
|
24
24
|
Args:
|
|
25
|
-
----
|
|
26
25
|
img_folder: path to the images folder
|
|
27
26
|
labels_path: pathe to the json file containing all labels (character sequences)
|
|
28
27
|
**kwargs: keyword arguments from `AbstractDataset`.
|
|
@@ -36,7 +35,7 @@ class RecognitionDataset(AbstractDataset):
|
|
|
36
35
|
) -> None:
|
|
37
36
|
super().__init__(img_folder, **kwargs)
|
|
38
37
|
|
|
39
|
-
self.data:
|
|
38
|
+
self.data: list[tuple[str, str]] = []
|
|
40
39
|
with open(labels_path, encoding="utf-8") as f:
|
|
41
40
|
labels = json.load(f)
|
|
42
41
|
|
doctr/datasets/sroie.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
import csv
|
|
7
7
|
import os
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
from tqdm import tqdm
|
|
@@ -29,7 +29,6 @@ class SROIE(VisionDataset):
|
|
|
29
29
|
>>> img, target = train_set[0]
|
|
30
30
|
|
|
31
31
|
Args:
|
|
32
|
-
----
|
|
33
32
|
train: whether the subset should be the training one
|
|
34
33
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
35
34
|
recognition_task: whether the dataset should be used for recognition task
|
|
@@ -74,10 +73,12 @@ class SROIE(VisionDataset):
|
|
|
74
73
|
self.train = train
|
|
75
74
|
|
|
76
75
|
tmp_root = os.path.join(self.root, "images")
|
|
77
|
-
self.data:
|
|
76
|
+
self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
|
|
78
77
|
np_dtype = np.float32
|
|
79
78
|
|
|
80
|
-
for img_path in tqdm(
|
|
79
|
+
for img_path in tqdm(
|
|
80
|
+
iterable=os.listdir(tmp_root), desc="Preparing and Loading SROIE", total=len(os.listdir(tmp_root))
|
|
81
|
+
):
|
|
81
82
|
# File existence check
|
|
82
83
|
if not os.path.exists(os.path.join(tmp_root, img_path)):
|
|
83
84
|
raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_path)}")
|
doctr/datasets/svhn.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import os
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
import h5py
|
|
10
10
|
import numpy as np
|
|
@@ -28,7 +28,6 @@ class SVHN(VisionDataset):
|
|
|
28
28
|
>>> img, target = train_set[0]
|
|
29
29
|
|
|
30
30
|
Args:
|
|
31
|
-
----
|
|
32
31
|
train: whether the subset should be the training one
|
|
33
32
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
34
33
|
recognition_task: whether the dataset should be used for recognition task
|
|
@@ -72,7 +71,7 @@ class SVHN(VisionDataset):
|
|
|
72
71
|
)
|
|
73
72
|
|
|
74
73
|
self.train = train
|
|
75
|
-
self.data:
|
|
74
|
+
self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
|
|
76
75
|
np_dtype = np.float32
|
|
77
76
|
|
|
78
77
|
tmp_root = os.path.join(self.root, "train" if train else "test")
|
|
@@ -81,7 +80,9 @@ class SVHN(VisionDataset):
|
|
|
81
80
|
with h5py.File(os.path.join(tmp_root, "digitStruct.mat"), "r") as f:
|
|
82
81
|
img_refs = f["digitStruct/name"]
|
|
83
82
|
box_refs = f["digitStruct/bbox"]
|
|
84
|
-
for img_ref, box_ref in tqdm(
|
|
83
|
+
for img_ref, box_ref in tqdm(
|
|
84
|
+
iterable=zip(img_refs, box_refs), desc="Preparing and Loading SVHN", total=len(img_refs)
|
|
85
|
+
):
|
|
85
86
|
# convert ascii matrix to string
|
|
86
87
|
img_name = "".join(map(chr, f[img_ref[0]][()].flatten()))
|
|
87
88
|
|
doctr/datasets/svt.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import os
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
import defusedxml.ElementTree as ET
|
|
10
10
|
import numpy as np
|
|
@@ -28,7 +28,6 @@ class SVT(VisionDataset):
|
|
|
28
28
|
>>> img, target = train_set[0]
|
|
29
29
|
|
|
30
30
|
Args:
|
|
31
|
-
----
|
|
32
31
|
train: whether the subset should be the training one
|
|
33
32
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
34
33
|
recognition_task: whether the dataset should be used for recognition task
|
|
@@ -62,7 +61,7 @@ class SVT(VisionDataset):
|
|
|
62
61
|
)
|
|
63
62
|
|
|
64
63
|
self.train = train
|
|
65
|
-
self.data:
|
|
64
|
+
self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
|
|
66
65
|
np_dtype = np.float32
|
|
67
66
|
|
|
68
67
|
# Load xml data
|
|
@@ -74,7 +73,7 @@ class SVT(VisionDataset):
|
|
|
74
73
|
)
|
|
75
74
|
xml_root = xml_tree.getroot()
|
|
76
75
|
|
|
77
|
-
for image in tqdm(iterable=xml_root, desc="
|
|
76
|
+
for image in tqdm(iterable=xml_root, desc="Preparing and Loading SVT", total=len(xml_root)):
|
|
78
77
|
name, _, _, _resolution, rectangles = image
|
|
79
78
|
|
|
80
79
|
# File existence check
|
doctr/datasets/synthtext.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import glob
|
|
7
7
|
import os
|
|
8
|
-
from typing import Any
|
|
8
|
+
from typing import Any
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
from PIL import Image
|
|
@@ -31,7 +31,6 @@ class SynthText(VisionDataset):
|
|
|
31
31
|
>>> img, target = train_set[0]
|
|
32
32
|
|
|
33
33
|
Args:
|
|
34
|
-
----
|
|
35
34
|
train: whether the subset should be the training one
|
|
36
35
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
37
36
|
recognition_task: whether the dataset should be used for recognition task
|
|
@@ -65,7 +64,7 @@ class SynthText(VisionDataset):
|
|
|
65
64
|
)
|
|
66
65
|
|
|
67
66
|
self.train = train
|
|
68
|
-
self.data:
|
|
67
|
+
self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
|
|
69
68
|
np_dtype = np.float32
|
|
70
69
|
|
|
71
70
|
# Load mat data
|
|
@@ -91,7 +90,7 @@ class SynthText(VisionDataset):
|
|
|
91
90
|
del mat_data
|
|
92
91
|
|
|
93
92
|
for img_path, word_boxes, txt in tqdm(
|
|
94
|
-
iterable=zip(paths, boxes, labels), desc="
|
|
93
|
+
iterable=zip(paths, boxes, labels), desc="Preparing and Loading SynthText", total=len(paths)
|
|
95
94
|
):
|
|
96
95
|
# File existence check
|
|
97
96
|
if not os.path.exists(os.path.join(tmp_root, img_path[0])):
|