python-doctr 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctr/datasets/__init__.py +2 -0
- doctr/datasets/cord.py +6 -4
- doctr/datasets/datasets/base.py +3 -2
- doctr/datasets/datasets/pytorch.py +4 -2
- doctr/datasets/datasets/tensorflow.py +4 -2
- doctr/datasets/detection.py +6 -3
- doctr/datasets/doc_artefacts.py +2 -1
- doctr/datasets/funsd.py +7 -8
- doctr/datasets/generator/base.py +3 -2
- doctr/datasets/generator/pytorch.py +3 -1
- doctr/datasets/generator/tensorflow.py +3 -1
- doctr/datasets/ic03.py +3 -2
- doctr/datasets/ic13.py +2 -1
- doctr/datasets/iiit5k.py +6 -4
- doctr/datasets/iiithws.py +2 -1
- doctr/datasets/imgur5k.py +3 -2
- doctr/datasets/loader.py +4 -2
- doctr/datasets/mjsynth.py +2 -1
- doctr/datasets/ocr.py +2 -1
- doctr/datasets/orientation.py +40 -0
- doctr/datasets/recognition.py +3 -2
- doctr/datasets/sroie.py +2 -1
- doctr/datasets/svhn.py +2 -1
- doctr/datasets/svt.py +3 -2
- doctr/datasets/synthtext.py +2 -1
- doctr/datasets/utils.py +27 -11
- doctr/datasets/vocabs.py +26 -1
- doctr/datasets/wildreceipt.py +111 -0
- doctr/file_utils.py +3 -1
- doctr/io/elements.py +52 -35
- doctr/io/html.py +5 -3
- doctr/io/image/base.py +5 -4
- doctr/io/image/pytorch.py +12 -7
- doctr/io/image/tensorflow.py +11 -6
- doctr/io/pdf.py +5 -4
- doctr/io/reader.py +13 -5
- doctr/models/_utils.py +30 -53
- doctr/models/artefacts/barcode.py +4 -3
- doctr/models/artefacts/face.py +4 -2
- doctr/models/builder.py +58 -43
- doctr/models/classification/__init__.py +1 -0
- doctr/models/classification/magc_resnet/pytorch.py +5 -2
- doctr/models/classification/magc_resnet/tensorflow.py +5 -2
- doctr/models/classification/mobilenet/pytorch.py +16 -4
- doctr/models/classification/mobilenet/tensorflow.py +29 -20
- doctr/models/classification/predictor/pytorch.py +3 -2
- doctr/models/classification/predictor/tensorflow.py +2 -1
- doctr/models/classification/resnet/pytorch.py +23 -13
- doctr/models/classification/resnet/tensorflow.py +33 -26
- doctr/models/classification/textnet/__init__.py +6 -0
- doctr/models/classification/textnet/pytorch.py +275 -0
- doctr/models/classification/textnet/tensorflow.py +267 -0
- doctr/models/classification/vgg/pytorch.py +4 -2
- doctr/models/classification/vgg/tensorflow.py +5 -2
- doctr/models/classification/vit/pytorch.py +9 -3
- doctr/models/classification/vit/tensorflow.py +9 -3
- doctr/models/classification/zoo.py +7 -2
- doctr/models/core.py +1 -1
- doctr/models/detection/__init__.py +1 -0
- doctr/models/detection/_utils/pytorch.py +7 -1
- doctr/models/detection/_utils/tensorflow.py +7 -3
- doctr/models/detection/core.py +9 -3
- doctr/models/detection/differentiable_binarization/base.py +37 -25
- doctr/models/detection/differentiable_binarization/pytorch.py +80 -104
- doctr/models/detection/differentiable_binarization/tensorflow.py +74 -55
- doctr/models/detection/fast/__init__.py +6 -0
- doctr/models/detection/fast/base.py +256 -0
- doctr/models/detection/fast/pytorch.py +442 -0
- doctr/models/detection/fast/tensorflow.py +428 -0
- doctr/models/detection/linknet/base.py +12 -5
- doctr/models/detection/linknet/pytorch.py +28 -15
- doctr/models/detection/linknet/tensorflow.py +68 -88
- doctr/models/detection/predictor/pytorch.py +16 -6
- doctr/models/detection/predictor/tensorflow.py +13 -5
- doctr/models/detection/zoo.py +19 -16
- doctr/models/factory/hub.py +20 -10
- doctr/models/kie_predictor/base.py +2 -1
- doctr/models/kie_predictor/pytorch.py +28 -36
- doctr/models/kie_predictor/tensorflow.py +27 -27
- doctr/models/modules/__init__.py +1 -0
- doctr/models/modules/layers/__init__.py +6 -0
- doctr/models/modules/layers/pytorch.py +166 -0
- doctr/models/modules/layers/tensorflow.py +175 -0
- doctr/models/modules/transformer/pytorch.py +24 -22
- doctr/models/modules/transformer/tensorflow.py +6 -4
- doctr/models/modules/vision_transformer/pytorch.py +2 -4
- doctr/models/modules/vision_transformer/tensorflow.py +2 -4
- doctr/models/obj_detection/faster_rcnn/pytorch.py +4 -2
- doctr/models/predictor/base.py +14 -3
- doctr/models/predictor/pytorch.py +26 -29
- doctr/models/predictor/tensorflow.py +25 -22
- doctr/models/preprocessor/pytorch.py +14 -9
- doctr/models/preprocessor/tensorflow.py +10 -5
- doctr/models/recognition/core.py +4 -1
- doctr/models/recognition/crnn/pytorch.py +23 -16
- doctr/models/recognition/crnn/tensorflow.py +25 -17
- doctr/models/recognition/master/base.py +4 -1
- doctr/models/recognition/master/pytorch.py +20 -9
- doctr/models/recognition/master/tensorflow.py +20 -8
- doctr/models/recognition/parseq/base.py +4 -1
- doctr/models/recognition/parseq/pytorch.py +28 -22
- doctr/models/recognition/parseq/tensorflow.py +22 -11
- doctr/models/recognition/predictor/_utils.py +3 -2
- doctr/models/recognition/predictor/pytorch.py +3 -2
- doctr/models/recognition/predictor/tensorflow.py +2 -1
- doctr/models/recognition/sar/pytorch.py +14 -7
- doctr/models/recognition/sar/tensorflow.py +23 -14
- doctr/models/recognition/utils.py +5 -1
- doctr/models/recognition/vitstr/base.py +4 -1
- doctr/models/recognition/vitstr/pytorch.py +22 -13
- doctr/models/recognition/vitstr/tensorflow.py +21 -10
- doctr/models/recognition/zoo.py +4 -2
- doctr/models/utils/pytorch.py +24 -6
- doctr/models/utils/tensorflow.py +22 -3
- doctr/models/zoo.py +21 -3
- doctr/transforms/functional/base.py +8 -3
- doctr/transforms/functional/pytorch.py +23 -6
- doctr/transforms/functional/tensorflow.py +25 -5
- doctr/transforms/modules/base.py +12 -5
- doctr/transforms/modules/pytorch.py +10 -12
- doctr/transforms/modules/tensorflow.py +17 -9
- doctr/utils/common_types.py +1 -1
- doctr/utils/data.py +4 -2
- doctr/utils/fonts.py +3 -2
- doctr/utils/geometry.py +95 -26
- doctr/utils/metrics.py +36 -22
- doctr/utils/multithreading.py +5 -3
- doctr/utils/repr.py +3 -1
- doctr/utils/visualization.py +31 -8
- doctr/version.py +1 -1
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/METADATA +67 -31
- python_doctr-0.8.1.dist-info/RECORD +173 -0
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/WHEEL +1 -1
- python_doctr-0.7.0.dist-info/RECORD +0 -161
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/LICENSE +0 -0
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/top_level.txt +0 -0
- {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/zip-safe +0 -0
doctr/datasets/__init__.py
CHANGED
|
@@ -13,12 +13,14 @@ from .imgur5k import *
|
|
|
13
13
|
from .mjsynth import *
|
|
14
14
|
from .ocr import *
|
|
15
15
|
from .recognition import *
|
|
16
|
+
from .orientation import *
|
|
16
17
|
from .sroie import *
|
|
17
18
|
from .svhn import *
|
|
18
19
|
from .svt import *
|
|
19
20
|
from .synthtext import *
|
|
20
21
|
from .utils import *
|
|
21
22
|
from .vocabs import *
|
|
23
|
+
from .wildreceipt import *
|
|
22
24
|
|
|
23
25
|
if is_tf_available():
|
|
24
26
|
from .loader import *
|
doctr/datasets/cord.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -29,6 +29,7 @@ class CORD(VisionDataset):
|
|
|
29
29
|
>>> img, target = train_set[0]
|
|
30
30
|
|
|
31
31
|
Args:
|
|
32
|
+
----
|
|
32
33
|
train: whether the subset should be the training one
|
|
33
34
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
34
35
|
recognition_task: whether the dataset should be used for recognition task
|
|
@@ -109,9 +110,10 @@ class CORD(VisionDataset):
|
|
|
109
110
|
for crop, label in zip(crops, list(text_targets)):
|
|
110
111
|
self.data.append((crop, label))
|
|
111
112
|
else:
|
|
112
|
-
self.data.append(
|
|
113
|
-
|
|
114
|
-
|
|
113
|
+
self.data.append((
|
|
114
|
+
img_path,
|
|
115
|
+
dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)),
|
|
116
|
+
))
|
|
115
117
|
|
|
116
118
|
self.root = tmp_root
|
|
117
119
|
|
doctr/datasets/datasets/base.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -59,7 +59,7 @@ class _AbstractDataset:
|
|
|
59
59
|
# Conditions to assess it is detection model with multiple classes and avoid confusion with other tasks.
|
|
60
60
|
if (
|
|
61
61
|
isinstance(target, dict)
|
|
62
|
-
and all(
|
|
62
|
+
and all(isinstance(item, np.ndarray) for item in target.values())
|
|
63
63
|
and set(target.keys()) != {"boxes", "labels"} # avoid confusion with obj detection target
|
|
64
64
|
):
|
|
65
65
|
img_transformed = _copy_tensor(img)
|
|
@@ -82,6 +82,7 @@ class _VisionDataset(_AbstractDataset):
|
|
|
82
82
|
"""Implements an abstract dataset
|
|
83
83
|
|
|
84
84
|
Args:
|
|
85
|
+
----
|
|
85
86
|
url: URL of the dataset
|
|
86
87
|
file_name: name of the file once downloaded
|
|
87
88
|
file_hash: expected SHA256 of the file
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -18,6 +18,8 @@ __all__ = ["AbstractDataset", "VisionDataset"]
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class AbstractDataset(_AbstractDataset):
|
|
21
|
+
"""Abstract class for all datasets"""
|
|
22
|
+
|
|
21
23
|
def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]:
|
|
22
24
|
img_name, target = self.data[index]
|
|
23
25
|
|
|
@@ -53,5 +55,5 @@ class AbstractDataset(_AbstractDataset):
|
|
|
53
55
|
return images, list(targets)
|
|
54
56
|
|
|
55
57
|
|
|
56
|
-
class VisionDataset(AbstractDataset, _VisionDataset):
|
|
58
|
+
class VisionDataset(AbstractDataset, _VisionDataset): # noqa: D101
|
|
57
59
|
pass
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -18,6 +18,8 @@ __all__ = ["AbstractDataset", "VisionDataset"]
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class AbstractDataset(_AbstractDataset):
|
|
21
|
+
"""Abstract class for all datasets"""
|
|
22
|
+
|
|
21
23
|
def _read_sample(self, index: int) -> Tuple[tf.Tensor, Any]:
|
|
22
24
|
img_name, target = self.data[index]
|
|
23
25
|
|
|
@@ -53,5 +55,5 @@ class AbstractDataset(_AbstractDataset):
|
|
|
53
55
|
return images, list(targets)
|
|
54
56
|
|
|
55
57
|
|
|
56
|
-
class VisionDataset(AbstractDataset, _VisionDataset):
|
|
58
|
+
class VisionDataset(AbstractDataset, _VisionDataset): # noqa: D101
|
|
57
59
|
pass
|
doctr/datasets/detection.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -26,6 +26,7 @@ class DetectionDataset(AbstractDataset):
|
|
|
26
26
|
>>> img, target = train_set[0]
|
|
27
27
|
|
|
28
28
|
Args:
|
|
29
|
+
----
|
|
29
30
|
img_folder: folder with all the images of the dataset
|
|
30
31
|
label_path: path to the annotations of each image
|
|
31
32
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
@@ -66,14 +67,16 @@ class DetectionDataset(AbstractDataset):
|
|
|
66
67
|
def format_polygons(
|
|
67
68
|
self, polygons: Union[List, Dict], use_polygons: bool, np_dtype: Type
|
|
68
69
|
) -> Tuple[np.ndarray, List[str]]:
|
|
69
|
-
"""
|
|
70
|
+
"""Format polygons into an array
|
|
70
71
|
|
|
71
72
|
Args:
|
|
73
|
+
----
|
|
72
74
|
polygons: the bounding boxes
|
|
73
75
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
74
76
|
np_dtype: dtype of array
|
|
75
77
|
|
|
76
78
|
Returns:
|
|
79
|
+
-------
|
|
77
80
|
geoms: bounding boxes as np array
|
|
78
81
|
polygons_classes: list of classes for each bounding box
|
|
79
82
|
"""
|
|
@@ -92,4 +95,4 @@ class DetectionDataset(AbstractDataset):
|
|
|
92
95
|
|
|
93
96
|
@property
|
|
94
97
|
def class_names(self):
|
|
95
|
-
return sorted(
|
|
98
|
+
return sorted(set(self._class_names))
|
doctr/datasets/doc_artefacts.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -26,6 +26,7 @@ class DocArtefacts(VisionDataset):
|
|
|
26
26
|
>>> img, target = train_set[0]
|
|
27
27
|
|
|
28
28
|
Args:
|
|
29
|
+
----
|
|
29
30
|
train: whether the subset should be the training one
|
|
30
31
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
31
32
|
**kwargs: keyword arguments from `VisionDataset`.
|
doctr/datasets/funsd.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -29,6 +29,7 @@ class FUNSD(VisionDataset):
|
|
|
29
29
|
>>> img, target = train_set[0]
|
|
30
30
|
|
|
31
31
|
Args:
|
|
32
|
+
----
|
|
32
33
|
train: whether the subset should be the training one
|
|
33
34
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
34
35
|
recognition_task: whether the dataset should be used for recognition task
|
|
@@ -81,7 +82,7 @@ class FUNSD(VisionDataset):
|
|
|
81
82
|
text_targets, box_targets = zip(*_targets)
|
|
82
83
|
if use_polygons:
|
|
83
84
|
# xmin, ymin, xmax, ymax -> (x, y) coordinates of top left, top right, bottom right, bottom left corners
|
|
84
|
-
box_targets = [
|
|
85
|
+
box_targets = [ # type: ignore[assignment]
|
|
85
86
|
[
|
|
86
87
|
[box[0], box[1]],
|
|
87
88
|
[box[2], box[1]],
|
|
@@ -100,12 +101,10 @@ class FUNSD(VisionDataset):
|
|
|
100
101
|
if not any(char in label for char in ["☑", "☐", "\uf703", "\uf702"]):
|
|
101
102
|
self.data.append((crop, label))
|
|
102
103
|
else:
|
|
103
|
-
self.data.append(
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
)
|
|
108
|
-
)
|
|
104
|
+
self.data.append((
|
|
105
|
+
img_path,
|
|
106
|
+
dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=list(text_targets)),
|
|
107
|
+
))
|
|
109
108
|
|
|
110
109
|
self.root = tmp_root
|
|
111
110
|
|
doctr/datasets/generator/base.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -24,6 +24,7 @@ def synthesize_text_img(
|
|
|
24
24
|
"""Generate a synthetic text image
|
|
25
25
|
|
|
26
26
|
Args:
|
|
27
|
+
----
|
|
27
28
|
text: the text to render as an image
|
|
28
29
|
font_size: the size of the font
|
|
29
30
|
font_family: the font family (has to be installed on your system)
|
|
@@ -31,9 +32,9 @@ def synthesize_text_img(
|
|
|
31
32
|
text_color: text color on the final image
|
|
32
33
|
|
|
33
34
|
Returns:
|
|
35
|
+
-------
|
|
34
36
|
PIL image of the text
|
|
35
37
|
"""
|
|
36
|
-
|
|
37
38
|
background_color = (0, 0, 0) if background_color is None else background_color
|
|
38
39
|
text_color = (255, 255, 255) if text_color is None else text_color
|
|
39
40
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -18,6 +18,7 @@ class CharacterGenerator(_CharacterGenerator):
|
|
|
18
18
|
>>> img, target = ds[0]
|
|
19
19
|
|
|
20
20
|
Args:
|
|
21
|
+
----
|
|
21
22
|
vocab: vocabulary to take the character from
|
|
22
23
|
num_samples: number of samples that will be generated iterating over the dataset
|
|
23
24
|
cache_samples: whether generated images should be cached firsthand
|
|
@@ -39,6 +40,7 @@ class WordGenerator(_WordGenerator):
|
|
|
39
40
|
>>> img, target = ds[0]
|
|
40
41
|
|
|
41
42
|
Args:
|
|
43
|
+
----
|
|
42
44
|
vocab: vocabulary to take the character from
|
|
43
45
|
min_chars: minimum number of characters in a word
|
|
44
46
|
max_chars: maximum number of characters in a word
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -18,6 +18,7 @@ class CharacterGenerator(_CharacterGenerator):
|
|
|
18
18
|
>>> img, target = ds[0]
|
|
19
19
|
|
|
20
20
|
Args:
|
|
21
|
+
----
|
|
21
22
|
vocab: vocabulary to take the character from
|
|
22
23
|
num_samples: number of samples that will be generated iterating over the dataset
|
|
23
24
|
cache_samples: whether generated images should be cached firsthand
|
|
@@ -45,6 +46,7 @@ class WordGenerator(_WordGenerator):
|
|
|
45
46
|
>>> img, target = ds[0]
|
|
46
47
|
|
|
47
48
|
Args:
|
|
49
|
+
----
|
|
48
50
|
vocab: vocabulary to take the character from
|
|
49
51
|
min_chars: minimum number of characters in a word
|
|
50
52
|
max_chars: maximum number of characters in a word
|
doctr/datasets/ic03.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -28,6 +28,7 @@ class IC03(VisionDataset):
|
|
|
28
28
|
>>> img, target = train_set[0]
|
|
29
29
|
|
|
30
30
|
Args:
|
|
31
|
+
----
|
|
31
32
|
train: whether the subset should be the training one
|
|
32
33
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
33
34
|
recognition_task: whether the dataset should be used for recognition task
|
|
@@ -73,7 +74,7 @@ class IC03(VisionDataset):
|
|
|
73
74
|
xml_root = xml_tree.getroot()
|
|
74
75
|
|
|
75
76
|
for image in tqdm(iterable=xml_root, desc="Unpacking IC03", total=len(xml_root)):
|
|
76
|
-
name,
|
|
77
|
+
name, _resolution, rectangles = image
|
|
77
78
|
|
|
78
79
|
# File existence check
|
|
79
80
|
if not os.path.exists(os.path.join(tmp_root, name.text)):
|
doctr/datasets/ic13.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -33,6 +33,7 @@ class IC13(AbstractDataset):
|
|
|
33
33
|
>>> img, target = test_set[0]
|
|
34
34
|
|
|
35
35
|
Args:
|
|
36
|
+
----
|
|
36
37
|
img_folder: folder with all the images of the dataset
|
|
37
38
|
label_folder: folder with all annotation files for the images
|
|
38
39
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
doctr/datasets/iiit5k.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -30,6 +30,7 @@ class IIIT5K(VisionDataset):
|
|
|
30
30
|
>>> img, target = train_set[0]
|
|
31
31
|
|
|
32
32
|
Args:
|
|
33
|
+
----
|
|
33
34
|
train: whether the subset should be the training one
|
|
34
35
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
35
36
|
recognition_task: whether the dataset should be used for recognition task
|
|
@@ -91,9 +92,10 @@ class IIIT5K(VisionDataset):
|
|
|
91
92
|
box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]
|
|
92
93
|
|
|
93
94
|
# label are casted to list where each char corresponds to the character's bounding box
|
|
94
|
-
self.data.append(
|
|
95
|
-
|
|
96
|
-
|
|
95
|
+
self.data.append((
|
|
96
|
+
_raw_path,
|
|
97
|
+
dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=list(_raw_label)),
|
|
98
|
+
))
|
|
97
99
|
|
|
98
100
|
self.root = tmp_root
|
|
99
101
|
|
doctr/datasets/iiithws.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -32,6 +32,7 @@ class IIITHWS(AbstractDataset):
|
|
|
32
32
|
>>> img, target = test_set[0]
|
|
33
33
|
|
|
34
34
|
Args:
|
|
35
|
+
----
|
|
35
36
|
img_folder: folder with all the images of the dataset
|
|
36
37
|
label_path: path to the file with the labels
|
|
37
38
|
train: whether the subset should be the training one
|
doctr/datasets/imgur5k.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -40,6 +40,7 @@ class IMGUR5K(AbstractDataset):
|
|
|
40
40
|
>>> img, target = test_set[0]
|
|
41
41
|
|
|
42
42
|
Args:
|
|
43
|
+
----
|
|
43
44
|
img_folder: folder with all the images of the dataset
|
|
44
45
|
label_path: path to the annotations file of the dataset
|
|
45
46
|
train: whether the subset should be the training one
|
|
@@ -111,7 +112,7 @@ class IMGUR5K(AbstractDataset):
|
|
|
111
112
|
if ann["word"] != "."
|
|
112
113
|
]
|
|
113
114
|
# (x, y) coordinates of top left, top right, bottom right, bottom left corners
|
|
114
|
-
box_targets = [cv2.boxPoints(((box[0], box[1]), (box[2], box[3]), box[4])) for box in _boxes]
|
|
115
|
+
box_targets = [cv2.boxPoints(((box[0], box[1]), (box[2], box[3]), box[4])) for box in _boxes] # type: ignore[arg-type]
|
|
115
116
|
|
|
116
117
|
if not use_polygons:
|
|
117
118
|
# xmin, ymin, xmax, ymax
|
doctr/datasets/loader.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -18,12 +18,13 @@ def default_collate(samples):
|
|
|
18
18
|
"""Collate multiple elements into batches
|
|
19
19
|
|
|
20
20
|
Args:
|
|
21
|
+
----
|
|
21
22
|
samples: list of N tuples containing M elements
|
|
22
23
|
|
|
23
24
|
Returns:
|
|
25
|
+
-------
|
|
24
26
|
Tuple of M sequences contianing N elements each
|
|
25
27
|
"""
|
|
26
|
-
|
|
27
28
|
batch_data = zip(*samples)
|
|
28
29
|
|
|
29
30
|
tf_data = tuple(tf.stack(elt, axis=0) for elt in batch_data)
|
|
@@ -41,6 +42,7 @@ class DataLoader:
|
|
|
41
42
|
>>> images, targets = next(train_iter)
|
|
42
43
|
|
|
43
44
|
Args:
|
|
45
|
+
----
|
|
44
46
|
dataset: the dataset
|
|
45
47
|
shuffle: whether the samples should be shuffled before passing it to the iterator
|
|
46
48
|
batch_size: number of elements in each batch
|
doctr/datasets/mjsynth.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -30,6 +30,7 @@ class MJSynth(AbstractDataset):
|
|
|
30
30
|
>>> img, target = test_set[0]
|
|
31
31
|
|
|
32
32
|
Args:
|
|
33
|
+
----
|
|
33
34
|
img_folder: folder with all the images of the dataset
|
|
34
35
|
label_path: path to the file with the labels
|
|
35
36
|
train: whether the subset should be the training one
|
doctr/datasets/ocr.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -24,6 +24,7 @@ class OCRDataset(AbstractDataset):
|
|
|
24
24
|
>>> img, target = train_set[0]
|
|
25
25
|
|
|
26
26
|
Args:
|
|
27
|
+
----
|
|
27
28
|
img_folder: local path to image folder (all jpg at the root)
|
|
28
29
|
label_file: local path to the label file
|
|
29
30
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
|
+
|
|
3
|
+
# This program is licensed under the Apache License 2.0.
|
|
4
|
+
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
from typing import Any, List, Tuple
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from .datasets import AbstractDataset
|
|
12
|
+
|
|
13
|
+
__all__ = ["OrientationDataset"]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class OrientationDataset(AbstractDataset):
|
|
17
|
+
"""Implements a basic image dataset where targets are filled with zeros.
|
|
18
|
+
|
|
19
|
+
>>> from doctr.datasets import OrientationDataset
|
|
20
|
+
>>> train_set = OrientationDataset(img_folder="/path/to/images")
|
|
21
|
+
>>> img, target = train_set[0]
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
----
|
|
25
|
+
img_folder: folder with all the images of the dataset
|
|
26
|
+
**kwargs: keyword arguments from `AbstractDataset`.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
img_folder: str,
|
|
32
|
+
**kwargs: Any,
|
|
33
|
+
) -> None:
|
|
34
|
+
super().__init__(
|
|
35
|
+
img_folder,
|
|
36
|
+
**kwargs,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# initialize dataset with 0 degree rotation targets
|
|
40
|
+
self.data: List[Tuple[str, np.ndarray]] = [(img_name, np.array([0])) for img_name in os.listdir(self.root)]
|
doctr/datasets/recognition.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -22,6 +22,7 @@ class RecognitionDataset(AbstractDataset):
|
|
|
22
22
|
>>> img, target = train_set[0]
|
|
23
23
|
|
|
24
24
|
Args:
|
|
25
|
+
----
|
|
25
26
|
img_folder: path to the images folder
|
|
26
27
|
labels_path: pathe to the json file containing all labels (character sequences)
|
|
27
28
|
**kwargs: keyword arguments from `AbstractDataset`.
|
|
@@ -36,7 +37,7 @@ class RecognitionDataset(AbstractDataset):
|
|
|
36
37
|
super().__init__(img_folder, **kwargs)
|
|
37
38
|
|
|
38
39
|
self.data: List[Tuple[str, str]] = []
|
|
39
|
-
with open(labels_path) as f:
|
|
40
|
+
with open(labels_path, encoding="utf-8") as f:
|
|
40
41
|
labels = json.load(f)
|
|
41
42
|
|
|
42
43
|
for img_name, label in labels.items():
|
doctr/datasets/sroie.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -29,6 +29,7 @@ class SROIE(VisionDataset):
|
|
|
29
29
|
>>> img, target = train_set[0]
|
|
30
30
|
|
|
31
31
|
Args:
|
|
32
|
+
----
|
|
32
33
|
train: whether the subset should be the training one
|
|
33
34
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
34
35
|
recognition_task: whether the dataset should be used for recognition task
|
doctr/datasets/svhn.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -28,6 +28,7 @@ class SVHN(VisionDataset):
|
|
|
28
28
|
>>> img, target = train_set[0]
|
|
29
29
|
|
|
30
30
|
Args:
|
|
31
|
+
----
|
|
31
32
|
train: whether the subset should be the training one
|
|
32
33
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
33
34
|
recognition_task: whether the dataset should be used for recognition task
|
doctr/datasets/svt.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -28,6 +28,7 @@ class SVT(VisionDataset):
|
|
|
28
28
|
>>> img, target = train_set[0]
|
|
29
29
|
|
|
30
30
|
Args:
|
|
31
|
+
----
|
|
31
32
|
train: whether the subset should be the training one
|
|
32
33
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
33
34
|
recognition_task: whether the dataset should be used for recognition task
|
|
@@ -66,7 +67,7 @@ class SVT(VisionDataset):
|
|
|
66
67
|
xml_root = xml_tree.getroot()
|
|
67
68
|
|
|
68
69
|
for image in tqdm(iterable=xml_root, desc="Unpacking SVT", total=len(xml_root)):
|
|
69
|
-
name, _, _,
|
|
70
|
+
name, _, _, _resolution, rectangles = image
|
|
70
71
|
|
|
71
72
|
# File existence check
|
|
72
73
|
if not os.path.exists(os.path.join(tmp_root, name.text)):
|
doctr/datasets/synthtext.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2024, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -31,6 +31,7 @@ class SynthText(VisionDataset):
|
|
|
31
31
|
>>> img, target = train_set[0]
|
|
32
32
|
|
|
33
33
|
Args:
|
|
34
|
+
----
|
|
34
35
|
train: whether the subset should be the training one
|
|
35
36
|
use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
|
|
36
37
|
recognition_task: whether the dataset should be used for recognition task
|