python-doctr 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctr/contrib/__init__.py +1 -0
- doctr/contrib/artefacts.py +7 -9
- doctr/contrib/base.py +8 -17
- doctr/datasets/cord.py +17 -7
- doctr/datasets/datasets/__init__.py +4 -4
- doctr/datasets/datasets/base.py +16 -16
- doctr/datasets/datasets/pytorch.py +12 -12
- doctr/datasets/datasets/tensorflow.py +10 -10
- doctr/datasets/detection.py +6 -9
- doctr/datasets/doc_artefacts.py +3 -4
- doctr/datasets/funsd.py +17 -6
- doctr/datasets/generator/__init__.py +4 -4
- doctr/datasets/generator/base.py +16 -17
- doctr/datasets/generator/pytorch.py +1 -3
- doctr/datasets/generator/tensorflow.py +1 -3
- doctr/datasets/ic03.py +14 -5
- doctr/datasets/ic13.py +13 -5
- doctr/datasets/iiit5k.py +31 -20
- doctr/datasets/iiithws.py +4 -5
- doctr/datasets/imgur5k.py +15 -5
- doctr/datasets/loader.py +4 -7
- doctr/datasets/mjsynth.py +6 -5
- doctr/datasets/ocr.py +3 -4
- doctr/datasets/orientation.py +3 -4
- doctr/datasets/recognition.py +3 -4
- doctr/datasets/sroie.py +16 -5
- doctr/datasets/svhn.py +16 -5
- doctr/datasets/svt.py +14 -5
- doctr/datasets/synthtext.py +14 -5
- doctr/datasets/utils.py +37 -27
- doctr/datasets/vocabs.py +21 -7
- doctr/datasets/wildreceipt.py +25 -10
- doctr/file_utils.py +18 -4
- doctr/io/elements.py +69 -81
- doctr/io/html.py +1 -3
- doctr/io/image/__init__.py +3 -3
- doctr/io/image/base.py +2 -5
- doctr/io/image/pytorch.py +3 -12
- doctr/io/image/tensorflow.py +2 -11
- doctr/io/pdf.py +5 -7
- doctr/io/reader.py +5 -11
- doctr/models/_utils.py +14 -22
- doctr/models/builder.py +32 -50
- doctr/models/classification/magc_resnet/__init__.py +3 -3
- doctr/models/classification/magc_resnet/pytorch.py +10 -13
- doctr/models/classification/magc_resnet/tensorflow.py +21 -17
- doctr/models/classification/mobilenet/__init__.py +3 -3
- doctr/models/classification/mobilenet/pytorch.py +7 -17
- doctr/models/classification/mobilenet/tensorflow.py +22 -29
- doctr/models/classification/predictor/__init__.py +4 -4
- doctr/models/classification/predictor/pytorch.py +13 -11
- doctr/models/classification/predictor/tensorflow.py +13 -11
- doctr/models/classification/resnet/__init__.py +4 -4
- doctr/models/classification/resnet/pytorch.py +21 -31
- doctr/models/classification/resnet/tensorflow.py +41 -39
- doctr/models/classification/textnet/__init__.py +3 -3
- doctr/models/classification/textnet/pytorch.py +10 -17
- doctr/models/classification/textnet/tensorflow.py +19 -20
- doctr/models/classification/vgg/__init__.py +3 -3
- doctr/models/classification/vgg/pytorch.py +5 -7
- doctr/models/classification/vgg/tensorflow.py +18 -15
- doctr/models/classification/vit/__init__.py +3 -3
- doctr/models/classification/vit/pytorch.py +8 -14
- doctr/models/classification/vit/tensorflow.py +16 -16
- doctr/models/classification/zoo.py +36 -19
- doctr/models/core.py +3 -3
- doctr/models/detection/_utils/__init__.py +4 -4
- doctr/models/detection/_utils/base.py +4 -7
- doctr/models/detection/_utils/pytorch.py +1 -5
- doctr/models/detection/_utils/tensorflow.py +1 -5
- doctr/models/detection/core.py +2 -8
- doctr/models/detection/differentiable_binarization/__init__.py +4 -4
- doctr/models/detection/differentiable_binarization/base.py +7 -17
- doctr/models/detection/differentiable_binarization/pytorch.py +27 -30
- doctr/models/detection/differentiable_binarization/tensorflow.py +49 -37
- doctr/models/detection/fast/__init__.py +4 -4
- doctr/models/detection/fast/base.py +6 -14
- doctr/models/detection/fast/pytorch.py +24 -31
- doctr/models/detection/fast/tensorflow.py +28 -37
- doctr/models/detection/linknet/__init__.py +4 -4
- doctr/models/detection/linknet/base.py +6 -15
- doctr/models/detection/linknet/pytorch.py +24 -27
- doctr/models/detection/linknet/tensorflow.py +36 -33
- doctr/models/detection/predictor/__init__.py +5 -5
- doctr/models/detection/predictor/pytorch.py +6 -7
- doctr/models/detection/predictor/tensorflow.py +7 -8
- doctr/models/detection/zoo.py +27 -7
- doctr/models/factory/hub.py +8 -13
- doctr/models/kie_predictor/__init__.py +5 -5
- doctr/models/kie_predictor/base.py +8 -5
- doctr/models/kie_predictor/pytorch.py +22 -19
- doctr/models/kie_predictor/tensorflow.py +21 -15
- doctr/models/modules/layers/__init__.py +3 -3
- doctr/models/modules/layers/pytorch.py +6 -9
- doctr/models/modules/layers/tensorflow.py +5 -7
- doctr/models/modules/transformer/__init__.py +3 -3
- doctr/models/modules/transformer/pytorch.py +12 -13
- doctr/models/modules/transformer/tensorflow.py +9 -12
- doctr/models/modules/vision_transformer/__init__.py +3 -3
- doctr/models/modules/vision_transformer/pytorch.py +3 -4
- doctr/models/modules/vision_transformer/tensorflow.py +4 -4
- doctr/models/predictor/__init__.py +5 -5
- doctr/models/predictor/base.py +52 -41
- doctr/models/predictor/pytorch.py +16 -13
- doctr/models/predictor/tensorflow.py +16 -10
- doctr/models/preprocessor/__init__.py +4 -4
- doctr/models/preprocessor/pytorch.py +13 -17
- doctr/models/preprocessor/tensorflow.py +11 -15
- doctr/models/recognition/core.py +3 -7
- doctr/models/recognition/crnn/__init__.py +4 -4
- doctr/models/recognition/crnn/pytorch.py +20 -28
- doctr/models/recognition/crnn/tensorflow.py +19 -29
- doctr/models/recognition/master/__init__.py +3 -3
- doctr/models/recognition/master/base.py +3 -7
- doctr/models/recognition/master/pytorch.py +22 -24
- doctr/models/recognition/master/tensorflow.py +21 -26
- doctr/models/recognition/parseq/__init__.py +3 -3
- doctr/models/recognition/parseq/base.py +3 -7
- doctr/models/recognition/parseq/pytorch.py +26 -26
- doctr/models/recognition/parseq/tensorflow.py +26 -30
- doctr/models/recognition/predictor/__init__.py +5 -5
- doctr/models/recognition/predictor/_utils.py +7 -10
- doctr/models/recognition/predictor/pytorch.py +6 -6
- doctr/models/recognition/predictor/tensorflow.py +5 -6
- doctr/models/recognition/sar/__init__.py +4 -4
- doctr/models/recognition/sar/pytorch.py +20 -21
- doctr/models/recognition/sar/tensorflow.py +19 -24
- doctr/models/recognition/utils.py +5 -10
- doctr/models/recognition/vitstr/__init__.py +4 -4
- doctr/models/recognition/vitstr/base.py +3 -7
- doctr/models/recognition/vitstr/pytorch.py +18 -20
- doctr/models/recognition/vitstr/tensorflow.py +21 -24
- doctr/models/recognition/zoo.py +22 -11
- doctr/models/utils/__init__.py +4 -4
- doctr/models/utils/pytorch.py +13 -16
- doctr/models/utils/tensorflow.py +31 -30
- doctr/models/zoo.py +1 -5
- doctr/transforms/functional/__init__.py +3 -3
- doctr/transforms/functional/base.py +4 -11
- doctr/transforms/functional/pytorch.py +21 -29
- doctr/transforms/functional/tensorflow.py +10 -22
- doctr/transforms/modules/__init__.py +4 -4
- doctr/transforms/modules/base.py +48 -55
- doctr/transforms/modules/pytorch.py +65 -28
- doctr/transforms/modules/tensorflow.py +33 -44
- doctr/utils/common_types.py +8 -9
- doctr/utils/data.py +8 -12
- doctr/utils/fonts.py +2 -7
- doctr/utils/geometry.py +120 -64
- doctr/utils/metrics.py +18 -38
- doctr/utils/multithreading.py +4 -6
- doctr/utils/reconstitution.py +157 -75
- doctr/utils/repr.py +2 -3
- doctr/utils/visualization.py +16 -29
- doctr/version.py +1 -1
- {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/METADATA +59 -57
- python_doctr-0.11.0.dist-info/RECORD +173 -0
- {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/WHEEL +1 -1
- python_doctr-0.9.0.dist-info/RECORD +0 -173
- {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/LICENSE +0 -0
- {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/top_level.txt +0 -0
- {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/zip-safe +0 -0
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import random
|
|
7
|
-
from
|
|
7
|
+
from collections.abc import Callable, Iterable
|
|
8
|
+
from typing import Any
|
|
8
9
|
|
|
9
10
|
import numpy as np
|
|
10
11
|
import tensorflow as tf
|
|
@@ -43,13 +44,12 @@ class Compose(NestedObject):
|
|
|
43
44
|
>>> out = transfos(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
44
45
|
|
|
45
46
|
Args:
|
|
46
|
-
----
|
|
47
47
|
transforms: list of transformation modules
|
|
48
48
|
"""
|
|
49
49
|
|
|
50
|
-
_children_names:
|
|
50
|
+
_children_names: list[str] = ["transforms"]
|
|
51
51
|
|
|
52
|
-
def __init__(self, transforms:
|
|
52
|
+
def __init__(self, transforms: list[Callable[[Any], Any]]) -> None:
|
|
53
53
|
self.transforms = transforms
|
|
54
54
|
|
|
55
55
|
def __call__(self, x: Any) -> Any:
|
|
@@ -68,7 +68,6 @@ class Resize(NestedObject):
|
|
|
68
68
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
69
69
|
|
|
70
70
|
Args:
|
|
71
|
-
----
|
|
72
71
|
output_size: expected output size
|
|
73
72
|
method: interpolation method
|
|
74
73
|
preserve_aspect_ratio: if `True`, preserve aspect ratio and pad the rest with zeros
|
|
@@ -77,7 +76,7 @@ class Resize(NestedObject):
|
|
|
77
76
|
|
|
78
77
|
def __init__(
|
|
79
78
|
self,
|
|
80
|
-
output_size:
|
|
79
|
+
output_size: int | tuple[int, int],
|
|
81
80
|
method: str = "bilinear",
|
|
82
81
|
preserve_aspect_ratio: bool = False,
|
|
83
82
|
symmetric_pad: bool = False,
|
|
@@ -104,32 +103,37 @@ class Resize(NestedObject):
|
|
|
104
103
|
def __call__(
|
|
105
104
|
self,
|
|
106
105
|
img: tf.Tensor,
|
|
107
|
-
target:
|
|
108
|
-
) ->
|
|
106
|
+
target: np.ndarray | None = None,
|
|
107
|
+
) -> tf.Tensor | tuple[tf.Tensor, np.ndarray]:
|
|
109
108
|
input_dtype = img.dtype
|
|
109
|
+
self.output_size = (
|
|
110
|
+
(self.output_size, self.output_size) if isinstance(self.output_size, int) else self.output_size
|
|
111
|
+
)
|
|
110
112
|
|
|
111
113
|
img = tf.image.resize(img, self.wanted_size, self.method, self.preserve_aspect_ratio, self.antialias)
|
|
112
114
|
# It will produce an un-padded resized image, with a side shorter than wanted if we preserve aspect ratio
|
|
113
115
|
raw_shape = img.shape[:2]
|
|
116
|
+
if self.symmetric_pad:
|
|
117
|
+
half_pad = (int((self.output_size[0] - img.shape[0]) / 2), 0)
|
|
114
118
|
if self.preserve_aspect_ratio:
|
|
115
119
|
if isinstance(self.output_size, (tuple, list)):
|
|
116
120
|
# In that case we need to pad because we want to enforce both width and height
|
|
117
121
|
if not self.symmetric_pad:
|
|
118
|
-
|
|
122
|
+
half_pad = (0, 0)
|
|
119
123
|
elif self.output_size[0] == img.shape[0]:
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
img = tf.image.pad_to_bounding_box(img, *offset, *self.output_size)
|
|
124
|
+
half_pad = (0, int((self.output_size[1] - img.shape[1]) / 2))
|
|
125
|
+
# Pad image
|
|
126
|
+
img = tf.image.pad_to_bounding_box(img, *half_pad, *self.output_size)
|
|
124
127
|
|
|
125
128
|
# In case boxes are provided, resize boxes if needed (for detection task if preserve aspect ratio)
|
|
126
129
|
if target is not None:
|
|
130
|
+
if self.symmetric_pad:
|
|
131
|
+
offset = half_pad[0] / img.shape[0], half_pad[1] / img.shape[1]
|
|
132
|
+
|
|
127
133
|
if self.preserve_aspect_ratio:
|
|
128
134
|
# Get absolute coords
|
|
129
135
|
if target.shape[1:] == (4,):
|
|
130
136
|
if isinstance(self.output_size, (tuple, list)) and self.symmetric_pad:
|
|
131
|
-
if np.max(target) <= 1:
|
|
132
|
-
offset = offset[0] / img.shape[0], offset[1] / img.shape[1]
|
|
133
137
|
target[:, [0, 2]] = offset[1] + target[:, [0, 2]] * raw_shape[1] / img.shape[1]
|
|
134
138
|
target[:, [1, 3]] = offset[0] + target[:, [1, 3]] * raw_shape[0] / img.shape[0]
|
|
135
139
|
else:
|
|
@@ -137,16 +141,15 @@ class Resize(NestedObject):
|
|
|
137
141
|
target[:, [1, 3]] *= raw_shape[0] / img.shape[0]
|
|
138
142
|
elif target.shape[1:] == (4, 2):
|
|
139
143
|
if isinstance(self.output_size, (tuple, list)) and self.symmetric_pad:
|
|
140
|
-
if np.max(target) <= 1:
|
|
141
|
-
offset = offset[0] / img.shape[0], offset[1] / img.shape[1]
|
|
142
144
|
target[..., 0] = offset[1] + target[..., 0] * raw_shape[1] / img.shape[1]
|
|
143
145
|
target[..., 1] = offset[0] + target[..., 1] * raw_shape[0] / img.shape[0]
|
|
144
146
|
else:
|
|
145
147
|
target[..., 0] *= raw_shape[1] / img.shape[1]
|
|
146
148
|
target[..., 1] *= raw_shape[0] / img.shape[0]
|
|
147
149
|
else:
|
|
148
|
-
raise AssertionError
|
|
149
|
-
|
|
150
|
+
raise AssertionError("Boxes should be in the format (n_boxes, 4, 2) or (n_boxes, 4)")
|
|
151
|
+
|
|
152
|
+
return tf.cast(img, dtype=input_dtype), np.clip(target, 0, 1)
|
|
150
153
|
|
|
151
154
|
return tf.cast(img, dtype=input_dtype)
|
|
152
155
|
|
|
@@ -160,12 +163,11 @@ class Normalize(NestedObject):
|
|
|
160
163
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
161
164
|
|
|
162
165
|
Args:
|
|
163
|
-
----
|
|
164
166
|
mean: average value per channel
|
|
165
167
|
std: standard deviation per channel
|
|
166
168
|
"""
|
|
167
169
|
|
|
168
|
-
def __init__(self, mean:
|
|
170
|
+
def __init__(self, mean: tuple[float, float, float], std: tuple[float, float, float]) -> None:
|
|
169
171
|
self.mean = tf.constant(mean)
|
|
170
172
|
self.std = tf.constant(std)
|
|
171
173
|
|
|
@@ -187,7 +189,6 @@ class LambdaTransformation(NestedObject):
|
|
|
187
189
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
188
190
|
|
|
189
191
|
Args:
|
|
190
|
-
----
|
|
191
192
|
fn: the function to be applied to the input tensor
|
|
192
193
|
"""
|
|
193
194
|
|
|
@@ -225,7 +226,6 @@ class RandomBrightness(NestedObject):
|
|
|
225
226
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
226
227
|
|
|
227
228
|
Args:
|
|
228
|
-
----
|
|
229
229
|
max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta]
|
|
230
230
|
p: probability to apply transformation
|
|
231
231
|
"""
|
|
@@ -250,7 +250,6 @@ class RandomContrast(NestedObject):
|
|
|
250
250
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
251
251
|
|
|
252
252
|
Args:
|
|
253
|
-
----
|
|
254
253
|
delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce contrast if factor<1)
|
|
255
254
|
"""
|
|
256
255
|
|
|
@@ -274,7 +273,6 @@ class RandomSaturation(NestedObject):
|
|
|
274
273
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
275
274
|
|
|
276
275
|
Args:
|
|
277
|
-
----
|
|
278
276
|
delta: multiplicative factor is picked in [1-delta, 1+delta] (reduce saturation if factor<1)
|
|
279
277
|
"""
|
|
280
278
|
|
|
@@ -297,7 +295,6 @@ class RandomHue(NestedObject):
|
|
|
297
295
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
298
296
|
|
|
299
297
|
Args:
|
|
300
|
-
----
|
|
301
298
|
max_delta: offset to add to each pixel is randomly picked in [-max_delta, max_delta]
|
|
302
299
|
"""
|
|
303
300
|
|
|
@@ -320,7 +317,6 @@ class RandomGamma(NestedObject):
|
|
|
320
317
|
>>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1))
|
|
321
318
|
|
|
322
319
|
Args:
|
|
323
|
-
----
|
|
324
320
|
min_gamma: non-negative real number, lower bound for gamma param
|
|
325
321
|
max_gamma: non-negative real number, upper bound for gamma
|
|
326
322
|
min_gain: lower bound for constant multiplier
|
|
@@ -358,7 +354,6 @@ class RandomJpegQuality(NestedObject):
|
|
|
358
354
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
359
355
|
|
|
360
356
|
Args:
|
|
361
|
-
----
|
|
362
357
|
min_quality: int between [0, 100]
|
|
363
358
|
max_quality: int between [0, 100]
|
|
364
359
|
"""
|
|
@@ -383,19 +378,17 @@ class GaussianBlur(NestedObject):
|
|
|
383
378
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
384
379
|
|
|
385
380
|
Args:
|
|
386
|
-
----
|
|
387
381
|
kernel_shape: size of the blurring kernel
|
|
388
382
|
std: min and max value of the standard deviation
|
|
389
383
|
"""
|
|
390
384
|
|
|
391
|
-
def __init__(self, kernel_shape:
|
|
385
|
+
def __init__(self, kernel_shape: int | Iterable[int], std: tuple[float, float]) -> None:
|
|
392
386
|
self.kernel_shape = kernel_shape
|
|
393
387
|
self.std = std
|
|
394
388
|
|
|
395
389
|
def extra_repr(self) -> str:
|
|
396
390
|
return f"kernel_shape={self.kernel_shape}, std={self.std}"
|
|
397
391
|
|
|
398
|
-
@tf.function
|
|
399
392
|
def __call__(self, img: tf.Tensor) -> tf.Tensor:
|
|
400
393
|
return tf.squeeze(
|
|
401
394
|
_gaussian_filter(
|
|
@@ -427,7 +420,6 @@ class GaussianNoise(NestedObject):
|
|
|
427
420
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
428
421
|
|
|
429
422
|
Args:
|
|
430
|
-
----
|
|
431
423
|
mean : mean of the gaussian distribution
|
|
432
424
|
std : std of the gaussian distribution
|
|
433
425
|
"""
|
|
@@ -462,7 +454,6 @@ class RandomHorizontalFlip(NestedObject):
|
|
|
462
454
|
>>> out = transfo(image, target)
|
|
463
455
|
|
|
464
456
|
Args:
|
|
465
|
-
----
|
|
466
457
|
p : probability of Horizontal Flip
|
|
467
458
|
"""
|
|
468
459
|
|
|
@@ -470,7 +461,7 @@ class RandomHorizontalFlip(NestedObject):
|
|
|
470
461
|
super().__init__()
|
|
471
462
|
self.p = p
|
|
472
463
|
|
|
473
|
-
def __call__(self, img:
|
|
464
|
+
def __call__(self, img: tf.Tensor | np.ndarray, target: np.ndarray) -> tuple[tf.Tensor, np.ndarray]:
|
|
474
465
|
if np.random.rand(1) <= self.p:
|
|
475
466
|
_img = tf.image.flip_left_right(img)
|
|
476
467
|
_target = target.copy()
|
|
@@ -492,11 +483,10 @@ class RandomShadow(NestedObject):
|
|
|
492
483
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
493
484
|
|
|
494
485
|
Args:
|
|
495
|
-
----
|
|
496
486
|
opacity_range : minimum and maximum opacity of the shade
|
|
497
487
|
"""
|
|
498
488
|
|
|
499
|
-
def __init__(self, opacity_range:
|
|
489
|
+
def __init__(self, opacity_range: tuple[float, float] | None = None) -> None:
|
|
500
490
|
super().__init__()
|
|
501
491
|
self.opacity_range = opacity_range if isinstance(opacity_range, tuple) else (0.2, 0.8)
|
|
502
492
|
|
|
@@ -527,20 +517,19 @@ class RandomResize(NestedObject):
|
|
|
527
517
|
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))
|
|
528
518
|
|
|
529
519
|
Args:
|
|
530
|
-
----
|
|
531
520
|
scale_range: range of the resizing factor for width and height (independently)
|
|
532
521
|
preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
|
|
533
|
-
|
|
522
|
+
given a float value, the aspect ratio will be preserved with this probability
|
|
534
523
|
symmetric_pad: whether to symmetrically pad the image,
|
|
535
|
-
|
|
524
|
+
given a float value, the symmetric padding will be applied with this probability
|
|
536
525
|
p: probability to apply the transformation
|
|
537
526
|
"""
|
|
538
527
|
|
|
539
528
|
def __init__(
|
|
540
529
|
self,
|
|
541
|
-
scale_range:
|
|
542
|
-
preserve_aspect_ratio:
|
|
543
|
-
symmetric_pad:
|
|
530
|
+
scale_range: tuple[float, float] = (0.3, 0.9),
|
|
531
|
+
preserve_aspect_ratio: bool | float = False,
|
|
532
|
+
symmetric_pad: bool | float = False,
|
|
544
533
|
p: float = 0.5,
|
|
545
534
|
):
|
|
546
535
|
super().__init__()
|
|
@@ -550,7 +539,7 @@ class RandomResize(NestedObject):
|
|
|
550
539
|
self.p = p
|
|
551
540
|
self._resize = Resize
|
|
552
541
|
|
|
553
|
-
def __call__(self, img: tf.Tensor, target: np.ndarray) ->
|
|
542
|
+
def __call__(self, img: tf.Tensor, target: np.ndarray) -> tuple[tf.Tensor, np.ndarray]:
|
|
554
543
|
if np.random.rand(1) <= self.p:
|
|
555
544
|
scale_h = random.uniform(*self.scale_range)
|
|
556
545
|
scale_w = random.uniform(*self.scale_range)
|
doctr/utils/common_types.py
CHANGED
|
@@ -1,18 +1,17 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import List, Tuple, Union
|
|
8
7
|
|
|
9
8
|
__all__ = ["Point2D", "BoundingBox", "Polygon4P", "Polygon", "Bbox"]
|
|
10
9
|
|
|
11
10
|
|
|
12
|
-
Point2D =
|
|
13
|
-
BoundingBox =
|
|
14
|
-
Polygon4P =
|
|
15
|
-
Polygon =
|
|
16
|
-
AbstractPath =
|
|
17
|
-
AbstractFile =
|
|
18
|
-
Bbox =
|
|
11
|
+
Point2D = tuple[float, float]
|
|
12
|
+
BoundingBox = tuple[Point2D, Point2D]
|
|
13
|
+
Polygon4P = tuple[Point2D, Point2D, Point2D, Point2D]
|
|
14
|
+
Polygon = list[Point2D]
|
|
15
|
+
AbstractPath = str | Path
|
|
16
|
+
AbstractFile = AbstractPath | bytes
|
|
17
|
+
Bbox = tuple[float, float, float, float]
|
doctr/utils/data.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -13,7 +13,6 @@ import urllib
|
|
|
13
13
|
import urllib.error
|
|
14
14
|
import urllib.request
|
|
15
15
|
from pathlib import Path
|
|
16
|
-
from typing import Optional, Union
|
|
17
16
|
|
|
18
17
|
from tqdm.auto import tqdm
|
|
19
18
|
|
|
@@ -25,7 +24,7 @@ HASH_REGEX = re.compile(r"-([a-f0-9]*)\.")
|
|
|
25
24
|
USER_AGENT = "mindee/doctr"
|
|
26
25
|
|
|
27
26
|
|
|
28
|
-
def _urlretrieve(url: str, filename:
|
|
27
|
+
def _urlretrieve(url: str, filename: Path | str, chunk_size: int = 1024) -> None:
|
|
29
28
|
with open(filename, "wb") as fh:
|
|
30
29
|
with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": USER_AGENT})) as response:
|
|
31
30
|
with tqdm(total=response.length) as pbar:
|
|
@@ -36,7 +35,7 @@ def _urlretrieve(url: str, filename: Union[Path, str], chunk_size: int = 1024) -
|
|
|
36
35
|
fh.write(chunk)
|
|
37
36
|
|
|
38
37
|
|
|
39
|
-
def _check_integrity(file_path:
|
|
38
|
+
def _check_integrity(file_path: str | Path, hash_prefix: str) -> bool:
|
|
40
39
|
with open(file_path, "rb") as f:
|
|
41
40
|
sha_hash = hashlib.sha256(f.read()).hexdigest()
|
|
42
41
|
|
|
@@ -45,10 +44,10 @@ def _check_integrity(file_path: Union[str, Path], hash_prefix: str) -> bool:
|
|
|
45
44
|
|
|
46
45
|
def download_from_url(
|
|
47
46
|
url: str,
|
|
48
|
-
file_name:
|
|
49
|
-
hash_prefix:
|
|
50
|
-
cache_dir:
|
|
51
|
-
cache_subdir:
|
|
47
|
+
file_name: str | None = None,
|
|
48
|
+
hash_prefix: str | None = None,
|
|
49
|
+
cache_dir: str | None = None,
|
|
50
|
+
cache_subdir: str | None = None,
|
|
52
51
|
) -> Path:
|
|
53
52
|
"""Download a file using its URL
|
|
54
53
|
|
|
@@ -56,7 +55,6 @@ def download_from_url(
|
|
|
56
55
|
>>> download_from_url("https://yoursource.com/yourcheckpoint-yourhash.zip")
|
|
57
56
|
|
|
58
57
|
Args:
|
|
59
|
-
----
|
|
60
58
|
url: the URL of the file to download
|
|
61
59
|
file_name: optional name of the file once downloaded
|
|
62
60
|
hash_prefix: optional expected SHA256 hash of the file
|
|
@@ -64,11 +62,9 @@ def download_from_url(
|
|
|
64
62
|
cache_subdir: subfolder to use in the cache
|
|
65
63
|
|
|
66
64
|
Returns:
|
|
67
|
-
-------
|
|
68
65
|
the location of the downloaded file
|
|
69
66
|
|
|
70
67
|
Note:
|
|
71
|
-
----
|
|
72
68
|
You can change cache directory location by using `DOCTR_CACHE_DIR` environment variable.
|
|
73
69
|
"""
|
|
74
70
|
if not isinstance(file_name, str):
|
|
@@ -112,7 +108,7 @@ def download_from_url(
|
|
|
112
108
|
except (urllib.error.URLError, IOError) as e:
|
|
113
109
|
if url[:5] == "https":
|
|
114
110
|
url = url.replace("https:", "http:")
|
|
115
|
-
print("Failed download. Trying https -> http instead.
|
|
111
|
+
print(f"Failed download. Trying https -> http instead. Downloading {url} to {file_path}")
|
|
116
112
|
_urlretrieve(url, file_path)
|
|
117
113
|
else:
|
|
118
114
|
raise e
|
doctr/utils/fonts.py
CHANGED
|
@@ -1,29 +1,24 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
7
|
import platform
|
|
8
|
-
from typing import Optional, Union
|
|
9
8
|
|
|
10
9
|
from PIL import ImageFont
|
|
11
10
|
|
|
12
11
|
__all__ = ["get_font"]
|
|
13
12
|
|
|
14
13
|
|
|
15
|
-
def get_font(
|
|
16
|
-
font_family: Optional[str] = None, font_size: int = 13
|
|
17
|
-
) -> Union[ImageFont.FreeTypeFont, ImageFont.ImageFont]:
|
|
14
|
+
def get_font(font_family: str | None = None, font_size: int = 13) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
|
|
18
15
|
"""Resolves a compatible ImageFont for the system
|
|
19
16
|
|
|
20
17
|
Args:
|
|
21
|
-
----
|
|
22
18
|
font_family: the font family to use
|
|
23
19
|
font_size: the size of the font upon rendering
|
|
24
20
|
|
|
25
21
|
Returns:
|
|
26
|
-
-------
|
|
27
22
|
the Pillow font
|
|
28
23
|
"""
|
|
29
24
|
# Font selection
|