python-doctr 0.12.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctr/__init__.py +0 -1
- doctr/contrib/artefacts.py +1 -1
- doctr/contrib/base.py +1 -1
- doctr/datasets/__init__.py +0 -5
- doctr/datasets/coco_text.py +1 -1
- doctr/datasets/cord.py +1 -1
- doctr/datasets/datasets/__init__.py +1 -6
- doctr/datasets/datasets/base.py +1 -1
- doctr/datasets/datasets/pytorch.py +3 -3
- doctr/datasets/detection.py +1 -1
- doctr/datasets/doc_artefacts.py +1 -1
- doctr/datasets/funsd.py +1 -1
- doctr/datasets/generator/__init__.py +1 -6
- doctr/datasets/generator/base.py +1 -1
- doctr/datasets/generator/pytorch.py +1 -1
- doctr/datasets/ic03.py +1 -1
- doctr/datasets/ic13.py +1 -1
- doctr/datasets/iiit5k.py +1 -1
- doctr/datasets/iiithws.py +1 -1
- doctr/datasets/imgur5k.py +1 -1
- doctr/datasets/mjsynth.py +1 -1
- doctr/datasets/ocr.py +1 -1
- doctr/datasets/orientation.py +1 -1
- doctr/datasets/recognition.py +1 -1
- doctr/datasets/sroie.py +1 -1
- doctr/datasets/svhn.py +1 -1
- doctr/datasets/svt.py +1 -1
- doctr/datasets/synthtext.py +1 -1
- doctr/datasets/utils.py +1 -1
- doctr/datasets/vocabs.py +1 -3
- doctr/datasets/wildreceipt.py +1 -1
- doctr/file_utils.py +3 -102
- doctr/io/elements.py +1 -1
- doctr/io/html.py +1 -1
- doctr/io/image/__init__.py +1 -7
- doctr/io/image/base.py +1 -1
- doctr/io/image/pytorch.py +2 -2
- doctr/io/pdf.py +1 -1
- doctr/io/reader.py +1 -1
- doctr/models/_utils.py +56 -18
- doctr/models/builder.py +1 -1
- doctr/models/classification/magc_resnet/__init__.py +1 -6
- doctr/models/classification/magc_resnet/pytorch.py +3 -3
- doctr/models/classification/mobilenet/__init__.py +1 -6
- doctr/models/classification/mobilenet/pytorch.py +1 -1
- doctr/models/classification/predictor/__init__.py +1 -6
- doctr/models/classification/predictor/pytorch.py +2 -2
- doctr/models/classification/resnet/__init__.py +1 -6
- doctr/models/classification/resnet/pytorch.py +1 -1
- doctr/models/classification/textnet/__init__.py +1 -6
- doctr/models/classification/textnet/pytorch.py +2 -2
- doctr/models/classification/vgg/__init__.py +1 -6
- doctr/models/classification/vgg/pytorch.py +1 -1
- doctr/models/classification/vip/__init__.py +1 -4
- doctr/models/classification/vip/layers/__init__.py +1 -4
- doctr/models/classification/vip/layers/pytorch.py +2 -2
- doctr/models/classification/vip/pytorch.py +1 -1
- doctr/models/classification/vit/__init__.py +1 -6
- doctr/models/classification/vit/pytorch.py +3 -3
- doctr/models/classification/zoo.py +7 -12
- doctr/models/core.py +1 -1
- doctr/models/detection/_utils/__init__.py +1 -6
- doctr/models/detection/_utils/base.py +1 -1
- doctr/models/detection/_utils/pytorch.py +1 -1
- doctr/models/detection/core.py +2 -2
- doctr/models/detection/differentiable_binarization/__init__.py +1 -6
- doctr/models/detection/differentiable_binarization/base.py +5 -13
- doctr/models/detection/differentiable_binarization/pytorch.py +4 -4
- doctr/models/detection/fast/__init__.py +1 -6
- doctr/models/detection/fast/base.py +5 -15
- doctr/models/detection/fast/pytorch.py +5 -5
- doctr/models/detection/linknet/__init__.py +1 -6
- doctr/models/detection/linknet/base.py +4 -13
- doctr/models/detection/linknet/pytorch.py +3 -3
- doctr/models/detection/predictor/__init__.py +1 -6
- doctr/models/detection/predictor/pytorch.py +2 -2
- doctr/models/detection/zoo.py +16 -33
- doctr/models/factory/hub.py +26 -34
- doctr/models/kie_predictor/__init__.py +1 -6
- doctr/models/kie_predictor/base.py +1 -1
- doctr/models/kie_predictor/pytorch.py +3 -7
- doctr/models/modules/layers/__init__.py +1 -6
- doctr/models/modules/layers/pytorch.py +4 -4
- doctr/models/modules/transformer/__init__.py +1 -6
- doctr/models/modules/transformer/pytorch.py +3 -3
- doctr/models/modules/vision_transformer/__init__.py +1 -6
- doctr/models/modules/vision_transformer/pytorch.py +1 -1
- doctr/models/predictor/__init__.py +1 -6
- doctr/models/predictor/base.py +4 -9
- doctr/models/predictor/pytorch.py +3 -6
- doctr/models/preprocessor/__init__.py +1 -6
- doctr/models/preprocessor/pytorch.py +28 -33
- doctr/models/recognition/core.py +1 -1
- doctr/models/recognition/crnn/__init__.py +1 -6
- doctr/models/recognition/crnn/pytorch.py +7 -7
- doctr/models/recognition/master/__init__.py +1 -6
- doctr/models/recognition/master/base.py +1 -1
- doctr/models/recognition/master/pytorch.py +6 -6
- doctr/models/recognition/parseq/__init__.py +1 -6
- doctr/models/recognition/parseq/base.py +1 -1
- doctr/models/recognition/parseq/pytorch.py +6 -6
- doctr/models/recognition/predictor/__init__.py +1 -6
- doctr/models/recognition/predictor/_utils.py +8 -17
- doctr/models/recognition/predictor/pytorch.py +2 -3
- doctr/models/recognition/sar/__init__.py +1 -6
- doctr/models/recognition/sar/pytorch.py +4 -4
- doctr/models/recognition/utils.py +1 -1
- doctr/models/recognition/viptr/__init__.py +1 -4
- doctr/models/recognition/viptr/pytorch.py +4 -4
- doctr/models/recognition/vitstr/__init__.py +1 -6
- doctr/models/recognition/vitstr/base.py +1 -1
- doctr/models/recognition/vitstr/pytorch.py +4 -4
- doctr/models/recognition/zoo.py +14 -14
- doctr/models/utils/__init__.py +1 -6
- doctr/models/utils/pytorch.py +3 -2
- doctr/models/zoo.py +1 -1
- doctr/transforms/functional/__init__.py +1 -6
- doctr/transforms/functional/base.py +3 -2
- doctr/transforms/functional/pytorch.py +5 -5
- doctr/transforms/modules/__init__.py +1 -7
- doctr/transforms/modules/base.py +28 -94
- doctr/transforms/modules/pytorch.py +29 -27
- doctr/utils/common_types.py +1 -1
- doctr/utils/data.py +1 -2
- doctr/utils/fonts.py +1 -1
- doctr/utils/geometry.py +7 -11
- doctr/utils/metrics.py +1 -1
- doctr/utils/multithreading.py +1 -1
- doctr/utils/reconstitution.py +1 -1
- doctr/utils/repr.py +1 -1
- doctr/utils/visualization.py +2 -2
- doctr/version.py +1 -1
- {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/METADATA +30 -80
- python_doctr-1.0.1.dist-info/RECORD +149 -0
- {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/WHEEL +1 -1
- doctr/datasets/datasets/tensorflow.py +0 -59
- doctr/datasets/generator/tensorflow.py +0 -58
- doctr/datasets/loader.py +0 -94
- doctr/io/image/tensorflow.py +0 -101
- doctr/models/classification/magc_resnet/tensorflow.py +0 -196
- doctr/models/classification/mobilenet/tensorflow.py +0 -442
- doctr/models/classification/predictor/tensorflow.py +0 -60
- doctr/models/classification/resnet/tensorflow.py +0 -418
- doctr/models/classification/textnet/tensorflow.py +0 -275
- doctr/models/classification/vgg/tensorflow.py +0 -125
- doctr/models/classification/vit/tensorflow.py +0 -201
- doctr/models/detection/_utils/tensorflow.py +0 -34
- doctr/models/detection/differentiable_binarization/tensorflow.py +0 -421
- doctr/models/detection/fast/tensorflow.py +0 -427
- doctr/models/detection/linknet/tensorflow.py +0 -377
- doctr/models/detection/predictor/tensorflow.py +0 -70
- doctr/models/kie_predictor/tensorflow.py +0 -187
- doctr/models/modules/layers/tensorflow.py +0 -171
- doctr/models/modules/transformer/tensorflow.py +0 -235
- doctr/models/modules/vision_transformer/tensorflow.py +0 -100
- doctr/models/predictor/tensorflow.py +0 -155
- doctr/models/preprocessor/tensorflow.py +0 -122
- doctr/models/recognition/crnn/tensorflow.py +0 -317
- doctr/models/recognition/master/tensorflow.py +0 -320
- doctr/models/recognition/parseq/tensorflow.py +0 -516
- doctr/models/recognition/predictor/tensorflow.py +0 -79
- doctr/models/recognition/sar/tensorflow.py +0 -423
- doctr/models/recognition/vitstr/tensorflow.py +0 -285
- doctr/models/utils/tensorflow.py +0 -189
- doctr/transforms/functional/tensorflow.py +0 -254
- doctr/transforms/modules/tensorflow.py +0 -562
- python_doctr-0.12.0.dist-info/RECORD +0 -180
- {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/top_level.txt +0 -0
- {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/zip-safe +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2026, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -13,7 +13,7 @@ from torch.nn.functional import pad
|
|
|
13
13
|
from torchvision.transforms import functional as F
|
|
14
14
|
from torchvision.transforms import transforms as T
|
|
15
15
|
|
|
16
|
-
from ..functional
|
|
16
|
+
from ..functional import random_shadow
|
|
17
17
|
|
|
18
18
|
__all__ = [
|
|
19
19
|
"Resize",
|
|
@@ -27,7 +27,21 @@ __all__ = [
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class Resize(T.Resize):
|
|
30
|
-
"""Resize the input image to the given size
|
|
30
|
+
"""Resize the input image to the given size
|
|
31
|
+
|
|
32
|
+
>>> import torch
|
|
33
|
+
>>> from doctr.transforms import Resize
|
|
34
|
+
>>> transfo = Resize((64, 64), preserve_aspect_ratio=True, symmetric_pad=True)
|
|
35
|
+
>>> out = transfo(torch.rand((3, 64, 64)))
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
size: output size in pixels, either a tuple (height, width) or a single integer for square images
|
|
39
|
+
interpolation: interpolation mode to use for resizing, default is bilinear
|
|
40
|
+
preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
|
|
41
|
+
if True, the image will be resized to fit within the target size while maintaining its aspect ratio
|
|
42
|
+
symmetric_pad: whether to symmetrically pad the image to the target size,
|
|
43
|
+
if True, the image will be padded equally on both sides to fit the target size
|
|
44
|
+
"""
|
|
31
45
|
|
|
32
46
|
def __init__(
|
|
33
47
|
self,
|
|
@@ -36,25 +50,19 @@ class Resize(T.Resize):
|
|
|
36
50
|
preserve_aspect_ratio: bool = False,
|
|
37
51
|
symmetric_pad: bool = False,
|
|
38
52
|
) -> None:
|
|
39
|
-
super().__init__(size, interpolation, antialias=True)
|
|
53
|
+
super().__init__(size if isinstance(size, (list, tuple)) else (size, size), interpolation, antialias=True)
|
|
40
54
|
self.preserve_aspect_ratio = preserve_aspect_ratio
|
|
41
55
|
self.symmetric_pad = symmetric_pad
|
|
42
56
|
|
|
43
|
-
if not isinstance(self.size, (int, tuple, list)):
|
|
44
|
-
raise AssertionError("size should be either a tuple, a list or an int")
|
|
45
|
-
|
|
46
57
|
def forward(
|
|
47
58
|
self,
|
|
48
59
|
img: torch.Tensor,
|
|
49
60
|
target: np.ndarray | None = None,
|
|
50
61
|
) -> torch.Tensor | tuple[torch.Tensor, np.ndarray]:
|
|
51
|
-
|
|
52
|
-
target_ratio = img.shape[-2] / img.shape[-1]
|
|
53
|
-
else:
|
|
54
|
-
target_ratio = self.size[0] / self.size[1]
|
|
62
|
+
target_ratio = self.size[0] / self.size[1]
|
|
55
63
|
actual_ratio = img.shape[-2] / img.shape[-1]
|
|
56
64
|
|
|
57
|
-
if not self.preserve_aspect_ratio or (target_ratio == actual_ratio
|
|
65
|
+
if not self.preserve_aspect_ratio or (target_ratio == actual_ratio):
|
|
58
66
|
# If we don't preserve the aspect ratio or the wanted aspect ratio is the same than the original one
|
|
59
67
|
# We can use with the regular resize
|
|
60
68
|
if target is not None:
|
|
@@ -62,16 +70,10 @@ class Resize(T.Resize):
|
|
|
62
70
|
return super().forward(img)
|
|
63
71
|
else:
|
|
64
72
|
# Resize
|
|
65
|
-
if
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1])
|
|
70
|
-
elif isinstance(self.size, int): # self.size is the longest side, infer the other
|
|
71
|
-
if img.shape[-2] <= img.shape[-1]:
|
|
72
|
-
tmp_size = (max(int(self.size * actual_ratio), 1), self.size)
|
|
73
|
-
else:
|
|
74
|
-
tmp_size = (self.size, max(int(self.size / actual_ratio), 1))
|
|
73
|
+
if actual_ratio > target_ratio:
|
|
74
|
+
tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1))
|
|
75
|
+
else:
|
|
76
|
+
tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1])
|
|
75
77
|
|
|
76
78
|
# Scale image
|
|
77
79
|
img = F.resize(img, tmp_size, self.interpolation, antialias=True)
|
|
@@ -93,14 +95,14 @@ class Resize(T.Resize):
|
|
|
93
95
|
if self.preserve_aspect_ratio:
|
|
94
96
|
# Get absolute coords
|
|
95
97
|
if target.shape[1:] == (4,):
|
|
96
|
-
if
|
|
98
|
+
if self.symmetric_pad:
|
|
97
99
|
target[:, [0, 2]] = offset[0] + target[:, [0, 2]] * raw_shape[-1] / img.shape[-1]
|
|
98
100
|
target[:, [1, 3]] = offset[1] + target[:, [1, 3]] * raw_shape[-2] / img.shape[-2]
|
|
99
101
|
else:
|
|
100
102
|
target[:, [0, 2]] *= raw_shape[-1] / img.shape[-1]
|
|
101
103
|
target[:, [1, 3]] *= raw_shape[-2] / img.shape[-2]
|
|
102
104
|
elif target.shape[1:] == (4, 2):
|
|
103
|
-
if
|
|
105
|
+
if self.symmetric_pad:
|
|
104
106
|
target[..., 0] = offset[0] + target[..., 0] * raw_shape[-1] / img.shape[-1]
|
|
105
107
|
target[..., 1] = offset[1] + target[..., 1] * raw_shape[-2] / img.shape[-2]
|
|
106
108
|
else:
|
|
@@ -143,9 +145,9 @@ class GaussianNoise(torch.nn.Module):
|
|
|
143
145
|
# Reshape the distribution
|
|
144
146
|
noise = self.mean + 2 * self.std * torch.rand(x.shape, device=x.device) - self.std
|
|
145
147
|
if x.dtype == torch.uint8:
|
|
146
|
-
return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8)
|
|
148
|
+
return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8)
|
|
147
149
|
else:
|
|
148
|
-
return (x + noise.to(dtype=x.dtype)).clamp(0, 1)
|
|
150
|
+
return (x + noise.to(dtype=x.dtype)).clamp(0, 1)
|
|
149
151
|
|
|
150
152
|
def extra_repr(self) -> str:
|
|
151
153
|
return f"mean={self.mean}, std={self.std}"
|
|
@@ -233,7 +235,7 @@ class RandomShadow(torch.nn.Module):
|
|
|
233
235
|
try:
|
|
234
236
|
if x.dtype == torch.uint8:
|
|
235
237
|
return (
|
|
236
|
-
(
|
|
238
|
+
(
|
|
237
239
|
255
|
|
238
240
|
* random_shadow(
|
|
239
241
|
x.to(dtype=torch.float32) / 255,
|
doctr/utils/common_types.py
CHANGED
doctr/utils/data.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2026, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -9,7 +9,6 @@ import hashlib
|
|
|
9
9
|
import logging
|
|
10
10
|
import os
|
|
11
11
|
import re
|
|
12
|
-
import urllib
|
|
13
12
|
import urllib.error
|
|
14
13
|
import urllib.request
|
|
15
14
|
from pathlib import Path
|
doctr/utils/fonts.py
CHANGED
doctr/utils/geometry.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2026, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -390,14 +390,13 @@ def convert_to_relative_coords(geoms: np.ndarray, img_shape: tuple[int, int]) ->
|
|
|
390
390
|
raise ValueError(f"invalid format for arg `geoms`: {geoms.shape}")
|
|
391
391
|
|
|
392
392
|
|
|
393
|
-
def extract_crops(img: np.ndarray, boxes: np.ndarray
|
|
393
|
+
def extract_crops(img: np.ndarray, boxes: np.ndarray) -> list[np.ndarray]:
|
|
394
394
|
"""Created cropped images from list of bounding boxes
|
|
395
395
|
|
|
396
396
|
Args:
|
|
397
397
|
img: input image
|
|
398
398
|
boxes: bounding boxes of shape (N, 4) where N is the number of boxes, and the relative
|
|
399
399
|
coordinates (xmin, ymin, xmax, ymax)
|
|
400
|
-
channels_last: whether the channel dimensions is the last one instead of the last one
|
|
401
400
|
|
|
402
401
|
Returns:
|
|
403
402
|
list of cropped images
|
|
@@ -409,21 +408,19 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True
|
|
|
409
408
|
|
|
410
409
|
# Project relative coordinates
|
|
411
410
|
_boxes = boxes.copy()
|
|
412
|
-
h, w = img.shape[:2]
|
|
411
|
+
h, w = img.shape[:2]
|
|
413
412
|
if not np.issubdtype(_boxes.dtype, np.integer):
|
|
414
413
|
_boxes[:, [0, 2]] *= w
|
|
415
414
|
_boxes[:, [1, 3]] *= h
|
|
416
415
|
_boxes = _boxes.round().astype(int)
|
|
417
416
|
# Add last index
|
|
418
417
|
_boxes[2:] += 1
|
|
419
|
-
if channels_last:
|
|
420
|
-
return deepcopy([img[box[1] : box[3], box[0] : box[2]] for box in _boxes])
|
|
421
418
|
|
|
422
|
-
return deepcopy([img[
|
|
419
|
+
return deepcopy([img[box[1] : box[3], box[0] : box[2]] for box in _boxes])
|
|
423
420
|
|
|
424
421
|
|
|
425
422
|
def extract_rcrops(
|
|
426
|
-
img: np.ndarray, polys: np.ndarray, dtype=np.float32,
|
|
423
|
+
img: np.ndarray, polys: np.ndarray, dtype=np.float32, assume_horizontal: bool = False
|
|
427
424
|
) -> list[np.ndarray]:
|
|
428
425
|
"""Created cropped images from list of rotated bounding boxes
|
|
429
426
|
|
|
@@ -431,7 +428,6 @@ def extract_rcrops(
|
|
|
431
428
|
img: input image
|
|
432
429
|
polys: bounding boxes of shape (N, 4, 2)
|
|
433
430
|
dtype: target data type of bounding boxes
|
|
434
|
-
channels_last: whether the channel dimensions is the last one instead of the last one
|
|
435
431
|
assume_horizontal: whether the boxes are assumed to be only horizontally oriented
|
|
436
432
|
|
|
437
433
|
Returns:
|
|
@@ -444,12 +440,12 @@ def extract_rcrops(
|
|
|
444
440
|
|
|
445
441
|
# Project relative coordinates
|
|
446
442
|
_boxes = polys.copy()
|
|
447
|
-
height, width = img.shape[:2]
|
|
443
|
+
height, width = img.shape[:2]
|
|
448
444
|
if not np.issubdtype(_boxes.dtype, np.integer):
|
|
449
445
|
_boxes[:, :, 0] *= width
|
|
450
446
|
_boxes[:, :, 1] *= height
|
|
451
447
|
|
|
452
|
-
src_img = img
|
|
448
|
+
src_img = img
|
|
453
449
|
|
|
454
450
|
# Handle only horizontal oriented boxes
|
|
455
451
|
if assume_horizontal:
|
doctr/utils/metrics.py
CHANGED
doctr/utils/multithreading.py
CHANGED
doctr/utils/reconstitution.py
CHANGED
doctr/utils/repr.py
CHANGED
doctr/utils/visualization.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2026, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -148,7 +148,7 @@ def get_colors(num_colors: int) -> list[tuple[float, float, float]]:
|
|
|
148
148
|
hue = i / 360.0
|
|
149
149
|
lightness = (50 + np.random.rand() * 10) / 100.0
|
|
150
150
|
saturation = (90 + np.random.rand() * 10) / 100.0
|
|
151
|
-
colors.append(colorsys.hls_to_rgb(hue, lightness, saturation))
|
|
151
|
+
colors.append(colorsys.hls_to_rgb(hue, lightness, saturation)) # type: ignore[arg-type]
|
|
152
152
|
return colors
|
|
153
153
|
|
|
154
154
|
|
doctr/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '
|
|
1
|
+
__version__ = 'v1.0.1'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-doctr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
|
|
5
5
|
Author-email: Mindee <contact@mindee.com>
|
|
6
6
|
Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
|
|
@@ -210,7 +210,7 @@ Project-URL: documentation, https://mindee.github.io/doctr
|
|
|
210
210
|
Project-URL: repository, https://github.com/mindee/doctr
|
|
211
211
|
Project-URL: tracker, https://github.com/mindee/doctr/issues
|
|
212
212
|
Project-URL: changelog, https://mindee.github.io/doctr/changelog.html
|
|
213
|
-
Keywords: OCR,deep learning,computer vision,
|
|
213
|
+
Keywords: OCR,deep learning,computer vision,pytorch,text detection,text recognition
|
|
214
214
|
Classifier: Development Status :: 4 - Beta
|
|
215
215
|
Classifier: Intended Audience :: Developers
|
|
216
216
|
Classifier: Intended Audience :: Education
|
|
@@ -226,30 +226,24 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
226
226
|
Requires-Python: <4,>=3.10.0
|
|
227
227
|
Description-Content-Type: text/markdown
|
|
228
228
|
License-File: LICENSE
|
|
229
|
+
Requires-Dist: torch<3.0.0,>=2.0.0
|
|
230
|
+
Requires-Dist: torchvision>=0.15.0
|
|
231
|
+
Requires-Dist: onnx<3.0.0,>=1.12.0
|
|
229
232
|
Requires-Dist: numpy<3.0.0,>=1.16.0
|
|
230
233
|
Requires-Dist: scipy<2.0.0,>=1.4.0
|
|
231
234
|
Requires-Dist: h5py<4.0.0,>=3.1.0
|
|
232
235
|
Requires-Dist: opencv-python<5.0.0,>=4.5.0
|
|
233
|
-
Requires-Dist: pypdfium2<
|
|
236
|
+
Requires-Dist: pypdfium2<6.0.0,>=4.11.0
|
|
234
237
|
Requires-Dist: pyclipper<2.0.0,>=1.2.0
|
|
235
238
|
Requires-Dist: shapely<3.0.0,>=1.6.0
|
|
236
239
|
Requires-Dist: langdetect<2.0.0,>=1.0.9
|
|
237
240
|
Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
|
|
238
|
-
Requires-Dist: huggingface-hub<
|
|
241
|
+
Requires-Dist: huggingface-hub<2.0.0,>=0.20.0
|
|
239
242
|
Requires-Dist: Pillow>=9.2.0
|
|
240
243
|
Requires-Dist: defusedxml>=0.7.0
|
|
241
244
|
Requires-Dist: anyascii>=0.3.2
|
|
242
245
|
Requires-Dist: validators>=0.18.0
|
|
243
246
|
Requires-Dist: tqdm>=4.30.0
|
|
244
|
-
Provides-Extra: tf
|
|
245
|
-
Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "tf"
|
|
246
|
-
Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "tf"
|
|
247
|
-
Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
|
|
248
|
-
Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
|
|
249
|
-
Provides-Extra: torch
|
|
250
|
-
Requires-Dist: torch<3.0.0,>=2.0.0; extra == "torch"
|
|
251
|
-
Requires-Dist: torchvision>=0.15.0; extra == "torch"
|
|
252
|
-
Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "torch"
|
|
253
247
|
Provides-Extra: html
|
|
254
248
|
Requires-Dist: weasyprint>=55.0; extra == "html"
|
|
255
249
|
Provides-Extra: viz
|
|
@@ -271,16 +265,12 @@ Provides-Extra: docs
|
|
|
271
265
|
Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "docs"
|
|
272
266
|
Requires-Dist: sphinxemoji>=0.1.8; extra == "docs"
|
|
273
267
|
Requires-Dist: sphinx-copybutton>=0.3.1; extra == "docs"
|
|
274
|
-
Requires-Dist: docutils<0.
|
|
268
|
+
Requires-Dist: docutils<0.23; extra == "docs"
|
|
275
269
|
Requires-Dist: recommonmark>=0.7.1; extra == "docs"
|
|
276
270
|
Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
|
|
277
271
|
Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
|
|
278
272
|
Requires-Dist: furo>=2022.3.4; extra == "docs"
|
|
279
273
|
Provides-Extra: dev
|
|
280
|
-
Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "dev"
|
|
281
|
-
Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "dev"
|
|
282
|
-
Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "dev"
|
|
283
|
-
Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
|
|
284
274
|
Requires-Dist: torch<3.0.0,>=2.0.0; extra == "dev"
|
|
285
275
|
Requires-Dist: torchvision>=0.15.0; extra == "dev"
|
|
286
276
|
Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "dev"
|
|
@@ -298,7 +288,7 @@ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
|
|
|
298
288
|
Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "dev"
|
|
299
289
|
Requires-Dist: sphinxemoji>=0.1.8; extra == "dev"
|
|
300
290
|
Requires-Dist: sphinx-copybutton>=0.3.1; extra == "dev"
|
|
301
|
-
Requires-Dist: docutils<0.
|
|
291
|
+
Requires-Dist: docutils<0.23; extra == "dev"
|
|
302
292
|
Requires-Dist: recommonmark>=0.7.1; extra == "dev"
|
|
303
293
|
Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
|
|
304
294
|
Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
|
|
@@ -309,10 +299,10 @@ Dynamic: license-file
|
|
|
309
299
|
<img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
|
|
310
300
|
</p>
|
|
311
301
|
|
|
312
|
-
[](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://slack.mindee.com) [](LICENSE)  [](https://github.com/mindee/doctr/pkgs/container/doctr) [](https://codecov.io/gh/mindee/doctr) [](https://www.codefactor.io/repository/github/mindee/doctr) [](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [](https://mindee.github.io/doctr) [](https://pypi.org/project/python-doctr/) [](https://huggingface.co/spaces/mindee/doctr) [](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [](https://gurubase.io/g/doctr)
|
|
313
303
|
|
|
314
304
|
|
|
315
|
-
**Optical Character Recognition made seamless & accessible to anyone, powered by
|
|
305
|
+
**Optical Character Recognition made seamless & accessible to anyone, powered by PyTorch**
|
|
316
306
|
|
|
317
307
|
What you can expect from this repository:
|
|
318
308
|
|
|
@@ -371,7 +361,7 @@ Should you use docTR on documents that include rotated pages, or pages with mult
|
|
|
371
361
|
you have multiple options to handle it:
|
|
372
362
|
|
|
373
363
|
- If you only use straight document pages with straight words (horizontal, same reading direction),
|
|
374
|
-
consider passing `
|
|
364
|
+
consider passing `assume_straight_pages=True` to the ocr_predictor. It will directly fit straight boxes
|
|
375
365
|
on your page and return straight boxes, which makes it the fastest option.
|
|
376
366
|
|
|
377
367
|
- If you want the predictor to output straight boxes (no matter the orientation of your pages, the final localizations
|
|
@@ -440,19 +430,6 @@ The KIE predictor results per page are in a dictionary format with each key repr
|
|
|
440
430
|
|
|
441
431
|
## Installation
|
|
442
432
|
|
|
443
|
-
> [!WARNING]
|
|
444
|
-
> **TensorFlow Backend Deprecation Notice**
|
|
445
|
-
>
|
|
446
|
-
> Using docTR with TensorFlow as a backend is deprecated and will be removed in the next major release (v1.0.0).
|
|
447
|
-
> We **recommend switching to the PyTorch backend**, which is more actively maintained and supports the latest features and models.
|
|
448
|
-
> Alternatively, you can use [OnnxTR](https://github.com/felixdittrich92/OnnxTR), which does **not** require TensorFlow or PyTorch.
|
|
449
|
-
>
|
|
450
|
-
> This decision was made based on several considerations:
|
|
451
|
-
>
|
|
452
|
-
> - Allows better focus on improving the core library
|
|
453
|
-
> - Frees up resources to develop new features faster
|
|
454
|
-
> - Enables more targeted optimizations with PyTorch
|
|
455
|
-
|
|
456
433
|
### Prerequisites
|
|
457
434
|
|
|
458
435
|
Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
|
|
@@ -465,24 +442,15 @@ You can then install the latest release of the package using [pypi](https://pypi
|
|
|
465
442
|
pip install python-doctr
|
|
466
443
|
```
|
|
467
444
|
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
We try to keep framework-specific dependencies to a minimum. You can install framework-specific builds as follows:
|
|
445
|
+
We try to keep extra dependencies to a minimum. You can install specific builds as follows:
|
|
471
446
|
|
|
472
447
|
```shell
|
|
473
|
-
#
|
|
474
|
-
pip install
|
|
475
|
-
# for PyTorch
|
|
476
|
-
pip install "python-doctr[torch]"
|
|
448
|
+
# standard build
|
|
449
|
+
pip install python-doctr
|
|
477
450
|
# optional dependencies for visualization, html, and contrib modules can be installed as follows:
|
|
478
|
-
pip install "python-doctr[
|
|
451
|
+
pip install "python-doctr[viz,html,contrib]"
|
|
479
452
|
```
|
|
480
453
|
|
|
481
|
-
For MacBooks with M1 chip, you will need some additional packages or specific versions:
|
|
482
|
-
|
|
483
|
-
- TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
|
|
484
|
-
- PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
|
|
485
|
-
|
|
486
454
|
### Developer mode
|
|
487
455
|
|
|
488
456
|
Alternatively, you can install it from source, which will require you to install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git).
|
|
@@ -493,13 +461,10 @@ git clone https://github.com/mindee/doctr.git
|
|
|
493
461
|
pip install -e doctr/.
|
|
494
462
|
```
|
|
495
463
|
|
|
496
|
-
Again, if you prefer to avoid the risk of missing dependencies, you can install the
|
|
464
|
+
Again, if you prefer to avoid the risk of missing dependencies, you can install the build:
|
|
497
465
|
|
|
498
466
|
```shell
|
|
499
|
-
|
|
500
|
-
pip install -e doctr/.[tf]
|
|
501
|
-
# for PyTorch
|
|
502
|
-
pip install -e doctr/.[torch]
|
|
467
|
+
pip install -e doctr/.
|
|
503
468
|
```
|
|
504
469
|
|
|
505
470
|
## Models architectures
|
|
@@ -542,20 +507,6 @@ Check it out [) that is required.
|
|
544
509
|
|
|
545
|
-
##### Tensorflow version
|
|
546
|
-
|
|
547
|
-
```shell
|
|
548
|
-
pip install -r demo/tf-requirements.txt
|
|
549
|
-
```
|
|
550
|
-
|
|
551
|
-
Then run your app in your default browser with:
|
|
552
|
-
|
|
553
|
-
```shell
|
|
554
|
-
USE_TF=1 streamlit run demo/app.py
|
|
555
|
-
```
|
|
556
|
-
|
|
557
|
-
##### PyTorch version
|
|
558
|
-
|
|
559
510
|
```shell
|
|
560
511
|
pip install -r demo/pt-requirements.txt
|
|
561
512
|
```
|
|
@@ -563,23 +514,16 @@ pip install -r demo/pt-requirements.txt
|
|
|
563
514
|
Then run your app in your default browser with:
|
|
564
515
|
|
|
565
516
|
```shell
|
|
566
|
-
|
|
517
|
+
streamlit run demo/app.py
|
|
567
518
|
```
|
|
568
519
|
|
|
569
|
-
#### TensorFlow.js
|
|
570
|
-
|
|
571
|
-
Instead of having your demo actually running Python, you would prefer to run everything in your web browser?
|
|
572
|
-
Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to get started!
|
|
573
|
-
|
|
574
|
-

|
|
575
|
-
|
|
576
520
|
### Docker container
|
|
577
521
|
|
|
578
522
|
We offer Docker container support for easy testing and deployment. [Here are the available docker tags.](https://github.com/mindee/doctr/pkgs/container/doctr).
|
|
579
523
|
|
|
580
524
|
#### Using GPU with docTR Docker Images
|
|
581
525
|
|
|
582
|
-
The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch
|
|
526
|
+
The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch won't be able to initialize the GPU.
|
|
583
527
|
Please ensure that Docker is configured to use your GPU.
|
|
584
528
|
|
|
585
529
|
To verify and configure GPU support for Docker, please follow the instructions provided in the [NVIDIA Container Toolkit Installation Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
|
|
@@ -594,7 +538,7 @@ docker run -it --gpus all ghcr.io/mindee/doctr:torch-py3.9.18-2024-10 bash
|
|
|
594
538
|
|
|
595
539
|
The Docker images for docTR follow a specific tag nomenclature: `<deps>-py<python_version>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
|
|
596
540
|
|
|
597
|
-
- `<deps>`: `
|
|
541
|
+
- `<deps>`: `torch`, `torch-viz-html-contrib`.
|
|
598
542
|
- `<python_version>`: `3.9.18`, `3.10.13` or `3.11.8`.
|
|
599
543
|
- `<doctr_version>`: a tag >= `v0.11.0`
|
|
600
544
|
- `<YYYY-MM>`: e.g. `2014-10`
|
|
@@ -603,7 +547,6 @@ Here are examples of different image tags:
|
|
|
603
547
|
|
|
604
548
|
| Tag | Description |
|
|
605
549
|
|----------------------------|---------------------------------------------------|
|
|
606
|
-
| `tf-py3.10.13-v0.11.0` | TensorFlow version `3.10.13` with docTR `v0.11.0`. |
|
|
607
550
|
| `torch-viz-html-contrib-py3.11.8-2024-10` | Torch with extra dependencies version `3.11.8` from latest commit on `main` in `2024-10`. |
|
|
608
551
|
| `torch-py3.11.8-2024-10`| PyTorch version `3.11.8` from latest commit on `main` in `2024-10`. |
|
|
609
552
|
|
|
@@ -615,10 +558,10 @@ You can also build docTR Docker images locally on your computer.
|
|
|
615
558
|
docker build -t doctr .
|
|
616
559
|
```
|
|
617
560
|
|
|
618
|
-
You can specify custom Python versions and docTR versions using build arguments. For example, to build a docTR image with
|
|
561
|
+
You can specify custom Python versions and docTR versions using build arguments. For example, to build a docTR image with PyTorch, Python version `3.9.10`, and docTR version `v0.7.0`, run the following command:
|
|
619
562
|
|
|
620
563
|
```shell
|
|
621
|
-
docker build -t doctr --build-arg FRAMEWORK=
|
|
564
|
+
docker build -t doctr --build-arg FRAMEWORK=torch --build-arg PYTHON_VERSION=3.9.10 --build-arg DOCTR_VERSION=v0.7.0 .
|
|
622
565
|
```
|
|
623
566
|
|
|
624
567
|
### Example script
|
|
@@ -678,6 +621,13 @@ print(requests.post("http://localhost:8080/ocr", params=params, files=files).jso
|
|
|
678
621
|
|
|
679
622
|
Looking for more illustrations of docTR features? You might want to check the [Jupyter notebooks](https://github.com/mindee/doctr/tree/main/notebooks) designed to give you a broader overview.
|
|
680
623
|
|
|
624
|
+
## Supported By
|
|
625
|
+
|
|
626
|
+
This project is supported by [t2k GmbH](https://www.text2knowledge.de/de),
|
|
627
|
+
<p align="center">
|
|
628
|
+
<img src="https://github.com/mindee/doctr/raw/main/docs/images/t2k_logo.png" width="40%">
|
|
629
|
+
</p>
|
|
630
|
+
|
|
681
631
|
## Citation
|
|
682
632
|
|
|
683
633
|
If you wish to cite this project, feel free to use this [BibTeX](http://www.bibtex.org/) reference:
|