python-doctr 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doctr/contrib/__init__.py +1 -0
- doctr/contrib/artefacts.py +7 -9
- doctr/contrib/base.py +8 -17
- doctr/datasets/cord.py +17 -7
- doctr/datasets/datasets/__init__.py +4 -4
- doctr/datasets/datasets/base.py +16 -16
- doctr/datasets/datasets/pytorch.py +12 -12
- doctr/datasets/datasets/tensorflow.py +10 -10
- doctr/datasets/detection.py +6 -9
- doctr/datasets/doc_artefacts.py +3 -4
- doctr/datasets/funsd.py +17 -6
- doctr/datasets/generator/__init__.py +4 -4
- doctr/datasets/generator/base.py +16 -17
- doctr/datasets/generator/pytorch.py +1 -3
- doctr/datasets/generator/tensorflow.py +1 -3
- doctr/datasets/ic03.py +14 -5
- doctr/datasets/ic13.py +13 -5
- doctr/datasets/iiit5k.py +31 -20
- doctr/datasets/iiithws.py +4 -5
- doctr/datasets/imgur5k.py +15 -5
- doctr/datasets/loader.py +4 -7
- doctr/datasets/mjsynth.py +6 -5
- doctr/datasets/ocr.py +3 -4
- doctr/datasets/orientation.py +3 -4
- doctr/datasets/recognition.py +3 -4
- doctr/datasets/sroie.py +16 -5
- doctr/datasets/svhn.py +16 -5
- doctr/datasets/svt.py +14 -5
- doctr/datasets/synthtext.py +14 -5
- doctr/datasets/utils.py +37 -27
- doctr/datasets/vocabs.py +21 -7
- doctr/datasets/wildreceipt.py +25 -10
- doctr/file_utils.py +18 -4
- doctr/io/elements.py +69 -81
- doctr/io/html.py +1 -3
- doctr/io/image/__init__.py +3 -3
- doctr/io/image/base.py +2 -5
- doctr/io/image/pytorch.py +3 -12
- doctr/io/image/tensorflow.py +2 -11
- doctr/io/pdf.py +5 -7
- doctr/io/reader.py +5 -11
- doctr/models/_utils.py +14 -22
- doctr/models/builder.py +32 -50
- doctr/models/classification/magc_resnet/__init__.py +3 -3
- doctr/models/classification/magc_resnet/pytorch.py +10 -13
- doctr/models/classification/magc_resnet/tensorflow.py +21 -17
- doctr/models/classification/mobilenet/__init__.py +3 -3
- doctr/models/classification/mobilenet/pytorch.py +7 -17
- doctr/models/classification/mobilenet/tensorflow.py +22 -29
- doctr/models/classification/predictor/__init__.py +4 -4
- doctr/models/classification/predictor/pytorch.py +13 -11
- doctr/models/classification/predictor/tensorflow.py +13 -11
- doctr/models/classification/resnet/__init__.py +4 -4
- doctr/models/classification/resnet/pytorch.py +21 -31
- doctr/models/classification/resnet/tensorflow.py +41 -39
- doctr/models/classification/textnet/__init__.py +3 -3
- doctr/models/classification/textnet/pytorch.py +10 -17
- doctr/models/classification/textnet/tensorflow.py +19 -20
- doctr/models/classification/vgg/__init__.py +3 -3
- doctr/models/classification/vgg/pytorch.py +5 -7
- doctr/models/classification/vgg/tensorflow.py +18 -15
- doctr/models/classification/vit/__init__.py +3 -3
- doctr/models/classification/vit/pytorch.py +8 -14
- doctr/models/classification/vit/tensorflow.py +16 -16
- doctr/models/classification/zoo.py +36 -19
- doctr/models/core.py +3 -3
- doctr/models/detection/_utils/__init__.py +4 -4
- doctr/models/detection/_utils/base.py +4 -7
- doctr/models/detection/_utils/pytorch.py +1 -5
- doctr/models/detection/_utils/tensorflow.py +1 -5
- doctr/models/detection/core.py +2 -8
- doctr/models/detection/differentiable_binarization/__init__.py +4 -4
- doctr/models/detection/differentiable_binarization/base.py +7 -17
- doctr/models/detection/differentiable_binarization/pytorch.py +27 -30
- doctr/models/detection/differentiable_binarization/tensorflow.py +49 -37
- doctr/models/detection/fast/__init__.py +4 -4
- doctr/models/detection/fast/base.py +6 -14
- doctr/models/detection/fast/pytorch.py +24 -31
- doctr/models/detection/fast/tensorflow.py +28 -37
- doctr/models/detection/linknet/__init__.py +4 -4
- doctr/models/detection/linknet/base.py +6 -15
- doctr/models/detection/linknet/pytorch.py +24 -27
- doctr/models/detection/linknet/tensorflow.py +36 -33
- doctr/models/detection/predictor/__init__.py +5 -5
- doctr/models/detection/predictor/pytorch.py +6 -7
- doctr/models/detection/predictor/tensorflow.py +7 -8
- doctr/models/detection/zoo.py +27 -7
- doctr/models/factory/hub.py +8 -13
- doctr/models/kie_predictor/__init__.py +5 -5
- doctr/models/kie_predictor/base.py +8 -5
- doctr/models/kie_predictor/pytorch.py +22 -19
- doctr/models/kie_predictor/tensorflow.py +21 -15
- doctr/models/modules/layers/__init__.py +3 -3
- doctr/models/modules/layers/pytorch.py +6 -9
- doctr/models/modules/layers/tensorflow.py +5 -7
- doctr/models/modules/transformer/__init__.py +3 -3
- doctr/models/modules/transformer/pytorch.py +12 -13
- doctr/models/modules/transformer/tensorflow.py +9 -12
- doctr/models/modules/vision_transformer/__init__.py +3 -3
- doctr/models/modules/vision_transformer/pytorch.py +3 -4
- doctr/models/modules/vision_transformer/tensorflow.py +4 -4
- doctr/models/predictor/__init__.py +5 -5
- doctr/models/predictor/base.py +52 -41
- doctr/models/predictor/pytorch.py +16 -13
- doctr/models/predictor/tensorflow.py +16 -10
- doctr/models/preprocessor/__init__.py +4 -4
- doctr/models/preprocessor/pytorch.py +13 -17
- doctr/models/preprocessor/tensorflow.py +11 -15
- doctr/models/recognition/core.py +3 -7
- doctr/models/recognition/crnn/__init__.py +4 -4
- doctr/models/recognition/crnn/pytorch.py +20 -28
- doctr/models/recognition/crnn/tensorflow.py +19 -29
- doctr/models/recognition/master/__init__.py +3 -3
- doctr/models/recognition/master/base.py +3 -7
- doctr/models/recognition/master/pytorch.py +22 -24
- doctr/models/recognition/master/tensorflow.py +21 -26
- doctr/models/recognition/parseq/__init__.py +3 -3
- doctr/models/recognition/parseq/base.py +3 -7
- doctr/models/recognition/parseq/pytorch.py +26 -26
- doctr/models/recognition/parseq/tensorflow.py +26 -30
- doctr/models/recognition/predictor/__init__.py +5 -5
- doctr/models/recognition/predictor/_utils.py +7 -10
- doctr/models/recognition/predictor/pytorch.py +6 -6
- doctr/models/recognition/predictor/tensorflow.py +5 -6
- doctr/models/recognition/sar/__init__.py +4 -4
- doctr/models/recognition/sar/pytorch.py +20 -21
- doctr/models/recognition/sar/tensorflow.py +19 -24
- doctr/models/recognition/utils.py +5 -10
- doctr/models/recognition/vitstr/__init__.py +4 -4
- doctr/models/recognition/vitstr/base.py +3 -7
- doctr/models/recognition/vitstr/pytorch.py +18 -20
- doctr/models/recognition/vitstr/tensorflow.py +21 -24
- doctr/models/recognition/zoo.py +22 -11
- doctr/models/utils/__init__.py +4 -4
- doctr/models/utils/pytorch.py +13 -16
- doctr/models/utils/tensorflow.py +31 -30
- doctr/models/zoo.py +1 -5
- doctr/transforms/functional/__init__.py +3 -3
- doctr/transforms/functional/base.py +4 -11
- doctr/transforms/functional/pytorch.py +21 -29
- doctr/transforms/functional/tensorflow.py +10 -22
- doctr/transforms/modules/__init__.py +4 -4
- doctr/transforms/modules/base.py +48 -55
- doctr/transforms/modules/pytorch.py +65 -28
- doctr/transforms/modules/tensorflow.py +33 -44
- doctr/utils/common_types.py +8 -9
- doctr/utils/data.py +8 -12
- doctr/utils/fonts.py +2 -7
- doctr/utils/geometry.py +120 -64
- doctr/utils/metrics.py +18 -38
- doctr/utils/multithreading.py +4 -6
- doctr/utils/reconstitution.py +157 -75
- doctr/utils/repr.py +2 -3
- doctr/utils/visualization.py +16 -29
- doctr/version.py +1 -1
- {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/METADATA +59 -57
- python_doctr-0.11.0.dist-info/RECORD +173 -0
- {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/WHEEL +1 -1
- python_doctr-0.9.0.dist-info/RECORD +0 -173
- {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/LICENSE +0 -0
- {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/top_level.txt +0 -0
- {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/zip-safe +0 -0
doctr/utils/multithreading.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
@@ -6,15 +6,16 @@
|
|
|
6
6
|
|
|
7
7
|
import multiprocessing as mp
|
|
8
8
|
import os
|
|
9
|
+
from collections.abc import Callable, Iterable, Iterator
|
|
9
10
|
from multiprocessing.pool import ThreadPool
|
|
10
|
-
from typing import Any
|
|
11
|
+
from typing import Any
|
|
11
12
|
|
|
12
13
|
from doctr.file_utils import ENV_VARS_TRUE_VALUES
|
|
13
14
|
|
|
14
15
|
__all__ = ["multithread_exec"]
|
|
15
16
|
|
|
16
17
|
|
|
17
|
-
def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads:
|
|
18
|
+
def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: int | None = None) -> Iterator[Any]:
|
|
18
19
|
"""Execute a given function in parallel for each element of a given sequence
|
|
19
20
|
|
|
20
21
|
>>> from doctr.utils.multithreading import multithread_exec
|
|
@@ -22,17 +23,14 @@ def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: Op
|
|
|
22
23
|
>>> results = multithread_exec(lambda x: x ** 2, entries)
|
|
23
24
|
|
|
24
25
|
Args:
|
|
25
|
-
----
|
|
26
26
|
func: function to be executed on each element of the iterable
|
|
27
27
|
seq: iterable
|
|
28
28
|
threads: number of workers to be used for multiprocessing
|
|
29
29
|
|
|
30
30
|
Returns:
|
|
31
|
-
-------
|
|
32
31
|
iterator of the function's results using the iterable as inputs
|
|
33
32
|
|
|
34
33
|
Notes:
|
|
35
|
-
-----
|
|
36
34
|
This function uses ThreadPool from multiprocessing package, which uses `/dev/shm` directory for shared memory.
|
|
37
35
|
If you do not have write permissions for this directory (if you run `doctr` on AWS Lambda for instance),
|
|
38
36
|
you might want to disable multiprocessing. To achieve that, set 'DOCTR_MULTIPROCESSING_DISABLE' to 'TRUE'.
|
doctr/utils/reconstitution.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
|
-
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any
|
|
6
7
|
|
|
7
8
|
import numpy as np
|
|
8
9
|
from anyascii import anyascii
|
|
@@ -13,114 +14,195 @@ from .fonts import get_font
|
|
|
13
14
|
__all__ = ["synthesize_page", "synthesize_kie_page"]
|
|
14
15
|
|
|
15
16
|
|
|
17
|
+
# Global variable to avoid multiple warnings
|
|
18
|
+
ROTATION_WARNING = False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _warn_rotation(entry: dict[str, Any]) -> None: # pragma: no cover
|
|
22
|
+
global ROTATION_WARNING
|
|
23
|
+
if not ROTATION_WARNING and len(entry["geometry"]) == 4:
|
|
24
|
+
logging.warning("Polygons with larger rotations will lead to inaccurate rendering")
|
|
25
|
+
ROTATION_WARNING = True
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _synthesize(
|
|
29
|
+
response: Image.Image,
|
|
30
|
+
entry: dict[str, Any],
|
|
31
|
+
w: int,
|
|
32
|
+
h: int,
|
|
33
|
+
draw_proba: bool = False,
|
|
34
|
+
font_family: str | None = None,
|
|
35
|
+
smoothing_factor: float = 0.75,
|
|
36
|
+
min_font_size: int = 6,
|
|
37
|
+
max_font_size: int = 50,
|
|
38
|
+
) -> Image.Image:
|
|
39
|
+
if len(entry["geometry"]) == 2:
|
|
40
|
+
(xmin, ymin), (xmax, ymax) = entry["geometry"]
|
|
41
|
+
polygon = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
|
|
42
|
+
else:
|
|
43
|
+
polygon = entry["geometry"]
|
|
44
|
+
|
|
45
|
+
# Calculate the bounding box of the word
|
|
46
|
+
x_coords, y_coords = zip(*polygon)
|
|
47
|
+
xmin, ymin, xmax, ymax = (
|
|
48
|
+
int(round(w * min(x_coords))),
|
|
49
|
+
int(round(h * min(y_coords))),
|
|
50
|
+
int(round(w * max(x_coords))),
|
|
51
|
+
int(round(h * max(y_coords))),
|
|
52
|
+
)
|
|
53
|
+
word_width = xmax - xmin
|
|
54
|
+
word_height = ymax - ymin
|
|
55
|
+
|
|
56
|
+
# If lines are provided instead of words, concatenate the word entries
|
|
57
|
+
if "words" in entry:
|
|
58
|
+
word_text = " ".join(word["value"] for word in entry["words"])
|
|
59
|
+
else:
|
|
60
|
+
word_text = entry["value"]
|
|
61
|
+
# Find the optimal font size
|
|
62
|
+
try:
|
|
63
|
+
font_size = min(word_height, max_font_size)
|
|
64
|
+
font = get_font(font_family, font_size)
|
|
65
|
+
text_width, text_height = font.getbbox(word_text)[2:4]
|
|
66
|
+
|
|
67
|
+
while (text_width > word_width or text_height > word_height) and font_size > min_font_size:
|
|
68
|
+
font_size = max(int(font_size * smoothing_factor), min_font_size)
|
|
69
|
+
font = get_font(font_family, font_size)
|
|
70
|
+
text_width, text_height = font.getbbox(word_text)[2:4]
|
|
71
|
+
except ValueError:
|
|
72
|
+
font = get_font(font_family, min_font_size)
|
|
73
|
+
|
|
74
|
+
# Create a mask for the word
|
|
75
|
+
mask = Image.new("L", (w, h), 0)
|
|
76
|
+
ImageDraw.Draw(mask).polygon([(int(round(w * x)), int(round(h * y))) for x, y in polygon], fill=255)
|
|
77
|
+
|
|
78
|
+
# Draw the word text
|
|
79
|
+
d = ImageDraw.Draw(response)
|
|
80
|
+
try:
|
|
81
|
+
try:
|
|
82
|
+
d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt")
|
|
83
|
+
except UnicodeEncodeError:
|
|
84
|
+
d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt")
|
|
85
|
+
# Catch generic exceptions to avoid crashing the whole rendering
|
|
86
|
+
except Exception: # pragma: no cover
|
|
87
|
+
logging.warning(f"Could not render word: {word_text}")
|
|
88
|
+
|
|
89
|
+
if draw_proba:
|
|
90
|
+
confidence = (
|
|
91
|
+
entry["confidence"]
|
|
92
|
+
if "confidence" in entry
|
|
93
|
+
else sum(w["confidence"] for w in entry["words"]) / len(entry["words"])
|
|
94
|
+
)
|
|
95
|
+
p = int(255 * confidence)
|
|
96
|
+
color = (255 - p, 0, p) # Red to blue gradient based on probability
|
|
97
|
+
d.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=2)
|
|
98
|
+
|
|
99
|
+
prob_font = get_font(font_family, 20)
|
|
100
|
+
prob_text = f"{confidence:.2f}"
|
|
101
|
+
prob_text_width, prob_text_height = prob_font.getbbox(prob_text)[2:4]
|
|
102
|
+
|
|
103
|
+
# Position the probability slightly above the bounding box
|
|
104
|
+
prob_x_offset = (word_width - prob_text_width) // 2
|
|
105
|
+
prob_y_offset = ymin - prob_text_height - 2
|
|
106
|
+
prob_y_offset = max(0, prob_y_offset)
|
|
107
|
+
|
|
108
|
+
d.text((xmin + prob_x_offset, prob_y_offset), prob_text, font=prob_font, fill=color, anchor="lt")
|
|
109
|
+
|
|
110
|
+
return response
|
|
111
|
+
|
|
112
|
+
|
|
16
113
|
def synthesize_page(
|
|
17
|
-
page:
|
|
114
|
+
page: dict[str, Any],
|
|
18
115
|
draw_proba: bool = False,
|
|
19
|
-
font_family:
|
|
116
|
+
font_family: str | None = None,
|
|
117
|
+
smoothing_factor: float = 0.95,
|
|
118
|
+
min_font_size: int = 8,
|
|
119
|
+
max_font_size: int = 50,
|
|
20
120
|
) -> np.ndarray:
|
|
21
121
|
"""Draw a the content of the element page (OCR response) on a blank page.
|
|
22
122
|
|
|
23
123
|
Args:
|
|
24
|
-
----
|
|
25
124
|
page: exported Page object to represent
|
|
26
125
|
draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
|
|
27
|
-
font_size: size of the font, default font = 13
|
|
28
126
|
font_family: family of the font
|
|
127
|
+
smoothing_factor: factor to smooth the font size
|
|
128
|
+
min_font_size: minimum font size
|
|
129
|
+
max_font_size: maximum font size
|
|
29
130
|
|
|
30
131
|
Returns:
|
|
31
|
-
-------
|
|
32
132
|
the synthesized page
|
|
33
133
|
"""
|
|
34
134
|
# Draw template
|
|
35
135
|
h, w = page["dimensions"]
|
|
36
|
-
response =
|
|
136
|
+
response = Image.new("RGB", (w, h), color=(255, 255, 255))
|
|
37
137
|
|
|
38
|
-
# Draw each word
|
|
39
138
|
for block in page["blocks"]:
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
#
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
139
|
+
# If lines are provided use these to get better rendering results
|
|
140
|
+
if len(block["lines"]) > 1:
|
|
141
|
+
for line in block["lines"]:
|
|
142
|
+
_warn_rotation(block) # pragma: no cover
|
|
143
|
+
response = _synthesize(
|
|
144
|
+
response=response,
|
|
145
|
+
entry=line,
|
|
146
|
+
w=w,
|
|
147
|
+
h=h,
|
|
148
|
+
draw_proba=draw_proba,
|
|
149
|
+
font_family=font_family,
|
|
150
|
+
smoothing_factor=smoothing_factor,
|
|
151
|
+
min_font_size=min_font_size,
|
|
152
|
+
max_font_size=max_font_size,
|
|
153
|
+
)
|
|
154
|
+
# Otherwise, draw each word
|
|
155
|
+
else:
|
|
156
|
+
for line in block["lines"]:
|
|
157
|
+
_warn_rotation(block) # pragma: no cover
|
|
158
|
+
for word in line["words"]:
|
|
159
|
+
response = _synthesize(
|
|
160
|
+
response=response,
|
|
161
|
+
entry=word,
|
|
162
|
+
w=w,
|
|
163
|
+
h=h,
|
|
164
|
+
draw_proba=draw_proba,
|
|
165
|
+
font_family=font_family,
|
|
166
|
+
smoothing_factor=smoothing_factor,
|
|
167
|
+
min_font_size=min_font_size,
|
|
168
|
+
max_font_size=max_font_size,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
return np.array(response, dtype=np.uint8)
|
|
71
172
|
|
|
72
173
|
|
|
73
174
|
def synthesize_kie_page(
|
|
74
|
-
page:
|
|
175
|
+
page: dict[str, Any],
|
|
75
176
|
draw_proba: bool = False,
|
|
76
|
-
font_family:
|
|
177
|
+
font_family: str | None = None,
|
|
77
178
|
) -> np.ndarray:
|
|
78
179
|
"""Draw a the content of the element page (OCR response) on a blank page.
|
|
79
180
|
|
|
80
181
|
Args:
|
|
81
|
-
----
|
|
82
182
|
page: exported Page object to represent
|
|
83
183
|
draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
|
|
84
|
-
font_size: size of the font, default font = 13
|
|
85
184
|
font_family: family of the font
|
|
185
|
+
smoothing_factor: factor to smooth the font size
|
|
186
|
+
min_font_size: minimum font size
|
|
187
|
+
max_font_size: maximum font size
|
|
86
188
|
|
|
87
189
|
Returns:
|
|
88
|
-
-------
|
|
89
190
|
the synthesized page
|
|
90
191
|
"""
|
|
91
192
|
# Draw template
|
|
92
193
|
h, w = page["dimensions"]
|
|
93
|
-
response =
|
|
194
|
+
response = Image.new("RGB", (w, h), color=(255, 255, 255))
|
|
94
195
|
|
|
95
196
|
# Draw each word
|
|
96
197
|
for predictions in page["predictions"].values():
|
|
97
198
|
for prediction in predictions:
|
|
98
|
-
#
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
try:
|
|
109
|
-
d.text((0, 0), prediction["value"], font=font, fill=(0, 0, 0))
|
|
110
|
-
except UnicodeEncodeError:
|
|
111
|
-
# When character cannot be encoded, use its anyascii version
|
|
112
|
-
d.text((0, 0), anyascii(prediction["value"]), font=font, fill=(0, 0, 0))
|
|
113
|
-
|
|
114
|
-
# Colorize if draw_proba
|
|
115
|
-
if draw_proba:
|
|
116
|
-
p = int(255 * prediction["confidence"])
|
|
117
|
-
mask = np.where(np.array(img) == 0, 1, 0)
|
|
118
|
-
proba: np.ndarray = np.array([255 - p, 0, p])
|
|
119
|
-
color = mask * proba[np.newaxis, np.newaxis, :]
|
|
120
|
-
white_mask = 255 * (1 - mask)
|
|
121
|
-
img = color + white_mask
|
|
122
|
-
|
|
123
|
-
# Write to response page
|
|
124
|
-
response[ymin:ymax, xmin:xmax, :] = np.array(img)
|
|
125
|
-
|
|
126
|
-
return response
|
|
199
|
+
_warn_rotation(prediction) # pragma: no cover
|
|
200
|
+
response = _synthesize(
|
|
201
|
+
response=response,
|
|
202
|
+
entry=prediction,
|
|
203
|
+
w=w,
|
|
204
|
+
h=h,
|
|
205
|
+
draw_proba=draw_proba,
|
|
206
|
+
font_family=font_family,
|
|
207
|
+
)
|
|
208
|
+
return np.array(response, dtype=np.uint8)
|
doctr/utils/repr.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
# Adapted from https://github.com/pytorch/torch/blob/master/torch/nn/modules/module.py
|
|
7
7
|
|
|
8
|
-
from typing import List
|
|
9
8
|
|
|
10
9
|
__all__ = ["NestedObject"]
|
|
11
10
|
|
|
@@ -25,7 +24,7 @@ def _addindent(s_, num_spaces):
|
|
|
25
24
|
class NestedObject:
|
|
26
25
|
"""Base class for all nested objects in doctr"""
|
|
27
26
|
|
|
28
|
-
_children_names:
|
|
27
|
+
_children_names: list[str]
|
|
29
28
|
|
|
30
29
|
def extra_repr(self) -> str:
|
|
31
30
|
return ""
|
doctr/utils/visualization.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
# Copyright (C) 2021-
|
|
1
|
+
# Copyright (C) 2021-2025, Mindee.
|
|
2
2
|
|
|
3
3
|
# This program is licensed under the Apache License 2.0.
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
import colorsys
|
|
6
6
|
from copy import deepcopy
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
import cv2
|
|
10
10
|
import matplotlib.patches as patches
|
|
@@ -19,9 +19,9 @@ __all__ = ["visualize_page", "visualize_kie_page", "draw_boxes"]
|
|
|
19
19
|
|
|
20
20
|
def rect_patch(
|
|
21
21
|
geometry: BoundingBox,
|
|
22
|
-
page_dimensions:
|
|
23
|
-
label:
|
|
24
|
-
color:
|
|
22
|
+
page_dimensions: tuple[int, int],
|
|
23
|
+
label: str | None = None,
|
|
24
|
+
color: tuple[float, float, float] = (0, 0, 0),
|
|
25
25
|
alpha: float = 0.3,
|
|
26
26
|
linewidth: int = 2,
|
|
27
27
|
fill: bool = True,
|
|
@@ -30,7 +30,6 @@ def rect_patch(
|
|
|
30
30
|
"""Create a matplotlib rectangular patch for the element
|
|
31
31
|
|
|
32
32
|
Args:
|
|
33
|
-
----
|
|
34
33
|
geometry: bounding box of the element
|
|
35
34
|
page_dimensions: dimensions of the Page in format (height, width)
|
|
36
35
|
label: label to display when hovered
|
|
@@ -41,7 +40,6 @@ def rect_patch(
|
|
|
41
40
|
preserve_aspect_ratio: pass True if you passed True to the predictor
|
|
42
41
|
|
|
43
42
|
Returns:
|
|
44
|
-
-------
|
|
45
43
|
a rectangular Patch
|
|
46
44
|
"""
|
|
47
45
|
if len(geometry) != 2 or any(not isinstance(elt, tuple) or len(elt) != 2 for elt in geometry):
|
|
@@ -70,9 +68,9 @@ def rect_patch(
|
|
|
70
68
|
|
|
71
69
|
def polygon_patch(
|
|
72
70
|
geometry: np.ndarray,
|
|
73
|
-
page_dimensions:
|
|
74
|
-
label:
|
|
75
|
-
color:
|
|
71
|
+
page_dimensions: tuple[int, int],
|
|
72
|
+
label: str | None = None,
|
|
73
|
+
color: tuple[float, float, float] = (0, 0, 0),
|
|
76
74
|
alpha: float = 0.3,
|
|
77
75
|
linewidth: int = 2,
|
|
78
76
|
fill: bool = True,
|
|
@@ -81,7 +79,6 @@ def polygon_patch(
|
|
|
81
79
|
"""Create a matplotlib polygon patch for the element
|
|
82
80
|
|
|
83
81
|
Args:
|
|
84
|
-
----
|
|
85
82
|
geometry: bounding box of the element
|
|
86
83
|
page_dimensions: dimensions of the Page in format (height, width)
|
|
87
84
|
label: label to display when hovered
|
|
@@ -92,7 +89,6 @@ def polygon_patch(
|
|
|
92
89
|
preserve_aspect_ratio: pass True if you passed True to the predictor
|
|
93
90
|
|
|
94
91
|
Returns:
|
|
95
|
-
-------
|
|
96
92
|
a polygon Patch
|
|
97
93
|
"""
|
|
98
94
|
if not geometry.shape == (4, 2):
|
|
@@ -114,20 +110,18 @@ def polygon_patch(
|
|
|
114
110
|
|
|
115
111
|
|
|
116
112
|
def create_obj_patch(
|
|
117
|
-
geometry:
|
|
118
|
-
page_dimensions:
|
|
113
|
+
geometry: BoundingBox | Polygon4P | np.ndarray,
|
|
114
|
+
page_dimensions: tuple[int, int],
|
|
119
115
|
**kwargs: Any,
|
|
120
116
|
) -> patches.Patch:
|
|
121
117
|
"""Create a matplotlib patch for the element
|
|
122
118
|
|
|
123
119
|
Args:
|
|
124
|
-
----
|
|
125
120
|
geometry: bounding box (straight or rotated) of the element
|
|
126
121
|
page_dimensions: dimensions of the page in format (height, width)
|
|
127
122
|
**kwargs: keyword arguments for the patch
|
|
128
123
|
|
|
129
124
|
Returns:
|
|
130
|
-
-------
|
|
131
125
|
a matplotlib Patch
|
|
132
126
|
"""
|
|
133
127
|
if isinstance(geometry, tuple):
|
|
@@ -140,15 +134,13 @@ def create_obj_patch(
|
|
|
140
134
|
raise ValueError("invalid geometry format")
|
|
141
135
|
|
|
142
136
|
|
|
143
|
-
def get_colors(num_colors: int) ->
|
|
137
|
+
def get_colors(num_colors: int) -> list[tuple[float, float, float]]:
|
|
144
138
|
"""Generate num_colors color for matplotlib
|
|
145
139
|
|
|
146
140
|
Args:
|
|
147
|
-
----
|
|
148
141
|
num_colors: number of colors to generate
|
|
149
142
|
|
|
150
143
|
Returns:
|
|
151
|
-
-------
|
|
152
144
|
colors: list of generated colors
|
|
153
145
|
"""
|
|
154
146
|
colors = []
|
|
@@ -161,7 +153,7 @@ def get_colors(num_colors: int) -> List[Tuple[float, float, float]]:
|
|
|
161
153
|
|
|
162
154
|
|
|
163
155
|
def visualize_page(
|
|
164
|
-
page:
|
|
156
|
+
page: dict[str, Any],
|
|
165
157
|
image: np.ndarray,
|
|
166
158
|
words_only: bool = True,
|
|
167
159
|
display_artefacts: bool = True,
|
|
@@ -183,7 +175,6 @@ def visualize_page(
|
|
|
183
175
|
>>> plt.show()
|
|
184
176
|
|
|
185
177
|
Args:
|
|
186
|
-
----
|
|
187
178
|
page: the exported Page of a Document
|
|
188
179
|
image: np array of the page, needs to have the same shape than page['dimensions']
|
|
189
180
|
words_only: whether only words should be displayed
|
|
@@ -194,7 +185,6 @@ def visualize_page(
|
|
|
194
185
|
**kwargs: keyword arguments for the polygon patch
|
|
195
186
|
|
|
196
187
|
Returns:
|
|
197
|
-
-------
|
|
198
188
|
the matplotlib figure
|
|
199
189
|
"""
|
|
200
190
|
# Get proper scale and aspect ratio
|
|
@@ -207,7 +197,7 @@ def visualize_page(
|
|
|
207
197
|
ax.axis("off")
|
|
208
198
|
|
|
209
199
|
if interactive:
|
|
210
|
-
artists:
|
|
200
|
+
artists: list[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page)
|
|
211
201
|
|
|
212
202
|
for block in page["blocks"]:
|
|
213
203
|
if not words_only:
|
|
@@ -287,7 +277,7 @@ def visualize_page(
|
|
|
287
277
|
|
|
288
278
|
|
|
289
279
|
def visualize_kie_page(
|
|
290
|
-
page:
|
|
280
|
+
page: dict[str, Any],
|
|
291
281
|
image: np.ndarray,
|
|
292
282
|
words_only: bool = False,
|
|
293
283
|
display_artefacts: bool = True,
|
|
@@ -309,7 +299,6 @@ def visualize_kie_page(
|
|
|
309
299
|
>>> plt.show()
|
|
310
300
|
|
|
311
301
|
Args:
|
|
312
|
-
----
|
|
313
302
|
page: the exported Page of a Document
|
|
314
303
|
image: np array of the page, needs to have the same shape than page['dimensions']
|
|
315
304
|
words_only: whether only words should be displayed
|
|
@@ -320,7 +309,6 @@ def visualize_kie_page(
|
|
|
320
309
|
**kwargs: keyword arguments for the polygon patch
|
|
321
310
|
|
|
322
311
|
Returns:
|
|
323
|
-
-------
|
|
324
312
|
the matplotlib figure
|
|
325
313
|
"""
|
|
326
314
|
# Get proper scale and aspect ratio
|
|
@@ -333,7 +321,7 @@ def visualize_kie_page(
|
|
|
333
321
|
ax.axis("off")
|
|
334
322
|
|
|
335
323
|
if interactive:
|
|
336
|
-
artists:
|
|
324
|
+
artists: list[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page)
|
|
337
325
|
|
|
338
326
|
colors = {k: color for color, k in zip(get_colors(len(page["predictions"])), page["predictions"])}
|
|
339
327
|
for key, value in page["predictions"].items():
|
|
@@ -363,11 +351,10 @@ def visualize_kie_page(
|
|
|
363
351
|
return fig
|
|
364
352
|
|
|
365
353
|
|
|
366
|
-
def draw_boxes(boxes: np.ndarray, image: np.ndarray, color:
|
|
354
|
+
def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: tuple[int, int, int] | None = None, **kwargs) -> None:
|
|
367
355
|
"""Draw an array of relative straight boxes on an image
|
|
368
356
|
|
|
369
357
|
Args:
|
|
370
|
-
----
|
|
371
358
|
boxes: array of relative boxes, of shape (*, 4)
|
|
372
359
|
image: np array, float32 or uint8
|
|
373
360
|
color: color to use for bounding box edges
|
doctr/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = 'v0.
|
|
1
|
+
__version__ = 'v0.11.0'
|