python-doctr 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. doctr/contrib/__init__.py +1 -0
  2. doctr/contrib/artefacts.py +7 -9
  3. doctr/contrib/base.py +8 -17
  4. doctr/datasets/cord.py +17 -7
  5. doctr/datasets/datasets/__init__.py +4 -4
  6. doctr/datasets/datasets/base.py +16 -16
  7. doctr/datasets/datasets/pytorch.py +12 -12
  8. doctr/datasets/datasets/tensorflow.py +10 -10
  9. doctr/datasets/detection.py +6 -9
  10. doctr/datasets/doc_artefacts.py +3 -4
  11. doctr/datasets/funsd.py +17 -6
  12. doctr/datasets/generator/__init__.py +4 -4
  13. doctr/datasets/generator/base.py +16 -17
  14. doctr/datasets/generator/pytorch.py +1 -3
  15. doctr/datasets/generator/tensorflow.py +1 -3
  16. doctr/datasets/ic03.py +14 -5
  17. doctr/datasets/ic13.py +13 -5
  18. doctr/datasets/iiit5k.py +31 -20
  19. doctr/datasets/iiithws.py +4 -5
  20. doctr/datasets/imgur5k.py +15 -5
  21. doctr/datasets/loader.py +4 -7
  22. doctr/datasets/mjsynth.py +6 -5
  23. doctr/datasets/ocr.py +3 -4
  24. doctr/datasets/orientation.py +3 -4
  25. doctr/datasets/recognition.py +3 -4
  26. doctr/datasets/sroie.py +16 -5
  27. doctr/datasets/svhn.py +16 -5
  28. doctr/datasets/svt.py +14 -5
  29. doctr/datasets/synthtext.py +14 -5
  30. doctr/datasets/utils.py +37 -27
  31. doctr/datasets/vocabs.py +21 -7
  32. doctr/datasets/wildreceipt.py +25 -10
  33. doctr/file_utils.py +18 -4
  34. doctr/io/elements.py +69 -81
  35. doctr/io/html.py +1 -3
  36. doctr/io/image/__init__.py +3 -3
  37. doctr/io/image/base.py +2 -5
  38. doctr/io/image/pytorch.py +3 -12
  39. doctr/io/image/tensorflow.py +2 -11
  40. doctr/io/pdf.py +5 -7
  41. doctr/io/reader.py +5 -11
  42. doctr/models/_utils.py +14 -22
  43. doctr/models/builder.py +32 -50
  44. doctr/models/classification/magc_resnet/__init__.py +3 -3
  45. doctr/models/classification/magc_resnet/pytorch.py +10 -13
  46. doctr/models/classification/magc_resnet/tensorflow.py +21 -17
  47. doctr/models/classification/mobilenet/__init__.py +3 -3
  48. doctr/models/classification/mobilenet/pytorch.py +7 -17
  49. doctr/models/classification/mobilenet/tensorflow.py +22 -29
  50. doctr/models/classification/predictor/__init__.py +4 -4
  51. doctr/models/classification/predictor/pytorch.py +13 -11
  52. doctr/models/classification/predictor/tensorflow.py +13 -11
  53. doctr/models/classification/resnet/__init__.py +4 -4
  54. doctr/models/classification/resnet/pytorch.py +21 -31
  55. doctr/models/classification/resnet/tensorflow.py +41 -39
  56. doctr/models/classification/textnet/__init__.py +3 -3
  57. doctr/models/classification/textnet/pytorch.py +10 -17
  58. doctr/models/classification/textnet/tensorflow.py +19 -20
  59. doctr/models/classification/vgg/__init__.py +3 -3
  60. doctr/models/classification/vgg/pytorch.py +5 -7
  61. doctr/models/classification/vgg/tensorflow.py +18 -15
  62. doctr/models/classification/vit/__init__.py +3 -3
  63. doctr/models/classification/vit/pytorch.py +8 -14
  64. doctr/models/classification/vit/tensorflow.py +16 -16
  65. doctr/models/classification/zoo.py +36 -19
  66. doctr/models/core.py +3 -3
  67. doctr/models/detection/_utils/__init__.py +4 -4
  68. doctr/models/detection/_utils/base.py +4 -7
  69. doctr/models/detection/_utils/pytorch.py +1 -5
  70. doctr/models/detection/_utils/tensorflow.py +1 -5
  71. doctr/models/detection/core.py +2 -8
  72. doctr/models/detection/differentiable_binarization/__init__.py +4 -4
  73. doctr/models/detection/differentiable_binarization/base.py +7 -17
  74. doctr/models/detection/differentiable_binarization/pytorch.py +27 -30
  75. doctr/models/detection/differentiable_binarization/tensorflow.py +49 -37
  76. doctr/models/detection/fast/__init__.py +4 -4
  77. doctr/models/detection/fast/base.py +6 -14
  78. doctr/models/detection/fast/pytorch.py +24 -31
  79. doctr/models/detection/fast/tensorflow.py +28 -37
  80. doctr/models/detection/linknet/__init__.py +4 -4
  81. doctr/models/detection/linknet/base.py +6 -15
  82. doctr/models/detection/linknet/pytorch.py +24 -27
  83. doctr/models/detection/linknet/tensorflow.py +36 -33
  84. doctr/models/detection/predictor/__init__.py +5 -5
  85. doctr/models/detection/predictor/pytorch.py +6 -7
  86. doctr/models/detection/predictor/tensorflow.py +7 -8
  87. doctr/models/detection/zoo.py +27 -7
  88. doctr/models/factory/hub.py +8 -13
  89. doctr/models/kie_predictor/__init__.py +5 -5
  90. doctr/models/kie_predictor/base.py +8 -5
  91. doctr/models/kie_predictor/pytorch.py +22 -19
  92. doctr/models/kie_predictor/tensorflow.py +21 -15
  93. doctr/models/modules/layers/__init__.py +3 -3
  94. doctr/models/modules/layers/pytorch.py +6 -9
  95. doctr/models/modules/layers/tensorflow.py +5 -7
  96. doctr/models/modules/transformer/__init__.py +3 -3
  97. doctr/models/modules/transformer/pytorch.py +12 -13
  98. doctr/models/modules/transformer/tensorflow.py +9 -12
  99. doctr/models/modules/vision_transformer/__init__.py +3 -3
  100. doctr/models/modules/vision_transformer/pytorch.py +3 -4
  101. doctr/models/modules/vision_transformer/tensorflow.py +4 -4
  102. doctr/models/predictor/__init__.py +5 -5
  103. doctr/models/predictor/base.py +52 -41
  104. doctr/models/predictor/pytorch.py +16 -13
  105. doctr/models/predictor/tensorflow.py +16 -10
  106. doctr/models/preprocessor/__init__.py +4 -4
  107. doctr/models/preprocessor/pytorch.py +13 -17
  108. doctr/models/preprocessor/tensorflow.py +11 -15
  109. doctr/models/recognition/core.py +3 -7
  110. doctr/models/recognition/crnn/__init__.py +4 -4
  111. doctr/models/recognition/crnn/pytorch.py +20 -28
  112. doctr/models/recognition/crnn/tensorflow.py +19 -29
  113. doctr/models/recognition/master/__init__.py +3 -3
  114. doctr/models/recognition/master/base.py +3 -7
  115. doctr/models/recognition/master/pytorch.py +22 -24
  116. doctr/models/recognition/master/tensorflow.py +21 -26
  117. doctr/models/recognition/parseq/__init__.py +3 -3
  118. doctr/models/recognition/parseq/base.py +3 -7
  119. doctr/models/recognition/parseq/pytorch.py +26 -26
  120. doctr/models/recognition/parseq/tensorflow.py +26 -30
  121. doctr/models/recognition/predictor/__init__.py +5 -5
  122. doctr/models/recognition/predictor/_utils.py +7 -10
  123. doctr/models/recognition/predictor/pytorch.py +6 -6
  124. doctr/models/recognition/predictor/tensorflow.py +5 -6
  125. doctr/models/recognition/sar/__init__.py +4 -4
  126. doctr/models/recognition/sar/pytorch.py +20 -21
  127. doctr/models/recognition/sar/tensorflow.py +19 -24
  128. doctr/models/recognition/utils.py +5 -10
  129. doctr/models/recognition/vitstr/__init__.py +4 -4
  130. doctr/models/recognition/vitstr/base.py +3 -7
  131. doctr/models/recognition/vitstr/pytorch.py +18 -20
  132. doctr/models/recognition/vitstr/tensorflow.py +21 -24
  133. doctr/models/recognition/zoo.py +22 -11
  134. doctr/models/utils/__init__.py +4 -4
  135. doctr/models/utils/pytorch.py +13 -16
  136. doctr/models/utils/tensorflow.py +31 -30
  137. doctr/models/zoo.py +1 -5
  138. doctr/transforms/functional/__init__.py +3 -3
  139. doctr/transforms/functional/base.py +4 -11
  140. doctr/transforms/functional/pytorch.py +21 -29
  141. doctr/transforms/functional/tensorflow.py +10 -22
  142. doctr/transforms/modules/__init__.py +4 -4
  143. doctr/transforms/modules/base.py +48 -55
  144. doctr/transforms/modules/pytorch.py +65 -28
  145. doctr/transforms/modules/tensorflow.py +33 -44
  146. doctr/utils/common_types.py +8 -9
  147. doctr/utils/data.py +8 -12
  148. doctr/utils/fonts.py +2 -7
  149. doctr/utils/geometry.py +120 -64
  150. doctr/utils/metrics.py +18 -38
  151. doctr/utils/multithreading.py +4 -6
  152. doctr/utils/reconstitution.py +157 -75
  153. doctr/utils/repr.py +2 -3
  154. doctr/utils/visualization.py +16 -29
  155. doctr/version.py +1 -1
  156. {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/METADATA +59 -57
  157. python_doctr-0.11.0.dist-info/RECORD +173 -0
  158. {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/WHEEL +1 -1
  159. python_doctr-0.9.0.dist-info/RECORD +0 -173
  160. {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/LICENSE +0 -0
  161. {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/top_level.txt +0 -0
  162. {python_doctr-0.9.0.dist-info → python_doctr-0.11.0.dist-info}/zip-safe +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,15 +6,16 @@
6
6
 
7
7
  import multiprocessing as mp
8
8
  import os
9
+ from collections.abc import Callable, Iterable, Iterator
9
10
  from multiprocessing.pool import ThreadPool
10
- from typing import Any, Callable, Iterable, Iterator, Optional
11
+ from typing import Any
11
12
 
12
13
  from doctr.file_utils import ENV_VARS_TRUE_VALUES
13
14
 
14
15
  __all__ = ["multithread_exec"]
15
16
 
16
17
 
17
- def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: Optional[int] = None) -> Iterator[Any]:
18
+ def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: int | None = None) -> Iterator[Any]:
18
19
  """Execute a given function in parallel for each element of a given sequence
19
20
 
20
21
  >>> from doctr.utils.multithreading import multithread_exec
@@ -22,17 +23,14 @@ def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: Op
22
23
  >>> results = multithread_exec(lambda x: x ** 2, entries)
23
24
 
24
25
  Args:
25
- ----
26
26
  func: function to be executed on each element of the iterable
27
27
  seq: iterable
28
28
  threads: number of workers to be used for multiprocessing
29
29
 
30
30
  Returns:
31
- -------
32
31
  iterator of the function's results using the iterable as inputs
33
32
 
34
33
  Notes:
35
- -----
36
34
  This function uses ThreadPool from multiprocessing package, which uses `/dev/shm` directory for shared memory.
37
35
  If you do not have write permissions for this directory (if you run `doctr` on AWS Lambda for instance),
38
36
  you might want to disable multiprocessing. To achieve that, set 'DOCTR_MULTIPROCESSING_DISABLE' to 'TRUE'.
@@ -1,8 +1,9 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
- from typing import Any, Dict, Optional
5
+ import logging
6
+ from typing import Any
6
7
 
7
8
  import numpy as np
8
9
  from anyascii import anyascii
@@ -13,114 +14,195 @@ from .fonts import get_font
13
14
  __all__ = ["synthesize_page", "synthesize_kie_page"]
14
15
 
15
16
 
17
+ # Global variable to avoid multiple warnings
18
+ ROTATION_WARNING = False
19
+
20
+
21
+ def _warn_rotation(entry: dict[str, Any]) -> None: # pragma: no cover
22
+ global ROTATION_WARNING
23
+ if not ROTATION_WARNING and len(entry["geometry"]) == 4:
24
+ logging.warning("Polygons with larger rotations will lead to inaccurate rendering")
25
+ ROTATION_WARNING = True
26
+
27
+
28
+ def _synthesize(
29
+ response: Image.Image,
30
+ entry: dict[str, Any],
31
+ w: int,
32
+ h: int,
33
+ draw_proba: bool = False,
34
+ font_family: str | None = None,
35
+ smoothing_factor: float = 0.75,
36
+ min_font_size: int = 6,
37
+ max_font_size: int = 50,
38
+ ) -> Image.Image:
39
+ if len(entry["geometry"]) == 2:
40
+ (xmin, ymin), (xmax, ymax) = entry["geometry"]
41
+ polygon = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
42
+ else:
43
+ polygon = entry["geometry"]
44
+
45
+ # Calculate the bounding box of the word
46
+ x_coords, y_coords = zip(*polygon)
47
+ xmin, ymin, xmax, ymax = (
48
+ int(round(w * min(x_coords))),
49
+ int(round(h * min(y_coords))),
50
+ int(round(w * max(x_coords))),
51
+ int(round(h * max(y_coords))),
52
+ )
53
+ word_width = xmax - xmin
54
+ word_height = ymax - ymin
55
+
56
+ # If lines are provided instead of words, concatenate the word entries
57
+ if "words" in entry:
58
+ word_text = " ".join(word["value"] for word in entry["words"])
59
+ else:
60
+ word_text = entry["value"]
61
+ # Find the optimal font size
62
+ try:
63
+ font_size = min(word_height, max_font_size)
64
+ font = get_font(font_family, font_size)
65
+ text_width, text_height = font.getbbox(word_text)[2:4]
66
+
67
+ while (text_width > word_width or text_height > word_height) and font_size > min_font_size:
68
+ font_size = max(int(font_size * smoothing_factor), min_font_size)
69
+ font = get_font(font_family, font_size)
70
+ text_width, text_height = font.getbbox(word_text)[2:4]
71
+ except ValueError:
72
+ font = get_font(font_family, min_font_size)
73
+
74
+ # Create a mask for the word
75
+ mask = Image.new("L", (w, h), 0)
76
+ ImageDraw.Draw(mask).polygon([(int(round(w * x)), int(round(h * y))) for x, y in polygon], fill=255)
77
+
78
+ # Draw the word text
79
+ d = ImageDraw.Draw(response)
80
+ try:
81
+ try:
82
+ d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt")
83
+ except UnicodeEncodeError:
84
+ d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt")
85
+ # Catch generic exceptions to avoid crashing the whole rendering
86
+ except Exception: # pragma: no cover
87
+ logging.warning(f"Could not render word: {word_text}")
88
+
89
+ if draw_proba:
90
+ confidence = (
91
+ entry["confidence"]
92
+ if "confidence" in entry
93
+ else sum(w["confidence"] for w in entry["words"]) / len(entry["words"])
94
+ )
95
+ p = int(255 * confidence)
96
+ color = (255 - p, 0, p) # Red to blue gradient based on probability
97
+ d.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=2)
98
+
99
+ prob_font = get_font(font_family, 20)
100
+ prob_text = f"{confidence:.2f}"
101
+ prob_text_width, prob_text_height = prob_font.getbbox(prob_text)[2:4]
102
+
103
+ # Position the probability slightly above the bounding box
104
+ prob_x_offset = (word_width - prob_text_width) // 2
105
+ prob_y_offset = ymin - prob_text_height - 2
106
+ prob_y_offset = max(0, prob_y_offset)
107
+
108
+ d.text((xmin + prob_x_offset, prob_y_offset), prob_text, font=prob_font, fill=color, anchor="lt")
109
+
110
+ return response
111
+
112
+
16
113
  def synthesize_page(
17
- page: Dict[str, Any],
114
+ page: dict[str, Any],
18
115
  draw_proba: bool = False,
19
- font_family: Optional[str] = None,
116
+ font_family: str | None = None,
117
+ smoothing_factor: float = 0.95,
118
+ min_font_size: int = 8,
119
+ max_font_size: int = 50,
20
120
  ) -> np.ndarray:
21
121
  """Draw a the content of the element page (OCR response) on a blank page.
22
122
 
23
123
  Args:
24
- ----
25
124
  page: exported Page object to represent
26
125
  draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
27
- font_size: size of the font, default font = 13
28
126
  font_family: family of the font
127
+ smoothing_factor: factor to smooth the font size
128
+ min_font_size: minimum font size
129
+ max_font_size: maximum font size
29
130
 
30
131
  Returns:
31
- -------
32
132
  the synthesized page
33
133
  """
34
134
  # Draw template
35
135
  h, w = page["dimensions"]
36
- response = 255 * np.ones((h, w, 3), dtype=np.int32)
136
+ response = Image.new("RGB", (w, h), color=(255, 255, 255))
37
137
 
38
- # Draw each word
39
138
  for block in page["blocks"]:
40
- for line in block["lines"]:
41
- for word in line["words"]:
42
- # Get absolute word geometry
43
- (xmin, ymin), (xmax, ymax) = word["geometry"]
44
- xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
45
- ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
46
-
47
- # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
48
- font = get_font(font_family, int(0.75 * (ymax - ymin)))
49
- img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
50
- d = ImageDraw.Draw(img)
51
- # Draw in black the value of the word
52
- try:
53
- d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
54
- except UnicodeEncodeError:
55
- # When character cannot be encoded, use its anyascii version
56
- d.text((0, 0), anyascii(word["value"]), font=font, fill=(0, 0, 0))
57
-
58
- # Colorize if draw_proba
59
- if draw_proba:
60
- p = int(255 * word["confidence"])
61
- mask = np.where(np.array(img) == 0, 1, 0)
62
- proba: np.ndarray = np.array([255 - p, 0, p])
63
- color = mask * proba[np.newaxis, np.newaxis, :]
64
- white_mask = 255 * (1 - mask)
65
- img = color + white_mask
66
-
67
- # Write to response page
68
- response[ymin:ymax, xmin:xmax, :] = np.array(img)
69
-
70
- return response
139
+ # If lines are provided use these to get better rendering results
140
+ if len(block["lines"]) > 1:
141
+ for line in block["lines"]:
142
+ _warn_rotation(block) # pragma: no cover
143
+ response = _synthesize(
144
+ response=response,
145
+ entry=line,
146
+ w=w,
147
+ h=h,
148
+ draw_proba=draw_proba,
149
+ font_family=font_family,
150
+ smoothing_factor=smoothing_factor,
151
+ min_font_size=min_font_size,
152
+ max_font_size=max_font_size,
153
+ )
154
+ # Otherwise, draw each word
155
+ else:
156
+ for line in block["lines"]:
157
+ _warn_rotation(block) # pragma: no cover
158
+ for word in line["words"]:
159
+ response = _synthesize(
160
+ response=response,
161
+ entry=word,
162
+ w=w,
163
+ h=h,
164
+ draw_proba=draw_proba,
165
+ font_family=font_family,
166
+ smoothing_factor=smoothing_factor,
167
+ min_font_size=min_font_size,
168
+ max_font_size=max_font_size,
169
+ )
170
+
171
+ return np.array(response, dtype=np.uint8)
71
172
 
72
173
 
73
174
  def synthesize_kie_page(
74
- page: Dict[str, Any],
175
+ page: dict[str, Any],
75
176
  draw_proba: bool = False,
76
- font_family: Optional[str] = None,
177
+ font_family: str | None = None,
77
178
  ) -> np.ndarray:
78
179
  """Draw a the content of the element page (OCR response) on a blank page.
79
180
 
80
181
  Args:
81
- ----
82
182
  page: exported Page object to represent
83
183
  draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
84
- font_size: size of the font, default font = 13
85
184
  font_family: family of the font
185
+ smoothing_factor: factor to smooth the font size
186
+ min_font_size: minimum font size
187
+ max_font_size: maximum font size
86
188
 
87
189
  Returns:
88
- -------
89
190
  the synthesized page
90
191
  """
91
192
  # Draw template
92
193
  h, w = page["dimensions"]
93
- response = 255 * np.ones((h, w, 3), dtype=np.int32)
194
+ response = Image.new("RGB", (w, h), color=(255, 255, 255))
94
195
 
95
196
  # Draw each word
96
197
  for predictions in page["predictions"].values():
97
198
  for prediction in predictions:
98
- # Get aboslute word geometry
99
- (xmin, ymin), (xmax, ymax) = prediction["geometry"]
100
- xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
101
- ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
102
-
103
- # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
104
- font = get_font(font_family, int(0.75 * (ymax - ymin)))
105
- img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
106
- d = ImageDraw.Draw(img)
107
- # Draw in black the value of the word
108
- try:
109
- d.text((0, 0), prediction["value"], font=font, fill=(0, 0, 0))
110
- except UnicodeEncodeError:
111
- # When character cannot be encoded, use its anyascii version
112
- d.text((0, 0), anyascii(prediction["value"]), font=font, fill=(0, 0, 0))
113
-
114
- # Colorize if draw_proba
115
- if draw_proba:
116
- p = int(255 * prediction["confidence"])
117
- mask = np.where(np.array(img) == 0, 1, 0)
118
- proba: np.ndarray = np.array([255 - p, 0, p])
119
- color = mask * proba[np.newaxis, np.newaxis, :]
120
- white_mask = 255 * (1 - mask)
121
- img = color + white_mask
122
-
123
- # Write to response page
124
- response[ymin:ymax, xmin:xmax, :] = np.array(img)
125
-
126
- return response
199
+ _warn_rotation(prediction) # pragma: no cover
200
+ response = _synthesize(
201
+ response=response,
202
+ entry=prediction,
203
+ w=w,
204
+ h=h,
205
+ draw_proba=draw_proba,
206
+ font_family=font_family,
207
+ )
208
+ return np.array(response, dtype=np.uint8)
doctr/utils/repr.py CHANGED
@@ -1,11 +1,10 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  # Adapted from https://github.com/pytorch/torch/blob/master/torch/nn/modules/module.py
7
7
 
8
- from typing import List
9
8
 
10
9
  __all__ = ["NestedObject"]
11
10
 
@@ -25,7 +24,7 @@ def _addindent(s_, num_spaces):
25
24
  class NestedObject:
26
25
  """Base class for all nested objects in doctr"""
27
26
 
28
- _children_names: List[str]
27
+ _children_names: list[str]
29
28
 
30
29
  def extra_repr(self) -> str:
31
30
  return ""
@@ -1,10 +1,10 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
  import colorsys
6
6
  from copy import deepcopy
7
- from typing import Any, Dict, List, Optional, Tuple, Union
7
+ from typing import Any
8
8
 
9
9
  import cv2
10
10
  import matplotlib.patches as patches
@@ -19,9 +19,9 @@ __all__ = ["visualize_page", "visualize_kie_page", "draw_boxes"]
19
19
 
20
20
  def rect_patch(
21
21
  geometry: BoundingBox,
22
- page_dimensions: Tuple[int, int],
23
- label: Optional[str] = None,
24
- color: Tuple[float, float, float] = (0, 0, 0),
22
+ page_dimensions: tuple[int, int],
23
+ label: str | None = None,
24
+ color: tuple[float, float, float] = (0, 0, 0),
25
25
  alpha: float = 0.3,
26
26
  linewidth: int = 2,
27
27
  fill: bool = True,
@@ -30,7 +30,6 @@ def rect_patch(
30
30
  """Create a matplotlib rectangular patch for the element
31
31
 
32
32
  Args:
33
- ----
34
33
  geometry: bounding box of the element
35
34
  page_dimensions: dimensions of the Page in format (height, width)
36
35
  label: label to display when hovered
@@ -41,7 +40,6 @@ def rect_patch(
41
40
  preserve_aspect_ratio: pass True if you passed True to the predictor
42
41
 
43
42
  Returns:
44
- -------
45
43
  a rectangular Patch
46
44
  """
47
45
  if len(geometry) != 2 or any(not isinstance(elt, tuple) or len(elt) != 2 for elt in geometry):
@@ -70,9 +68,9 @@ def rect_patch(
70
68
 
71
69
  def polygon_patch(
72
70
  geometry: np.ndarray,
73
- page_dimensions: Tuple[int, int],
74
- label: Optional[str] = None,
75
- color: Tuple[float, float, float] = (0, 0, 0),
71
+ page_dimensions: tuple[int, int],
72
+ label: str | None = None,
73
+ color: tuple[float, float, float] = (0, 0, 0),
76
74
  alpha: float = 0.3,
77
75
  linewidth: int = 2,
78
76
  fill: bool = True,
@@ -81,7 +79,6 @@ def polygon_patch(
81
79
  """Create a matplotlib polygon patch for the element
82
80
 
83
81
  Args:
84
- ----
85
82
  geometry: bounding box of the element
86
83
  page_dimensions: dimensions of the Page in format (height, width)
87
84
  label: label to display when hovered
@@ -92,7 +89,6 @@ def polygon_patch(
92
89
  preserve_aspect_ratio: pass True if you passed True to the predictor
93
90
 
94
91
  Returns:
95
- -------
96
92
  a polygon Patch
97
93
  """
98
94
  if not geometry.shape == (4, 2):
@@ -114,20 +110,18 @@ def polygon_patch(
114
110
 
115
111
 
116
112
  def create_obj_patch(
117
- geometry: Union[BoundingBox, Polygon4P, np.ndarray],
118
- page_dimensions: Tuple[int, int],
113
+ geometry: BoundingBox | Polygon4P | np.ndarray,
114
+ page_dimensions: tuple[int, int],
119
115
  **kwargs: Any,
120
116
  ) -> patches.Patch:
121
117
  """Create a matplotlib patch for the element
122
118
 
123
119
  Args:
124
- ----
125
120
  geometry: bounding box (straight or rotated) of the element
126
121
  page_dimensions: dimensions of the page in format (height, width)
127
122
  **kwargs: keyword arguments for the patch
128
123
 
129
124
  Returns:
130
- -------
131
125
  a matplotlib Patch
132
126
  """
133
127
  if isinstance(geometry, tuple):
@@ -140,15 +134,13 @@ def create_obj_patch(
140
134
  raise ValueError("invalid geometry format")
141
135
 
142
136
 
143
- def get_colors(num_colors: int) -> List[Tuple[float, float, float]]:
137
+ def get_colors(num_colors: int) -> list[tuple[float, float, float]]:
144
138
  """Generate num_colors color for matplotlib
145
139
 
146
140
  Args:
147
- ----
148
141
  num_colors: number of colors to generate
149
142
 
150
143
  Returns:
151
- -------
152
144
  colors: list of generated colors
153
145
  """
154
146
  colors = []
@@ -161,7 +153,7 @@ def get_colors(num_colors: int) -> List[Tuple[float, float, float]]:
161
153
 
162
154
 
163
155
  def visualize_page(
164
- page: Dict[str, Any],
156
+ page: dict[str, Any],
165
157
  image: np.ndarray,
166
158
  words_only: bool = True,
167
159
  display_artefacts: bool = True,
@@ -183,7 +175,6 @@ def visualize_page(
183
175
  >>> plt.show()
184
176
 
185
177
  Args:
186
- ----
187
178
  page: the exported Page of a Document
188
179
  image: np array of the page, needs to have the same shape than page['dimensions']
189
180
  words_only: whether only words should be displayed
@@ -194,7 +185,6 @@ def visualize_page(
194
185
  **kwargs: keyword arguments for the polygon patch
195
186
 
196
187
  Returns:
197
- -------
198
188
  the matplotlib figure
199
189
  """
200
190
  # Get proper scale and aspect ratio
@@ -207,7 +197,7 @@ def visualize_page(
207
197
  ax.axis("off")
208
198
 
209
199
  if interactive:
210
- artists: List[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page)
200
+ artists: list[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page)
211
201
 
212
202
  for block in page["blocks"]:
213
203
  if not words_only:
@@ -287,7 +277,7 @@ def visualize_page(
287
277
 
288
278
 
289
279
  def visualize_kie_page(
290
- page: Dict[str, Any],
280
+ page: dict[str, Any],
291
281
  image: np.ndarray,
292
282
  words_only: bool = False,
293
283
  display_artefacts: bool = True,
@@ -309,7 +299,6 @@ def visualize_kie_page(
309
299
  >>> plt.show()
310
300
 
311
301
  Args:
312
- ----
313
302
  page: the exported Page of a Document
314
303
  image: np array of the page, needs to have the same shape than page['dimensions']
315
304
  words_only: whether only words should be displayed
@@ -320,7 +309,6 @@ def visualize_kie_page(
320
309
  **kwargs: keyword arguments for the polygon patch
321
310
 
322
311
  Returns:
323
- -------
324
312
  the matplotlib figure
325
313
  """
326
314
  # Get proper scale and aspect ratio
@@ -333,7 +321,7 @@ def visualize_kie_page(
333
321
  ax.axis("off")
334
322
 
335
323
  if interactive:
336
- artists: List[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page)
324
+ artists: list[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page)
337
325
 
338
326
  colors = {k: color for color, k in zip(get_colors(len(page["predictions"])), page["predictions"])}
339
327
  for key, value in page["predictions"].items():
@@ -363,11 +351,10 @@ def visualize_kie_page(
363
351
  return fig
364
352
 
365
353
 
366
- def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: Optional[Tuple[int, int, int]] = None, **kwargs) -> None:
354
+ def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: tuple[int, int, int] | None = None, **kwargs) -> None:
367
355
  """Draw an array of relative straight boxes on an image
368
356
 
369
357
  Args:
370
- ----
371
358
  boxes: array of relative boxes, of shape (*, 4)
372
359
  image: np array, float32 or uint8
373
360
  color: color to use for bounding box edges
doctr/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = 'v0.9.0'
1
+ __version__ = 'v0.11.0'