python-doctr 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. doctr/contrib/__init__.py +1 -0
  2. doctr/contrib/artefacts.py +7 -9
  3. doctr/contrib/base.py +8 -17
  4. doctr/datasets/__init__.py +1 -0
  5. doctr/datasets/coco_text.py +139 -0
  6. doctr/datasets/cord.py +10 -8
  7. doctr/datasets/datasets/__init__.py +4 -4
  8. doctr/datasets/datasets/base.py +16 -16
  9. doctr/datasets/datasets/pytorch.py +12 -12
  10. doctr/datasets/datasets/tensorflow.py +10 -10
  11. doctr/datasets/detection.py +6 -9
  12. doctr/datasets/doc_artefacts.py +3 -4
  13. doctr/datasets/funsd.py +9 -8
  14. doctr/datasets/generator/__init__.py +4 -4
  15. doctr/datasets/generator/base.py +16 -17
  16. doctr/datasets/generator/pytorch.py +1 -3
  17. doctr/datasets/generator/tensorflow.py +1 -3
  18. doctr/datasets/ic03.py +5 -6
  19. doctr/datasets/ic13.py +6 -6
  20. doctr/datasets/iiit5k.py +10 -6
  21. doctr/datasets/iiithws.py +4 -5
  22. doctr/datasets/imgur5k.py +15 -7
  23. doctr/datasets/loader.py +4 -7
  24. doctr/datasets/mjsynth.py +6 -5
  25. doctr/datasets/ocr.py +3 -4
  26. doctr/datasets/orientation.py +3 -4
  27. doctr/datasets/recognition.py +4 -5
  28. doctr/datasets/sroie.py +6 -5
  29. doctr/datasets/svhn.py +7 -6
  30. doctr/datasets/svt.py +6 -7
  31. doctr/datasets/synthtext.py +19 -7
  32. doctr/datasets/utils.py +41 -35
  33. doctr/datasets/vocabs.py +1107 -49
  34. doctr/datasets/wildreceipt.py +14 -10
  35. doctr/file_utils.py +11 -7
  36. doctr/io/elements.py +96 -82
  37. doctr/io/html.py +1 -3
  38. doctr/io/image/__init__.py +3 -3
  39. doctr/io/image/base.py +2 -5
  40. doctr/io/image/pytorch.py +3 -12
  41. doctr/io/image/tensorflow.py +2 -11
  42. doctr/io/pdf.py +5 -7
  43. doctr/io/reader.py +5 -11
  44. doctr/models/_utils.py +15 -23
  45. doctr/models/builder.py +30 -48
  46. doctr/models/classification/__init__.py +1 -0
  47. doctr/models/classification/magc_resnet/__init__.py +3 -3
  48. doctr/models/classification/magc_resnet/pytorch.py +11 -15
  49. doctr/models/classification/magc_resnet/tensorflow.py +11 -14
  50. doctr/models/classification/mobilenet/__init__.py +3 -3
  51. doctr/models/classification/mobilenet/pytorch.py +20 -18
  52. doctr/models/classification/mobilenet/tensorflow.py +19 -23
  53. doctr/models/classification/predictor/__init__.py +4 -4
  54. doctr/models/classification/predictor/pytorch.py +7 -9
  55. doctr/models/classification/predictor/tensorflow.py +6 -8
  56. doctr/models/classification/resnet/__init__.py +4 -4
  57. doctr/models/classification/resnet/pytorch.py +47 -34
  58. doctr/models/classification/resnet/tensorflow.py +45 -35
  59. doctr/models/classification/textnet/__init__.py +3 -3
  60. doctr/models/classification/textnet/pytorch.py +20 -18
  61. doctr/models/classification/textnet/tensorflow.py +19 -17
  62. doctr/models/classification/vgg/__init__.py +3 -3
  63. doctr/models/classification/vgg/pytorch.py +21 -8
  64. doctr/models/classification/vgg/tensorflow.py +20 -14
  65. doctr/models/classification/vip/__init__.py +4 -0
  66. doctr/models/classification/vip/layers/__init__.py +4 -0
  67. doctr/models/classification/vip/layers/pytorch.py +615 -0
  68. doctr/models/classification/vip/pytorch.py +505 -0
  69. doctr/models/classification/vit/__init__.py +3 -3
  70. doctr/models/classification/vit/pytorch.py +18 -15
  71. doctr/models/classification/vit/tensorflow.py +15 -12
  72. doctr/models/classification/zoo.py +23 -14
  73. doctr/models/core.py +3 -3
  74. doctr/models/detection/_utils/__init__.py +4 -4
  75. doctr/models/detection/_utils/base.py +4 -7
  76. doctr/models/detection/_utils/pytorch.py +1 -5
  77. doctr/models/detection/_utils/tensorflow.py +1 -5
  78. doctr/models/detection/core.py +2 -8
  79. doctr/models/detection/differentiable_binarization/__init__.py +4 -4
  80. doctr/models/detection/differentiable_binarization/base.py +10 -21
  81. doctr/models/detection/differentiable_binarization/pytorch.py +37 -31
  82. doctr/models/detection/differentiable_binarization/tensorflow.py +26 -29
  83. doctr/models/detection/fast/__init__.py +4 -4
  84. doctr/models/detection/fast/base.py +8 -17
  85. doctr/models/detection/fast/pytorch.py +37 -35
  86. doctr/models/detection/fast/tensorflow.py +24 -28
  87. doctr/models/detection/linknet/__init__.py +4 -4
  88. doctr/models/detection/linknet/base.py +8 -18
  89. doctr/models/detection/linknet/pytorch.py +34 -28
  90. doctr/models/detection/linknet/tensorflow.py +24 -25
  91. doctr/models/detection/predictor/__init__.py +5 -5
  92. doctr/models/detection/predictor/pytorch.py +6 -7
  93. doctr/models/detection/predictor/tensorflow.py +5 -6
  94. doctr/models/detection/zoo.py +27 -7
  95. doctr/models/factory/hub.py +6 -10
  96. doctr/models/kie_predictor/__init__.py +5 -5
  97. doctr/models/kie_predictor/base.py +4 -5
  98. doctr/models/kie_predictor/pytorch.py +19 -20
  99. doctr/models/kie_predictor/tensorflow.py +14 -15
  100. doctr/models/modules/layers/__init__.py +3 -3
  101. doctr/models/modules/layers/pytorch.py +55 -10
  102. doctr/models/modules/layers/tensorflow.py +5 -7
  103. doctr/models/modules/transformer/__init__.py +3 -3
  104. doctr/models/modules/transformer/pytorch.py +12 -13
  105. doctr/models/modules/transformer/tensorflow.py +9 -10
  106. doctr/models/modules/vision_transformer/__init__.py +3 -3
  107. doctr/models/modules/vision_transformer/pytorch.py +2 -3
  108. doctr/models/modules/vision_transformer/tensorflow.py +3 -3
  109. doctr/models/predictor/__init__.py +5 -5
  110. doctr/models/predictor/base.py +28 -29
  111. doctr/models/predictor/pytorch.py +13 -14
  112. doctr/models/predictor/tensorflow.py +9 -10
  113. doctr/models/preprocessor/__init__.py +4 -4
  114. doctr/models/preprocessor/pytorch.py +13 -17
  115. doctr/models/preprocessor/tensorflow.py +10 -14
  116. doctr/models/recognition/__init__.py +1 -0
  117. doctr/models/recognition/core.py +3 -7
  118. doctr/models/recognition/crnn/__init__.py +4 -4
  119. doctr/models/recognition/crnn/pytorch.py +30 -29
  120. doctr/models/recognition/crnn/tensorflow.py +21 -24
  121. doctr/models/recognition/master/__init__.py +3 -3
  122. doctr/models/recognition/master/base.py +3 -7
  123. doctr/models/recognition/master/pytorch.py +32 -25
  124. doctr/models/recognition/master/tensorflow.py +22 -25
  125. doctr/models/recognition/parseq/__init__.py +3 -3
  126. doctr/models/recognition/parseq/base.py +3 -7
  127. doctr/models/recognition/parseq/pytorch.py +47 -29
  128. doctr/models/recognition/parseq/tensorflow.py +29 -27
  129. doctr/models/recognition/predictor/__init__.py +5 -5
  130. doctr/models/recognition/predictor/_utils.py +111 -52
  131. doctr/models/recognition/predictor/pytorch.py +9 -9
  132. doctr/models/recognition/predictor/tensorflow.py +8 -9
  133. doctr/models/recognition/sar/__init__.py +4 -4
  134. doctr/models/recognition/sar/pytorch.py +30 -22
  135. doctr/models/recognition/sar/tensorflow.py +22 -24
  136. doctr/models/recognition/utils.py +57 -53
  137. doctr/models/recognition/viptr/__init__.py +4 -0
  138. doctr/models/recognition/viptr/pytorch.py +277 -0
  139. doctr/models/recognition/vitstr/__init__.py +4 -4
  140. doctr/models/recognition/vitstr/base.py +3 -7
  141. doctr/models/recognition/vitstr/pytorch.py +28 -21
  142. doctr/models/recognition/vitstr/tensorflow.py +22 -23
  143. doctr/models/recognition/zoo.py +27 -11
  144. doctr/models/utils/__init__.py +4 -4
  145. doctr/models/utils/pytorch.py +41 -34
  146. doctr/models/utils/tensorflow.py +31 -23
  147. doctr/models/zoo.py +1 -5
  148. doctr/transforms/functional/__init__.py +3 -3
  149. doctr/transforms/functional/base.py +4 -11
  150. doctr/transforms/functional/pytorch.py +20 -28
  151. doctr/transforms/functional/tensorflow.py +10 -22
  152. doctr/transforms/modules/__init__.py +4 -4
  153. doctr/transforms/modules/base.py +48 -55
  154. doctr/transforms/modules/pytorch.py +58 -22
  155. doctr/transforms/modules/tensorflow.py +18 -32
  156. doctr/utils/common_types.py +8 -9
  157. doctr/utils/data.py +9 -13
  158. doctr/utils/fonts.py +2 -7
  159. doctr/utils/geometry.py +17 -48
  160. doctr/utils/metrics.py +17 -37
  161. doctr/utils/multithreading.py +4 -6
  162. doctr/utils/reconstitution.py +9 -13
  163. doctr/utils/repr.py +2 -3
  164. doctr/utils/visualization.py +16 -29
  165. doctr/version.py +1 -1
  166. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/METADATA +70 -52
  167. python_doctr-0.12.0.dist-info/RECORD +180 -0
  168. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/WHEEL +1 -1
  169. python_doctr-0.10.0.dist-info/RECORD +0 -173
  170. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info/licenses}/LICENSE +0 -0
  171. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/top_level.txt +0 -0
  172. {python_doctr-0.10.0.dist-info → python_doctr-0.12.0.dist-info}/zip-safe +0 -0
@@ -1,9 +1,9 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
  import logging
6
- from typing import Any, Dict, Optional
6
+ from typing import Any
7
7
 
8
8
  import numpy as np
9
9
  from anyascii import anyascii
@@ -18,7 +18,7 @@ __all__ = ["synthesize_page", "synthesize_kie_page"]
18
18
  ROTATION_WARNING = False
19
19
 
20
20
 
21
- def _warn_rotation(entry: Dict[str, Any]) -> None: # pragma: no cover
21
+ def _warn_rotation(entry: dict[str, Any]) -> None: # pragma: no cover
22
22
  global ROTATION_WARNING
23
23
  if not ROTATION_WARNING and len(entry["geometry"]) == 4:
24
24
  logging.warning("Polygons with larger rotations will lead to inaccurate rendering")
@@ -27,11 +27,11 @@ def _warn_rotation(entry: Dict[str, Any]) -> None: # pragma: no cover
27
27
 
28
28
  def _synthesize(
29
29
  response: Image.Image,
30
- entry: Dict[str, Any],
30
+ entry: dict[str, Any],
31
31
  w: int,
32
32
  h: int,
33
33
  draw_proba: bool = False,
34
- font_family: Optional[str] = None,
34
+ font_family: str | None = None,
35
35
  smoothing_factor: float = 0.75,
36
36
  min_font_size: int = 6,
37
37
  max_font_size: int = 50,
@@ -111,9 +111,9 @@ def _synthesize(
111
111
 
112
112
 
113
113
  def synthesize_page(
114
- page: Dict[str, Any],
114
+ page: dict[str, Any],
115
115
  draw_proba: bool = False,
116
- font_family: Optional[str] = None,
116
+ font_family: str | None = None,
117
117
  smoothing_factor: float = 0.95,
118
118
  min_font_size: int = 8,
119
119
  max_font_size: int = 50,
@@ -121,7 +121,6 @@ def synthesize_page(
121
121
  """Draw a the content of the element page (OCR response) on a blank page.
122
122
 
123
123
  Args:
124
- ----
125
124
  page: exported Page object to represent
126
125
  draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
127
126
  font_family: family of the font
@@ -130,7 +129,6 @@ def synthesize_page(
130
129
  max_font_size: maximum font size
131
130
 
132
131
  Returns:
133
- -------
134
132
  the synthesized page
135
133
  """
136
134
  # Draw template
@@ -174,14 +172,13 @@ def synthesize_page(
174
172
 
175
173
 
176
174
  def synthesize_kie_page(
177
- page: Dict[str, Any],
175
+ page: dict[str, Any],
178
176
  draw_proba: bool = False,
179
- font_family: Optional[str] = None,
177
+ font_family: str | None = None,
180
178
  ) -> np.ndarray:
181
179
  """Draw a the content of the element page (OCR response) on a blank page.
182
180
 
183
181
  Args:
184
- ----
185
182
  page: exported Page object to represent
186
183
  draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
187
184
  font_family: family of the font
@@ -190,7 +187,6 @@ def synthesize_kie_page(
190
187
  max_font_size: maximum font size
191
188
 
192
189
  Returns:
193
- -------
194
190
  the synthesized page
195
191
  """
196
192
  # Draw template
doctr/utils/repr.py CHANGED
@@ -1,11 +1,10 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  # Adapted from https://github.com/pytorch/torch/blob/master/torch/nn/modules/module.py
7
7
 
8
- from typing import List
9
8
 
10
9
  __all__ = ["NestedObject"]
11
10
 
@@ -25,7 +24,7 @@ def _addindent(s_, num_spaces):
25
24
  class NestedObject:
26
25
  """Base class for all nested objects in doctr"""
27
26
 
28
- _children_names: List[str]
27
+ _children_names: list[str]
29
28
 
30
29
  def extra_repr(self) -> str:
31
30
  return ""
@@ -1,10 +1,10 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
  import colorsys
6
6
  from copy import deepcopy
7
- from typing import Any, Dict, List, Optional, Tuple, Union
7
+ from typing import Any
8
8
 
9
9
  import cv2
10
10
  import matplotlib.patches as patches
@@ -19,9 +19,9 @@ __all__ = ["visualize_page", "visualize_kie_page", "draw_boxes"]
19
19
 
20
20
  def rect_patch(
21
21
  geometry: BoundingBox,
22
- page_dimensions: Tuple[int, int],
23
- label: Optional[str] = None,
24
- color: Tuple[float, float, float] = (0, 0, 0),
22
+ page_dimensions: tuple[int, int],
23
+ label: str | None = None,
24
+ color: tuple[float, float, float] = (0, 0, 0),
25
25
  alpha: float = 0.3,
26
26
  linewidth: int = 2,
27
27
  fill: bool = True,
@@ -30,7 +30,6 @@ def rect_patch(
30
30
  """Create a matplotlib rectangular patch for the element
31
31
 
32
32
  Args:
33
- ----
34
33
  geometry: bounding box of the element
35
34
  page_dimensions: dimensions of the Page in format (height, width)
36
35
  label: label to display when hovered
@@ -41,7 +40,6 @@ def rect_patch(
41
40
  preserve_aspect_ratio: pass True if you passed True to the predictor
42
41
 
43
42
  Returns:
44
- -------
45
43
  a rectangular Patch
46
44
  """
47
45
  if len(geometry) != 2 or any(not isinstance(elt, tuple) or len(elt) != 2 for elt in geometry):
@@ -70,9 +68,9 @@ def rect_patch(
70
68
 
71
69
  def polygon_patch(
72
70
  geometry: np.ndarray,
73
- page_dimensions: Tuple[int, int],
74
- label: Optional[str] = None,
75
- color: Tuple[float, float, float] = (0, 0, 0),
71
+ page_dimensions: tuple[int, int],
72
+ label: str | None = None,
73
+ color: tuple[float, float, float] = (0, 0, 0),
76
74
  alpha: float = 0.3,
77
75
  linewidth: int = 2,
78
76
  fill: bool = True,
@@ -81,7 +79,6 @@ def polygon_patch(
81
79
  """Create a matplotlib polygon patch for the element
82
80
 
83
81
  Args:
84
- ----
85
82
  geometry: bounding box of the element
86
83
  page_dimensions: dimensions of the Page in format (height, width)
87
84
  label: label to display when hovered
@@ -92,7 +89,6 @@ def polygon_patch(
92
89
  preserve_aspect_ratio: pass True if you passed True to the predictor
93
90
 
94
91
  Returns:
95
- -------
96
92
  a polygon Patch
97
93
  """
98
94
  if not geometry.shape == (4, 2):
@@ -114,20 +110,18 @@ def polygon_patch(
114
110
 
115
111
 
116
112
  def create_obj_patch(
117
- geometry: Union[BoundingBox, Polygon4P, np.ndarray],
118
- page_dimensions: Tuple[int, int],
113
+ geometry: BoundingBox | Polygon4P | np.ndarray,
114
+ page_dimensions: tuple[int, int],
119
115
  **kwargs: Any,
120
116
  ) -> patches.Patch:
121
117
  """Create a matplotlib patch for the element
122
118
 
123
119
  Args:
124
- ----
125
120
  geometry: bounding box (straight or rotated) of the element
126
121
  page_dimensions: dimensions of the page in format (height, width)
127
122
  **kwargs: keyword arguments for the patch
128
123
 
129
124
  Returns:
130
- -------
131
125
  a matplotlib Patch
132
126
  """
133
127
  if isinstance(geometry, tuple):
@@ -140,15 +134,13 @@ def create_obj_patch(
140
134
  raise ValueError("invalid geometry format")
141
135
 
142
136
 
143
- def get_colors(num_colors: int) -> List[Tuple[float, float, float]]:
137
+ def get_colors(num_colors: int) -> list[tuple[float, float, float]]:
144
138
  """Generate num_colors color for matplotlib
145
139
 
146
140
  Args:
147
- ----
148
141
  num_colors: number of colors to generate
149
142
 
150
143
  Returns:
151
- -------
152
144
  colors: list of generated colors
153
145
  """
154
146
  colors = []
@@ -161,7 +153,7 @@ def get_colors(num_colors: int) -> List[Tuple[float, float, float]]:
161
153
 
162
154
 
163
155
  def visualize_page(
164
- page: Dict[str, Any],
156
+ page: dict[str, Any],
165
157
  image: np.ndarray,
166
158
  words_only: bool = True,
167
159
  display_artefacts: bool = True,
@@ -183,7 +175,6 @@ def visualize_page(
183
175
  >>> plt.show()
184
176
 
185
177
  Args:
186
- ----
187
178
  page: the exported Page of a Document
188
179
  image: np array of the page, needs to have the same shape than page['dimensions']
189
180
  words_only: whether only words should be displayed
@@ -194,7 +185,6 @@ def visualize_page(
194
185
  **kwargs: keyword arguments for the polygon patch
195
186
 
196
187
  Returns:
197
- -------
198
188
  the matplotlib figure
199
189
  """
200
190
  # Get proper scale and aspect ratio
@@ -207,7 +197,7 @@ def visualize_page(
207
197
  ax.axis("off")
208
198
 
209
199
  if interactive:
210
- artists: List[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page)
200
+ artists: list[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page)
211
201
 
212
202
  for block in page["blocks"]:
213
203
  if not words_only:
@@ -287,7 +277,7 @@ def visualize_page(
287
277
 
288
278
 
289
279
  def visualize_kie_page(
290
- page: Dict[str, Any],
280
+ page: dict[str, Any],
291
281
  image: np.ndarray,
292
282
  words_only: bool = False,
293
283
  display_artefacts: bool = True,
@@ -309,7 +299,6 @@ def visualize_kie_page(
309
299
  >>> plt.show()
310
300
 
311
301
  Args:
312
- ----
313
302
  page: the exported Page of a Document
314
303
  image: np array of the page, needs to have the same shape than page['dimensions']
315
304
  words_only: whether only words should be displayed
@@ -320,7 +309,6 @@ def visualize_kie_page(
320
309
  **kwargs: keyword arguments for the polygon patch
321
310
 
322
311
  Returns:
323
- -------
324
312
  the matplotlib figure
325
313
  """
326
314
  # Get proper scale and aspect ratio
@@ -333,7 +321,7 @@ def visualize_kie_page(
333
321
  ax.axis("off")
334
322
 
335
323
  if interactive:
336
- artists: List[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page)
324
+ artists: list[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page)
337
325
 
338
326
  colors = {k: color for color, k in zip(get_colors(len(page["predictions"])), page["predictions"])}
339
327
  for key, value in page["predictions"].items():
@@ -363,11 +351,10 @@ def visualize_kie_page(
363
351
  return fig
364
352
 
365
353
 
366
- def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: Optional[Tuple[int, int, int]] = None, **kwargs) -> None:
354
+ def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: tuple[int, int, int] | None = None, **kwargs) -> None:
367
355
  """Draw an array of relative straight boxes on an image
368
356
 
369
357
  Args:
370
- ----
371
358
  boxes: array of relative boxes, of shape (*, 4)
372
359
  image: np array, float32 or uint8
373
360
  color: color to use for bounding box edges
doctr/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = 'v0.10.0'
1
+ __version__ = 'v0.12.0'
@@ -1,10 +1,10 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: python-doctr
3
- Version: 0.10.0
3
+ Version: 0.12.0
4
4
  Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
5
5
  Author-email: Mindee <contact@mindee.com>
6
6
  Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
7
- License: Apache License
7
+ License: Apache License
8
8
  Version 2.0, January 2004
9
9
  http://www.apache.org/licenses/
10
10
 
@@ -219,11 +219,11 @@ Classifier: License :: OSI Approved :: Apache Software License
219
219
  Classifier: Natural Language :: English
220
220
  Classifier: Operating System :: OS Independent
221
221
  Classifier: Programming Language :: Python :: 3
222
- Classifier: Programming Language :: Python :: 3.9
223
222
  Classifier: Programming Language :: Python :: 3.10
224
223
  Classifier: Programming Language :: Python :: 3.11
224
+ Classifier: Programming Language :: Python :: 3.12
225
225
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
226
- Requires-Python: <4,>=3.9.0
226
+ Requires-Python: <4,>=3.10.0
227
227
  Description-Content-Type: text/markdown
228
228
  License-File: LICENSE
229
229
  Requires-Dist: numpy<3.0.0,>=1.16.0
@@ -239,11 +239,46 @@ Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
239
239
  Requires-Dist: Pillow>=9.2.0
240
240
  Requires-Dist: defusedxml>=0.7.0
241
241
  Requires-Dist: anyascii>=0.3.2
242
+ Requires-Dist: validators>=0.18.0
242
243
  Requires-Dist: tqdm>=4.30.0
244
+ Provides-Extra: tf
245
+ Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "tf"
246
+ Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "tf"
247
+ Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
248
+ Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
249
+ Provides-Extra: torch
250
+ Requires-Dist: torch<3.0.0,>=2.0.0; extra == "torch"
251
+ Requires-Dist: torchvision>=0.15.0; extra == "torch"
252
+ Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "torch"
253
+ Provides-Extra: html
254
+ Requires-Dist: weasyprint>=55.0; extra == "html"
255
+ Provides-Extra: viz
256
+ Requires-Dist: matplotlib>=3.1.0; extra == "viz"
257
+ Requires-Dist: mplcursors>=0.3; extra == "viz"
243
258
  Provides-Extra: contrib
244
259
  Requires-Dist: onnxruntime>=1.11.0; extra == "contrib"
260
+ Provides-Extra: testing
261
+ Requires-Dist: pytest>=5.3.2; extra == "testing"
262
+ Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
263
+ Requires-Dist: onnxruntime>=1.11.0; extra == "testing"
264
+ Requires-Dist: requests>=2.20.0; extra == "testing"
265
+ Requires-Dist: psutil>=5.9.5; extra == "testing"
266
+ Provides-Extra: quality
267
+ Requires-Dist: ruff>=0.1.5; extra == "quality"
268
+ Requires-Dist: mypy>=0.812; extra == "quality"
269
+ Requires-Dist: pre-commit>=2.17.0; extra == "quality"
270
+ Provides-Extra: docs
271
+ Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "docs"
272
+ Requires-Dist: sphinxemoji>=0.1.8; extra == "docs"
273
+ Requires-Dist: sphinx-copybutton>=0.3.1; extra == "docs"
274
+ Requires-Dist: docutils<0.22; extra == "docs"
275
+ Requires-Dist: recommonmark>=0.7.1; extra == "docs"
276
+ Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
277
+ Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
278
+ Requires-Dist: furo>=2022.3.4; extra == "docs"
245
279
  Provides-Extra: dev
246
- Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "dev"
280
+ Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "dev"
281
+ Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "dev"
247
282
  Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "dev"
248
283
  Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
249
284
  Requires-Dist: torch<3.0.0,>=2.0.0; extra == "dev"
@@ -268,44 +303,13 @@ Requires-Dist: recommonmark>=0.7.1; extra == "dev"
268
303
  Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
269
304
  Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
270
305
  Requires-Dist: furo>=2022.3.4; extra == "dev"
271
- Provides-Extra: docs
272
- Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "docs"
273
- Requires-Dist: sphinxemoji>=0.1.8; extra == "docs"
274
- Requires-Dist: sphinx-copybutton>=0.3.1; extra == "docs"
275
- Requires-Dist: docutils<0.22; extra == "docs"
276
- Requires-Dist: recommonmark>=0.7.1; extra == "docs"
277
- Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
278
- Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
279
- Requires-Dist: furo>=2022.3.4; extra == "docs"
280
- Provides-Extra: html
281
- Requires-Dist: weasyprint>=55.0; extra == "html"
282
- Provides-Extra: quality
283
- Requires-Dist: ruff>=0.1.5; extra == "quality"
284
- Requires-Dist: mypy>=0.812; extra == "quality"
285
- Requires-Dist: pre-commit>=2.17.0; extra == "quality"
286
- Provides-Extra: testing
287
- Requires-Dist: pytest>=5.3.2; extra == "testing"
288
- Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
289
- Requires-Dist: onnxruntime>=1.11.0; extra == "testing"
290
- Requires-Dist: requests>=2.20.0; extra == "testing"
291
- Requires-Dist: psutil>=5.9.5; extra == "testing"
292
- Provides-Extra: tf
293
- Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "tf"
294
- Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
295
- Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
296
- Provides-Extra: torch
297
- Requires-Dist: torch<3.0.0,>=2.0.0; extra == "torch"
298
- Requires-Dist: torchvision>=0.15.0; extra == "torch"
299
- Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "torch"
300
- Provides-Extra: viz
301
- Requires-Dist: matplotlib>=3.1.0; extra == "viz"
302
- Requires-Dist: mplcursors>=0.3; extra == "viz"
306
+ Dynamic: license-file
303
307
 
304
308
  <p align="center">
305
309
  <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
306
310
  </p>
307
311
 
308
- [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
312
+ [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.12.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)
309
313
 
310
314
 
311
315
  **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -436,9 +440,22 @@ The KIE predictor results per page are in a dictionary format with each key repr
436
440
 
437
441
  ## Installation
438
442
 
443
+ > [!WARNING]
444
+ > **TensorFlow Backend Deprecation Notice**
445
+ >
446
+ > Using docTR with TensorFlow as a backend is deprecated and will be removed in the next major release (v1.0.0).
447
+ > We **recommend switching to the PyTorch backend**, which is more actively maintained and supports the latest features and models.
448
+ > Alternatively, you can use [OnnxTR](https://github.com/felixdittrich92/OnnxTR), which does **not** require TensorFlow or PyTorch.
449
+ >
450
+ > This decision was made based on several considerations:
451
+ >
452
+ > - Allows better focus on improving the core library
453
+ > - Frees up resources to develop new features faster
454
+ > - Enables more targeted optimizations with PyTorch
455
+
439
456
  ### Prerequisites
440
457
 
441
- Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
458
+ Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
442
459
 
443
460
  ### Latest release
444
461
 
@@ -502,6 +519,7 @@ Credits where it's due: this repository is implementing, among others, architect
502
519
  - MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
503
520
  - ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
504
521
  - PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
522
+ - VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
505
523
 
506
524
  ## More goodies
507
525
 
@@ -557,37 +575,37 @@ Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to
557
575
 
558
576
  ### Docker container
559
577
 
560
- [We offer Docker container support for easy testing and deployment](https://github.com/mindee/doctr/pkgs/container/doctr).
578
+ We offer Docker container support for easy testing and deployment. [Here are the available docker tags.](https://github.com/mindee/doctr/pkgs/container/doctr).
561
579
 
562
580
  #### Using GPU with docTR Docker Images
563
581
 
564
- The docTR Docker images are GPU-ready and based on CUDA `11.8`.
565
- However, to use GPU support with these Docker images, please ensure that Docker is configured to use your GPU.
582
+ The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch or TensorFlow won't be able to initialize the GPU.
583
+ Please ensure that Docker is configured to use your GPU.
566
584
 
567
585
  To verify and configure GPU support for Docker, please follow the instructions provided in the [NVIDIA Container Toolkit Installation Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
568
586
 
569
587
  Once Docker is configured to use GPUs, you can run docTR Docker containers with GPU support:
570
588
 
571
589
  ```shell
572
- docker run -it --gpus all ghcr.io/mindee/doctr:tf-py3.8.18-gpu-2023-09 bash
590
+ docker run -it --gpus all ghcr.io/mindee/doctr:torch-py3.9.18-2024-10 bash
573
591
  ```
574
592
 
575
593
  #### Available Tags
576
594
 
577
- The Docker images for docTR follow a specific tag nomenclature: `<framework>-py<python_version>-<system>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
595
+ The Docker images for docTR follow a specific tag nomenclature: `<deps>-py<python_version>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
578
596
 
579
- - `<framework>`: `tf` (TensorFlow) or `torch` (PyTorch).
580
- - `<python_version>`: `3.8.18`, `3.9.18`, or `3.10.13`.
581
- - `<system>`: `cpu` or `gpu`
582
- - `<doctr_version>`: a tag >= `v0.7.1`
583
- - `<YYYY-MM>`: e.g. `2023-09`
597
+ - `<deps>`: `tf`, `torch`, `tf-viz-html-contrib` or `torch-viz-html-contrib`.
598
+ - `<python_version>`: `3.9.18`, `3.10.13` or `3.11.8`.
599
+ - `<doctr_version>`: a tag >= `v0.11.0`
600
+ - `<YYYY-MM>`: e.g. `2014-10`
584
601
 
585
602
  Here are examples of different image tags:
586
603
 
587
604
  | Tag | Description |
588
605
  |----------------------------|---------------------------------------------------|
589
- | `tf-py3.8.18-cpu-v0.7.1` | TensorFlow version `3.8.18` with docTR `v0.7.1`. |
590
- | `torch-py3.9.18-gpu-2023-09`| PyTorch version `3.9.18` with GPU support and a monthly build from `2023-09`. |
606
+ | `tf-py3.10.13-v0.11.0` | TensorFlow version `3.10.13` with docTR `v0.11.0`. |
607
+ | `torch-viz-html-contrib-py3.11.8-2024-10` | Torch with extra dependencies version `3.11.8` from latest commit on `main` in `2024-10`. |
608
+ | `torch-py3.11.8-2024-10`| PyTorch version `3.11.8` from latest commit on `main` in `2024-10`. |
591
609
 
592
610
  #### Building Docker Images Locally
593
611