python-doctr 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. doctr/__init__.py +1 -1
  2. doctr/contrib/__init__.py +0 -0
  3. doctr/contrib/artefacts.py +131 -0
  4. doctr/contrib/base.py +105 -0
  5. doctr/datasets/cord.py +10 -1
  6. doctr/datasets/datasets/pytorch.py +2 -2
  7. doctr/datasets/funsd.py +11 -1
  8. doctr/datasets/generator/base.py +6 -5
  9. doctr/datasets/ic03.py +11 -1
  10. doctr/datasets/ic13.py +10 -1
  11. doctr/datasets/iiit5k.py +26 -16
  12. doctr/datasets/imgur5k.py +11 -2
  13. doctr/datasets/loader.py +1 -6
  14. doctr/datasets/sroie.py +11 -1
  15. doctr/datasets/svhn.py +11 -1
  16. doctr/datasets/svt.py +11 -1
  17. doctr/datasets/synthtext.py +11 -1
  18. doctr/datasets/utils.py +9 -3
  19. doctr/datasets/vocabs.py +15 -4
  20. doctr/datasets/wildreceipt.py +12 -1
  21. doctr/file_utils.py +45 -12
  22. doctr/io/elements.py +52 -10
  23. doctr/io/html.py +2 -2
  24. doctr/io/image/pytorch.py +6 -8
  25. doctr/io/image/tensorflow.py +1 -1
  26. doctr/io/pdf.py +5 -2
  27. doctr/io/reader.py +6 -0
  28. doctr/models/__init__.py +0 -1
  29. doctr/models/_utils.py +57 -20
  30. doctr/models/builder.py +73 -15
  31. doctr/models/classification/magc_resnet/tensorflow.py +13 -6
  32. doctr/models/classification/mobilenet/pytorch.py +47 -9
  33. doctr/models/classification/mobilenet/tensorflow.py +51 -14
  34. doctr/models/classification/predictor/pytorch.py +28 -17
  35. doctr/models/classification/predictor/tensorflow.py +26 -16
  36. doctr/models/classification/resnet/tensorflow.py +21 -8
  37. doctr/models/classification/textnet/pytorch.py +3 -3
  38. doctr/models/classification/textnet/tensorflow.py +11 -5
  39. doctr/models/classification/vgg/tensorflow.py +9 -3
  40. doctr/models/classification/vit/tensorflow.py +10 -4
  41. doctr/models/classification/zoo.py +55 -19
  42. doctr/models/detection/_utils/__init__.py +1 -0
  43. doctr/models/detection/_utils/base.py +66 -0
  44. doctr/models/detection/differentiable_binarization/base.py +4 -3
  45. doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
  46. doctr/models/detection/differentiable_binarization/tensorflow.py +34 -12
  47. doctr/models/detection/fast/base.py +6 -5
  48. doctr/models/detection/fast/pytorch.py +4 -4
  49. doctr/models/detection/fast/tensorflow.py +15 -12
  50. doctr/models/detection/linknet/base.py +4 -3
  51. doctr/models/detection/linknet/tensorflow.py +23 -11
  52. doctr/models/detection/predictor/pytorch.py +15 -1
  53. doctr/models/detection/predictor/tensorflow.py +17 -3
  54. doctr/models/detection/zoo.py +7 -2
  55. doctr/models/factory/hub.py +8 -18
  56. doctr/models/kie_predictor/base.py +13 -3
  57. doctr/models/kie_predictor/pytorch.py +45 -20
  58. doctr/models/kie_predictor/tensorflow.py +44 -17
  59. doctr/models/modules/layers/pytorch.py +2 -3
  60. doctr/models/modules/layers/tensorflow.py +6 -8
  61. doctr/models/modules/transformer/pytorch.py +2 -2
  62. doctr/models/modules/transformer/tensorflow.py +0 -2
  63. doctr/models/modules/vision_transformer/pytorch.py +1 -1
  64. doctr/models/modules/vision_transformer/tensorflow.py +1 -1
  65. doctr/models/predictor/base.py +97 -58
  66. doctr/models/predictor/pytorch.py +35 -20
  67. doctr/models/predictor/tensorflow.py +35 -18
  68. doctr/models/preprocessor/pytorch.py +4 -4
  69. doctr/models/preprocessor/tensorflow.py +3 -2
  70. doctr/models/recognition/crnn/tensorflow.py +8 -6
  71. doctr/models/recognition/master/pytorch.py +2 -2
  72. doctr/models/recognition/master/tensorflow.py +9 -4
  73. doctr/models/recognition/parseq/pytorch.py +4 -3
  74. doctr/models/recognition/parseq/tensorflow.py +14 -11
  75. doctr/models/recognition/sar/pytorch.py +7 -6
  76. doctr/models/recognition/sar/tensorflow.py +10 -12
  77. doctr/models/recognition/vitstr/pytorch.py +1 -1
  78. doctr/models/recognition/vitstr/tensorflow.py +9 -4
  79. doctr/models/recognition/zoo.py +1 -1
  80. doctr/models/utils/pytorch.py +1 -1
  81. doctr/models/utils/tensorflow.py +15 -15
  82. doctr/models/zoo.py +2 -2
  83. doctr/py.typed +0 -0
  84. doctr/transforms/functional/base.py +1 -1
  85. doctr/transforms/functional/pytorch.py +5 -5
  86. doctr/transforms/modules/base.py +37 -15
  87. doctr/transforms/modules/pytorch.py +73 -14
  88. doctr/transforms/modules/tensorflow.py +78 -19
  89. doctr/utils/fonts.py +7 -5
  90. doctr/utils/geometry.py +141 -31
  91. doctr/utils/metrics.py +34 -175
  92. doctr/utils/reconstitution.py +212 -0
  93. doctr/utils/visualization.py +5 -118
  94. doctr/version.py +1 -1
  95. {python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/METADATA +85 -81
  96. python_doctr-0.10.0.dist-info/RECORD +173 -0
  97. {python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/WHEEL +1 -1
  98. doctr/models/artefacts/__init__.py +0 -2
  99. doctr/models/artefacts/barcode.py +0 -74
  100. doctr/models/artefacts/face.py +0 -63
  101. doctr/models/obj_detection/__init__.py +0 -1
  102. doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
  103. doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
  104. python_doctr-0.8.1.dist-info/RECORD +0 -173
  105. {python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/LICENSE +0 -0
  106. {python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/top_level.txt +0 -0
  107. {python_doctr-0.8.1.dist-info → python_doctr-0.10.0.dist-info}/zip-safe +0 -0
@@ -0,0 +1,212 @@
1
+ # Copyright (C) 2021-2024, Mindee.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+ import logging
6
+ from typing import Any, Dict, Optional
7
+
8
+ import numpy as np
9
+ from anyascii import anyascii
10
+ from PIL import Image, ImageDraw
11
+
12
+ from .fonts import get_font
13
+
14
+ __all__ = ["synthesize_page", "synthesize_kie_page"]
15
+
16
+
17
+ # Global variable to avoid multiple warnings
18
+ ROTATION_WARNING = False
19
+
20
+
21
+ def _warn_rotation(entry: Dict[str, Any]) -> None: # pragma: no cover
22
+ global ROTATION_WARNING
23
+ if not ROTATION_WARNING and len(entry["geometry"]) == 4:
24
+ logging.warning("Polygons with larger rotations will lead to inaccurate rendering")
25
+ ROTATION_WARNING = True
26
+
27
+
28
+ def _synthesize(
29
+ response: Image.Image,
30
+ entry: Dict[str, Any],
31
+ w: int,
32
+ h: int,
33
+ draw_proba: bool = False,
34
+ font_family: Optional[str] = None,
35
+ smoothing_factor: float = 0.75,
36
+ min_font_size: int = 6,
37
+ max_font_size: int = 50,
38
+ ) -> Image.Image:
39
+ if len(entry["geometry"]) == 2:
40
+ (xmin, ymin), (xmax, ymax) = entry["geometry"]
41
+ polygon = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
42
+ else:
43
+ polygon = entry["geometry"]
44
+
45
+ # Calculate the bounding box of the word
46
+ x_coords, y_coords = zip(*polygon)
47
+ xmin, ymin, xmax, ymax = (
48
+ int(round(w * min(x_coords))),
49
+ int(round(h * min(y_coords))),
50
+ int(round(w * max(x_coords))),
51
+ int(round(h * max(y_coords))),
52
+ )
53
+ word_width = xmax - xmin
54
+ word_height = ymax - ymin
55
+
56
+ # If lines are provided instead of words, concatenate the word entries
57
+ if "words" in entry:
58
+ word_text = " ".join(word["value"] for word in entry["words"])
59
+ else:
60
+ word_text = entry["value"]
61
+ # Find the optimal font size
62
+ try:
63
+ font_size = min(word_height, max_font_size)
64
+ font = get_font(font_family, font_size)
65
+ text_width, text_height = font.getbbox(word_text)[2:4]
66
+
67
+ while (text_width > word_width or text_height > word_height) and font_size > min_font_size:
68
+ font_size = max(int(font_size * smoothing_factor), min_font_size)
69
+ font = get_font(font_family, font_size)
70
+ text_width, text_height = font.getbbox(word_text)[2:4]
71
+ except ValueError:
72
+ font = get_font(font_family, min_font_size)
73
+
74
+ # Create a mask for the word
75
+ mask = Image.new("L", (w, h), 0)
76
+ ImageDraw.Draw(mask).polygon([(int(round(w * x)), int(round(h * y))) for x, y in polygon], fill=255)
77
+
78
+ # Draw the word text
79
+ d = ImageDraw.Draw(response)
80
+ try:
81
+ try:
82
+ d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt")
83
+ except UnicodeEncodeError:
84
+ d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt")
85
+ # Catch generic exceptions to avoid crashing the whole rendering
86
+ except Exception: # pragma: no cover
87
+ logging.warning(f"Could not render word: {word_text}")
88
+
89
+ if draw_proba:
90
+ confidence = (
91
+ entry["confidence"]
92
+ if "confidence" in entry
93
+ else sum(w["confidence"] for w in entry["words"]) / len(entry["words"])
94
+ )
95
+ p = int(255 * confidence)
96
+ color = (255 - p, 0, p) # Red to blue gradient based on probability
97
+ d.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=2)
98
+
99
+ prob_font = get_font(font_family, 20)
100
+ prob_text = f"{confidence:.2f}"
101
+ prob_text_width, prob_text_height = prob_font.getbbox(prob_text)[2:4]
102
+
103
+ # Position the probability slightly above the bounding box
104
+ prob_x_offset = (word_width - prob_text_width) // 2
105
+ prob_y_offset = ymin - prob_text_height - 2
106
+ prob_y_offset = max(0, prob_y_offset)
107
+
108
+ d.text((xmin + prob_x_offset, prob_y_offset), prob_text, font=prob_font, fill=color, anchor="lt")
109
+
110
+ return response
111
+
112
+
113
+ def synthesize_page(
114
+ page: Dict[str, Any],
115
+ draw_proba: bool = False,
116
+ font_family: Optional[str] = None,
117
+ smoothing_factor: float = 0.95,
118
+ min_font_size: int = 8,
119
+ max_font_size: int = 50,
120
+ ) -> np.ndarray:
121
+ """Draw a the content of the element page (OCR response) on a blank page.
122
+
123
+ Args:
124
+ ----
125
+ page: exported Page object to represent
126
+ draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
127
+ font_family: family of the font
128
+ smoothing_factor: factor to smooth the font size
129
+ min_font_size: minimum font size
130
+ max_font_size: maximum font size
131
+
132
+ Returns:
133
+ -------
134
+ the synthesized page
135
+ """
136
+ # Draw template
137
+ h, w = page["dimensions"]
138
+ response = Image.new("RGB", (w, h), color=(255, 255, 255))
139
+
140
+ for block in page["blocks"]:
141
+ # If lines are provided use these to get better rendering results
142
+ if len(block["lines"]) > 1:
143
+ for line in block["lines"]:
144
+ _warn_rotation(block) # pragma: no cover
145
+ response = _synthesize(
146
+ response=response,
147
+ entry=line,
148
+ w=w,
149
+ h=h,
150
+ draw_proba=draw_proba,
151
+ font_family=font_family,
152
+ smoothing_factor=smoothing_factor,
153
+ min_font_size=min_font_size,
154
+ max_font_size=max_font_size,
155
+ )
156
+ # Otherwise, draw each word
157
+ else:
158
+ for line in block["lines"]:
159
+ _warn_rotation(block) # pragma: no cover
160
+ for word in line["words"]:
161
+ response = _synthesize(
162
+ response=response,
163
+ entry=word,
164
+ w=w,
165
+ h=h,
166
+ draw_proba=draw_proba,
167
+ font_family=font_family,
168
+ smoothing_factor=smoothing_factor,
169
+ min_font_size=min_font_size,
170
+ max_font_size=max_font_size,
171
+ )
172
+
173
+ return np.array(response, dtype=np.uint8)
174
+
175
+
176
+ def synthesize_kie_page(
177
+ page: Dict[str, Any],
178
+ draw_proba: bool = False,
179
+ font_family: Optional[str] = None,
180
+ ) -> np.ndarray:
181
+ """Draw a the content of the element page (OCR response) on a blank page.
182
+
183
+ Args:
184
+ ----
185
+ page: exported Page object to represent
186
+ draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
187
+ font_family: family of the font
188
+ smoothing_factor: factor to smooth the font size
189
+ min_font_size: minimum font size
190
+ max_font_size: maximum font size
191
+
192
+ Returns:
193
+ -------
194
+ the synthesized page
195
+ """
196
+ # Draw template
197
+ h, w = page["dimensions"]
198
+ response = Image.new("RGB", (w, h), color=(255, 255, 255))
199
+
200
+ # Draw each word
201
+ for predictions in page["predictions"].values():
202
+ for prediction in predictions:
203
+ _warn_rotation(prediction) # pragma: no cover
204
+ response = _synthesize(
205
+ response=response,
206
+ entry=prediction,
207
+ w=w,
208
+ h=h,
209
+ draw_proba=draw_proba,
210
+ font_family=font_family,
211
+ )
212
+ return np.array(response, dtype=np.uint8)
@@ -9,16 +9,12 @@ from typing import Any, Dict, List, Optional, Tuple, Union
9
9
  import cv2
10
10
  import matplotlib.patches as patches
11
11
  import matplotlib.pyplot as plt
12
- import mplcursors
13
12
  import numpy as np
14
13
  from matplotlib.figure import Figure
15
- from PIL import Image, ImageDraw
16
- from unidecode import unidecode
17
14
 
18
15
  from .common_types import BoundingBox, Polygon4P
19
- from .fonts import get_font
20
16
 
21
- __all__ = ["visualize_page", "synthesize_page", "visualize_kie_page", "synthesize_kie_page", "draw_boxes"]
17
+ __all__ = ["visualize_page", "visualize_kie_page", "draw_boxes"]
22
18
 
23
19
 
24
20
  def rect_patch(
@@ -281,6 +277,8 @@ def visualize_page(
281
277
  artists.append(rect)
282
278
 
283
279
  if interactive:
280
+ import mplcursors
281
+
284
282
  # Create mlp Cursor to hover patches in artists
285
283
  mplcursors.Cursor(artists, hover=2).connect("add", lambda sel: sel.annotation.set_text(sel.artist.get_label()))
286
284
  fig.tight_layout(pad=0.0)
@@ -288,63 +286,6 @@ def visualize_page(
288
286
  return fig
289
287
 
290
288
 
291
- def synthesize_page(
292
- page: Dict[str, Any],
293
- draw_proba: bool = False,
294
- font_family: Optional[str] = None,
295
- ) -> np.ndarray:
296
- """Draw a the content of the element page (OCR response) on a blank page.
297
-
298
- Args:
299
- ----
300
- page: exported Page object to represent
301
- draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
302
- font_size: size of the font, default font = 13
303
- font_family: family of the font
304
-
305
- Returns:
306
- -------
307
- the synthesized page
308
- """
309
- # Draw template
310
- h, w = page["dimensions"]
311
- response = 255 * np.ones((h, w, 3), dtype=np.int32)
312
-
313
- # Draw each word
314
- for block in page["blocks"]:
315
- for line in block["lines"]:
316
- for word in line["words"]:
317
- # Get aboslute word geometry
318
- (xmin, ymin), (xmax, ymax) = word["geometry"]
319
- xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
320
- ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
321
-
322
- # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
323
- font = get_font(font_family, int(0.75 * (ymax - ymin)))
324
- img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
325
- d = ImageDraw.Draw(img)
326
- # Draw in black the value of the word
327
- try:
328
- d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
329
- except UnicodeEncodeError:
330
- # When character cannot be encoded, use its unidecode version
331
- d.text((0, 0), unidecode(word["value"]), font=font, fill=(0, 0, 0))
332
-
333
- # Colorize if draw_proba
334
- if draw_proba:
335
- p = int(255 * word["confidence"])
336
- mask = np.where(np.array(img) == 0, 1, 0)
337
- proba: np.ndarray = np.array([255 - p, 0, p])
338
- color = mask * proba[np.newaxis, np.newaxis, :]
339
- white_mask = 255 * (1 - mask)
340
- img = color + white_mask
341
-
342
- # Write to response page
343
- response[ymin:ymax, xmin:xmax, :] = np.array(img)
344
-
345
- return response
346
-
347
-
348
289
  def visualize_kie_page(
349
290
  page: Dict[str, Any],
350
291
  image: np.ndarray,
@@ -413,6 +354,8 @@ def visualize_kie_page(
413
354
  artists.append(rect)
414
355
 
415
356
  if interactive:
357
+ import mplcursors
358
+
416
359
  # Create mlp Cursor to hover patches in artists
417
360
  mplcursors.Cursor(artists, hover=2).connect("add", lambda sel: sel.annotation.set_text(sel.artist.get_label()))
418
361
  fig.tight_layout(pad=0.0)
@@ -420,62 +363,6 @@ def visualize_kie_page(
420
363
  return fig
421
364
 
422
365
 
423
- def synthesize_kie_page(
424
- page: Dict[str, Any],
425
- draw_proba: bool = False,
426
- font_family: Optional[str] = None,
427
- ) -> np.ndarray:
428
- """Draw a the content of the element page (OCR response) on a blank page.
429
-
430
- Args:
431
- ----
432
- page: exported Page object to represent
433
- draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
434
- font_size: size of the font, default font = 13
435
- font_family: family of the font
436
-
437
- Returns:
438
- -------
439
- the synthesized page
440
- """
441
- # Draw template
442
- h, w = page["dimensions"]
443
- response = 255 * np.ones((h, w, 3), dtype=np.int32)
444
-
445
- # Draw each word
446
- for predictions in page["predictions"].values():
447
- for prediction in predictions:
448
- # Get aboslute word geometry
449
- (xmin, ymin), (xmax, ymax) = prediction["geometry"]
450
- xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
451
- ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
452
-
453
- # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
454
- font = get_font(font_family, int(0.75 * (ymax - ymin)))
455
- img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
456
- d = ImageDraw.Draw(img)
457
- # Draw in black the value of the word
458
- try:
459
- d.text((0, 0), prediction["value"], font=font, fill=(0, 0, 0))
460
- except UnicodeEncodeError:
461
- # When character cannot be encoded, use its unidecode version
462
- d.text((0, 0), unidecode(prediction["value"]), font=font, fill=(0, 0, 0))
463
-
464
- # Colorize if draw_proba
465
- if draw_proba:
466
- p = int(255 * prediction["confidence"])
467
- mask = np.where(np.array(img) == 0, 1, 0)
468
- proba: np.ndarray = np.array([255 - p, 0, p])
469
- color = mask * proba[np.newaxis, np.newaxis, :]
470
- white_mask = 255 * (1 - mask)
471
- img = color + white_mask
472
-
473
- # Write to response page
474
- response[ymin:ymax, xmin:xmax, :] = np.array(img)
475
-
476
- return response
477
-
478
-
479
366
  def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: Optional[Tuple[int, int, int]] = None, **kwargs) -> None:
480
367
  """Draw an array of relative straight boxes on an image
481
368
 
doctr/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = 'v0.8.1'
1
+ __version__ = 'v0.10.0'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-doctr
3
- Version: 0.8.1
3
+ Version: 0.10.0
4
4
  Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
5
5
  Author-email: Mindee <contact@mindee.com>
6
6
  Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
@@ -219,87 +219,93 @@ Classifier: License :: OSI Approved :: Apache Software License
219
219
  Classifier: Natural Language :: English
220
220
  Classifier: Operating System :: OS Independent
221
221
  Classifier: Programming Language :: Python :: 3
222
- Classifier: Programming Language :: Python :: 3.8
223
222
  Classifier: Programming Language :: Python :: 3.9
224
223
  Classifier: Programming Language :: Python :: 3.10
224
+ Classifier: Programming Language :: Python :: 3.11
225
225
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
226
- Requires-Python: <4,>=3.8.0
226
+ Requires-Python: <4,>=3.9.0
227
227
  Description-Content-Type: text/markdown
228
228
  License-File: LICENSE
229
- Requires-Dist: importlib-metadata
230
- Requires-Dist: numpy <2.0.0,>=1.16.0
231
- Requires-Dist: scipy <2.0.0,>=1.4.0
232
- Requires-Dist: h5py <4.0.0,>=3.1.0
233
- Requires-Dist: opencv-python <5.0.0,>=4.5.0
234
- Requires-Dist: pypdfium2 <5.0.0,>=4.0.0
235
- Requires-Dist: pyclipper <2.0.0,>=1.2.0
236
- Requires-Dist: shapely <3.0.0,>=1.6.0
237
- Requires-Dist: langdetect <2.0.0,>=1.0.9
238
- Requires-Dist: rapidfuzz <4.0.0,>=3.0.0
239
- Requires-Dist: huggingface-hub <1.0.0,>=0.20.0
240
- Requires-Dist: matplotlib >=3.1.0
241
- Requires-Dist: weasyprint >=55.0
242
- Requires-Dist: Pillow >=9.2.0
243
- Requires-Dist: defusedxml >=0.7.0
244
- Requires-Dist: mplcursors >=0.3
245
- Requires-Dist: unidecode >=1.0.0
246
- Requires-Dist: tqdm >=4.30.0
229
+ Requires-Dist: numpy<3.0.0,>=1.16.0
230
+ Requires-Dist: scipy<2.0.0,>=1.4.0
231
+ Requires-Dist: h5py<4.0.0,>=3.1.0
232
+ Requires-Dist: opencv-python<5.0.0,>=4.5.0
233
+ Requires-Dist: pypdfium2<5.0.0,>=4.11.0
234
+ Requires-Dist: pyclipper<2.0.0,>=1.2.0
235
+ Requires-Dist: shapely<3.0.0,>=1.6.0
236
+ Requires-Dist: langdetect<2.0.0,>=1.0.9
237
+ Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
238
+ Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
239
+ Requires-Dist: Pillow>=9.2.0
240
+ Requires-Dist: defusedxml>=0.7.0
241
+ Requires-Dist: anyascii>=0.3.2
242
+ Requires-Dist: tqdm>=4.30.0
243
+ Provides-Extra: contrib
244
+ Requires-Dist: onnxruntime>=1.11.0; extra == "contrib"
247
245
  Provides-Extra: dev
248
- Requires-Dist: tensorflow <2.16.0,>=2.11.0 ; extra == 'dev'
249
- Requires-Dist: tf2onnx <2.0.0,>=1.16.0 ; extra == 'dev'
250
- Requires-Dist: torch <3.0.0,>=1.12.0 ; extra == 'dev'
251
- Requires-Dist: torchvision >=0.13.0 ; extra == 'dev'
252
- Requires-Dist: onnx <3.0.0,>=1.12.0 ; extra == 'dev'
253
- Requires-Dist: pytest >=5.3.2 ; extra == 'dev'
254
- Requires-Dist: coverage[toml] >=4.5.4 ; extra == 'dev'
255
- Requires-Dist: hdf5storage >=0.1.18 ; extra == 'dev'
256
- Requires-Dist: onnxruntime >=1.11.0 ; extra == 'dev'
257
- Requires-Dist: requests >=2.20.0 ; extra == 'dev'
258
- Requires-Dist: psutil >=5.9.5 ; extra == 'dev'
259
- Requires-Dist: ruff >=0.1.5 ; extra == 'dev'
260
- Requires-Dist: mypy >=0.812 ; extra == 'dev'
261
- Requires-Dist: pre-commit >=2.17.0 ; extra == 'dev'
262
- Requires-Dist: sphinx !=3.5.0,>=3.0.0 ; extra == 'dev'
263
- Requires-Dist: sphinxemoji >=0.1.8 ; extra == 'dev'
264
- Requires-Dist: sphinx-copybutton >=0.3.1 ; extra == 'dev'
265
- Requires-Dist: docutils <0.21 ; extra == 'dev'
266
- Requires-Dist: recommonmark >=0.7.1 ; extra == 'dev'
267
- Requires-Dist: sphinx-markdown-tables >=0.0.15 ; extra == 'dev'
268
- Requires-Dist: sphinx-tabs >=3.3.0 ; extra == 'dev'
269
- Requires-Dist: furo >=2022.3.4 ; extra == 'dev'
246
+ Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "dev"
247
+ Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "dev"
248
+ Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
249
+ Requires-Dist: torch<3.0.0,>=2.0.0; extra == "dev"
250
+ Requires-Dist: torchvision>=0.15.0; extra == "dev"
251
+ Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "dev"
252
+ Requires-Dist: weasyprint>=55.0; extra == "dev"
253
+ Requires-Dist: matplotlib>=3.1.0; extra == "dev"
254
+ Requires-Dist: mplcursors>=0.3; extra == "dev"
255
+ Requires-Dist: pytest>=5.3.2; extra == "dev"
256
+ Requires-Dist: coverage[toml]>=4.5.4; extra == "dev"
257
+ Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
258
+ Requires-Dist: requests>=2.20.0; extra == "dev"
259
+ Requires-Dist: psutil>=5.9.5; extra == "dev"
260
+ Requires-Dist: ruff>=0.3.0; extra == "dev"
261
+ Requires-Dist: mypy>=1.0; extra == "dev"
262
+ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
263
+ Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "dev"
264
+ Requires-Dist: sphinxemoji>=0.1.8; extra == "dev"
265
+ Requires-Dist: sphinx-copybutton>=0.3.1; extra == "dev"
266
+ Requires-Dist: docutils<0.22; extra == "dev"
267
+ Requires-Dist: recommonmark>=0.7.1; extra == "dev"
268
+ Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
269
+ Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
270
+ Requires-Dist: furo>=2022.3.4; extra == "dev"
270
271
  Provides-Extra: docs
271
- Requires-Dist: sphinx !=3.5.0,>=3.0.0 ; extra == 'docs'
272
- Requires-Dist: sphinxemoji >=0.1.8 ; extra == 'docs'
273
- Requires-Dist: sphinx-copybutton >=0.3.1 ; extra == 'docs'
274
- Requires-Dist: docutils <0.21 ; extra == 'docs'
275
- Requires-Dist: recommonmark >=0.7.1 ; extra == 'docs'
276
- Requires-Dist: sphinx-markdown-tables >=0.0.15 ; extra == 'docs'
277
- Requires-Dist: sphinx-tabs >=3.3.0 ; extra == 'docs'
278
- Requires-Dist: furo >=2022.3.4 ; extra == 'docs'
272
+ Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "docs"
273
+ Requires-Dist: sphinxemoji>=0.1.8; extra == "docs"
274
+ Requires-Dist: sphinx-copybutton>=0.3.1; extra == "docs"
275
+ Requires-Dist: docutils<0.22; extra == "docs"
276
+ Requires-Dist: recommonmark>=0.7.1; extra == "docs"
277
+ Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
278
+ Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
279
+ Requires-Dist: furo>=2022.3.4; extra == "docs"
280
+ Provides-Extra: html
281
+ Requires-Dist: weasyprint>=55.0; extra == "html"
279
282
  Provides-Extra: quality
280
- Requires-Dist: ruff >=0.1.5 ; extra == 'quality'
281
- Requires-Dist: mypy >=0.812 ; extra == 'quality'
282
- Requires-Dist: pre-commit >=2.17.0 ; extra == 'quality'
283
+ Requires-Dist: ruff>=0.1.5; extra == "quality"
284
+ Requires-Dist: mypy>=0.812; extra == "quality"
285
+ Requires-Dist: pre-commit>=2.17.0; extra == "quality"
283
286
  Provides-Extra: testing
284
- Requires-Dist: pytest >=5.3.2 ; extra == 'testing'
285
- Requires-Dist: coverage[toml] >=4.5.4 ; extra == 'testing'
286
- Requires-Dist: hdf5storage >=0.1.18 ; extra == 'testing'
287
- Requires-Dist: onnxruntime >=1.11.0 ; extra == 'testing'
288
- Requires-Dist: requests >=2.20.0 ; extra == 'testing'
289
- Requires-Dist: psutil >=5.9.5 ; extra == 'testing'
287
+ Requires-Dist: pytest>=5.3.2; extra == "testing"
288
+ Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
289
+ Requires-Dist: onnxruntime>=1.11.0; extra == "testing"
290
+ Requires-Dist: requests>=2.20.0; extra == "testing"
291
+ Requires-Dist: psutil>=5.9.5; extra == "testing"
290
292
  Provides-Extra: tf
291
- Requires-Dist: tensorflow <2.16.0,>=2.11.0 ; extra == 'tf'
292
- Requires-Dist: tf2onnx <2.0.0,>=1.16.0 ; extra == 'tf'
293
+ Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "tf"
294
+ Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
295
+ Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
293
296
  Provides-Extra: torch
294
- Requires-Dist: torch <3.0.0,>=1.12.0 ; extra == 'torch'
295
- Requires-Dist: torchvision >=0.13.0 ; extra == 'torch'
296
- Requires-Dist: onnx <3.0.0,>=1.12.0 ; extra == 'torch'
297
+ Requires-Dist: torch<3.0.0,>=2.0.0; extra == "torch"
298
+ Requires-Dist: torchvision>=0.15.0; extra == "torch"
299
+ Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "torch"
300
+ Provides-Extra: viz
301
+ Requires-Dist: matplotlib>=3.1.0; extra == "viz"
302
+ Requires-Dist: mplcursors>=0.3; extra == "viz"
297
303
 
298
304
  <p align="center">
299
305
  <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
300
306
  </p>
301
307
 
302
- [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.8.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
308
+ [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
303
309
 
304
310
 
305
311
  **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -334,7 +340,7 @@ from doctr.io import DocumentFile
334
340
  pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
335
341
  # Image
336
342
  single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
337
- # Webpage
343
+ # Webpage (requires `weasyprint` to be installed)
338
344
  webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
339
345
  # Multiple page images
340
346
  multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
@@ -372,6 +378,7 @@ If both options are set to False, the predictor will always fit and return rotat
372
378
  To interpret your model's predictions, you can visualize them interactively as follows:
373
379
 
374
380
  ```python
381
+ # Display the result (requires matplotlib & mplcursors to be installed)
375
382
  result.show()
376
383
  ```
377
384
 
@@ -431,17 +438,7 @@ The KIE predictor results per page are in a dictionary format with each key repr
431
438
 
432
439
  ### Prerequisites
433
440
 
434
- Python 3.8 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
435
-
436
- Since we use [weasyprint](https://weasyprint.org/), you will need extra dependencies if you are not running Linux.
437
-
438
- For MacOS users, you can install them as follows:
439
-
440
- ```shell
441
- brew install cairo pango gdk-pixbuf libffi
442
- ```
443
-
444
- For Windows users, those dependencies are included in GTK. You can find the latest installer over [here](https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer/releases).
441
+ Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
445
442
 
446
443
  ### Latest release
447
444
 
@@ -460,12 +457,14 @@ We try to keep framework-specific dependencies to a minimum. You can install fra
460
457
  pip install "python-doctr[tf]"
461
458
  # for PyTorch
462
459
  pip install "python-doctr[torch]"
460
+ # optional dependencies for visualization, html, and contrib modules can be installed as follows:
461
+ pip install "python-doctr[torch,viz,html,contib]"
463
462
  ```
464
463
 
465
464
  For MacBooks with M1 chip, you will need some additional packages or specific versions:
466
465
 
467
466
  - TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
468
- - PyTorch: [version >= 1.12.0](https://pytorch.org/get-started/locally/#start-locally)
467
+ - PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
469
468
 
470
469
  ### Developer mode
471
470
 
@@ -647,9 +646,14 @@ Your API should now be running locally on your port 8002. Access your automatica
647
646
 
648
647
  ```python
649
648
  import requests
649
+
650
+ params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
651
+
650
652
  with open('/path/to/your/doc.jpg', 'rb') as f:
651
- data = f.read()
652
- response = requests.post("http://localhost:8002/ocr", files={'file': data}).json()
653
+ files = [ # application/pdf, image/jpeg, image/png supported
654
+ ("files", ("doc.jpg", f.read(), "image/jpeg")),
655
+ ]
656
+ print(requests.post("http://localhost:8080/ocr", params=params, files=files).json())
653
657
  ```
654
658
 
655
659
  ### Example notebooks