python-doctr 0.12.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. doctr/__init__.py +0 -1
  2. doctr/contrib/artefacts.py +1 -1
  3. doctr/contrib/base.py +1 -1
  4. doctr/datasets/__init__.py +0 -5
  5. doctr/datasets/coco_text.py +1 -1
  6. doctr/datasets/cord.py +1 -1
  7. doctr/datasets/datasets/__init__.py +1 -6
  8. doctr/datasets/datasets/base.py +1 -1
  9. doctr/datasets/datasets/pytorch.py +3 -3
  10. doctr/datasets/detection.py +1 -1
  11. doctr/datasets/doc_artefacts.py +1 -1
  12. doctr/datasets/funsd.py +1 -1
  13. doctr/datasets/generator/__init__.py +1 -6
  14. doctr/datasets/generator/base.py +1 -1
  15. doctr/datasets/generator/pytorch.py +1 -1
  16. doctr/datasets/ic03.py +1 -1
  17. doctr/datasets/ic13.py +1 -1
  18. doctr/datasets/iiit5k.py +1 -1
  19. doctr/datasets/iiithws.py +1 -1
  20. doctr/datasets/imgur5k.py +1 -1
  21. doctr/datasets/mjsynth.py +1 -1
  22. doctr/datasets/ocr.py +1 -1
  23. doctr/datasets/orientation.py +1 -1
  24. doctr/datasets/recognition.py +1 -1
  25. doctr/datasets/sroie.py +1 -1
  26. doctr/datasets/svhn.py +1 -1
  27. doctr/datasets/svt.py +1 -1
  28. doctr/datasets/synthtext.py +1 -1
  29. doctr/datasets/utils.py +1 -1
  30. doctr/datasets/vocabs.py +1 -3
  31. doctr/datasets/wildreceipt.py +1 -1
  32. doctr/file_utils.py +3 -102
  33. doctr/io/elements.py +1 -1
  34. doctr/io/html.py +1 -1
  35. doctr/io/image/__init__.py +1 -7
  36. doctr/io/image/base.py +1 -1
  37. doctr/io/image/pytorch.py +2 -2
  38. doctr/io/pdf.py +1 -1
  39. doctr/io/reader.py +1 -1
  40. doctr/models/_utils.py +56 -18
  41. doctr/models/builder.py +1 -1
  42. doctr/models/classification/magc_resnet/__init__.py +1 -6
  43. doctr/models/classification/magc_resnet/pytorch.py +3 -3
  44. doctr/models/classification/mobilenet/__init__.py +1 -6
  45. doctr/models/classification/mobilenet/pytorch.py +1 -1
  46. doctr/models/classification/predictor/__init__.py +1 -6
  47. doctr/models/classification/predictor/pytorch.py +2 -2
  48. doctr/models/classification/resnet/__init__.py +1 -6
  49. doctr/models/classification/resnet/pytorch.py +1 -1
  50. doctr/models/classification/textnet/__init__.py +1 -6
  51. doctr/models/classification/textnet/pytorch.py +2 -2
  52. doctr/models/classification/vgg/__init__.py +1 -6
  53. doctr/models/classification/vgg/pytorch.py +1 -1
  54. doctr/models/classification/vip/__init__.py +1 -4
  55. doctr/models/classification/vip/layers/__init__.py +1 -4
  56. doctr/models/classification/vip/layers/pytorch.py +2 -2
  57. doctr/models/classification/vip/pytorch.py +1 -1
  58. doctr/models/classification/vit/__init__.py +1 -6
  59. doctr/models/classification/vit/pytorch.py +3 -3
  60. doctr/models/classification/zoo.py +7 -12
  61. doctr/models/core.py +1 -1
  62. doctr/models/detection/_utils/__init__.py +1 -6
  63. doctr/models/detection/_utils/base.py +1 -1
  64. doctr/models/detection/_utils/pytorch.py +1 -1
  65. doctr/models/detection/core.py +2 -2
  66. doctr/models/detection/differentiable_binarization/__init__.py +1 -6
  67. doctr/models/detection/differentiable_binarization/base.py +5 -13
  68. doctr/models/detection/differentiable_binarization/pytorch.py +4 -4
  69. doctr/models/detection/fast/__init__.py +1 -6
  70. doctr/models/detection/fast/base.py +5 -15
  71. doctr/models/detection/fast/pytorch.py +5 -5
  72. doctr/models/detection/linknet/__init__.py +1 -6
  73. doctr/models/detection/linknet/base.py +4 -13
  74. doctr/models/detection/linknet/pytorch.py +3 -3
  75. doctr/models/detection/predictor/__init__.py +1 -6
  76. doctr/models/detection/predictor/pytorch.py +2 -2
  77. doctr/models/detection/zoo.py +16 -33
  78. doctr/models/factory/hub.py +26 -34
  79. doctr/models/kie_predictor/__init__.py +1 -6
  80. doctr/models/kie_predictor/base.py +1 -1
  81. doctr/models/kie_predictor/pytorch.py +3 -7
  82. doctr/models/modules/layers/__init__.py +1 -6
  83. doctr/models/modules/layers/pytorch.py +4 -4
  84. doctr/models/modules/transformer/__init__.py +1 -6
  85. doctr/models/modules/transformer/pytorch.py +3 -3
  86. doctr/models/modules/vision_transformer/__init__.py +1 -6
  87. doctr/models/modules/vision_transformer/pytorch.py +1 -1
  88. doctr/models/predictor/__init__.py +1 -6
  89. doctr/models/predictor/base.py +4 -9
  90. doctr/models/predictor/pytorch.py +3 -6
  91. doctr/models/preprocessor/__init__.py +1 -6
  92. doctr/models/preprocessor/pytorch.py +28 -33
  93. doctr/models/recognition/core.py +1 -1
  94. doctr/models/recognition/crnn/__init__.py +1 -6
  95. doctr/models/recognition/crnn/pytorch.py +7 -7
  96. doctr/models/recognition/master/__init__.py +1 -6
  97. doctr/models/recognition/master/base.py +1 -1
  98. doctr/models/recognition/master/pytorch.py +6 -6
  99. doctr/models/recognition/parseq/__init__.py +1 -6
  100. doctr/models/recognition/parseq/base.py +1 -1
  101. doctr/models/recognition/parseq/pytorch.py +6 -6
  102. doctr/models/recognition/predictor/__init__.py +1 -6
  103. doctr/models/recognition/predictor/_utils.py +8 -17
  104. doctr/models/recognition/predictor/pytorch.py +2 -3
  105. doctr/models/recognition/sar/__init__.py +1 -6
  106. doctr/models/recognition/sar/pytorch.py +4 -4
  107. doctr/models/recognition/utils.py +1 -1
  108. doctr/models/recognition/viptr/__init__.py +1 -4
  109. doctr/models/recognition/viptr/pytorch.py +4 -4
  110. doctr/models/recognition/vitstr/__init__.py +1 -6
  111. doctr/models/recognition/vitstr/base.py +1 -1
  112. doctr/models/recognition/vitstr/pytorch.py +4 -4
  113. doctr/models/recognition/zoo.py +14 -14
  114. doctr/models/utils/__init__.py +1 -6
  115. doctr/models/utils/pytorch.py +3 -2
  116. doctr/models/zoo.py +1 -1
  117. doctr/transforms/functional/__init__.py +1 -6
  118. doctr/transforms/functional/base.py +3 -2
  119. doctr/transforms/functional/pytorch.py +5 -5
  120. doctr/transforms/modules/__init__.py +1 -7
  121. doctr/transforms/modules/base.py +28 -94
  122. doctr/transforms/modules/pytorch.py +29 -27
  123. doctr/utils/common_types.py +1 -1
  124. doctr/utils/data.py +1 -2
  125. doctr/utils/fonts.py +1 -1
  126. doctr/utils/geometry.py +7 -11
  127. doctr/utils/metrics.py +1 -1
  128. doctr/utils/multithreading.py +1 -1
  129. doctr/utils/reconstitution.py +1 -1
  130. doctr/utils/repr.py +1 -1
  131. doctr/utils/visualization.py +2 -2
  132. doctr/version.py +1 -1
  133. {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/METADATA +30 -80
  134. python_doctr-1.0.1.dist-info/RECORD +149 -0
  135. {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/WHEEL +1 -1
  136. doctr/datasets/datasets/tensorflow.py +0 -59
  137. doctr/datasets/generator/tensorflow.py +0 -58
  138. doctr/datasets/loader.py +0 -94
  139. doctr/io/image/tensorflow.py +0 -101
  140. doctr/models/classification/magc_resnet/tensorflow.py +0 -196
  141. doctr/models/classification/mobilenet/tensorflow.py +0 -442
  142. doctr/models/classification/predictor/tensorflow.py +0 -60
  143. doctr/models/classification/resnet/tensorflow.py +0 -418
  144. doctr/models/classification/textnet/tensorflow.py +0 -275
  145. doctr/models/classification/vgg/tensorflow.py +0 -125
  146. doctr/models/classification/vit/tensorflow.py +0 -201
  147. doctr/models/detection/_utils/tensorflow.py +0 -34
  148. doctr/models/detection/differentiable_binarization/tensorflow.py +0 -421
  149. doctr/models/detection/fast/tensorflow.py +0 -427
  150. doctr/models/detection/linknet/tensorflow.py +0 -377
  151. doctr/models/detection/predictor/tensorflow.py +0 -70
  152. doctr/models/kie_predictor/tensorflow.py +0 -187
  153. doctr/models/modules/layers/tensorflow.py +0 -171
  154. doctr/models/modules/transformer/tensorflow.py +0 -235
  155. doctr/models/modules/vision_transformer/tensorflow.py +0 -100
  156. doctr/models/predictor/tensorflow.py +0 -155
  157. doctr/models/preprocessor/tensorflow.py +0 -122
  158. doctr/models/recognition/crnn/tensorflow.py +0 -317
  159. doctr/models/recognition/master/tensorflow.py +0 -320
  160. doctr/models/recognition/parseq/tensorflow.py +0 -516
  161. doctr/models/recognition/predictor/tensorflow.py +0 -79
  162. doctr/models/recognition/sar/tensorflow.py +0 -423
  163. doctr/models/recognition/vitstr/tensorflow.py +0 -285
  164. doctr/models/utils/tensorflow.py +0 -189
  165. doctr/transforms/functional/tensorflow.py +0 -254
  166. doctr/transforms/modules/tensorflow.py +0 -562
  167. python_doctr-0.12.0.dist-info/RECORD +0 -180
  168. {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/licenses/LICENSE +0 -0
  169. {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/top_level.txt +0 -0
  170. {python_doctr-0.12.0.dist-info → python_doctr-1.0.1.dist-info}/zip-safe +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2025, Mindee.
1
+ # Copyright (C) 2021-2026, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -13,7 +13,7 @@ from torch.nn.functional import pad
13
13
  from torchvision.transforms import functional as F
14
14
  from torchvision.transforms import transforms as T
15
15
 
16
- from ..functional.pytorch import random_shadow
16
+ from ..functional import random_shadow
17
17
 
18
18
  __all__ = [
19
19
  "Resize",
@@ -27,7 +27,21 @@ __all__ = [
27
27
 
28
28
 
29
29
  class Resize(T.Resize):
30
- """Resize the input image to the given size"""
30
+ """Resize the input image to the given size
31
+
32
+ >>> import torch
33
+ >>> from doctr.transforms import Resize
34
+ >>> transfo = Resize((64, 64), preserve_aspect_ratio=True, symmetric_pad=True)
35
+ >>> out = transfo(torch.rand((3, 64, 64)))
36
+
37
+ Args:
38
+ size: output size in pixels, either a tuple (height, width) or a single integer for square images
39
+ interpolation: interpolation mode to use for resizing, default is bilinear
40
+ preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
41
+ if True, the image will be resized to fit within the target size while maintaining its aspect ratio
42
+ symmetric_pad: whether to symmetrically pad the image to the target size,
43
+ if True, the image will be padded equally on both sides to fit the target size
44
+ """
31
45
 
32
46
  def __init__(
33
47
  self,
@@ -36,25 +50,19 @@ class Resize(T.Resize):
36
50
  preserve_aspect_ratio: bool = False,
37
51
  symmetric_pad: bool = False,
38
52
  ) -> None:
39
- super().__init__(size, interpolation, antialias=True)
53
+ super().__init__(size if isinstance(size, (list, tuple)) else (size, size), interpolation, antialias=True)
40
54
  self.preserve_aspect_ratio = preserve_aspect_ratio
41
55
  self.symmetric_pad = symmetric_pad
42
56
 
43
- if not isinstance(self.size, (int, tuple, list)):
44
- raise AssertionError("size should be either a tuple, a list or an int")
45
-
46
57
  def forward(
47
58
  self,
48
59
  img: torch.Tensor,
49
60
  target: np.ndarray | None = None,
50
61
  ) -> torch.Tensor | tuple[torch.Tensor, np.ndarray]:
51
- if isinstance(self.size, int):
52
- target_ratio = img.shape[-2] / img.shape[-1]
53
- else:
54
- target_ratio = self.size[0] / self.size[1]
62
+ target_ratio = self.size[0] / self.size[1]
55
63
  actual_ratio = img.shape[-2] / img.shape[-1]
56
64
 
57
- if not self.preserve_aspect_ratio or (target_ratio == actual_ratio and (isinstance(self.size, (tuple, list)))):
65
+ if not self.preserve_aspect_ratio or (target_ratio == actual_ratio):
58
66
  # If we don't preserve the aspect ratio or the wanted aspect ratio is the same than the original one
59
67
  # We can use with the regular resize
60
68
  if target is not None:
@@ -62,16 +70,10 @@ class Resize(T.Resize):
62
70
  return super().forward(img)
63
71
  else:
64
72
  # Resize
65
- if isinstance(self.size, (tuple, list)):
66
- if actual_ratio > target_ratio:
67
- tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1))
68
- else:
69
- tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1])
70
- elif isinstance(self.size, int): # self.size is the longest side, infer the other
71
- if img.shape[-2] <= img.shape[-1]:
72
- tmp_size = (max(int(self.size * actual_ratio), 1), self.size)
73
- else:
74
- tmp_size = (self.size, max(int(self.size / actual_ratio), 1))
73
+ if actual_ratio > target_ratio:
74
+ tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1))
75
+ else:
76
+ tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1])
75
77
 
76
78
  # Scale image
77
79
  img = F.resize(img, tmp_size, self.interpolation, antialias=True)
@@ -93,14 +95,14 @@ class Resize(T.Resize):
93
95
  if self.preserve_aspect_ratio:
94
96
  # Get absolute coords
95
97
  if target.shape[1:] == (4,):
96
- if isinstance(self.size, (tuple, list)) and self.symmetric_pad:
98
+ if self.symmetric_pad:
97
99
  target[:, [0, 2]] = offset[0] + target[:, [0, 2]] * raw_shape[-1] / img.shape[-1]
98
100
  target[:, [1, 3]] = offset[1] + target[:, [1, 3]] * raw_shape[-2] / img.shape[-2]
99
101
  else:
100
102
  target[:, [0, 2]] *= raw_shape[-1] / img.shape[-1]
101
103
  target[:, [1, 3]] *= raw_shape[-2] / img.shape[-2]
102
104
  elif target.shape[1:] == (4, 2):
103
- if isinstance(self.size, (tuple, list)) and self.symmetric_pad:
105
+ if self.symmetric_pad:
104
106
  target[..., 0] = offset[0] + target[..., 0] * raw_shape[-1] / img.shape[-1]
105
107
  target[..., 1] = offset[1] + target[..., 1] * raw_shape[-2] / img.shape[-2]
106
108
  else:
@@ -143,9 +145,9 @@ class GaussianNoise(torch.nn.Module):
143
145
  # Reshape the distribution
144
146
  noise = self.mean + 2 * self.std * torch.rand(x.shape, device=x.device) - self.std
145
147
  if x.dtype == torch.uint8:
146
- return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8) # type: ignore[attr-defined]
148
+ return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8)
147
149
  else:
148
- return (x + noise.to(dtype=x.dtype)).clamp(0, 1) # type: ignore[attr-defined]
150
+ return (x + noise.to(dtype=x.dtype)).clamp(0, 1)
149
151
 
150
152
  def extra_repr(self) -> str:
151
153
  return f"mean={self.mean}, std={self.std}"
@@ -233,7 +235,7 @@ class RandomShadow(torch.nn.Module):
233
235
  try:
234
236
  if x.dtype == torch.uint8:
235
237
  return (
236
- ( # type: ignore[attr-defined]
238
+ (
237
239
  255
238
240
  * random_shadow(
239
241
  x.to(dtype=torch.float32) / 255,
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2025, Mindee.
1
+ # Copyright (C) 2021-2026, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
doctr/utils/data.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2025, Mindee.
1
+ # Copyright (C) 2021-2026, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -9,7 +9,6 @@ import hashlib
9
9
  import logging
10
10
  import os
11
11
  import re
12
- import urllib
13
12
  import urllib.error
14
13
  import urllib.request
15
14
  from pathlib import Path
doctr/utils/fonts.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2025, Mindee.
1
+ # Copyright (C) 2021-2026, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
doctr/utils/geometry.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2025, Mindee.
1
+ # Copyright (C) 2021-2026, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -390,14 +390,13 @@ def convert_to_relative_coords(geoms: np.ndarray, img_shape: tuple[int, int]) ->
390
390
  raise ValueError(f"invalid format for arg `geoms`: {geoms.shape}")
391
391
 
392
392
 
393
- def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> list[np.ndarray]:
393
+ def extract_crops(img: np.ndarray, boxes: np.ndarray) -> list[np.ndarray]:
394
394
  """Created cropped images from list of bounding boxes
395
395
 
396
396
  Args:
397
397
  img: input image
398
398
  boxes: bounding boxes of shape (N, 4) where N is the number of boxes, and the relative
399
399
  coordinates (xmin, ymin, xmax, ymax)
400
- channels_last: whether the channel dimensions is the last one instead of the last one
401
400
 
402
401
  Returns:
403
402
  list of cropped images
@@ -409,21 +408,19 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True
409
408
 
410
409
  # Project relative coordinates
411
410
  _boxes = boxes.copy()
412
- h, w = img.shape[:2] if channels_last else img.shape[-2:]
411
+ h, w = img.shape[:2]
413
412
  if not np.issubdtype(_boxes.dtype, np.integer):
414
413
  _boxes[:, [0, 2]] *= w
415
414
  _boxes[:, [1, 3]] *= h
416
415
  _boxes = _boxes.round().astype(int)
417
416
  # Add last index
418
417
  _boxes[2:] += 1
419
- if channels_last:
420
- return deepcopy([img[box[1] : box[3], box[0] : box[2]] for box in _boxes])
421
418
 
422
- return deepcopy([img[:, box[1] : box[3], box[0] : box[2]] for box in _boxes])
419
+ return deepcopy([img[box[1] : box[3], box[0] : box[2]] for box in _boxes])
423
420
 
424
421
 
425
422
  def extract_rcrops(
426
- img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True, assume_horizontal: bool = False
423
+ img: np.ndarray, polys: np.ndarray, dtype=np.float32, assume_horizontal: bool = False
427
424
  ) -> list[np.ndarray]:
428
425
  """Created cropped images from list of rotated bounding boxes
429
426
 
@@ -431,7 +428,6 @@ def extract_rcrops(
431
428
  img: input image
432
429
  polys: bounding boxes of shape (N, 4, 2)
433
430
  dtype: target data type of bounding boxes
434
- channels_last: whether the channel dimensions is the last one instead of the last one
435
431
  assume_horizontal: whether the boxes are assumed to be only horizontally oriented
436
432
 
437
433
  Returns:
@@ -444,12 +440,12 @@ def extract_rcrops(
444
440
 
445
441
  # Project relative coordinates
446
442
  _boxes = polys.copy()
447
- height, width = img.shape[:2] if channels_last else img.shape[-2:]
443
+ height, width = img.shape[:2]
448
444
  if not np.issubdtype(_boxes.dtype, np.integer):
449
445
  _boxes[:, :, 0] *= width
450
446
  _boxes[:, :, 1] *= height
451
447
 
452
- src_img = img if channels_last else img.transpose(1, 2, 0)
448
+ src_img = img
453
449
 
454
450
  # Handle only horizontal oriented boxes
455
451
  if assume_horizontal:
doctr/utils/metrics.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2025, Mindee.
1
+ # Copyright (C) 2021-2026, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2025, Mindee.
1
+ # Copyright (C) 2021-2026, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2025, Mindee.
1
+ # Copyright (C) 2021-2026, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
doctr/utils/repr.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2025, Mindee.
1
+ # Copyright (C) 2021-2026, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2025, Mindee.
1
+ # Copyright (C) 2021-2026, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -148,7 +148,7 @@ def get_colors(num_colors: int) -> list[tuple[float, float, float]]:
148
148
  hue = i / 360.0
149
149
  lightness = (50 + np.random.rand() * 10) / 100.0
150
150
  saturation = (90 + np.random.rand() * 10) / 100.0
151
- colors.append(colorsys.hls_to_rgb(hue, lightness, saturation))
151
+ colors.append(colorsys.hls_to_rgb(hue, lightness, saturation)) # type: ignore[arg-type]
152
152
  return colors
153
153
 
154
154
 
doctr/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = 'v0.12.0'
1
+ __version__ = 'v1.0.1'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-doctr
3
- Version: 0.12.0
3
+ Version: 1.0.1
4
4
  Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
5
5
  Author-email: Mindee <contact@mindee.com>
6
6
  Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
@@ -210,7 +210,7 @@ Project-URL: documentation, https://mindee.github.io/doctr
210
210
  Project-URL: repository, https://github.com/mindee/doctr
211
211
  Project-URL: tracker, https://github.com/mindee/doctr/issues
212
212
  Project-URL: changelog, https://mindee.github.io/doctr/changelog.html
213
- Keywords: OCR,deep learning,computer vision,tensorflow,pytorch,text detection,text recognition
213
+ Keywords: OCR,deep learning,computer vision,pytorch,text detection,text recognition
214
214
  Classifier: Development Status :: 4 - Beta
215
215
  Classifier: Intended Audience :: Developers
216
216
  Classifier: Intended Audience :: Education
@@ -226,30 +226,24 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
226
226
  Requires-Python: <4,>=3.10.0
227
227
  Description-Content-Type: text/markdown
228
228
  License-File: LICENSE
229
+ Requires-Dist: torch<3.0.0,>=2.0.0
230
+ Requires-Dist: torchvision>=0.15.0
231
+ Requires-Dist: onnx<3.0.0,>=1.12.0
229
232
  Requires-Dist: numpy<3.0.0,>=1.16.0
230
233
  Requires-Dist: scipy<2.0.0,>=1.4.0
231
234
  Requires-Dist: h5py<4.0.0,>=3.1.0
232
235
  Requires-Dist: opencv-python<5.0.0,>=4.5.0
233
- Requires-Dist: pypdfium2<5.0.0,>=4.11.0
236
+ Requires-Dist: pypdfium2<6.0.0,>=4.11.0
234
237
  Requires-Dist: pyclipper<2.0.0,>=1.2.0
235
238
  Requires-Dist: shapely<3.0.0,>=1.6.0
236
239
  Requires-Dist: langdetect<2.0.0,>=1.0.9
237
240
  Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
238
- Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
241
+ Requires-Dist: huggingface-hub<2.0.0,>=0.20.0
239
242
  Requires-Dist: Pillow>=9.2.0
240
243
  Requires-Dist: defusedxml>=0.7.0
241
244
  Requires-Dist: anyascii>=0.3.2
242
245
  Requires-Dist: validators>=0.18.0
243
246
  Requires-Dist: tqdm>=4.30.0
244
- Provides-Extra: tf
245
- Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "tf"
246
- Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "tf"
247
- Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
248
- Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
249
- Provides-Extra: torch
250
- Requires-Dist: torch<3.0.0,>=2.0.0; extra == "torch"
251
- Requires-Dist: torchvision>=0.15.0; extra == "torch"
252
- Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "torch"
253
247
  Provides-Extra: html
254
248
  Requires-Dist: weasyprint>=55.0; extra == "html"
255
249
  Provides-Extra: viz
@@ -271,16 +265,12 @@ Provides-Extra: docs
271
265
  Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "docs"
272
266
  Requires-Dist: sphinxemoji>=0.1.8; extra == "docs"
273
267
  Requires-Dist: sphinx-copybutton>=0.3.1; extra == "docs"
274
- Requires-Dist: docutils<0.22; extra == "docs"
268
+ Requires-Dist: docutils<0.23; extra == "docs"
275
269
  Requires-Dist: recommonmark>=0.7.1; extra == "docs"
276
270
  Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
277
271
  Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
278
272
  Requires-Dist: furo>=2022.3.4; extra == "docs"
279
273
  Provides-Extra: dev
280
- Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "dev"
281
- Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "dev"
282
- Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "dev"
283
- Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
284
274
  Requires-Dist: torch<3.0.0,>=2.0.0; extra == "dev"
285
275
  Requires-Dist: torchvision>=0.15.0; extra == "dev"
286
276
  Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "dev"
@@ -298,7 +288,7 @@ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
298
288
  Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "dev"
299
289
  Requires-Dist: sphinxemoji>=0.1.8; extra == "dev"
300
290
  Requires-Dist: sphinx-copybutton>=0.3.1; extra == "dev"
301
- Requires-Dist: docutils<0.22; extra == "dev"
291
+ Requires-Dist: docutils<0.23; extra == "dev"
302
292
  Requires-Dist: recommonmark>=0.7.1; extra == "dev"
303
293
  Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
304
294
  Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
@@ -309,10 +299,10 @@ Dynamic: license-file
309
299
  <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
310
300
  </p>
311
301
 
312
- [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.12.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)
302
+ [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v1.0.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)
313
303
 
314
304
 
315
- **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
305
+ **Optical Character Recognition made seamless & accessible to anyone, powered by PyTorch**
316
306
 
317
307
  What you can expect from this repository:
318
308
 
@@ -371,7 +361,7 @@ Should you use docTR on documents that include rotated pages, or pages with mult
371
361
  you have multiple options to handle it:
372
362
 
373
363
  - If you only use straight document pages with straight words (horizontal, same reading direction),
374
- consider passing `assume_straight_boxes=True` to the ocr_predictor. It will directly fit straight boxes
364
+ consider passing `assume_straight_pages=True` to the ocr_predictor. It will directly fit straight boxes
375
365
  on your page and return straight boxes, which makes it the fastest option.
376
366
 
377
367
  - If you want the predictor to output straight boxes (no matter the orientation of your pages, the final localizations
@@ -440,19 +430,6 @@ The KIE predictor results per page are in a dictionary format with each key repr
440
430
 
441
431
  ## Installation
442
432
 
443
- > [!WARNING]
444
- > **TensorFlow Backend Deprecation Notice**
445
- >
446
- > Using docTR with TensorFlow as a backend is deprecated and will be removed in the next major release (v1.0.0).
447
- > We **recommend switching to the PyTorch backend**, which is more actively maintained and supports the latest features and models.
448
- > Alternatively, you can use [OnnxTR](https://github.com/felixdittrich92/OnnxTR), which does **not** require TensorFlow or PyTorch.
449
- >
450
- > This decision was made based on several considerations:
451
- >
452
- > - Allows better focus on improving the core library
453
- > - Frees up resources to develop new features faster
454
- > - Enables more targeted optimizations with PyTorch
455
-
456
433
  ### Prerequisites
457
434
 
458
435
  Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
@@ -465,24 +442,15 @@ You can then install the latest release of the package using [pypi](https://pypi
465
442
  pip install python-doctr
466
443
  ```
467
444
 
468
- > :warning: Please note that the basic installation is not standalone, as it does not provide a deep learning framework, which is required for the package to run.
469
-
470
- We try to keep framework-specific dependencies to a minimum. You can install framework-specific builds as follows:
445
+ We try to keep extra dependencies to a minimum. You can install specific builds as follows:
471
446
 
472
447
  ```shell
473
- # for TensorFlow
474
- pip install "python-doctr[tf]"
475
- # for PyTorch
476
- pip install "python-doctr[torch]"
448
+ # standard build
449
+ pip install python-doctr
477
450
  # optional dependencies for visualization, html, and contrib modules can be installed as follows:
478
- pip install "python-doctr[torch,viz,html,contib]"
451
+ pip install "python-doctr[viz,html,contrib]"
479
452
  ```
480
453
 
481
- For MacBooks with M1 chip, you will need some additional packages or specific versions:
482
-
483
- - TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
484
- - PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
485
-
486
454
  ### Developer mode
487
455
 
488
456
  Alternatively, you can install it from source, which will require you to install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git).
@@ -493,13 +461,10 @@ git clone https://github.com/mindee/doctr.git
493
461
  pip install -e doctr/.
494
462
  ```
495
463
 
496
- Again, if you prefer to avoid the risk of missing dependencies, you can install the TensorFlow or the PyTorch build:
464
+ Again, if you prefer to avoid the risk of missing dependencies, you can install the build:
497
465
 
498
466
  ```shell
499
- # for TensorFlow
500
- pip install -e doctr/.[tf]
501
- # for PyTorch
502
- pip install -e doctr/.[torch]
467
+ pip install -e doctr/.
503
468
  ```
504
469
 
505
470
  ## Models architectures
@@ -542,20 +507,6 @@ Check it out [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%2
542
507
 
543
508
  If you prefer to use it locally, there is an extra dependency ([Streamlit](https://streamlit.io/)) that is required.
544
509
 
545
- ##### Tensorflow version
546
-
547
- ```shell
548
- pip install -r demo/tf-requirements.txt
549
- ```
550
-
551
- Then run your app in your default browser with:
552
-
553
- ```shell
554
- USE_TF=1 streamlit run demo/app.py
555
- ```
556
-
557
- ##### PyTorch version
558
-
559
510
  ```shell
560
511
  pip install -r demo/pt-requirements.txt
561
512
  ```
@@ -563,23 +514,16 @@ pip install -r demo/pt-requirements.txt
563
514
  Then run your app in your default browser with:
564
515
 
565
516
  ```shell
566
- USE_TORCH=1 streamlit run demo/app.py
517
+ streamlit run demo/app.py
567
518
  ```
568
519
 
569
- #### TensorFlow.js
570
-
571
- Instead of having your demo actually running Python, you would prefer to run everything in your web browser?
572
- Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to get started!
573
-
574
- ![TFJS demo](https://github.com/mindee/doctr/raw/main/docs/images/demo_illustration_mini.png)
575
-
576
520
  ### Docker container
577
521
 
578
522
  We offer Docker container support for easy testing and deployment. [Here are the available docker tags.](https://github.com/mindee/doctr/pkgs/container/doctr).
579
523
 
580
524
  #### Using GPU with docTR Docker Images
581
525
 
582
- The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch or TensorFlow won't be able to initialize the GPU.
526
+ The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch won't be able to initialize the GPU.
583
527
  Please ensure that Docker is configured to use your GPU.
584
528
 
585
529
  To verify and configure GPU support for Docker, please follow the instructions provided in the [NVIDIA Container Toolkit Installation Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
@@ -594,7 +538,7 @@ docker run -it --gpus all ghcr.io/mindee/doctr:torch-py3.9.18-2024-10 bash
594
538
 
595
539
  The Docker images for docTR follow a specific tag nomenclature: `<deps>-py<python_version>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
596
540
 
597
- - `<deps>`: `tf`, `torch`, `tf-viz-html-contrib` or `torch-viz-html-contrib`.
541
+ - `<deps>`: `torch`, `torch-viz-html-contrib`.
598
542
  - `<python_version>`: `3.9.18`, `3.10.13` or `3.11.8`.
599
543
  - `<doctr_version>`: a tag >= `v0.11.0`
600
544
  - `<YYYY-MM>`: e.g. `2014-10`
@@ -603,7 +547,6 @@ Here are examples of different image tags:
603
547
 
604
548
  | Tag | Description |
605
549
  |----------------------------|---------------------------------------------------|
606
- | `tf-py3.10.13-v0.11.0` | TensorFlow version `3.10.13` with docTR `v0.11.0`. |
607
550
  | `torch-viz-html-contrib-py3.11.8-2024-10` | Torch with extra dependencies version `3.11.8` from latest commit on `main` in `2024-10`. |
608
551
  | `torch-py3.11.8-2024-10`| PyTorch version `3.11.8` from latest commit on `main` in `2024-10`. |
609
552
 
@@ -615,10 +558,10 @@ You can also build docTR Docker images locally on your computer.
615
558
  docker build -t doctr .
616
559
  ```
617
560
 
618
- You can specify custom Python versions and docTR versions using build arguments. For example, to build a docTR image with TensorFlow, Python version `3.9.10`, and docTR version `v0.7.0`, run the following command:
561
+ You can specify custom Python versions and docTR versions using build arguments. For example, to build a docTR image with PyTorch, Python version `3.9.10`, and docTR version `v0.7.0`, run the following command:
619
562
 
620
563
  ```shell
621
- docker build -t doctr --build-arg FRAMEWORK=tf --build-arg PYTHON_VERSION=3.9.10 --build-arg DOCTR_VERSION=v0.7.0 .
564
+ docker build -t doctr --build-arg FRAMEWORK=torch --build-arg PYTHON_VERSION=3.9.10 --build-arg DOCTR_VERSION=v0.7.0 .
622
565
  ```
623
566
 
624
567
  ### Example script
@@ -678,6 +621,13 @@ print(requests.post("http://localhost:8080/ocr", params=params, files=files).jso
678
621
 
679
622
  Looking for more illustrations of docTR features? You might want to check the [Jupyter notebooks](https://github.com/mindee/doctr/tree/main/notebooks) designed to give you a broader overview.
680
623
 
624
+ ## Supported By
625
+
626
+ This project is supported by [t2k GmbH](https://www.text2knowledge.de/de),
627
+ <p align="center">
628
+ <img src="https://github.com/mindee/doctr/raw/main/docs/images/t2k_logo.png" width="40%">
629
+ </p>
630
+
681
631
  ## Citation
682
632
 
683
633
  If you wish to cite this project, feel free to use this [BibTeX](http://www.bibtex.org/) reference: