python-doctr 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. doctr/datasets/__init__.py +2 -0
  2. doctr/datasets/cord.py +6 -4
  3. doctr/datasets/datasets/base.py +3 -2
  4. doctr/datasets/datasets/pytorch.py +4 -2
  5. doctr/datasets/datasets/tensorflow.py +4 -2
  6. doctr/datasets/detection.py +6 -3
  7. doctr/datasets/doc_artefacts.py +2 -1
  8. doctr/datasets/funsd.py +7 -8
  9. doctr/datasets/generator/base.py +3 -2
  10. doctr/datasets/generator/pytorch.py +3 -1
  11. doctr/datasets/generator/tensorflow.py +3 -1
  12. doctr/datasets/ic03.py +3 -2
  13. doctr/datasets/ic13.py +2 -1
  14. doctr/datasets/iiit5k.py +6 -4
  15. doctr/datasets/iiithws.py +2 -1
  16. doctr/datasets/imgur5k.py +3 -2
  17. doctr/datasets/loader.py +4 -2
  18. doctr/datasets/mjsynth.py +2 -1
  19. doctr/datasets/ocr.py +2 -1
  20. doctr/datasets/orientation.py +40 -0
  21. doctr/datasets/recognition.py +3 -2
  22. doctr/datasets/sroie.py +2 -1
  23. doctr/datasets/svhn.py +2 -1
  24. doctr/datasets/svt.py +3 -2
  25. doctr/datasets/synthtext.py +2 -1
  26. doctr/datasets/utils.py +27 -11
  27. doctr/datasets/vocabs.py +26 -1
  28. doctr/datasets/wildreceipt.py +111 -0
  29. doctr/file_utils.py +3 -1
  30. doctr/io/elements.py +52 -35
  31. doctr/io/html.py +5 -3
  32. doctr/io/image/base.py +5 -4
  33. doctr/io/image/pytorch.py +12 -7
  34. doctr/io/image/tensorflow.py +11 -6
  35. doctr/io/pdf.py +5 -4
  36. doctr/io/reader.py +13 -5
  37. doctr/models/_utils.py +30 -53
  38. doctr/models/artefacts/barcode.py +4 -3
  39. doctr/models/artefacts/face.py +4 -2
  40. doctr/models/builder.py +58 -43
  41. doctr/models/classification/__init__.py +1 -0
  42. doctr/models/classification/magc_resnet/pytorch.py +5 -2
  43. doctr/models/classification/magc_resnet/tensorflow.py +5 -2
  44. doctr/models/classification/mobilenet/pytorch.py +16 -4
  45. doctr/models/classification/mobilenet/tensorflow.py +29 -20
  46. doctr/models/classification/predictor/pytorch.py +3 -2
  47. doctr/models/classification/predictor/tensorflow.py +2 -1
  48. doctr/models/classification/resnet/pytorch.py +23 -13
  49. doctr/models/classification/resnet/tensorflow.py +33 -26
  50. doctr/models/classification/textnet/__init__.py +6 -0
  51. doctr/models/classification/textnet/pytorch.py +275 -0
  52. doctr/models/classification/textnet/tensorflow.py +267 -0
  53. doctr/models/classification/vgg/pytorch.py +4 -2
  54. doctr/models/classification/vgg/tensorflow.py +5 -2
  55. doctr/models/classification/vit/pytorch.py +9 -3
  56. doctr/models/classification/vit/tensorflow.py +9 -3
  57. doctr/models/classification/zoo.py +7 -2
  58. doctr/models/core.py +1 -1
  59. doctr/models/detection/__init__.py +1 -0
  60. doctr/models/detection/_utils/pytorch.py +7 -1
  61. doctr/models/detection/_utils/tensorflow.py +7 -3
  62. doctr/models/detection/core.py +9 -3
  63. doctr/models/detection/differentiable_binarization/base.py +37 -25
  64. doctr/models/detection/differentiable_binarization/pytorch.py +80 -104
  65. doctr/models/detection/differentiable_binarization/tensorflow.py +74 -55
  66. doctr/models/detection/fast/__init__.py +6 -0
  67. doctr/models/detection/fast/base.py +256 -0
  68. doctr/models/detection/fast/pytorch.py +442 -0
  69. doctr/models/detection/fast/tensorflow.py +428 -0
  70. doctr/models/detection/linknet/base.py +12 -5
  71. doctr/models/detection/linknet/pytorch.py +28 -15
  72. doctr/models/detection/linknet/tensorflow.py +68 -88
  73. doctr/models/detection/predictor/pytorch.py +16 -6
  74. doctr/models/detection/predictor/tensorflow.py +13 -5
  75. doctr/models/detection/zoo.py +19 -16
  76. doctr/models/factory/hub.py +20 -10
  77. doctr/models/kie_predictor/base.py +2 -1
  78. doctr/models/kie_predictor/pytorch.py +28 -36
  79. doctr/models/kie_predictor/tensorflow.py +27 -27
  80. doctr/models/modules/__init__.py +1 -0
  81. doctr/models/modules/layers/__init__.py +6 -0
  82. doctr/models/modules/layers/pytorch.py +166 -0
  83. doctr/models/modules/layers/tensorflow.py +175 -0
  84. doctr/models/modules/transformer/pytorch.py +24 -22
  85. doctr/models/modules/transformer/tensorflow.py +6 -4
  86. doctr/models/modules/vision_transformer/pytorch.py +2 -4
  87. doctr/models/modules/vision_transformer/tensorflow.py +2 -4
  88. doctr/models/obj_detection/faster_rcnn/pytorch.py +4 -2
  89. doctr/models/predictor/base.py +14 -3
  90. doctr/models/predictor/pytorch.py +26 -29
  91. doctr/models/predictor/tensorflow.py +25 -22
  92. doctr/models/preprocessor/pytorch.py +14 -9
  93. doctr/models/preprocessor/tensorflow.py +10 -5
  94. doctr/models/recognition/core.py +4 -1
  95. doctr/models/recognition/crnn/pytorch.py +23 -16
  96. doctr/models/recognition/crnn/tensorflow.py +25 -17
  97. doctr/models/recognition/master/base.py +4 -1
  98. doctr/models/recognition/master/pytorch.py +20 -9
  99. doctr/models/recognition/master/tensorflow.py +20 -8
  100. doctr/models/recognition/parseq/base.py +4 -1
  101. doctr/models/recognition/parseq/pytorch.py +28 -22
  102. doctr/models/recognition/parseq/tensorflow.py +22 -11
  103. doctr/models/recognition/predictor/_utils.py +3 -2
  104. doctr/models/recognition/predictor/pytorch.py +3 -2
  105. doctr/models/recognition/predictor/tensorflow.py +2 -1
  106. doctr/models/recognition/sar/pytorch.py +14 -7
  107. doctr/models/recognition/sar/tensorflow.py +23 -14
  108. doctr/models/recognition/utils.py +5 -1
  109. doctr/models/recognition/vitstr/base.py +4 -1
  110. doctr/models/recognition/vitstr/pytorch.py +22 -13
  111. doctr/models/recognition/vitstr/tensorflow.py +21 -10
  112. doctr/models/recognition/zoo.py +4 -2
  113. doctr/models/utils/pytorch.py +24 -6
  114. doctr/models/utils/tensorflow.py +22 -3
  115. doctr/models/zoo.py +21 -3
  116. doctr/transforms/functional/base.py +8 -3
  117. doctr/transforms/functional/pytorch.py +23 -6
  118. doctr/transforms/functional/tensorflow.py +25 -5
  119. doctr/transforms/modules/base.py +12 -5
  120. doctr/transforms/modules/pytorch.py +10 -12
  121. doctr/transforms/modules/tensorflow.py +17 -9
  122. doctr/utils/common_types.py +1 -1
  123. doctr/utils/data.py +4 -2
  124. doctr/utils/fonts.py +3 -2
  125. doctr/utils/geometry.py +95 -26
  126. doctr/utils/metrics.py +36 -22
  127. doctr/utils/multithreading.py +5 -3
  128. doctr/utils/repr.py +3 -1
  129. doctr/utils/visualization.py +31 -8
  130. doctr/version.py +1 -1
  131. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/METADATA +67 -31
  132. python_doctr-0.8.1.dist-info/RECORD +173 -0
  133. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/WHEEL +1 -1
  134. python_doctr-0.7.0.dist-info/RECORD +0 -161
  135. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/LICENSE +0 -0
  136. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/top_level.txt +0 -0
  137. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/zip-safe +0 -0
@@ -13,12 +13,14 @@ from .imgur5k import *
13
13
  from .mjsynth import *
14
14
  from .ocr import *
15
15
  from .recognition import *
16
+ from .orientation import *
16
17
  from .sroie import *
17
18
  from .svhn import *
18
19
  from .svt import *
19
20
  from .synthtext import *
20
21
  from .utils import *
21
22
  from .vocabs import *
23
+ from .wildreceipt import *
22
24
 
23
25
  if is_tf_available():
24
26
  from .loader import *
doctr/datasets/cord.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -29,6 +29,7 @@ class CORD(VisionDataset):
29
29
  >>> img, target = train_set[0]
30
30
 
31
31
  Args:
32
+ ----
32
33
  train: whether the subset should be the training one
33
34
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
34
35
  recognition_task: whether the dataset should be used for recognition task
@@ -109,9 +110,10 @@ class CORD(VisionDataset):
109
110
  for crop, label in zip(crops, list(text_targets)):
110
111
  self.data.append((crop, label))
111
112
  else:
112
- self.data.append(
113
- (img_path, dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)))
114
- )
113
+ self.data.append((
114
+ img_path,
115
+ dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)),
116
+ ))
115
117
 
116
118
  self.root = tmp_root
117
119
 
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -59,7 +59,7 @@ class _AbstractDataset:
59
59
  # Conditions to assess it is detection model with multiple classes and avoid confusion with other tasks.
60
60
  if (
61
61
  isinstance(target, dict)
62
- and all([isinstance(item, np.ndarray) for item in target.values()])
62
+ and all(isinstance(item, np.ndarray) for item in target.values())
63
63
  and set(target.keys()) != {"boxes", "labels"} # avoid confusion with obj detection target
64
64
  ):
65
65
  img_transformed = _copy_tensor(img)
@@ -82,6 +82,7 @@ class _VisionDataset(_AbstractDataset):
82
82
  """Implements an abstract dataset
83
83
 
84
84
  Args:
85
+ ----
85
86
  url: URL of the dataset
86
87
  file_name: name of the file once downloaded
87
88
  file_hash: expected SHA256 of the file
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -18,6 +18,8 @@ __all__ = ["AbstractDataset", "VisionDataset"]
18
18
 
19
19
 
20
20
  class AbstractDataset(_AbstractDataset):
21
+ """Abstract class for all datasets"""
22
+
21
23
  def _read_sample(self, index: int) -> Tuple[torch.Tensor, Any]:
22
24
  img_name, target = self.data[index]
23
25
 
@@ -53,5 +55,5 @@ class AbstractDataset(_AbstractDataset):
53
55
  return images, list(targets)
54
56
 
55
57
 
56
- class VisionDataset(AbstractDataset, _VisionDataset):
58
+ class VisionDataset(AbstractDataset, _VisionDataset): # noqa: D101
57
59
  pass
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -18,6 +18,8 @@ __all__ = ["AbstractDataset", "VisionDataset"]
18
18
 
19
19
 
20
20
  class AbstractDataset(_AbstractDataset):
21
+ """Abstract class for all datasets"""
22
+
21
23
  def _read_sample(self, index: int) -> Tuple[tf.Tensor, Any]:
22
24
  img_name, target = self.data[index]
23
25
 
@@ -53,5 +55,5 @@ class AbstractDataset(_AbstractDataset):
53
55
  return images, list(targets)
54
56
 
55
57
 
56
- class VisionDataset(AbstractDataset, _VisionDataset):
58
+ class VisionDataset(AbstractDataset, _VisionDataset): # noqa: D101
57
59
  pass
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -26,6 +26,7 @@ class DetectionDataset(AbstractDataset):
26
26
  >>> img, target = train_set[0]
27
27
 
28
28
  Args:
29
+ ----
29
30
  img_folder: folder with all the images of the dataset
30
31
  label_path: path to the annotations of each image
31
32
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
@@ -66,14 +67,16 @@ class DetectionDataset(AbstractDataset):
66
67
  def format_polygons(
67
68
  self, polygons: Union[List, Dict], use_polygons: bool, np_dtype: Type
68
69
  ) -> Tuple[np.ndarray, List[str]]:
69
- """format polygons into an array
70
+ """Format polygons into an array
70
71
 
71
72
  Args:
73
+ ----
72
74
  polygons: the bounding boxes
73
75
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
74
76
  np_dtype: dtype of array
75
77
 
76
78
  Returns:
79
+ -------
77
80
  geoms: bounding boxes as np array
78
81
  polygons_classes: list of classes for each bounding box
79
82
  """
@@ -92,4 +95,4 @@ class DetectionDataset(AbstractDataset):
92
95
 
93
96
  @property
94
97
  def class_names(self):
95
- return sorted(list(set(self._class_names)))
98
+ return sorted(set(self._class_names))
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -26,6 +26,7 @@ class DocArtefacts(VisionDataset):
26
26
  >>> img, target = train_set[0]
27
27
 
28
28
  Args:
29
+ ----
29
30
  train: whether the subset should be the training one
30
31
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
31
32
  **kwargs: keyword arguments from `VisionDataset`.
doctr/datasets/funsd.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -29,6 +29,7 @@ class FUNSD(VisionDataset):
29
29
  >>> img, target = train_set[0]
30
30
 
31
31
  Args:
32
+ ----
32
33
  train: whether the subset should be the training one
33
34
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
34
35
  recognition_task: whether the dataset should be used for recognition task
@@ -81,7 +82,7 @@ class FUNSD(VisionDataset):
81
82
  text_targets, box_targets = zip(*_targets)
82
83
  if use_polygons:
83
84
  # xmin, ymin, xmax, ymax -> (x, y) coordinates of top left, top right, bottom right, bottom left corners
84
- box_targets = [
85
+ box_targets = [ # type: ignore[assignment]
85
86
  [
86
87
  [box[0], box[1]],
87
88
  [box[2], box[1]],
@@ -100,12 +101,10 @@ class FUNSD(VisionDataset):
100
101
  if not any(char in label for char in ["☑", "☐", "\uf703", "\uf702"]):
101
102
  self.data.append((crop, label))
102
103
  else:
103
- self.data.append(
104
- (
105
- img_path,
106
- dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=list(text_targets)),
107
- )
108
- )
104
+ self.data.append((
105
+ img_path,
106
+ dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=list(text_targets)),
107
+ ))
109
108
 
110
109
  self.root = tmp_root
111
110
 
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -24,6 +24,7 @@ def synthesize_text_img(
24
24
  """Generate a synthetic text image
25
25
 
26
26
  Args:
27
+ ----
27
28
  text: the text to render as an image
28
29
  font_size: the size of the font
29
30
  font_family: the font family (has to be installed on your system)
@@ -31,9 +32,9 @@ def synthesize_text_img(
31
32
  text_color: text color on the final image
32
33
 
33
34
  Returns:
35
+ -------
34
36
  PIL image of the text
35
37
  """
36
-
37
38
  background_color = (0, 0, 0) if background_color is None else background_color
38
39
  text_color = (255, 255, 255) if text_color is None else text_color
39
40
 
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -18,6 +18,7 @@ class CharacterGenerator(_CharacterGenerator):
18
18
  >>> img, target = ds[0]
19
19
 
20
20
  Args:
21
+ ----
21
22
  vocab: vocabulary to take the character from
22
23
  num_samples: number of samples that will be generated iterating over the dataset
23
24
  cache_samples: whether generated images should be cached firsthand
@@ -39,6 +40,7 @@ class WordGenerator(_WordGenerator):
39
40
  >>> img, target = ds[0]
40
41
 
41
42
  Args:
43
+ ----
42
44
  vocab: vocabulary to take the character from
43
45
  min_chars: minimum number of characters in a word
44
46
  max_chars: maximum number of characters in a word
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -18,6 +18,7 @@ class CharacterGenerator(_CharacterGenerator):
18
18
  >>> img, target = ds[0]
19
19
 
20
20
  Args:
21
+ ----
21
22
  vocab: vocabulary to take the character from
22
23
  num_samples: number of samples that will be generated iterating over the dataset
23
24
  cache_samples: whether generated images should be cached firsthand
@@ -45,6 +46,7 @@ class WordGenerator(_WordGenerator):
45
46
  >>> img, target = ds[0]
46
47
 
47
48
  Args:
49
+ ----
48
50
  vocab: vocabulary to take the character from
49
51
  min_chars: minimum number of characters in a word
50
52
  max_chars: maximum number of characters in a word
doctr/datasets/ic03.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -28,6 +28,7 @@ class IC03(VisionDataset):
28
28
  >>> img, target = train_set[0]
29
29
 
30
30
  Args:
31
+ ----
31
32
  train: whether the subset should be the training one
32
33
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
33
34
  recognition_task: whether the dataset should be used for recognition task
@@ -73,7 +74,7 @@ class IC03(VisionDataset):
73
74
  xml_root = xml_tree.getroot()
74
75
 
75
76
  for image in tqdm(iterable=xml_root, desc="Unpacking IC03", total=len(xml_root)):
76
- name, resolution, rectangles = image
77
+ name, _resolution, rectangles = image
77
78
 
78
79
  # File existence check
79
80
  if not os.path.exists(os.path.join(tmp_root, name.text)):
doctr/datasets/ic13.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -33,6 +33,7 @@ class IC13(AbstractDataset):
33
33
  >>> img, target = test_set[0]
34
34
 
35
35
  Args:
36
+ ----
36
37
  img_folder: folder with all the images of the dataset
37
38
  label_folder: folder with all annotation files for the images
38
39
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
doctr/datasets/iiit5k.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -30,6 +30,7 @@ class IIIT5K(VisionDataset):
30
30
  >>> img, target = train_set[0]
31
31
 
32
32
  Args:
33
+ ----
33
34
  train: whether the subset should be the training one
34
35
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
35
36
  recognition_task: whether the dataset should be used for recognition task
@@ -91,9 +92,10 @@ class IIIT5K(VisionDataset):
91
92
  box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]
92
93
 
93
94
  # label are casted to list where each char corresponds to the character's bounding box
94
- self.data.append(
95
- (_raw_path, dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=list(_raw_label)))
96
- )
95
+ self.data.append((
96
+ _raw_path,
97
+ dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=list(_raw_label)),
98
+ ))
97
99
 
98
100
  self.root = tmp_root
99
101
 
doctr/datasets/iiithws.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -32,6 +32,7 @@ class IIITHWS(AbstractDataset):
32
32
  >>> img, target = test_set[0]
33
33
 
34
34
  Args:
35
+ ----
35
36
  img_folder: folder with all the images of the dataset
36
37
  label_path: path to the file with the labels
37
38
  train: whether the subset should be the training one
doctr/datasets/imgur5k.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -40,6 +40,7 @@ class IMGUR5K(AbstractDataset):
40
40
  >>> img, target = test_set[0]
41
41
 
42
42
  Args:
43
+ ----
43
44
  img_folder: folder with all the images of the dataset
44
45
  label_path: path to the annotations file of the dataset
45
46
  train: whether the subset should be the training one
@@ -111,7 +112,7 @@ class IMGUR5K(AbstractDataset):
111
112
  if ann["word"] != "."
112
113
  ]
113
114
  # (x, y) coordinates of top left, top right, bottom right, bottom left corners
114
- box_targets = [cv2.boxPoints(((box[0], box[1]), (box[2], box[3]), box[4])) for box in _boxes]
115
+ box_targets = [cv2.boxPoints(((box[0], box[1]), (box[2], box[3]), box[4])) for box in _boxes] # type: ignore[arg-type]
115
116
 
116
117
  if not use_polygons:
117
118
  # xmin, ymin, xmax, ymax
doctr/datasets/loader.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -18,12 +18,13 @@ def default_collate(samples):
18
18
  """Collate multiple elements into batches
19
19
 
20
20
  Args:
21
+ ----
21
22
  samples: list of N tuples containing M elements
22
23
 
23
24
  Returns:
25
+ -------
24
26
  Tuple of M sequences contianing N elements each
25
27
  """
26
-
27
28
  batch_data = zip(*samples)
28
29
 
29
30
  tf_data = tuple(tf.stack(elt, axis=0) for elt in batch_data)
@@ -41,6 +42,7 @@ class DataLoader:
41
42
  >>> images, targets = next(train_iter)
42
43
 
43
44
  Args:
45
+ ----
44
46
  dataset: the dataset
45
47
  shuffle: whether the samples should be shuffled before passing it to the iterator
46
48
  batch_size: number of elements in each batch
doctr/datasets/mjsynth.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -30,6 +30,7 @@ class MJSynth(AbstractDataset):
30
30
  >>> img, target = test_set[0]
31
31
 
32
32
  Args:
33
+ ----
33
34
  img_folder: folder with all the images of the dataset
34
35
  label_path: path to the file with the labels
35
36
  train: whether the subset should be the training one
doctr/datasets/ocr.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -24,6 +24,7 @@ class OCRDataset(AbstractDataset):
24
24
  >>> img, target = train_set[0]
25
25
 
26
26
  Args:
27
+ ----
27
28
  img_folder: local path to image folder (all jpg at the root)
28
29
  label_file: local path to the label file
29
30
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
@@ -0,0 +1,40 @@
1
+ # Copyright (C) 2021-2024, Mindee.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+
6
+ import os
7
+ from typing import Any, List, Tuple
8
+
9
+ import numpy as np
10
+
11
+ from .datasets import AbstractDataset
12
+
13
+ __all__ = ["OrientationDataset"]
14
+
15
+
16
+ class OrientationDataset(AbstractDataset):
17
+ """Implements a basic image dataset where targets are filled with zeros.
18
+
19
+ >>> from doctr.datasets import OrientationDataset
20
+ >>> train_set = OrientationDataset(img_folder="/path/to/images")
21
+ >>> img, target = train_set[0]
22
+
23
+ Args:
24
+ ----
25
+ img_folder: folder with all the images of the dataset
26
+ **kwargs: keyword arguments from `AbstractDataset`.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ img_folder: str,
32
+ **kwargs: Any,
33
+ ) -> None:
34
+ super().__init__(
35
+ img_folder,
36
+ **kwargs,
37
+ )
38
+
39
+ # initialize dataset with 0 degree rotation targets
40
+ self.data: List[Tuple[str, np.ndarray]] = [(img_name, np.array([0])) for img_name in os.listdir(self.root)]
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -22,6 +22,7 @@ class RecognitionDataset(AbstractDataset):
22
22
  >>> img, target = train_set[0]
23
23
 
24
24
  Args:
25
+ ----
25
26
  img_folder: path to the images folder
26
27
  labels_path: pathe to the json file containing all labels (character sequences)
27
28
  **kwargs: keyword arguments from `AbstractDataset`.
@@ -36,7 +37,7 @@ class RecognitionDataset(AbstractDataset):
36
37
  super().__init__(img_folder, **kwargs)
37
38
 
38
39
  self.data: List[Tuple[str, str]] = []
39
- with open(labels_path) as f:
40
+ with open(labels_path, encoding="utf-8") as f:
40
41
  labels = json.load(f)
41
42
 
42
43
  for img_name, label in labels.items():
doctr/datasets/sroie.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -29,6 +29,7 @@ class SROIE(VisionDataset):
29
29
  >>> img, target = train_set[0]
30
30
 
31
31
  Args:
32
+ ----
32
33
  train: whether the subset should be the training one
33
34
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
34
35
  recognition_task: whether the dataset should be used for recognition task
doctr/datasets/svhn.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -28,6 +28,7 @@ class SVHN(VisionDataset):
28
28
  >>> img, target = train_set[0]
29
29
 
30
30
  Args:
31
+ ----
31
32
  train: whether the subset should be the training one
32
33
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
33
34
  recognition_task: whether the dataset should be used for recognition task
doctr/datasets/svt.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -28,6 +28,7 @@ class SVT(VisionDataset):
28
28
  >>> img, target = train_set[0]
29
29
 
30
30
  Args:
31
+ ----
31
32
  train: whether the subset should be the training one
32
33
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
33
34
  recognition_task: whether the dataset should be used for recognition task
@@ -66,7 +67,7 @@ class SVT(VisionDataset):
66
67
  xml_root = xml_tree.getroot()
67
68
 
68
69
  for image in tqdm(iterable=xml_root, desc="Unpacking SVT", total=len(xml_root)):
69
- name, _, _, resolution, rectangles = image
70
+ name, _, _, _resolution, rectangles = image
70
71
 
71
72
  # File existence check
72
73
  if not os.path.exists(os.path.join(tmp_root, name.text)):
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -31,6 +31,7 @@ class SynthText(VisionDataset):
31
31
  >>> img, target = train_set[0]
32
32
 
33
33
  Args:
34
+ ----
34
35
  train: whether the subset should be the training one
35
36
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
36
37
  recognition_task: whether the dataset should be used for recognition task