python-doctr 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. doctr/datasets/__init__.py +2 -0
  2. doctr/datasets/cord.py +6 -4
  3. doctr/datasets/datasets/base.py +3 -2
  4. doctr/datasets/datasets/pytorch.py +4 -2
  5. doctr/datasets/datasets/tensorflow.py +4 -2
  6. doctr/datasets/detection.py +6 -3
  7. doctr/datasets/doc_artefacts.py +2 -1
  8. doctr/datasets/funsd.py +7 -8
  9. doctr/datasets/generator/base.py +3 -2
  10. doctr/datasets/generator/pytorch.py +3 -1
  11. doctr/datasets/generator/tensorflow.py +3 -1
  12. doctr/datasets/ic03.py +3 -2
  13. doctr/datasets/ic13.py +2 -1
  14. doctr/datasets/iiit5k.py +6 -4
  15. doctr/datasets/iiithws.py +2 -1
  16. doctr/datasets/imgur5k.py +3 -2
  17. doctr/datasets/loader.py +4 -2
  18. doctr/datasets/mjsynth.py +2 -1
  19. doctr/datasets/ocr.py +2 -1
  20. doctr/datasets/orientation.py +40 -0
  21. doctr/datasets/recognition.py +3 -2
  22. doctr/datasets/sroie.py +2 -1
  23. doctr/datasets/svhn.py +2 -1
  24. doctr/datasets/svt.py +3 -2
  25. doctr/datasets/synthtext.py +2 -1
  26. doctr/datasets/utils.py +27 -11
  27. doctr/datasets/vocabs.py +26 -1
  28. doctr/datasets/wildreceipt.py +111 -0
  29. doctr/file_utils.py +3 -1
  30. doctr/io/elements.py +52 -35
  31. doctr/io/html.py +5 -3
  32. doctr/io/image/base.py +5 -4
  33. doctr/io/image/pytorch.py +12 -7
  34. doctr/io/image/tensorflow.py +11 -6
  35. doctr/io/pdf.py +5 -4
  36. doctr/io/reader.py +13 -5
  37. doctr/models/_utils.py +30 -53
  38. doctr/models/artefacts/barcode.py +4 -3
  39. doctr/models/artefacts/face.py +4 -2
  40. doctr/models/builder.py +58 -43
  41. doctr/models/classification/__init__.py +1 -0
  42. doctr/models/classification/magc_resnet/pytorch.py +5 -2
  43. doctr/models/classification/magc_resnet/tensorflow.py +5 -2
  44. doctr/models/classification/mobilenet/pytorch.py +16 -4
  45. doctr/models/classification/mobilenet/tensorflow.py +29 -20
  46. doctr/models/classification/predictor/pytorch.py +3 -2
  47. doctr/models/classification/predictor/tensorflow.py +2 -1
  48. doctr/models/classification/resnet/pytorch.py +23 -13
  49. doctr/models/classification/resnet/tensorflow.py +33 -26
  50. doctr/models/classification/textnet/__init__.py +6 -0
  51. doctr/models/classification/textnet/pytorch.py +275 -0
  52. doctr/models/classification/textnet/tensorflow.py +267 -0
  53. doctr/models/classification/vgg/pytorch.py +4 -2
  54. doctr/models/classification/vgg/tensorflow.py +5 -2
  55. doctr/models/classification/vit/pytorch.py +9 -3
  56. doctr/models/classification/vit/tensorflow.py +9 -3
  57. doctr/models/classification/zoo.py +7 -2
  58. doctr/models/core.py +1 -1
  59. doctr/models/detection/__init__.py +1 -0
  60. doctr/models/detection/_utils/pytorch.py +7 -1
  61. doctr/models/detection/_utils/tensorflow.py +7 -3
  62. doctr/models/detection/core.py +9 -3
  63. doctr/models/detection/differentiable_binarization/base.py +37 -25
  64. doctr/models/detection/differentiable_binarization/pytorch.py +80 -104
  65. doctr/models/detection/differentiable_binarization/tensorflow.py +74 -55
  66. doctr/models/detection/fast/__init__.py +6 -0
  67. doctr/models/detection/fast/base.py +256 -0
  68. doctr/models/detection/fast/pytorch.py +442 -0
  69. doctr/models/detection/fast/tensorflow.py +428 -0
  70. doctr/models/detection/linknet/base.py +12 -5
  71. doctr/models/detection/linknet/pytorch.py +28 -15
  72. doctr/models/detection/linknet/tensorflow.py +68 -88
  73. doctr/models/detection/predictor/pytorch.py +16 -6
  74. doctr/models/detection/predictor/tensorflow.py +13 -5
  75. doctr/models/detection/zoo.py +19 -16
  76. doctr/models/factory/hub.py +20 -10
  77. doctr/models/kie_predictor/base.py +2 -1
  78. doctr/models/kie_predictor/pytorch.py +28 -36
  79. doctr/models/kie_predictor/tensorflow.py +27 -27
  80. doctr/models/modules/__init__.py +1 -0
  81. doctr/models/modules/layers/__init__.py +6 -0
  82. doctr/models/modules/layers/pytorch.py +166 -0
  83. doctr/models/modules/layers/tensorflow.py +175 -0
  84. doctr/models/modules/transformer/pytorch.py +24 -22
  85. doctr/models/modules/transformer/tensorflow.py +6 -4
  86. doctr/models/modules/vision_transformer/pytorch.py +2 -4
  87. doctr/models/modules/vision_transformer/tensorflow.py +2 -4
  88. doctr/models/obj_detection/faster_rcnn/pytorch.py +4 -2
  89. doctr/models/predictor/base.py +14 -3
  90. doctr/models/predictor/pytorch.py +26 -29
  91. doctr/models/predictor/tensorflow.py +25 -22
  92. doctr/models/preprocessor/pytorch.py +14 -9
  93. doctr/models/preprocessor/tensorflow.py +10 -5
  94. doctr/models/recognition/core.py +4 -1
  95. doctr/models/recognition/crnn/pytorch.py +23 -16
  96. doctr/models/recognition/crnn/tensorflow.py +25 -17
  97. doctr/models/recognition/master/base.py +4 -1
  98. doctr/models/recognition/master/pytorch.py +20 -9
  99. doctr/models/recognition/master/tensorflow.py +20 -8
  100. doctr/models/recognition/parseq/base.py +4 -1
  101. doctr/models/recognition/parseq/pytorch.py +28 -22
  102. doctr/models/recognition/parseq/tensorflow.py +22 -11
  103. doctr/models/recognition/predictor/_utils.py +3 -2
  104. doctr/models/recognition/predictor/pytorch.py +3 -2
  105. doctr/models/recognition/predictor/tensorflow.py +2 -1
  106. doctr/models/recognition/sar/pytorch.py +14 -7
  107. doctr/models/recognition/sar/tensorflow.py +23 -14
  108. doctr/models/recognition/utils.py +5 -1
  109. doctr/models/recognition/vitstr/base.py +4 -1
  110. doctr/models/recognition/vitstr/pytorch.py +22 -13
  111. doctr/models/recognition/vitstr/tensorflow.py +21 -10
  112. doctr/models/recognition/zoo.py +4 -2
  113. doctr/models/utils/pytorch.py +24 -6
  114. doctr/models/utils/tensorflow.py +22 -3
  115. doctr/models/zoo.py +21 -3
  116. doctr/transforms/functional/base.py +8 -3
  117. doctr/transforms/functional/pytorch.py +23 -6
  118. doctr/transforms/functional/tensorflow.py +25 -5
  119. doctr/transforms/modules/base.py +12 -5
  120. doctr/transforms/modules/pytorch.py +10 -12
  121. doctr/transforms/modules/tensorflow.py +17 -9
  122. doctr/utils/common_types.py +1 -1
  123. doctr/utils/data.py +4 -2
  124. doctr/utils/fonts.py +3 -2
  125. doctr/utils/geometry.py +95 -26
  126. doctr/utils/metrics.py +36 -22
  127. doctr/utils/multithreading.py +5 -3
  128. doctr/utils/repr.py +3 -1
  129. doctr/utils/visualization.py +31 -8
  130. doctr/version.py +1 -1
  131. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/METADATA +67 -31
  132. python_doctr-0.8.1.dist-info/RECORD +173 -0
  133. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/WHEEL +1 -1
  134. python_doctr-0.7.0.dist-info/RECORD +0 -161
  135. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/LICENSE +0 -0
  136. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/top_level.txt +0 -0
  137. {python_doctr-0.7.0.dist-info → python_doctr-0.8.1.dist-info}/zip-safe +0 -0
doctr/utils/geometry.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -29,10 +29,30 @@ __all__ = [
29
29
 
30
30
 
31
31
  def bbox_to_polygon(bbox: BoundingBox) -> Polygon4P:
32
+ """Convert a bounding box to a polygon
33
+
34
+ Args:
35
+ ----
36
+ bbox: a bounding box
37
+
38
+ Returns:
39
+ -------
40
+ a polygon
41
+ """
32
42
  return bbox[0], (bbox[1][0], bbox[0][1]), (bbox[0][0], bbox[1][1]), bbox[1]
33
43
 
34
44
 
35
45
  def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox:
46
+ """Convert a polygon to a bounding box
47
+
48
+ Args:
49
+ ----
50
+ polygon: a polygon
51
+
52
+ Returns:
53
+ -------
54
+ a bounding box
55
+ """
36
56
  x, y = zip(*polygon)
37
57
  return (min(x), min(y)), (max(x), max(y))
38
58
 
@@ -40,12 +60,18 @@ def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox:
40
60
  def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]:
41
61
  """Compute enclosing bbox either from:
42
62
 
43
- - an array of boxes: (*, 5), where boxes have this shape:
44
- (xmin, ymin, xmax, ymax, score)
63
+ Args:
64
+ ----
65
+ bboxes: boxes in one of the following formats:
66
+
67
+ - an array of boxes: (*, 5), where boxes have this shape:
68
+ (xmin, ymin, xmax, ymax, score)
45
69
 
46
- - a list of BoundingBox
70
+ - a list of BoundingBox
47
71
 
48
- Return a (1, 5) array (enclosing boxarray), or a BoundingBox
72
+ Returns:
73
+ -------
74
+ a (1, 5) array (enclosing boxarray), or a BoundingBox
49
75
  """
50
76
  if isinstance(bboxes, np.ndarray):
51
77
  xmin, ymin, xmax, ymax, score = np.split(bboxes, 5, axis=1)
@@ -56,18 +82,41 @@ def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Unio
56
82
 
57
83
 
58
84
  def resolve_enclosing_rbbox(rbboxes: List[np.ndarray], intermed_size: int = 1024) -> np.ndarray:
85
+ """Compute enclosing rotated bbox either from:
86
+
87
+ Args:
88
+ ----
89
+ rbboxes: boxes in one of the following formats:
90
+
91
+ - an array of boxes: (*, 5), where boxes have this shape:
92
+ (xmin, ymin, xmax, ymax, score)
93
+
94
+ - a list of BoundingBox
95
+ intermed_size: size of the intermediate image
96
+
97
+ Returns:
98
+ -------
99
+ a (1, 5) array (enclosing boxarray), or a BoundingBox
100
+ """
59
101
  cloud: np.ndarray = np.concatenate(rbboxes, axis=0)
60
102
  # Convert to absolute for minAreaRect
61
103
  cloud *= intermed_size
62
104
  rect = cv2.minAreaRect(cloud.astype(np.int32))
63
- return cv2.boxPoints(rect) / intermed_size
105
+ return cv2.boxPoints(rect) / intermed_size # type: ignore[operator]
64
106
 
65
107
 
66
108
  def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray:
67
109
  """Rotate points counter-clockwise.
68
- Points: array of size (N, 2)
69
- """
70
110
 
111
+ Args:
112
+ ----
113
+ points: array of size (N, 2)
114
+ angle: angle between -90 and +90 degrees
115
+
116
+ Returns:
117
+ -------
118
+ Rotated points
119
+ """
71
120
  angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions
72
121
  rotation_mat = np.array(
73
122
  [[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=points.dtype
@@ -79,19 +128,18 @@ def compute_expanded_shape(img_shape: Tuple[int, int], angle: float) -> Tuple[in
79
128
  """Compute the shape of an expanded rotated image
80
129
 
81
130
  Args:
131
+ ----
82
132
  img_shape: the height and width of the image
83
133
  angle: angle between -90 and +90 degrees
84
134
 
85
135
  Returns:
136
+ -------
86
137
  the height and width of the rotated image
87
138
  """
88
-
89
- points: np.ndarray = np.array(
90
- [
91
- [img_shape[1] / 2, img_shape[0] / 2],
92
- [-img_shape[1] / 2, img_shape[0] / 2],
93
- ]
94
- )
139
+ points: np.ndarray = np.array([
140
+ [img_shape[1] / 2, img_shape[0] / 2],
141
+ [-img_shape[1] / 2, img_shape[0] / 2],
142
+ ])
95
143
 
96
144
  rotated_points = rotate_abs_points(points, angle)
97
145
 
@@ -109,15 +157,16 @@ def rotate_abs_geoms(
109
157
  image center.
110
158
 
111
159
  Args:
112
- boxes: (N, 4) or (N, 4, 2) array of ABSOLUTE coordinate boxes
160
+ ----
161
+ geoms: (N, 4) or (N, 4, 2) array of ABSOLUTE coordinate boxes
113
162
  angle: anti-clockwise rotation angle in degrees
114
163
  img_shape: the height and width of the image
115
164
  expand: whether the image should be padded to avoid information loss
116
165
 
117
166
  Returns:
167
+ -------
118
168
  A batch of rotated polygons (N, 4, 2)
119
169
  """
120
-
121
170
  # Switch to polygons
122
171
  polys = (
123
172
  np.stack([geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]], axis=1)
@@ -147,14 +196,15 @@ def remap_boxes(loc_preds: np.ndarray, orig_shape: Tuple[int, int], dest_shape:
147
196
  coordinates after a resizing of the image.
148
197
 
149
198
  Args:
199
+ ----
150
200
  loc_preds: (N, 4, 2) array of RELATIVE loc_preds
151
201
  orig_shape: shape of the origin image
152
202
  dest_shape: shape of the destination image
153
203
 
154
204
  Returns:
205
+ -------
155
206
  A batch of rotated loc_preds (N, 4, 2) expressed in the destination referencial
156
207
  """
157
-
158
208
  if len(dest_shape) != 2:
159
209
  raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}")
160
210
  if len(orig_shape) != 2:
@@ -181,15 +231,17 @@ def rotate_boxes(
181
231
  is done to remove the padding that is created by rotate_page(expand=True)
182
232
 
183
233
  Args:
234
+ ----
184
235
  loc_preds: (N, 5) or (N, 4, 2) array of RELATIVE boxes
185
236
  angle: angle between -90 and +90 degrees
186
237
  orig_shape: shape of the origin image
187
238
  min_angle: minimum angle to rotate boxes
239
+ target_shape: shape of the destination image
188
240
 
189
241
  Returns:
242
+ -------
190
243
  A batch of rotated boxes (N, 4, 2): or a batch of straight bounding boxes
191
244
  """
192
-
193
245
  # Change format of the boxes to rotated boxes
194
246
  _boxes = loc_preds.copy()
195
247
  if _boxes.ndim == 2:
@@ -234,21 +286,23 @@ def rotate_image(
234
286
  """Rotate an image counterclockwise by an given angle.
235
287
 
236
288
  Args:
289
+ ----
237
290
  image: numpy tensor to rotate
238
291
  angle: rotation angle in degrees, between -90 and +90
239
292
  expand: whether the image should be padded before the rotation
240
293
  preserve_origin_shape: if expand is set to True, resizes the final output to the original image size
241
294
 
242
295
  Returns:
296
+ -------
243
297
  Rotated array, padded by 0 by default.
244
298
  """
245
-
246
299
  # Compute the expanded padding
247
300
  exp_img: np.ndarray
248
301
  if expand:
249
302
  exp_shape = compute_expanded_shape(image.shape[:2], angle) # type: ignore[arg-type]
250
- h_pad, w_pad = int(max(0, ceil(exp_shape[0] - image.shape[0]))), int(
251
- max(0, ceil(exp_shape[1] - image.shape[1]))
303
+ h_pad, w_pad = (
304
+ int(max(0, ceil(exp_shape[0] - image.shape[0]))),
305
+ int(max(0, ceil(exp_shape[1] - image.shape[1]))),
252
306
  )
253
307
  exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
254
308
  else:
@@ -283,20 +337,27 @@ def estimate_page_angle(polys: np.ndarray) -> float:
283
337
  yleft = polys[:, 0, 1] + polys[:, 3, 1]
284
338
  xright = polys[:, 1, 0] + polys[:, 2, 0]
285
339
  yright = polys[:, 1, 1] + polys[:, 2, 1]
286
- return float(np.median(np.arctan((yleft - yright) / (xright - xleft))) * 180 / np.pi) # Y axis from top to bottom!
340
+ with np.errstate(divide="raise", invalid="raise"):
341
+ try:
342
+ return float(
343
+ np.median(np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi) # Y axis from top to bottom!
344
+ )
345
+ except FloatingPointError:
346
+ return 0.0
287
347
 
288
348
 
289
349
  def convert_to_relative_coords(geoms: np.ndarray, img_shape: Tuple[int, int]) -> np.ndarray:
290
350
  """Convert a geometry to relative coordinates
291
351
 
292
352
  Args:
353
+ ----
293
354
  geoms: a set of polygons of shape (N, 4, 2) or of straight boxes of shape (N, 4)
294
355
  img_shape: the height and width of the image
295
356
 
296
357
  Returns:
358
+ -------
297
359
  the updated geometry
298
360
  """
299
-
300
361
  # Polygon
301
362
  if geoms.ndim == 3 and geoms.shape[1:] == (4, 2):
302
363
  polygons: np.ndarray = np.empty(geoms.shape, dtype=np.float32)
@@ -314,12 +375,16 @@ def convert_to_relative_coords(geoms: np.ndarray, img_shape: Tuple[int, int]) ->
314
375
 
315
376
  def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> List[np.ndarray]:
316
377
  """Created cropped images from list of bounding boxes
378
+
317
379
  Args:
380
+ ----
318
381
  img: input image
319
382
  boxes: bounding boxes of shape (N, 4) where N is the number of boxes, and the relative
320
383
  coordinates (xmin, ymin, xmax, ymax)
321
384
  channels_last: whether the channel dimensions is the last one instead of the last one
385
+
322
386
  Returns:
387
+ -------
323
388
  list of cropped images
324
389
  """
325
390
  if boxes.shape[0] == 0:
@@ -330,7 +395,7 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True
330
395
  # Project relative coordinates
331
396
  _boxes = boxes.copy()
332
397
  h, w = img.shape[:2] if channels_last else img.shape[-2:]
333
- if _boxes.dtype != int:
398
+ if not np.issubdtype(_boxes.dtype, np.integer):
334
399
  _boxes[:, [0, 2]] *= w
335
400
  _boxes[:, [1, 3]] *= h
336
401
  _boxes = _boxes.round().astype(int)
@@ -346,12 +411,16 @@ def extract_rcrops(
346
411
  img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True
347
412
  ) -> List[np.ndarray]:
348
413
  """Created cropped images from list of rotated bounding boxes
414
+
349
415
  Args:
416
+ ----
350
417
  img: input image
351
418
  polys: bounding boxes of shape (N, 4, 2)
352
419
  dtype: target data type of bounding boxes
353
420
  channels_last: whether the channel dimensions is the last one instead of the last one
421
+
354
422
  Returns:
423
+ -------
355
424
  list of cropped images
356
425
  """
357
426
  if polys.shape[0] == 0:
@@ -362,7 +431,7 @@ def extract_rcrops(
362
431
  # Project relative coordinates
363
432
  _boxes = polys.copy()
364
433
  height, width = img.shape[:2] if channels_last else img.shape[-2:]
365
- if _boxes.dtype != int:
434
+ if not np.issubdtype(_boxes.dtype, np.integer):
366
435
  _boxes[:, :, 0] *= width
367
436
  _boxes[:, :, 1] *= height
368
437
 
doctr/utils/metrics.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -27,10 +27,12 @@ def string_match(word1: str, word2: str) -> Tuple[bool, bool, bool, bool]:
27
27
  """Performs string comparison with multiple levels of tolerance
28
28
 
29
29
  Args:
30
+ ----
30
31
  word1: a string
31
32
  word2: another string
32
33
 
33
34
  Returns:
35
+ -------
34
36
  a tuple with booleans specifying respectively whether the raw strings, their lower-case counterparts, their
35
37
  unidecode counterparts and their lower-case unidecode counterparts match
36
38
  """
@@ -84,10 +86,10 @@ class TextMatch:
84
86
  """Update the state of the metric with new predictions
85
87
 
86
88
  Args:
89
+ ----
87
90
  gt: list of groung-truth character sequences
88
91
  pred: list of predicted character sequences
89
92
  """
90
-
91
93
  if len(gt) != len(pred):
92
94
  raise AssertionError("prediction size does not match with ground-truth labels size")
93
95
 
@@ -103,7 +105,8 @@ class TextMatch:
103
105
  def summary(self) -> Dict[str, float]:
104
106
  """Computes the aggregated metrics
105
107
 
106
- Returns:
108
+ Returns
109
+ -------
107
110
  a dictionary with the exact match score for the raw data, its lower-case counterpart, its unidecode
108
111
  counterpart and its lower-case unidecode counterpart
109
112
  """
@@ -129,13 +132,14 @@ def box_iou(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
129
132
  """Computes the IoU between two sets of bounding boxes
130
133
 
131
134
  Args:
135
+ ----
132
136
  boxes_1: bounding boxes of shape (N, 4) in format (xmin, ymin, xmax, ymax)
133
137
  boxes_2: bounding boxes of shape (M, 4) in format (xmin, ymin, xmax, ymax)
134
138
 
135
139
  Returns:
140
+ -------
136
141
  the IoU matrix of shape (N, M)
137
142
  """
138
-
139
143
  iou_mat: np.ndarray = np.zeros((boxes_1.shape[0], boxes_2.shape[0]), dtype=np.float32)
140
144
 
141
145
  if boxes_1.shape[0] > 0 and boxes_2.shape[0] > 0:
@@ -159,13 +163,14 @@ def box_ioa(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
159
163
  ioa(i, j) = inter(i, j) / area(i)
160
164
 
161
165
  Args:
166
+ ----
162
167
  boxes_1: bounding boxes of shape (N, 4) in format (xmin, ymin, xmax, ymax)
163
168
  boxes_2: bounding boxes of shape (M, 4) in format (xmin, ymin, xmax, ymax)
164
169
 
165
170
  Returns:
171
+ -------
166
172
  the IoA matrix of shape (N, M)
167
173
  """
168
-
169
174
  ioa_mat: np.ndarray = np.zeros((boxes_1.shape[0], boxes_2.shape[0]), dtype=np.float32)
170
175
 
171
176
  if boxes_1.shape[0] > 0 and boxes_2.shape[0] > 0:
@@ -188,13 +193,14 @@ def mask_iou(masks_1: np.ndarray, masks_2: np.ndarray) -> np.ndarray:
188
193
  """Computes the IoU between two sets of boolean masks
189
194
 
190
195
  Args:
196
+ ----
191
197
  masks_1: boolean masks of shape (N, H, W)
192
198
  masks_2: boolean masks of shape (M, H, W)
193
199
 
194
200
  Returns:
201
+ -------
195
202
  the IoU matrix of shape (N, M)
196
203
  """
197
-
198
204
  if masks_1.shape[1:] != masks_2.shape[1:]:
199
205
  raise AssertionError("both boolean masks should have the same spatial shape")
200
206
 
@@ -215,15 +221,16 @@ def polygon_iou(
215
221
  """Computes the IoU between two sets of rotated bounding boxes
216
222
 
217
223
  Args:
224
+ ----
218
225
  polys_1: rotated bounding boxes of shape (N, 4, 2)
219
226
  polys_2: rotated bounding boxes of shape (M, 4, 2)
220
227
  mask_shape: spatial shape of the intermediate masks
221
228
  use_broadcasting: if set to True, leverage broadcasting speedup by consuming more memory
222
229
 
223
230
  Returns:
231
+ -------
224
232
  the IoU matrix of shape (N, M)
225
233
  """
226
-
227
234
  if polys_1.ndim != 3 or polys_2.ndim != 3:
228
235
  raise AssertionError("expects boxes to be in format (N, 4, 2)")
229
236
 
@@ -249,16 +256,17 @@ def _rbox_to_mask(box: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
249
256
  """Converts a rotated bounding box to a boolean mask
250
257
 
251
258
  Args:
259
+ ----
252
260
  box: rotated bounding box of shape (4, 2)
253
261
  shape: spatial shapes of the output masks
254
262
 
255
263
  Returns:
264
+ -------
256
265
  the boolean mask of the specified shape
257
266
  """
258
-
259
267
  mask: np.ndarray = np.zeros(shape, dtype=np.uint8)
260
268
  # Get absolute coords
261
- if box.dtype != int:
269
+ if not np.issubdtype(box.dtype, np.integer):
262
270
  abs_box = box.copy()
263
271
  abs_box[:, 0] = abs_box[:, 0] * shape[1]
264
272
  abs_box[:, 1] = abs_box[:, 1] * shape[0]
@@ -266,7 +274,7 @@ def _rbox_to_mask(box: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
266
274
  else:
267
275
  abs_box = box
268
276
  abs_box[2:] = abs_box[2:] + 1
269
- cv2.fillPoly(mask, [abs_box - 1], 1)
277
+ cv2.fillPoly(mask, [abs_box - 1], 1.0) # type: ignore[call-overload]
270
278
 
271
279
  return mask.astype(bool)
272
280
 
@@ -275,18 +283,19 @@ def rbox_to_mask(boxes: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
275
283
  """Converts rotated bounding boxes to boolean masks
276
284
 
277
285
  Args:
286
+ ----
278
287
  boxes: rotated bounding boxes of shape (N, 4, 2)
279
288
  shape: spatial shapes of the output masks
280
289
 
281
290
  Returns:
291
+ -------
282
292
  the boolean masks of shape (N, H, W)
283
293
  """
284
-
285
294
  masks: np.ndarray = np.zeros((boxes.shape[0], *shape), dtype=np.uint8)
286
295
 
287
296
  if boxes.shape[0] > 0:
288
297
  # Get absolute coordinates
289
- if boxes.dtype != int:
298
+ if not np.issubdtype(boxes.dtype, np.integer):
290
299
  abs_boxes = boxes.copy()
291
300
  abs_boxes[:, :, 0] = abs_boxes[:, :, 0] * shape[1]
292
301
  abs_boxes[:, :, 1] = abs_boxes[:, :, 1] * shape[0]
@@ -297,7 +306,7 @@ def rbox_to_mask(boxes: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
297
306
 
298
307
  # TODO: optimize slicing to improve vectorization
299
308
  for idx, _box in enumerate(abs_boxes):
300
- cv2.fillPoly(masks[idx], [_box - 1], 1)
309
+ cv2.fillPoly(masks[idx], [_box - 1], 1.0) # type: ignore[call-overload]
301
310
  return masks.astype(bool)
302
311
 
303
312
 
@@ -305,10 +314,12 @@ def nms(boxes: np.ndarray, thresh: float = 0.5) -> List[int]:
305
314
  """Perform non-max suppression, borrowed from <https://github.com/rbgirshick/fast-rcnn>`_.
306
315
 
307
316
  Args:
317
+ ----
308
318
  boxes: np array of straight boxes: (*, 5), (xmin, ymin, xmax, ymax, score)
309
319
  thresh: iou threshold to perform box suppression.
310
320
 
311
321
  Returns:
322
+ -------
312
323
  A list of box indexes to keep
313
324
  """
314
325
  x1 = boxes[:, 0]
@@ -372,6 +383,7 @@ class LocalizationConfusion:
372
383
  >>> metric.summary()
373
384
 
374
385
  Args:
386
+ ----
375
387
  iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
376
388
  use_polygons: if set to True, predictions and targets will be expected to have rotated format
377
389
  mask_shape: if use_polygons is True, describes the spatial shape of the image used
@@ -395,10 +407,10 @@ class LocalizationConfusion:
395
407
  """Updates the metric
396
408
 
397
409
  Args:
410
+ ----
398
411
  gts: a set of relative bounding boxes either of shape (N, 4) or (N, 5) if they are rotated ones
399
412
  preds: a set of relative bounding boxes either of shape (M, 4) or (M, 5) if they are rotated ones
400
413
  """
401
-
402
414
  if preds.shape[0] > 0:
403
415
  # Compute IoU
404
416
  if self.use_polygons:
@@ -418,10 +430,10 @@ class LocalizationConfusion:
418
430
  def summary(self) -> Tuple[Optional[float], Optional[float], Optional[float]]:
419
431
  """Computes the aggregated metrics
420
432
 
421
- Returns:
433
+ Returns
434
+ -------
422
435
  a tuple with the recall, precision and meanIoU scores
423
436
  """
424
-
425
437
  # Recall
426
438
  recall = self.matches / self.num_gts if self.num_gts > 0 else None
427
439
 
@@ -477,6 +489,7 @@ class OCRMetric:
477
489
  >>> metric.summary()
478
490
 
479
491
  Args:
492
+ ----
480
493
  iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
481
494
  use_polygons: if set to True, predictions and targets will be expected to have rotated format
482
495
  mask_shape: if use_polygons is True, describes the spatial shape of the image used
@@ -506,12 +519,12 @@ class OCRMetric:
506
519
  """Updates the metric
507
520
 
508
521
  Args:
522
+ ----
509
523
  gt_boxes: a set of relative bounding boxes either of shape (N, 4) or (N, 5) if they are rotated ones
510
524
  pred_boxes: a set of relative bounding boxes either of shape (M, 4) or (M, 5) if they are rotated ones
511
525
  gt_labels: a list of N string labels
512
526
  pred_labels: a list of M string labels
513
527
  """
514
-
515
528
  if gt_boxes.shape[0] != len(gt_labels) or pred_boxes.shape[0] != len(pred_labels):
516
529
  raise AssertionError(
517
530
  "there should be the same number of boxes and string both for the ground truth " "and the predictions"
@@ -543,10 +556,10 @@ class OCRMetric:
543
556
  def summary(self) -> Tuple[Dict[str, Optional[float]], Dict[str, Optional[float]], Optional[float]]:
544
557
  """Computes the aggregated metrics
545
558
 
546
- Returns:
559
+ Returns
560
+ -------
547
561
  a tuple with the recall & precision for each string comparison and the mean IoU
548
562
  """
549
-
550
563
  # Recall
551
564
  recall = dict(
552
565
  raw=self.raw_matches / self.num_gts if self.num_gts > 0 else None,
@@ -615,6 +628,7 @@ class DetectionMetric:
615
628
  >>> metric.summary()
616
629
 
617
630
  Args:
631
+ ----
618
632
  iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
619
633
  use_polygons: if set to True, predictions and targets will be expected to have rotated format
620
634
  mask_shape: if use_polygons is True, describes the spatial shape of the image used
@@ -644,12 +658,12 @@ class DetectionMetric:
644
658
  """Updates the metric
645
659
 
646
660
  Args:
661
+ ----
647
662
  gt_boxes: a set of relative bounding boxes either of shape (N, 4) or (N, 5) if they are rotated ones
648
663
  pred_boxes: a set of relative bounding boxes either of shape (M, 4) or (M, 5) if they are rotated ones
649
664
  gt_labels: an array of class indices of shape (N,)
650
665
  pred_labels: an array of class indices of shape (M,)
651
666
  """
652
-
653
667
  if gt_boxes.shape[0] != gt_labels.shape[0] or pred_boxes.shape[0] != pred_labels.shape[0]:
654
668
  raise AssertionError(
655
669
  "there should be the same number of boxes and string both for the ground truth " "and the predictions"
@@ -676,10 +690,10 @@ class DetectionMetric:
676
690
  def summary(self) -> Tuple[Optional[float], Optional[float], Optional[float]]:
677
691
  """Computes the aggregated metrics
678
692
 
679
- Returns:
693
+ Returns
694
+ -------
680
695
  a tuple with the recall & precision for each class prediction and the mean IoU
681
696
  """
682
-
683
697
  # Recall
684
698
  recall = self.num_matches / self.num_gts if self.num_gts > 0 else None
685
699
 
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -22,19 +22,21 @@ def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: Op
22
22
  >>> results = multithread_exec(lambda x: x ** 2, entries)
23
23
 
24
24
  Args:
25
+ ----
25
26
  func: function to be executed on each element of the iterable
26
27
  seq: iterable
27
28
  threads: number of workers to be used for multiprocessing
28
29
 
29
30
  Returns:
31
+ -------
30
32
  iterator of the function's results using the iterable as inputs
31
33
 
32
34
  Notes:
35
+ -----
33
36
  This function uses ThreadPool from multiprocessing package, which uses `/dev/shm` directory for shared memory.
34
37
  If you do not have write permissions for this directory (if you run `doctr` on AWS Lambda for instance),
35
38
  you might want to disable multiprocessing. To achieve that, set 'DOCTR_MULTIPROCESSING_DISABLE' to 'TRUE'.
36
39
  """
37
-
38
40
  threads = threads if isinstance(threads, int) else min(16, mp.cpu_count())
39
41
  # Single-thread
40
42
  if threads < 2 or os.environ.get("DOCTR_MULTIPROCESSING_DISABLE", "").upper() in ENV_VARS_TRUE_VALUES:
@@ -44,5 +46,5 @@ def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: Op
44
46
  with ThreadPool(threads) as tp:
45
47
  # ThreadPool's map function returns a list, but seq could be of a different type
46
48
  # That's why wrapping result in map to return iterator
47
- results = map(lambda x: x, tp.map(func, seq))
49
+ results = map(lambda x: x, tp.map(func, seq)) # noqa: C417
48
50
  return results
doctr/utils/repr.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2023, Mindee.
1
+ # Copyright (C) 2021-2024, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -23,6 +23,8 @@ def _addindent(s_, num_spaces):
23
23
 
24
24
 
25
25
  class NestedObject:
26
+ """Base class for all nested objects in doctr"""
27
+
26
28
  _children_names: List[str]
27
29
 
28
30
  def extra_repr(self) -> str: