python-doctr 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. doctr/__init__.py +1 -1
  2. doctr/contrib/__init__.py +0 -0
  3. doctr/contrib/artefacts.py +131 -0
  4. doctr/contrib/base.py +105 -0
  5. doctr/datasets/datasets/pytorch.py +2 -2
  6. doctr/datasets/generator/base.py +6 -5
  7. doctr/datasets/imgur5k.py +1 -1
  8. doctr/datasets/loader.py +1 -6
  9. doctr/datasets/utils.py +2 -1
  10. doctr/datasets/vocabs.py +9 -2
  11. doctr/file_utils.py +26 -12
  12. doctr/io/elements.py +40 -6
  13. doctr/io/html.py +2 -2
  14. doctr/io/image/pytorch.py +6 -8
  15. doctr/io/image/tensorflow.py +1 -1
  16. doctr/io/pdf.py +5 -2
  17. doctr/io/reader.py +6 -0
  18. doctr/models/__init__.py +0 -1
  19. doctr/models/_utils.py +57 -20
  20. doctr/models/builder.py +71 -13
  21. doctr/models/classification/mobilenet/pytorch.py +45 -9
  22. doctr/models/classification/mobilenet/tensorflow.py +38 -7
  23. doctr/models/classification/predictor/pytorch.py +18 -11
  24. doctr/models/classification/predictor/tensorflow.py +16 -10
  25. doctr/models/classification/textnet/pytorch.py +3 -3
  26. doctr/models/classification/textnet/tensorflow.py +3 -3
  27. doctr/models/classification/zoo.py +39 -15
  28. doctr/models/detection/__init__.py +1 -0
  29. doctr/models/detection/_utils/__init__.py +1 -0
  30. doctr/models/detection/_utils/base.py +66 -0
  31. doctr/models/detection/differentiable_binarization/base.py +4 -3
  32. doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
  33. doctr/models/detection/differentiable_binarization/tensorflow.py +14 -18
  34. doctr/models/detection/fast/__init__.py +6 -0
  35. doctr/models/detection/fast/base.py +257 -0
  36. doctr/models/detection/fast/pytorch.py +442 -0
  37. doctr/models/detection/fast/tensorflow.py +428 -0
  38. doctr/models/detection/linknet/base.py +4 -3
  39. doctr/models/detection/predictor/pytorch.py +15 -1
  40. doctr/models/detection/predictor/tensorflow.py +15 -1
  41. doctr/models/detection/zoo.py +21 -4
  42. doctr/models/factory/hub.py +3 -12
  43. doctr/models/kie_predictor/base.py +9 -3
  44. doctr/models/kie_predictor/pytorch.py +41 -20
  45. doctr/models/kie_predictor/tensorflow.py +36 -16
  46. doctr/models/modules/layers/pytorch.py +89 -10
  47. doctr/models/modules/layers/tensorflow.py +88 -10
  48. doctr/models/modules/transformer/pytorch.py +2 -2
  49. doctr/models/predictor/base.py +77 -50
  50. doctr/models/predictor/pytorch.py +31 -20
  51. doctr/models/predictor/tensorflow.py +27 -17
  52. doctr/models/preprocessor/pytorch.py +4 -4
  53. doctr/models/preprocessor/tensorflow.py +3 -2
  54. doctr/models/recognition/master/pytorch.py +2 -2
  55. doctr/models/recognition/parseq/pytorch.py +4 -3
  56. doctr/models/recognition/parseq/tensorflow.py +4 -3
  57. doctr/models/recognition/sar/pytorch.py +7 -6
  58. doctr/models/recognition/sar/tensorflow.py +3 -9
  59. doctr/models/recognition/vitstr/pytorch.py +1 -1
  60. doctr/models/recognition/zoo.py +1 -1
  61. doctr/models/zoo.py +2 -2
  62. doctr/py.typed +0 -0
  63. doctr/transforms/functional/base.py +1 -1
  64. doctr/transforms/functional/pytorch.py +4 -4
  65. doctr/transforms/modules/base.py +37 -15
  66. doctr/transforms/modules/pytorch.py +66 -8
  67. doctr/transforms/modules/tensorflow.py +63 -7
  68. doctr/utils/fonts.py +7 -5
  69. doctr/utils/geometry.py +35 -12
  70. doctr/utils/metrics.py +33 -174
  71. doctr/utils/reconstitution.py +126 -0
  72. doctr/utils/visualization.py +5 -118
  73. doctr/version.py +1 -1
  74. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/METADATA +96 -91
  75. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/RECORD +79 -75
  76. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/WHEEL +1 -1
  77. doctr/models/artefacts/__init__.py +0 -2
  78. doctr/models/artefacts/barcode.py +0 -74
  79. doctr/models/artefacts/face.py +0 -63
  80. doctr/models/obj_detection/__init__.py +0 -1
  81. doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
  82. doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
  83. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/LICENSE +0 -0
  84. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/top_level.txt +0 -0
  85. {python_doctr-0.8.0.dist-info → python_doctr-0.9.0.dist-info}/zip-safe +0 -0
doctr/utils/metrics.py CHANGED
@@ -5,16 +5,14 @@
5
5
 
6
6
  from typing import Dict, List, Optional, Tuple
7
7
 
8
- import cv2
9
8
  import numpy as np
9
+ from anyascii import anyascii
10
10
  from scipy.optimize import linear_sum_assignment
11
- from unidecode import unidecode
11
+ from shapely.geometry import Polygon
12
12
 
13
13
  __all__ = [
14
14
  "TextMatch",
15
15
  "box_iou",
16
- "box_ioa",
17
- "mask_iou",
18
16
  "polygon_iou",
19
17
  "nms",
20
18
  "LocalizationConfusion",
@@ -34,16 +32,16 @@ def string_match(word1: str, word2: str) -> Tuple[bool, bool, bool, bool]:
34
32
  Returns:
35
33
  -------
36
34
  a tuple with booleans specifying respectively whether the raw strings, their lower-case counterparts, their
37
- unidecode counterparts and their lower-case unidecode counterparts match
35
+ anyascii counterparts and their lower-case anyascii counterparts match
38
36
  """
39
37
  raw_match = word1 == word2
40
38
  caseless_match = word1.lower() == word2.lower()
41
- unidecode_match = unidecode(word1) == unidecode(word2)
39
+ anyascii_match = anyascii(word1) == anyascii(word2)
42
40
 
43
41
  # Warning: the order is important here otherwise the pair ("EUR", "€") cannot be matched
44
- unicase_match = unidecode(word1).lower() == unidecode(word2).lower()
42
+ unicase_match = anyascii(word1).lower() == anyascii(word2).lower()
45
43
 
46
- return raw_match, caseless_match, unidecode_match, unicase_match
44
+ return raw_match, caseless_match, anyascii_match, unicase_match
47
45
 
48
46
 
49
47
  class TextMatch:
@@ -94,10 +92,10 @@ class TextMatch:
94
92
  raise AssertionError("prediction size does not match with ground-truth labels size")
95
93
 
96
94
  for gt_word, pred_word in zip(gt, pred):
97
- _raw, _caseless, _unidecode, _unicase = string_match(gt_word, pred_word)
95
+ _raw, _caseless, _anyascii, _unicase = string_match(gt_word, pred_word)
98
96
  self.raw += int(_raw)
99
97
  self.caseless += int(_caseless)
100
- self.unidecode += int(_unidecode)
98
+ self.anyascii += int(_anyascii)
101
99
  self.unicase += int(_unicase)
102
100
 
103
101
  self.total += len(gt)
@@ -107,8 +105,8 @@ class TextMatch:
107
105
 
108
106
  Returns
109
107
  -------
110
- a dictionary with the exact match score for the raw data, its lower-case counterpart, its unidecode
111
- counterpart and its lower-case unidecode counterpart
108
+ a dictionary with the exact match score for the raw data, its lower-case counterpart, its anyascii
109
+ counterpart and its lower-case anyascii counterpart
112
110
  """
113
111
  if self.total == 0:
114
112
  raise AssertionError("you need to update the metric before getting the summary")
@@ -116,14 +114,14 @@ class TextMatch:
116
114
  return dict(
117
115
  raw=self.raw / self.total,
118
116
  caseless=self.caseless / self.total,
119
- unidecode=self.unidecode / self.total,
117
+ anyascii=self.anyascii / self.total,
120
118
  unicase=self.unicase / self.total,
121
119
  )
122
120
 
123
121
  def reset(self) -> None:
124
122
  self.raw = 0
125
123
  self.caseless = 0
126
- self.unidecode = 0
124
+ self.anyascii = 0
127
125
  self.unicase = 0
128
126
  self.total = 0
129
127
 
@@ -158,66 +156,7 @@ def box_iou(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
158
156
  return iou_mat
159
157
 
160
158
 
161
- def box_ioa(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray:
162
- """Computes the IoA (intersection over area) between two sets of bounding boxes:
163
- ioa(i, j) = inter(i, j) / area(i)
164
-
165
- Args:
166
- ----
167
- boxes_1: bounding boxes of shape (N, 4) in format (xmin, ymin, xmax, ymax)
168
- boxes_2: bounding boxes of shape (M, 4) in format (xmin, ymin, xmax, ymax)
169
-
170
- Returns:
171
- -------
172
- the IoA matrix of shape (N, M)
173
- """
174
- ioa_mat: np.ndarray = np.zeros((boxes_1.shape[0], boxes_2.shape[0]), dtype=np.float32)
175
-
176
- if boxes_1.shape[0] > 0 and boxes_2.shape[0] > 0:
177
- l1, t1, r1, b1 = np.split(boxes_1, 4, axis=1)
178
- l2, t2, r2, b2 = np.split(boxes_2, 4, axis=1)
179
-
180
- left = np.maximum(l1, l2.T)
181
- top = np.maximum(t1, t2.T)
182
- right = np.minimum(r1, r2.T)
183
- bot = np.minimum(b1, b2.T)
184
-
185
- intersection = np.clip(right - left, 0, np.Inf) * np.clip(bot - top, 0, np.Inf)
186
- area = (r1 - l1) * (b1 - t1)
187
- ioa_mat = intersection / area
188
-
189
- return ioa_mat
190
-
191
-
192
- def mask_iou(masks_1: np.ndarray, masks_2: np.ndarray) -> np.ndarray:
193
- """Computes the IoU between two sets of boolean masks
194
-
195
- Args:
196
- ----
197
- masks_1: boolean masks of shape (N, H, W)
198
- masks_2: boolean masks of shape (M, H, W)
199
-
200
- Returns:
201
- -------
202
- the IoU matrix of shape (N, M)
203
- """
204
- if masks_1.shape[1:] != masks_2.shape[1:]:
205
- raise AssertionError("both boolean masks should have the same spatial shape")
206
-
207
- iou_mat: np.ndarray = np.zeros((masks_1.shape[0], masks_2.shape[0]), dtype=np.float32)
208
-
209
- if masks_1.shape[0] > 0 and masks_2.shape[0] > 0:
210
- axes = tuple(range(2, masks_1.ndim + 1))
211
- intersection = np.logical_and(masks_1[:, None, ...], masks_2[None, ...]).sum(axis=axes)
212
- union = np.logical_or(masks_1[:, None, ...], masks_2[None, ...]).sum(axis=axes)
213
- iou_mat = intersection / union
214
-
215
- return iou_mat
216
-
217
-
218
- def polygon_iou(
219
- polys_1: np.ndarray, polys_2: np.ndarray, mask_shape: Tuple[int, int], use_broadcasting: bool = False
220
- ) -> np.ndarray:
159
+ def polygon_iou(polys_1: np.ndarray, polys_2: np.ndarray) -> np.ndarray:
221
160
  """Computes the IoU between two sets of rotated bounding boxes
222
161
 
223
162
  Args:
@@ -234,80 +173,18 @@ def polygon_iou(
234
173
  if polys_1.ndim != 3 or polys_2.ndim != 3:
235
174
  raise AssertionError("expects boxes to be in format (N, 4, 2)")
236
175
 
237
- iou_mat: np.ndarray = np.zeros((polys_1.shape[0], polys_2.shape[0]), dtype=np.float32)
238
-
239
- if polys_1.shape[0] > 0 and polys_2.shape[0] > 0:
240
- if use_broadcasting:
241
- masks_1 = rbox_to_mask(polys_1, shape=mask_shape)
242
- masks_2 = rbox_to_mask(polys_2, shape=mask_shape)
243
- iou_mat = mask_iou(masks_1, masks_2)
244
- else:
245
- # Save memory by doing the computation for each pair
246
- for idx, b1 in enumerate(polys_1):
247
- m1 = _rbox_to_mask(b1, mask_shape)
248
- for _idx, b2 in enumerate(polys_2):
249
- m2 = _rbox_to_mask(b2, mask_shape)
250
- iou_mat[idx, _idx] = np.logical_and(m1, m2).sum() / np.logical_or(m1, m2).sum()
251
-
252
- return iou_mat
176
+ iou_mat = np.zeros((polys_1.shape[0], polys_2.shape[0]), dtype=np.float32)
253
177
 
178
+ shapely_polys_1 = [Polygon(poly) for poly in polys_1]
179
+ shapely_polys_2 = [Polygon(poly) for poly in polys_2]
254
180
 
255
- def _rbox_to_mask(box: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
256
- """Converts a rotated bounding box to a boolean mask
181
+ for i, poly1 in enumerate(shapely_polys_1):
182
+ for j, poly2 in enumerate(shapely_polys_2):
183
+ intersection_area = poly1.intersection(poly2).area
184
+ union_area = poly1.area + poly2.area - intersection_area
185
+ iou_mat[i, j] = intersection_area / union_area
257
186
 
258
- Args:
259
- ----
260
- box: rotated bounding box of shape (4, 2)
261
- shape: spatial shapes of the output masks
262
-
263
- Returns:
264
- -------
265
- the boolean mask of the specified shape
266
- """
267
- mask: np.ndarray = np.zeros(shape, dtype=np.uint8)
268
- # Get absolute coords
269
- if not np.issubdtype(box.dtype, np.integer):
270
- abs_box = box.copy()
271
- abs_box[:, 0] = abs_box[:, 0] * shape[1]
272
- abs_box[:, 1] = abs_box[:, 1] * shape[0]
273
- abs_box = abs_box.round().astype(int)
274
- else:
275
- abs_box = box
276
- abs_box[2:] = abs_box[2:] + 1
277
- cv2.fillPoly(mask, [abs_box - 1], 1.0) # type: ignore[call-overload]
278
-
279
- return mask.astype(bool)
280
-
281
-
282
- def rbox_to_mask(boxes: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
283
- """Converts rotated bounding boxes to boolean masks
284
-
285
- Args:
286
- ----
287
- boxes: rotated bounding boxes of shape (N, 4, 2)
288
- shape: spatial shapes of the output masks
289
-
290
- Returns:
291
- -------
292
- the boolean masks of shape (N, H, W)
293
- """
294
- masks: np.ndarray = np.zeros((boxes.shape[0], *shape), dtype=np.uint8)
295
-
296
- if boxes.shape[0] > 0:
297
- # Get absolute coordinates
298
- if not np.issubdtype(boxes.dtype, np.integer):
299
- abs_boxes = boxes.copy()
300
- abs_boxes[:, :, 0] = abs_boxes[:, :, 0] * shape[1]
301
- abs_boxes[:, :, 1] = abs_boxes[:, :, 1] * shape[0]
302
- abs_boxes = abs_boxes.round().astype(int)
303
- else:
304
- abs_boxes = boxes
305
- abs_boxes[:, 2:] = abs_boxes[:, 2:] + 1
306
-
307
- # TODO: optimize slicing to improve vectorization
308
- for idx, _box in enumerate(abs_boxes):
309
- cv2.fillPoly(masks[idx], [_box - 1], 1.0) # type: ignore[call-overload]
310
- return masks.astype(bool)
187
+ return iou_mat
311
188
 
312
189
 
313
190
  def nms(boxes: np.ndarray, thresh: float = 0.5) -> List[int]:
@@ -386,21 +263,15 @@ class LocalizationConfusion:
386
263
  ----
387
264
  iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
388
265
  use_polygons: if set to True, predictions and targets will be expected to have rotated format
389
- mask_shape: if use_polygons is True, describes the spatial shape of the image used
390
- use_broadcasting: if use_polygons is True, use broadcasting for IoU computation by consuming more memory
391
266
  """
392
267
 
393
268
  def __init__(
394
269
  self,
395
270
  iou_thresh: float = 0.5,
396
271
  use_polygons: bool = False,
397
- mask_shape: Tuple[int, int] = (1024, 1024),
398
- use_broadcasting: bool = True,
399
272
  ) -> None:
400
273
  self.iou_thresh = iou_thresh
401
274
  self.use_polygons = use_polygons
402
- self.mask_shape = mask_shape
403
- self.use_broadcasting = use_broadcasting
404
275
  self.reset()
405
276
 
406
277
  def update(self, gts: np.ndarray, preds: np.ndarray) -> None:
@@ -414,7 +285,7 @@ class LocalizationConfusion:
414
285
  if preds.shape[0] > 0:
415
286
  # Compute IoU
416
287
  if self.use_polygons:
417
- iou_mat = polygon_iou(gts, preds, self.mask_shape, self.use_broadcasting)
288
+ iou_mat = polygon_iou(gts, preds)
418
289
  else:
419
290
  iou_mat = box_iou(gts, preds)
420
291
  self.tot_iou += float(iou_mat.max(axis=0).sum())
@@ -441,7 +312,7 @@ class LocalizationConfusion:
441
312
  precision = self.matches / self.num_preds if self.num_preds > 0 else None
442
313
 
443
314
  # mean IoU
444
- mean_iou = self.tot_iou / self.num_preds if self.num_preds > 0 else None
315
+ mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None
445
316
 
446
317
  return recall, precision, mean_iou
447
318
 
@@ -492,21 +363,15 @@ class OCRMetric:
492
363
  ----
493
364
  iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
494
365
  use_polygons: if set to True, predictions and targets will be expected to have rotated format
495
- mask_shape: if use_polygons is True, describes the spatial shape of the image used
496
- use_broadcasting: if use_polygons is True, use broadcasting for IoU computation by consuming more memory
497
366
  """
498
367
 
499
368
  def __init__(
500
369
  self,
501
370
  iou_thresh: float = 0.5,
502
371
  use_polygons: bool = False,
503
- mask_shape: Tuple[int, int] = (1024, 1024),
504
- use_broadcasting: bool = True,
505
372
  ) -> None:
506
373
  self.iou_thresh = iou_thresh
507
374
  self.use_polygons = use_polygons
508
- self.mask_shape = mask_shape
509
- self.use_broadcasting = use_broadcasting
510
375
  self.reset()
511
376
 
512
377
  def update(
@@ -533,7 +398,7 @@ class OCRMetric:
533
398
  # Compute IoU
534
399
  if pred_boxes.shape[0] > 0:
535
400
  if self.use_polygons:
536
- iou_mat = polygon_iou(gt_boxes, pred_boxes, self.mask_shape, self.use_broadcasting)
401
+ iou_mat = polygon_iou(gt_boxes, pred_boxes)
537
402
  else:
538
403
  iou_mat = box_iou(gt_boxes, pred_boxes)
539
404
 
@@ -544,10 +409,10 @@ class OCRMetric:
544
409
  is_kept = iou_mat[gt_indices, pred_indices] >= self.iou_thresh
545
410
  # String comparison
546
411
  for gt_idx, pred_idx in zip(gt_indices[is_kept], pred_indices[is_kept]):
547
- _raw, _caseless, _unidecode, _unicase = string_match(gt_labels[gt_idx], pred_labels[pred_idx])
412
+ _raw, _caseless, _anyascii, _unicase = string_match(gt_labels[gt_idx], pred_labels[pred_idx])
548
413
  self.raw_matches += int(_raw)
549
414
  self.caseless_matches += int(_caseless)
550
- self.unidecode_matches += int(_unidecode)
415
+ self.anyascii_matches += int(_anyascii)
551
416
  self.unicase_matches += int(_unicase)
552
417
 
553
418
  self.num_gts += gt_boxes.shape[0]
@@ -564,7 +429,7 @@ class OCRMetric:
564
429
  recall = dict(
565
430
  raw=self.raw_matches / self.num_gts if self.num_gts > 0 else None,
566
431
  caseless=self.caseless_matches / self.num_gts if self.num_gts > 0 else None,
567
- unidecode=self.unidecode_matches / self.num_gts if self.num_gts > 0 else None,
432
+ anyascii=self.anyascii_matches / self.num_gts if self.num_gts > 0 else None,
568
433
  unicase=self.unicase_matches / self.num_gts if self.num_gts > 0 else None,
569
434
  )
570
435
 
@@ -572,12 +437,12 @@ class OCRMetric:
572
437
  precision = dict(
573
438
  raw=self.raw_matches / self.num_preds if self.num_preds > 0 else None,
574
439
  caseless=self.caseless_matches / self.num_preds if self.num_preds > 0 else None,
575
- unidecode=self.unidecode_matches / self.num_preds if self.num_preds > 0 else None,
440
+ anyascii=self.anyascii_matches / self.num_preds if self.num_preds > 0 else None,
576
441
  unicase=self.unicase_matches / self.num_preds if self.num_preds > 0 else None,
577
442
  )
578
443
 
579
444
  # mean IoU (overall detected boxes)
580
- mean_iou = self.tot_iou / self.num_preds if self.num_preds > 0 else None
445
+ mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None
581
446
 
582
447
  return recall, precision, mean_iou
583
448
 
@@ -587,7 +452,7 @@ class OCRMetric:
587
452
  self.tot_iou = 0.0
588
453
  self.raw_matches = 0
589
454
  self.caseless_matches = 0
590
- self.unidecode_matches = 0
455
+ self.anyascii_matches = 0
591
456
  self.unicase_matches = 0
592
457
 
593
458
 
@@ -631,21 +496,15 @@ class DetectionMetric:
631
496
  ----
632
497
  iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match
633
498
  use_polygons: if set to True, predictions and targets will be expected to have rotated format
634
- mask_shape: if use_polygons is True, describes the spatial shape of the image used
635
- use_broadcasting: if use_polygons is True, use broadcasting for IoU computation by consuming more memory
636
499
  """
637
500
 
638
501
  def __init__(
639
502
  self,
640
503
  iou_thresh: float = 0.5,
641
504
  use_polygons: bool = False,
642
- mask_shape: Tuple[int, int] = (1024, 1024),
643
- use_broadcasting: bool = True,
644
505
  ) -> None:
645
506
  self.iou_thresh = iou_thresh
646
507
  self.use_polygons = use_polygons
647
- self.mask_shape = mask_shape
648
- self.use_broadcasting = use_broadcasting
649
508
  self.reset()
650
509
 
651
510
  def update(
@@ -672,7 +531,7 @@ class DetectionMetric:
672
531
  # Compute IoU
673
532
  if pred_boxes.shape[0] > 0:
674
533
  if self.use_polygons:
675
- iou_mat = polygon_iou(gt_boxes, pred_boxes, self.mask_shape, self.use_broadcasting)
534
+ iou_mat = polygon_iou(gt_boxes, pred_boxes)
676
535
  else:
677
536
  iou_mat = box_iou(gt_boxes, pred_boxes)
678
537
 
@@ -701,7 +560,7 @@ class DetectionMetric:
701
560
  precision = self.num_matches / self.num_preds if self.num_preds > 0 else None
702
561
 
703
562
  # mean IoU (overall detected boxes)
704
- mean_iou = self.tot_iou / self.num_preds if self.num_preds > 0 else None
563
+ mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None
705
564
 
706
565
  return recall, precision, mean_iou
707
566
 
@@ -0,0 +1,126 @@
1
+ # Copyright (C) 2021-2024, Mindee.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+ from typing import Any, Dict, Optional
6
+
7
+ import numpy as np
8
+ from anyascii import anyascii
9
+ from PIL import Image, ImageDraw
10
+
11
+ from .fonts import get_font
12
+
13
+ __all__ = ["synthesize_page", "synthesize_kie_page"]
14
+
15
+
16
+ def synthesize_page(
17
+ page: Dict[str, Any],
18
+ draw_proba: bool = False,
19
+ font_family: Optional[str] = None,
20
+ ) -> np.ndarray:
21
+ """Draw a the content of the element page (OCR response) on a blank page.
22
+
23
+ Args:
24
+ ----
25
+ page: exported Page object to represent
26
+ draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
27
+ font_size: size of the font, default font = 13
28
+ font_family: family of the font
29
+
30
+ Returns:
31
+ -------
32
+ the synthesized page
33
+ """
34
+ # Draw template
35
+ h, w = page["dimensions"]
36
+ response = 255 * np.ones((h, w, 3), dtype=np.int32)
37
+
38
+ # Draw each word
39
+ for block in page["blocks"]:
40
+ for line in block["lines"]:
41
+ for word in line["words"]:
42
+ # Get absolute word geometry
43
+ (xmin, ymin), (xmax, ymax) = word["geometry"]
44
+ xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
45
+ ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
46
+
47
+ # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
48
+ font = get_font(font_family, int(0.75 * (ymax - ymin)))
49
+ img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
50
+ d = ImageDraw.Draw(img)
51
+ # Draw in black the value of the word
52
+ try:
53
+ d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
54
+ except UnicodeEncodeError:
55
+ # When character cannot be encoded, use its anyascii version
56
+ d.text((0, 0), anyascii(word["value"]), font=font, fill=(0, 0, 0))
57
+
58
+ # Colorize if draw_proba
59
+ if draw_proba:
60
+ p = int(255 * word["confidence"])
61
+ mask = np.where(np.array(img) == 0, 1, 0)
62
+ proba: np.ndarray = np.array([255 - p, 0, p])
63
+ color = mask * proba[np.newaxis, np.newaxis, :]
64
+ white_mask = 255 * (1 - mask)
65
+ img = color + white_mask
66
+
67
+ # Write to response page
68
+ response[ymin:ymax, xmin:xmax, :] = np.array(img)
69
+
70
+ return response
71
+
72
+
73
+ def synthesize_kie_page(
74
+ page: Dict[str, Any],
75
+ draw_proba: bool = False,
76
+ font_family: Optional[str] = None,
77
+ ) -> np.ndarray:
78
+ """Draw a the content of the element page (OCR response) on a blank page.
79
+
80
+ Args:
81
+ ----
82
+ page: exported Page object to represent
83
+ draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
84
+ font_size: size of the font, default font = 13
85
+ font_family: family of the font
86
+
87
+ Returns:
88
+ -------
89
+ the synthesized page
90
+ """
91
+ # Draw template
92
+ h, w = page["dimensions"]
93
+ response = 255 * np.ones((h, w, 3), dtype=np.int32)
94
+
95
+ # Draw each word
96
+ for predictions in page["predictions"].values():
97
+ for prediction in predictions:
98
+ # Get aboslute word geometry
99
+ (xmin, ymin), (xmax, ymax) = prediction["geometry"]
100
+ xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
101
+ ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
102
+
103
+ # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
104
+ font = get_font(font_family, int(0.75 * (ymax - ymin)))
105
+ img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
106
+ d = ImageDraw.Draw(img)
107
+ # Draw in black the value of the word
108
+ try:
109
+ d.text((0, 0), prediction["value"], font=font, fill=(0, 0, 0))
110
+ except UnicodeEncodeError:
111
+ # When character cannot be encoded, use its anyascii version
112
+ d.text((0, 0), anyascii(prediction["value"]), font=font, fill=(0, 0, 0))
113
+
114
+ # Colorize if draw_proba
115
+ if draw_proba:
116
+ p = int(255 * prediction["confidence"])
117
+ mask = np.where(np.array(img) == 0, 1, 0)
118
+ proba: np.ndarray = np.array([255 - p, 0, p])
119
+ color = mask * proba[np.newaxis, np.newaxis, :]
120
+ white_mask = 255 * (1 - mask)
121
+ img = color + white_mask
122
+
123
+ # Write to response page
124
+ response[ymin:ymax, xmin:xmax, :] = np.array(img)
125
+
126
+ return response
@@ -9,16 +9,12 @@ from typing import Any, Dict, List, Optional, Tuple, Union
9
9
  import cv2
10
10
  import matplotlib.patches as patches
11
11
  import matplotlib.pyplot as plt
12
- import mplcursors
13
12
  import numpy as np
14
13
  from matplotlib.figure import Figure
15
- from PIL import Image, ImageDraw
16
- from unidecode import unidecode
17
14
 
18
15
  from .common_types import BoundingBox, Polygon4P
19
- from .fonts import get_font
20
16
 
21
- __all__ = ["visualize_page", "synthesize_page", "visualize_kie_page", "synthesize_kie_page", "draw_boxes"]
17
+ __all__ = ["visualize_page", "visualize_kie_page", "draw_boxes"]
22
18
 
23
19
 
24
20
  def rect_patch(
@@ -281,6 +277,8 @@ def visualize_page(
281
277
  artists.append(rect)
282
278
 
283
279
  if interactive:
280
+ import mplcursors
281
+
284
282
  # Create mlp Cursor to hover patches in artists
285
283
  mplcursors.Cursor(artists, hover=2).connect("add", lambda sel: sel.annotation.set_text(sel.artist.get_label()))
286
284
  fig.tight_layout(pad=0.0)
@@ -288,63 +286,6 @@ def visualize_page(
288
286
  return fig
289
287
 
290
288
 
291
- def synthesize_page(
292
- page: Dict[str, Any],
293
- draw_proba: bool = False,
294
- font_family: Optional[str] = None,
295
- ) -> np.ndarray:
296
- """Draw a the content of the element page (OCR response) on a blank page.
297
-
298
- Args:
299
- ----
300
- page: exported Page object to represent
301
- draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
302
- font_size: size of the font, default font = 13
303
- font_family: family of the font
304
-
305
- Returns:
306
- -------
307
- the synthesized page
308
- """
309
- # Draw template
310
- h, w = page["dimensions"]
311
- response = 255 * np.ones((h, w, 3), dtype=np.int32)
312
-
313
- # Draw each word
314
- for block in page["blocks"]:
315
- for line in block["lines"]:
316
- for word in line["words"]:
317
- # Get aboslute word geometry
318
- (xmin, ymin), (xmax, ymax) = word["geometry"]
319
- xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
320
- ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
321
-
322
- # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
323
- font = get_font(font_family, int(0.75 * (ymax - ymin)))
324
- img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
325
- d = ImageDraw.Draw(img)
326
- # Draw in black the value of the word
327
- try:
328
- d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
329
- except UnicodeEncodeError:
330
- # When character cannot be encoded, use its unidecode version
331
- d.text((0, 0), unidecode(word["value"]), font=font, fill=(0, 0, 0))
332
-
333
- # Colorize if draw_proba
334
- if draw_proba:
335
- p = int(255 * word["confidence"])
336
- mask = np.where(np.array(img) == 0, 1, 0)
337
- proba: np.ndarray = np.array([255 - p, 0, p])
338
- color = mask * proba[np.newaxis, np.newaxis, :]
339
- white_mask = 255 * (1 - mask)
340
- img = color + white_mask
341
-
342
- # Write to response page
343
- response[ymin:ymax, xmin:xmax, :] = np.array(img)
344
-
345
- return response
346
-
347
-
348
289
  def visualize_kie_page(
349
290
  page: Dict[str, Any],
350
291
  image: np.ndarray,
@@ -413,6 +354,8 @@ def visualize_kie_page(
413
354
  artists.append(rect)
414
355
 
415
356
  if interactive:
357
+ import mplcursors
358
+
416
359
  # Create mlp Cursor to hover patches in artists
417
360
  mplcursors.Cursor(artists, hover=2).connect("add", lambda sel: sel.annotation.set_text(sel.artist.get_label()))
418
361
  fig.tight_layout(pad=0.0)
@@ -420,62 +363,6 @@ def visualize_kie_page(
420
363
  return fig
421
364
 
422
365
 
423
- def synthesize_kie_page(
424
- page: Dict[str, Any],
425
- draw_proba: bool = False,
426
- font_family: Optional[str] = None,
427
- ) -> np.ndarray:
428
- """Draw a the content of the element page (OCR response) on a blank page.
429
-
430
- Args:
431
- ----
432
- page: exported Page object to represent
433
- draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
434
- font_size: size of the font, default font = 13
435
- font_family: family of the font
436
-
437
- Returns:
438
- -------
439
- the synthesized page
440
- """
441
- # Draw template
442
- h, w = page["dimensions"]
443
- response = 255 * np.ones((h, w, 3), dtype=np.int32)
444
-
445
- # Draw each word
446
- for predictions in page["predictions"].values():
447
- for prediction in predictions:
448
- # Get aboslute word geometry
449
- (xmin, ymin), (xmax, ymax) = prediction["geometry"]
450
- xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
451
- ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
452
-
453
- # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
454
- font = get_font(font_family, int(0.75 * (ymax - ymin)))
455
- img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
456
- d = ImageDraw.Draw(img)
457
- # Draw in black the value of the word
458
- try:
459
- d.text((0, 0), prediction["value"], font=font, fill=(0, 0, 0))
460
- except UnicodeEncodeError:
461
- # When character cannot be encoded, use its unidecode version
462
- d.text((0, 0), unidecode(prediction["value"]), font=font, fill=(0, 0, 0))
463
-
464
- # Colorize if draw_proba
465
- if draw_proba:
466
- p = int(255 * prediction["confidence"])
467
- mask = np.where(np.array(img) == 0, 1, 0)
468
- proba: np.ndarray = np.array([255 - p, 0, p])
469
- color = mask * proba[np.newaxis, np.newaxis, :]
470
- white_mask = 255 * (1 - mask)
471
- img = color + white_mask
472
-
473
- # Write to response page
474
- response[ymin:ymax, xmin:xmax, :] = np.array(img)
475
-
476
- return response
477
-
478
-
479
366
  def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: Optional[Tuple[int, int, int]] = None, **kwargs) -> None:
480
367
  """Draw an array of relative straight boxes on an image
481
368
 
doctr/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = 'v0.8.0'
1
+ __version__ = 'v0.9.0'