onnxtr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. onnxtr/__init__.py +2 -0
  2. onnxtr/contrib/__init__.py +0 -0
  3. onnxtr/contrib/artefacts.py +131 -0
  4. onnxtr/contrib/base.py +105 -0
  5. onnxtr/file_utils.py +33 -0
  6. onnxtr/io/__init__.py +5 -0
  7. onnxtr/io/elements.py +455 -0
  8. onnxtr/io/html.py +28 -0
  9. onnxtr/io/image.py +56 -0
  10. onnxtr/io/pdf.py +42 -0
  11. onnxtr/io/reader.py +85 -0
  12. onnxtr/models/__init__.py +4 -0
  13. onnxtr/models/_utils.py +141 -0
  14. onnxtr/models/builder.py +355 -0
  15. onnxtr/models/classification/__init__.py +2 -0
  16. onnxtr/models/classification/models/__init__.py +1 -0
  17. onnxtr/models/classification/models/mobilenet.py +120 -0
  18. onnxtr/models/classification/predictor/__init__.py +1 -0
  19. onnxtr/models/classification/predictor/base.py +57 -0
  20. onnxtr/models/classification/zoo.py +76 -0
  21. onnxtr/models/detection/__init__.py +2 -0
  22. onnxtr/models/detection/core.py +101 -0
  23. onnxtr/models/detection/models/__init__.py +3 -0
  24. onnxtr/models/detection/models/differentiable_binarization.py +159 -0
  25. onnxtr/models/detection/models/fast.py +160 -0
  26. onnxtr/models/detection/models/linknet.py +160 -0
  27. onnxtr/models/detection/postprocessor/__init__.py +0 -0
  28. onnxtr/models/detection/postprocessor/base.py +144 -0
  29. onnxtr/models/detection/predictor/__init__.py +1 -0
  30. onnxtr/models/detection/predictor/base.py +54 -0
  31. onnxtr/models/detection/zoo.py +73 -0
  32. onnxtr/models/engine.py +50 -0
  33. onnxtr/models/predictor/__init__.py +1 -0
  34. onnxtr/models/predictor/base.py +175 -0
  35. onnxtr/models/predictor/predictor.py +145 -0
  36. onnxtr/models/preprocessor/__init__.py +1 -0
  37. onnxtr/models/preprocessor/base.py +118 -0
  38. onnxtr/models/recognition/__init__.py +2 -0
  39. onnxtr/models/recognition/core.py +28 -0
  40. onnxtr/models/recognition/models/__init__.py +5 -0
  41. onnxtr/models/recognition/models/crnn.py +226 -0
  42. onnxtr/models/recognition/models/master.py +145 -0
  43. onnxtr/models/recognition/models/parseq.py +134 -0
  44. onnxtr/models/recognition/models/sar.py +134 -0
  45. onnxtr/models/recognition/models/vitstr.py +166 -0
  46. onnxtr/models/recognition/predictor/__init__.py +1 -0
  47. onnxtr/models/recognition/predictor/_utils.py +86 -0
  48. onnxtr/models/recognition/predictor/base.py +79 -0
  49. onnxtr/models/recognition/utils.py +89 -0
  50. onnxtr/models/recognition/zoo.py +69 -0
  51. onnxtr/models/zoo.py +114 -0
  52. onnxtr/transforms/__init__.py +1 -0
  53. onnxtr/transforms/base.py +112 -0
  54. onnxtr/utils/__init__.py +4 -0
  55. onnxtr/utils/common_types.py +18 -0
  56. onnxtr/utils/data.py +126 -0
  57. onnxtr/utils/fonts.py +41 -0
  58. onnxtr/utils/geometry.py +498 -0
  59. onnxtr/utils/multithreading.py +50 -0
  60. onnxtr/utils/reconstitution.py +70 -0
  61. onnxtr/utils/repr.py +64 -0
  62. onnxtr/utils/visualization.py +291 -0
  63. onnxtr/utils/vocabs.py +71 -0
  64. onnxtr/version.py +1 -0
  65. onnxtr-0.1.0.dist-info/LICENSE +201 -0
  66. onnxtr-0.1.0.dist-info/METADATA +481 -0
  67. onnxtr-0.1.0.dist-info/RECORD +70 -0
  68. onnxtr-0.1.0.dist-info/WHEEL +5 -0
  69. onnxtr-0.1.0.dist-info/top_level.txt +2 -0
  70. onnxtr-0.1.0.dist-info/zip-safe +1 -0
@@ -0,0 +1,498 @@
1
+ # Copyright (C) 2021-2024, Mindee | Felix Dittrich.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+
6
+ from copy import deepcopy
7
+ from math import ceil
8
+ from typing import List, Optional, Tuple, Union
9
+
10
+ import cv2
11
+ import numpy as np
12
+
13
+ from .common_types import BoundingBox, Polygon4P
14
+
15
+ __all__ = [
16
+ "bbox_to_polygon",
17
+ "polygon_to_bbox",
18
+ "resolve_enclosing_bbox",
19
+ "resolve_enclosing_rbbox",
20
+ "rotate_boxes",
21
+ "compute_expanded_shape",
22
+ "rotate_image",
23
+ "estimate_page_angle",
24
+ "convert_to_relative_coords",
25
+ "rotate_abs_geoms",
26
+ "extract_crops",
27
+ "extract_rcrops",
28
+ "shape_translate",
29
+ ]
30
+
31
+
32
+ def bbox_to_polygon(bbox: BoundingBox) -> Polygon4P:
33
+ """Convert a bounding box to a polygon
34
+
35
+ Args:
36
+ ----
37
+ bbox: a bounding box
38
+
39
+ Returns:
40
+ -------
41
+ a polygon
42
+ """
43
+ return bbox[0], (bbox[1][0], bbox[0][1]), (bbox[0][0], bbox[1][1]), bbox[1]
44
+
45
+
46
+ def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox:
47
+ """Convert a polygon to a bounding box
48
+
49
+ Args:
50
+ ----
51
+ polygon: a polygon
52
+
53
+ Returns:
54
+ -------
55
+ a bounding box
56
+ """
57
+ x, y = zip(*polygon)
58
+ return (min(x), min(y)), (max(x), max(y))
59
+
60
+
61
+ def shape_translate(data: np.ndarray, format: str) -> np.ndarray:
62
+ """Translate the shape of the input data to the desired format
63
+
64
+ Args:
65
+ ----
66
+ data: input data in shape (B, C, H, W) or (B, H, W, C) or (C, H, W) or (H, W, C)
67
+ format: target format ('BCHW', 'BHWC', 'CHW', or 'HWC')
68
+
69
+ Returns:
70
+ -------
71
+ the reshaped data
72
+ """
73
+ # Get the current shape
74
+ current_shape = data.shape
75
+
76
+ # Check the number of dimensions
77
+ num_dims = len(current_shape)
78
+
79
+ if num_dims != len(format):
80
+ return data
81
+
82
+ if format == "BCHW" and data.shape[1] in [1, 3]:
83
+ return data
84
+ elif format == "BHWC" and data.shape[-1] in [1, 3]:
85
+ return data
86
+ elif format == "CHW" and data.shape[0] in [1, 3]:
87
+ return data
88
+ elif format == "HWC" and data.shape[-1] in [1, 3]:
89
+ return data
90
+ elif format == "BCHW" and data.shape[1] not in [1, 3]:
91
+ return np.moveaxis(data, -1, 1)
92
+ elif format == "BHWC" and data.shape[-1] not in [1, 3]:
93
+ return np.moveaxis(data, 1, -1)
94
+ elif format == "CHW" and data.shape[0] not in [1, 3]:
95
+ return np.moveaxis(data, -1, 0)
96
+ elif format == "HWC" and data.shape[-1] not in [1, 3]:
97
+ return np.moveaxis(data, 0, -1)
98
+ else:
99
+ return data
100
+
101
+
102
+ def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]:
103
+ """Compute enclosing bbox either from:
104
+
105
+ Args:
106
+ ----
107
+ bboxes: boxes in one of the following formats:
108
+
109
+ - an array of boxes: (*, 5), where boxes have this shape:
110
+ (xmin, ymin, xmax, ymax, score)
111
+
112
+ - a list of BoundingBox
113
+
114
+ Returns:
115
+ -------
116
+ a (1, 5) array (enclosing boxarray), or a BoundingBox
117
+ """
118
+ if isinstance(bboxes, np.ndarray):
119
+ xmin, ymin, xmax, ymax, score = np.split(bboxes, 5, axis=1)
120
+ return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max(), score.mean()])
121
+ else:
122
+ x, y = zip(*[point for box in bboxes for point in box])
123
+ return (min(x), min(y)), (max(x), max(y))
124
+
125
+
126
+ def resolve_enclosing_rbbox(rbboxes: List[np.ndarray], intermed_size: int = 1024) -> np.ndarray:
127
+ """Compute enclosing rotated bbox either from:
128
+
129
+ Args:
130
+ ----
131
+ rbboxes: boxes in one of the following formats:
132
+
133
+ - an array of boxes: (*, 5), where boxes have this shape:
134
+ (xmin, ymin, xmax, ymax, score)
135
+
136
+ - a list of BoundingBox
137
+ intermed_size: size of the intermediate image
138
+
139
+ Returns:
140
+ -------
141
+ a (1, 5) array (enclosing boxarray), or a BoundingBox
142
+ """
143
+ cloud: np.ndarray = np.concatenate(rbboxes, axis=0)
144
+ # Convert to absolute for minAreaRect
145
+ cloud *= intermed_size
146
+ rect = cv2.minAreaRect(cloud.astype(np.int32))
147
+ return cv2.boxPoints(rect) / intermed_size # type: ignore[operator]
148
+
149
+
150
+ def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray:
151
+ """Rotate points counter-clockwise.
152
+
153
+ Args:
154
+ ----
155
+ points: array of size (N, 2)
156
+ angle: angle between -90 and +90 degrees
157
+
158
+ Returns:
159
+ -------
160
+ Rotated points
161
+ """
162
+ angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions
163
+ rotation_mat = np.array(
164
+ [[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=points.dtype
165
+ )
166
+ return np.matmul(points, rotation_mat.T)
167
+
168
+
169
+ def compute_expanded_shape(img_shape: Tuple[int, int], angle: float) -> Tuple[int, int]:
170
+ """Compute the shape of an expanded rotated image
171
+
172
+ Args:
173
+ ----
174
+ img_shape: the height and width of the image
175
+ angle: angle between -90 and +90 degrees
176
+
177
+ Returns:
178
+ -------
179
+ the height and width of the rotated image
180
+ """
181
+ points: np.ndarray = np.array([
182
+ [img_shape[1] / 2, img_shape[0] / 2],
183
+ [-img_shape[1] / 2, img_shape[0] / 2],
184
+ ])
185
+
186
+ rotated_points = rotate_abs_points(points, angle)
187
+
188
+ wh_shape = 2 * np.abs(rotated_points).max(axis=0)
189
+ return wh_shape[1], wh_shape[0]
190
+
191
+
192
+ def rotate_abs_geoms(
193
+ geoms: np.ndarray,
194
+ angle: float,
195
+ img_shape: Tuple[int, int],
196
+ expand: bool = True,
197
+ ) -> np.ndarray:
198
+ """Rotate a batch of bounding boxes or polygons by an angle around the
199
+ image center.
200
+
201
+ Args:
202
+ ----
203
+ geoms: (N, 4) or (N, 4, 2) array of ABSOLUTE coordinate boxes
204
+ angle: anti-clockwise rotation angle in degrees
205
+ img_shape: the height and width of the image
206
+ expand: whether the image should be padded to avoid information loss
207
+
208
+ Returns:
209
+ -------
210
+ A batch of rotated polygons (N, 4, 2)
211
+ """
212
+ # Switch to polygons
213
+ polys = (
214
+ np.stack([geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]], axis=1)
215
+ if geoms.ndim == 2
216
+ else geoms
217
+ )
218
+ polys = polys.astype(np.float32)
219
+
220
+ # Switch to image center as referential
221
+ polys[..., 0] -= img_shape[1] / 2
222
+ polys[..., 1] = img_shape[0] / 2 - polys[..., 1]
223
+
224
+ # Rotated them around image center
225
+ rotated_polys = rotate_abs_points(polys.reshape(-1, 2), angle).reshape(-1, 4, 2)
226
+ # Switch back to top-left corner as referential
227
+ target_shape = compute_expanded_shape(img_shape, angle) if expand else img_shape
228
+ # Clip coords to fit since there is no expansion
229
+ rotated_polys[..., 0] = (rotated_polys[..., 0] + target_shape[1] / 2).clip(0, target_shape[1])
230
+ rotated_polys[..., 1] = (target_shape[0] / 2 - rotated_polys[..., 1]).clip(0, target_shape[0])
231
+
232
+ return rotated_polys
233
+
234
+
235
+ def remap_boxes(loc_preds: np.ndarray, orig_shape: Tuple[int, int], dest_shape: Tuple[int, int]) -> np.ndarray:
236
+ """Remaps a batch of rotated locpred (N, 4, 2) expressed for an origin_shape to a destination_shape.
237
+ This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox
238
+ coordinates after a resizing of the image.
239
+
240
+ Args:
241
+ ----
242
+ loc_preds: (N, 4, 2) array of RELATIVE loc_preds
243
+ orig_shape: shape of the origin image
244
+ dest_shape: shape of the destination image
245
+
246
+ Returns:
247
+ -------
248
+ A batch of rotated loc_preds (N, 4, 2) expressed in the destination referencial
249
+ """
250
+ if len(dest_shape) != 2:
251
+ raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}")
252
+ if len(orig_shape) != 2:
253
+ raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}")
254
+ orig_height, orig_width = orig_shape
255
+ dest_height, dest_width = dest_shape
256
+ mboxes = loc_preds.copy()
257
+ mboxes[:, :, 0] = ((loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width
258
+ mboxes[:, :, 1] = ((loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height
259
+
260
+ return mboxes
261
+
262
+
263
+ def rotate_boxes(
264
+ loc_preds: np.ndarray,
265
+ angle: float,
266
+ orig_shape: Tuple[int, int],
267
+ min_angle: float = 1.0,
268
+ target_shape: Optional[Tuple[int, int]] = None,
269
+ ) -> np.ndarray:
270
+ """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax, c) or rotated bounding boxes
271
+ (4, 2) of an angle, if angle > min_angle, around the center of the page.
272
+ If target_shape is specified, the boxes are remapped to the target shape after the rotation. This
273
+ is done to remove the padding that is created by rotate_page(expand=True)
274
+
275
+ Args:
276
+ ----
277
+ loc_preds: (N, 5) or (N, 4, 2) array of RELATIVE boxes
278
+ angle: angle between -90 and +90 degrees
279
+ orig_shape: shape of the origin image
280
+ min_angle: minimum angle to rotate boxes
281
+ target_shape: shape of the destination image
282
+
283
+ Returns:
284
+ -------
285
+ A batch of rotated boxes (N, 4, 2): or a batch of straight bounding boxes
286
+ """
287
+ # Change format of the boxes to rotated boxes
288
+ _boxes = loc_preds.copy()
289
+ if _boxes.ndim == 2:
290
+ _boxes = np.stack(
291
+ [
292
+ _boxes[:, [0, 1]],
293
+ _boxes[:, [2, 1]],
294
+ _boxes[:, [2, 3]],
295
+ _boxes[:, [0, 3]],
296
+ ],
297
+ axis=1,
298
+ )
299
+ # If small angle, return boxes (no rotation)
300
+ if abs(angle) < min_angle or abs(angle) > 90 - min_angle:
301
+ return _boxes
302
+ # Compute rotation matrix
303
+ angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions
304
+ rotation_mat = np.array(
305
+ [[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=_boxes.dtype
306
+ )
307
+ # Rotate absolute points
308
+ points: np.ndarray = np.stack((_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1)
309
+ image_center = (orig_shape[1] / 2, orig_shape[0] / 2)
310
+ rotated_points = image_center + np.matmul(points - image_center, rotation_mat)
311
+ rotated_boxes: np.ndarray = np.stack(
312
+ (rotated_points[:, :, 0] / orig_shape[1], rotated_points[:, :, 1] / orig_shape[0]), axis=-1
313
+ )
314
+
315
+ # Apply a mask if requested
316
+ if target_shape is not None:
317
+ rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape)
318
+
319
+ return rotated_boxes
320
+
321
+
322
+ def rotate_image(
323
+ image: np.ndarray,
324
+ angle: float,
325
+ expand: bool = False,
326
+ preserve_origin_shape: bool = False,
327
+ ) -> np.ndarray:
328
+ """Rotate an image counterclockwise by an given angle.
329
+
330
+ Args:
331
+ ----
332
+ image: numpy tensor to rotate
333
+ angle: rotation angle in degrees, between -90 and +90
334
+ expand: whether the image should be padded before the rotation
335
+ preserve_origin_shape: if expand is set to True, resizes the final output to the original image size
336
+
337
+ Returns:
338
+ -------
339
+ Rotated array, padded by 0 by default.
340
+ """
341
+ # Compute the expanded padding
342
+ exp_img: np.ndarray
343
+ if expand:
344
+ exp_shape = compute_expanded_shape(image.shape[:2], angle) # type: ignore[arg-type]
345
+ h_pad, w_pad = (
346
+ int(max(0, ceil(exp_shape[0] - image.shape[0]))),
347
+ int(max(0, ceil(exp_shape[1] - image.shape[1]))),
348
+ )
349
+ exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
350
+ else:
351
+ exp_img = image
352
+
353
+ height, width = exp_img.shape[:2]
354
+ rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0)
355
+ rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height))
356
+ if expand:
357
+ # Pad to get the same aspect ratio
358
+ if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]):
359
+ # Pad width
360
+ if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]):
361
+ h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1])
362
+ # Pad height
363
+ else:
364
+ h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0
365
+ rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
366
+ if preserve_origin_shape:
367
+ # rescale
368
+ rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR)
369
+
370
+ return rot_img
371
+
372
+
373
+ def estimate_page_angle(polys: np.ndarray) -> float:
374
+ """Takes a batch of rotated previously ORIENTED polys (N, 4, 2) (rectified by the classifier) and return the
375
+ estimated angle ccw in degrees
376
+ """
377
+ # Compute mean left points and mean right point with respect to the reading direction (oriented polygon)
378
+ xleft = polys[:, 0, 0] + polys[:, 3, 0]
379
+ yleft = polys[:, 0, 1] + polys[:, 3, 1]
380
+ xright = polys[:, 1, 0] + polys[:, 2, 0]
381
+ yright = polys[:, 1, 1] + polys[:, 2, 1]
382
+ with np.errstate(divide="raise", invalid="raise"):
383
+ try:
384
+ return float(
385
+ np.median(np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi) # Y axis from top to bottom!
386
+ )
387
+ except FloatingPointError:
388
+ return 0.0
389
+
390
+
391
+ def convert_to_relative_coords(geoms: np.ndarray, img_shape: Tuple[int, int]) -> np.ndarray:
392
+ """Convert a geometry to relative coordinates
393
+
394
+ Args:
395
+ ----
396
+ geoms: a set of polygons of shape (N, 4, 2) or of straight boxes of shape (N, 4)
397
+ img_shape: the height and width of the image
398
+
399
+ Returns:
400
+ -------
401
+ the updated geometry
402
+ """
403
+ # Polygon
404
+ if geoms.ndim == 3 and geoms.shape[1:] == (4, 2):
405
+ polygons: np.ndarray = np.empty(geoms.shape, dtype=np.float32)
406
+ polygons[..., 0] = geoms[..., 0] / img_shape[1]
407
+ polygons[..., 1] = geoms[..., 1] / img_shape[0]
408
+ return polygons.clip(0, 1)
409
+ if geoms.ndim == 2 and geoms.shape[1] == 4:
410
+ boxes: np.ndarray = np.empty(geoms.shape, dtype=np.float32)
411
+ boxes[:, ::2] = geoms[:, ::2] / img_shape[1]
412
+ boxes[:, 1::2] = geoms[:, 1::2] / img_shape[0]
413
+ return boxes.clip(0, 1)
414
+
415
+ raise ValueError(f"invalid format for arg `geoms`: {geoms.shape}")
416
+
417
+
418
+ def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> List[np.ndarray]:
419
+ """Created cropped images from list of bounding boxes
420
+
421
+ Args:
422
+ ----
423
+ img: input image
424
+ boxes: bounding boxes of shape (N, 4) where N is the number of boxes, and the relative
425
+ coordinates (xmin, ymin, xmax, ymax)
426
+ channels_last: whether the channel dimensions is the last one instead of the last one
427
+
428
+ Returns:
429
+ -------
430
+ list of cropped images
431
+ """
432
+ if boxes.shape[0] == 0:
433
+ return []
434
+ if boxes.shape[1] != 4:
435
+ raise AssertionError("boxes are expected to be relative and in order (xmin, ymin, xmax, ymax)")
436
+
437
+ # Project relative coordinates
438
+ _boxes = boxes.copy()
439
+ h, w = img.shape[:2] if channels_last else img.shape[-2:]
440
+ if not np.issubdtype(_boxes.dtype, np.integer):
441
+ _boxes[:, [0, 2]] *= w
442
+ _boxes[:, [1, 3]] *= h
443
+ _boxes = _boxes.round().astype(int)
444
+ # Add last index
445
+ _boxes[2:] += 1
446
+ if channels_last:
447
+ return deepcopy([img[box[1] : box[3], box[0] : box[2]] for box in _boxes])
448
+
449
+ return deepcopy([img[:, box[1] : box[3], box[0] : box[2]] for box in _boxes])
450
+
451
+
452
+ def extract_rcrops(
453
+ img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True
454
+ ) -> List[np.ndarray]:
455
+ """Created cropped images from list of rotated bounding boxes
456
+
457
+ Args:
458
+ ----
459
+ img: input image
460
+ polys: bounding boxes of shape (N, 4, 2)
461
+ dtype: target data type of bounding boxes
462
+ channels_last: whether the channel dimensions is the last one instead of the last one
463
+
464
+ Returns:
465
+ -------
466
+ list of cropped images
467
+ """
468
+ if polys.shape[0] == 0:
469
+ return []
470
+ if polys.shape[1:] != (4, 2):
471
+ raise AssertionError("polys are expected to be quadrilateral, of shape (N, 4, 2)")
472
+
473
+ # Project relative coordinates
474
+ _boxes = polys.copy()
475
+ height, width = img.shape[:2] if channels_last else img.shape[-2:]
476
+ if not np.issubdtype(_boxes.dtype, np.integer):
477
+ _boxes[:, :, 0] *= width
478
+ _boxes[:, :, 1] *= height
479
+
480
+ src_pts = _boxes[:, :3].astype(np.float32)
481
+ # Preserve size
482
+ d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
483
+ d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1)
484
+ # (N, 3, 2)
485
+ dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype)
486
+ dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1
487
+ dst_pts[:, 2, 1] = d2 - 1
488
+ # Use a warp transformation to extract the crop
489
+ crops = [
490
+ cv2.warpAffine(
491
+ img if channels_last else img.transpose(1, 2, 0),
492
+ # Transformation matrix
493
+ cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
494
+ (int(d1[idx]), int(d2[idx])),
495
+ )
496
+ for idx in range(_boxes.shape[0])
497
+ ]
498
+ return crops
@@ -0,0 +1,50 @@
1
+ # Copyright (C) 2021-2024, Mindee | Felix Dittrich.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+
6
+
7
+ import multiprocessing as mp
8
+ import os
9
+ from multiprocessing.pool import ThreadPool
10
+ from typing import Any, Callable, Iterable, Iterator, Optional
11
+
12
+ from onnxtr.file_utils import ENV_VARS_TRUE_VALUES
13
+
14
+ __all__ = ["multithread_exec"]
15
+
16
+
17
+ def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: Optional[int] = None) -> Iterator[Any]:
18
+ """Execute a given function in parallel for each element of a given sequence
19
+
20
+ >>> from onnxtr.utils.multithreading import multithread_exec
21
+ >>> entries = [1, 4, 8]
22
+ >>> results = multithread_exec(lambda x: x ** 2, entries)
23
+
24
+ Args:
25
+ ----
26
+ func: function to be executed on each element of the iterable
27
+ seq: iterable
28
+ threads: number of workers to be used for multiprocessing
29
+
30
+ Returns:
31
+ -------
32
+ iterator of the function's results using the iterable as inputs
33
+
34
+ Notes:
35
+ -----
36
+ This function uses ThreadPool from multiprocessing package, which uses `/dev/shm` directory for shared memory.
37
+ If you do not have write permissions for this directory (if you run `onnxtr` on AWS Lambda for instance),
38
+ you might want to disable multiprocessing. To achieve that, set 'ONNXTR_MULTIPROCESSING_DISABLE' to 'TRUE'.
39
+ """
40
+ threads = threads if isinstance(threads, int) else min(16, mp.cpu_count())
41
+ # Single-thread
42
+ if threads < 2 or os.environ.get("ONNXTR_MULTIPROCESSING_DISABLE", "").upper() in ENV_VARS_TRUE_VALUES:
43
+ results = map(func, seq)
44
+ # Multi-threading
45
+ else:
46
+ with ThreadPool(threads) as tp:
47
+ # ThreadPool's map function returns a list, but seq could be of a different type
48
+ # That's why wrapping result in map to return iterator
49
+ results = map(lambda x: x, tp.map(func, seq)) # noqa: C417
50
+ return results
@@ -0,0 +1,70 @@
1
+ # Copyright (C) 2021-2024, Mindee | Felix Dittrich.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+ from typing import Any, Dict, Optional
6
+
7
+ import numpy as np
8
+ from anyascii import anyascii
9
+ from PIL import Image, ImageDraw
10
+
11
+ from .fonts import get_font
12
+
13
+ __all__ = ["synthesize_page"]
14
+
15
+
16
+ def synthesize_page(
17
+ page: Dict[str, Any],
18
+ draw_proba: bool = False,
19
+ font_family: Optional[str] = None,
20
+ ) -> np.ndarray:
21
+ """Draw a the content of the element page (OCR response) on a blank page.
22
+
23
+ Args:
24
+ ----
25
+ page: exported Page object to represent
26
+ draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
27
+ font_size: size of the font, default font = 13
28
+ font_family: family of the font
29
+
30
+ Returns:
31
+ -------
32
+ the synthesized page
33
+ """
34
+ # Draw template
35
+ h, w = page["dimensions"]
36
+ response = 255 * np.ones((h, w, 3), dtype=np.int32)
37
+
38
+ # Draw each word
39
+ for block in page["blocks"]:
40
+ for line in block["lines"]:
41
+ for word in line["words"]:
42
+ # Get absolute word geometry
43
+ (xmin, ymin), (xmax, ymax) = word["geometry"]
44
+ xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
45
+ ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
46
+
47
+ # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
48
+ font = get_font(font_family, int(0.75 * (ymax - ymin)))
49
+ img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
50
+ d = ImageDraw.Draw(img)
51
+ # Draw in black the value of the word
52
+ try:
53
+ d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
54
+ except UnicodeEncodeError:
55
+ # When character cannot be encoded, use its anyascii version
56
+ d.text((0, 0), anyascii(word["value"]), font=font, fill=(0, 0, 0))
57
+
58
+ # Colorize if draw_proba
59
+ if draw_proba:
60
+ p = int(255 * word["confidence"])
61
+ mask = np.where(np.array(img) == 0, 1, 0)
62
+ proba: np.ndarray = np.array([255 - p, 0, p])
63
+ color = mask * proba[np.newaxis, np.newaxis, :]
64
+ white_mask = 255 * (1 - mask)
65
+ img = color + white_mask
66
+
67
+ # Write to response page
68
+ response[ymin:ymax, xmin:xmax, :] = np.array(img)
69
+
70
+ return response
onnxtr/utils/repr.py ADDED
@@ -0,0 +1,64 @@
1
+ # Copyright (C) 2021-2024, Mindee | Felix Dittrich.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+
6
+ # Adapted from https://github.com/pytorch/torch/blob/master/torch/nn/modules/module.py
7
+
8
+ from typing import List
9
+
10
+ __all__ = ["NestedObject"]
11
+
12
+
13
+ def _addindent(s_, num_spaces):
14
+ s = s_.split("\n")
15
+ # don't do anything for single-line stuff
16
+ if len(s) == 1:
17
+ return s_
18
+ first = s.pop(0)
19
+ s = [(num_spaces * " ") + line for line in s]
20
+ s = "\n".join(s)
21
+ s = first + "\n" + s
22
+ return s
23
+
24
+
25
+ class NestedObject:
26
+ """Base class for all nested objects in onnxtr"""
27
+
28
+ _children_names: List[str]
29
+
30
+ def extra_repr(self) -> str:
31
+ return ""
32
+
33
+ def __repr__(self):
34
+ # We treat the extra repr like the sub-object, one item per line
35
+ extra_lines = []
36
+ extra_repr = self.extra_repr()
37
+ # empty string will be split into list ['']
38
+ if extra_repr:
39
+ extra_lines = extra_repr.split("\n")
40
+ child_lines = []
41
+ if hasattr(self, "_children_names"):
42
+ for key in self._children_names:
43
+ child = getattr(self, key)
44
+ if isinstance(child, list) and len(child) > 0:
45
+ child_str = ",\n".join([repr(subchild) for subchild in child])
46
+ if len(child) > 1:
47
+ child_str = _addindent(f"\n{child_str},", 2) + "\n"
48
+ child_str = f"[{child_str}]"
49
+ else:
50
+ child_str = repr(child)
51
+ child_str = _addindent(child_str, 2)
52
+ child_lines.append("(" + key + "): " + child_str)
53
+ lines = extra_lines + child_lines
54
+
55
+ main_str = self.__class__.__name__ + "("
56
+ if lines:
57
+ # simple one-liner info, which most builtin Modules will use
58
+ if len(extra_lines) == 1 and not child_lines:
59
+ main_str += extra_lines[0]
60
+ else:
61
+ main_str += "\n " + "\n ".join(lines) + "\n"
62
+
63
+ main_str += ")"
64
+ return main_str