edgefirst-validator 4.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. deepview/modelpack/utils/argmax.py +16 -0
  2. edgefirst/validator/__init__.py +1 -0
  3. edgefirst/validator/__main__.py +375 -0
  4. edgefirst/validator/datasets/__init__.py +118 -0
  5. edgefirst/validator/datasets/cache.py +296 -0
  6. edgefirst/validator/datasets/core.py +250 -0
  7. edgefirst/validator/datasets/darknet.py +446 -0
  8. edgefirst/validator/datasets/database.py +1067 -0
  9. edgefirst/validator/datasets/instance/__init__.py +4 -0
  10. edgefirst/validator/datasets/instance/core.py +222 -0
  11. edgefirst/validator/datasets/instance/detection.py +145 -0
  12. edgefirst/validator/datasets/instance/multitask.py +80 -0
  13. edgefirst/validator/datasets/instance/segmentation.py +120 -0
  14. edgefirst/validator/datasets/utils/fetch.py +682 -0
  15. edgefirst/validator/datasets/utils/readers.py +425 -0
  16. edgefirst/validator/datasets/utils/transformations.py +1695 -0
  17. edgefirst/validator/evaluators/__init__.py +17 -0
  18. edgefirst/validator/evaluators/callbacks/__init__.py +3 -0
  19. edgefirst/validator/evaluators/callbacks/core.py +192 -0
  20. edgefirst/validator/evaluators/callbacks/plots.py +900 -0
  21. edgefirst/validator/evaluators/callbacks/studio.py +234 -0
  22. edgefirst/validator/evaluators/core.py +257 -0
  23. edgefirst/validator/evaluators/detection.py +749 -0
  24. edgefirst/validator/evaluators/multitask.py +270 -0
  25. edgefirst/validator/evaluators/parameters/__init__.py +53 -0
  26. edgefirst/validator/evaluators/parameters/core.py +554 -0
  27. edgefirst/validator/evaluators/parameters/dataset.py +239 -0
  28. edgefirst/validator/evaluators/parameters/model.py +338 -0
  29. edgefirst/validator/evaluators/parameters/validation.py +528 -0
  30. edgefirst/validator/evaluators/segmentation.py +729 -0
  31. edgefirst/validator/evaluators/utils/__init__.py +3 -0
  32. edgefirst/validator/evaluators/utils/classify.py +292 -0
  33. edgefirst/validator/evaluators/utils/match.py +262 -0
  34. edgefirst/validator/evaluators/utils/timer.py +132 -0
  35. edgefirst/validator/metrics/__init__.py +9 -0
  36. edgefirst/validator/metrics/data/__init__.py +7 -0
  37. edgefirst/validator/metrics/data/label.py +668 -0
  38. edgefirst/validator/metrics/data/metrics.py +759 -0
  39. edgefirst/validator/metrics/data/plots.py +476 -0
  40. edgefirst/validator/metrics/data/stats.py +507 -0
  41. edgefirst/validator/metrics/detection.py +595 -0
  42. edgefirst/validator/metrics/segmentation.py +173 -0
  43. edgefirst/validator/metrics/utils/math.py +717 -0
  44. edgefirst/validator/publishers/__init__.py +3 -0
  45. edgefirst/validator/publishers/console.py +147 -0
  46. edgefirst/validator/publishers/studio.py +128 -0
  47. edgefirst/validator/publishers/tensorboard.py +119 -0
  48. edgefirst/validator/publishers/utils/logger.py +111 -0
  49. edgefirst/validator/publishers/utils/table.py +403 -0
  50. edgefirst/validator/runners/__init__.py +8 -0
  51. edgefirst/validator/runners/core.py +727 -0
  52. edgefirst/validator/runners/deepviewrt.py +177 -0
  53. edgefirst/validator/runners/hailo.py +263 -0
  54. edgefirst/validator/runners/keras.py +150 -0
  55. edgefirst/validator/runners/kinara.py +265 -0
  56. edgefirst/validator/runners/offline.py +228 -0
  57. edgefirst/validator/runners/onnx.py +241 -0
  58. edgefirst/validator/runners/processing/decode.py +320 -0
  59. edgefirst/validator/runners/processing/dvapi.py +4192 -0
  60. edgefirst/validator/runners/processing/nms.py +637 -0
  61. edgefirst/validator/runners/processing/outputs.py +507 -0
  62. edgefirst/validator/runners/tensorrt.py +321 -0
  63. edgefirst/validator/runners/tflite.py +221 -0
  64. edgefirst/validator/validate.py +843 -0
  65. edgefirst/validator/visualize/__init__.py +3 -0
  66. edgefirst/validator/visualize/detection.py +623 -0
  67. edgefirst/validator/visualize/segmentation.py +281 -0
  68. edgefirst/validator/visualize/utils/plots.py +635 -0
  69. edgefirst_validator-4.2.1.dist-info/METADATA +111 -0
  70. edgefirst_validator-4.2.1.dist-info/RECORD +73 -0
  71. edgefirst_validator-4.2.1.dist-info/WHEEL +5 -0
  72. edgefirst_validator-4.2.1.dist-info/entry_points.txt +2 -0
  73. edgefirst_validator-4.2.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1695 @@
1
+ """
2
+ This module contains functions for transforming dataset artifacts.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import math
8
+ import numbers
9
+ from io import BytesIO
10
+ from typing import TYPE_CHECKING, Union, Tuple, Any, List, Callable
11
+
12
+ import numpy as np
13
+ from PIL import Image, ImageDraw, ExifTags
14
+
15
+ # Transform label synonyms to a common representation.
16
+ COCO_LABEL_SYNC = {
17
+ "motorbike": "motorcycle",
18
+ "aeroplane": "airplane",
19
+ "sofa": "couch",
20
+ "pottedplant": "potted plant",
21
+ "diningtable": "dining table",
22
+ "tvmonitor": "tv"
23
+ }
24
+
25
+ try:
26
+ import edgefirst_python # type: ignore
27
+ CONVERTER = edgefirst_python.ImageConverter()
28
+ except ImportError:
29
+ CONVERTER = None
30
+
31
+ if TYPE_CHECKING:
32
+ from edgefirst_python import TensorImage # type: ignore
33
+
34
+ # Functions for Sensor Transformations
35
+
36
+
37
+ def bgr2rgb(image: np.ndarray) -> np.ndarray:
38
+ """
39
+ Converts BGR image to RGB image.
40
+
41
+ Parameters
42
+ ----------
43
+ image: (height, width, 3) np.ndarray
44
+ The BGR image NumPy array.
45
+
46
+ Returns
47
+ -------
48
+ np.ndarray
49
+ The RGB image NumPy array.
50
+ """
51
+ return image[:, :, ::-1]
52
+
53
+
54
+ def rgb2bgr(image: np.ndarray) -> np.ndarray:
55
+ """
56
+ Converts RGB image to BGR image.
57
+
58
+ Parameters
59
+ ----------
60
+ image: (height, width, 3) np.ndarray
61
+ The RGB image NumPy array.
62
+
63
+ Returns
64
+ -------
65
+ np.ndarray
66
+ The BGR image NumPy array.
67
+ """
68
+ return bgr2rgb(image)
69
+
70
+
71
+ def rgb2yuyv(image: np.ndarray, backend: str = "hal") -> np.ndarray:
72
+ """
73
+ Convert an RGB image to YUYV format using the EdgeFirst Tensor API.
74
+
75
+ Parameters
76
+ ----------
77
+ image: np.ndarray
78
+ The 3-channel RGB image NumPy array.
79
+ backend: str
80
+ The backend library to use for this conversion.
81
+
82
+ Returns
83
+ -------
84
+ np.ndarray
85
+ The 2-channel YUYV image array.
86
+ """
87
+
88
+ if backend == "hal":
89
+ try:
90
+ import edgefirst_python # type: ignore
91
+ except ImportError:
92
+ raise ImportError(
93
+ "EdgeFirst HAL is needed to perform RGB to YUYV conversion.")
94
+
95
+ height, width, _ = image.shape
96
+ src = edgefirst_python.TensorImage(
97
+ width, height, fourcc=edgefirst_python.FourCC.RGB)
98
+ src.copy_from_numpy(image)
99
+
100
+ dst = edgefirst_python.TensorImage(
101
+ width, height, fourcc=edgefirst_python.FourCC.YUYV)
102
+ CONVERTER.convert(src, dst)
103
+
104
+ im = np.zeros((dst.height, dst.width, 2), dtype=np.uint8)
105
+ dst.normalize_to_numpy(im)
106
+ return im
107
+ else:
108
+ try:
109
+ import cv2
110
+ except ImportError:
111
+ raise ImportError(
112
+ "OpenCV is needed to perform RGB to YUYV conversion.")
113
+ return cv2.cvtColor(image, cv2.COLOR_RGB2YUV_YUY2)
114
+
115
+
116
+ def yuyv2rgb(image: np.ndarray, backend: str = "hal") -> np.ndarray:
117
+ """
118
+ Convert a YUYV image to RGB format using the EdgeFirst Tensor API.
119
+
120
+ Parameters
121
+ ----------
122
+ image: np.ndarray
123
+ The input 2-channel YUYV image.
124
+ backend: str
125
+ The backend library to use for this conversion.
126
+
127
+ Returns
128
+ -------
129
+ np.ndarray
130
+ The output 3-channel RGB image.
131
+ """
132
+
133
+ if backend == "hal":
134
+ try:
135
+ import edgefirst_python # type: ignore
136
+ except ImportError:
137
+ raise ImportError(
138
+ "EdgeFirst HAL is needed to perform YUYV to RGB conversion.")
139
+
140
+ height, width, _ = image.shape
141
+ src = edgefirst_python.TensorImage(
142
+ width, height, fourcc=edgefirst_python.FourCC.YUYV)
143
+ src.copy_from_numpy(image)
144
+
145
+ dst = edgefirst_python.TensorImage(
146
+ width, height, fourcc=edgefirst_python.FourCC.RGB)
147
+ CONVERTER.convert(src, dst)
148
+
149
+ im = np.zeros((dst.height, dst.width, 3), dtype=np.uint8)
150
+ dst.normalize_to_numpy(im)
151
+ return im
152
+ else:
153
+ try:
154
+ import cv2
155
+ except ImportError:
156
+ raise ImportError(
157
+ "OpenCV is needed to perform YUYV to RGB conversion.")
158
+ return cv2.cvtColor(image, cv2.COLOR_YUV2RGB_YUY2)
159
+
160
+
161
+ def rgb2rgba(image: np.ndarray, backend: str = "hal") -> np.ndarray:
162
+ """
163
+ Convert a 3-channel RGB image to 4-channel RGBA image.
164
+
165
+ Parameters
166
+ ----------
167
+ image: np.ndarray
168
+ The 3-channel RGB image array.
169
+ backend: str
170
+ The backend library to use for this conversion.
171
+
172
+ Returns
173
+ -------
174
+ np.ndarray
175
+ The 4-channel RGBA image array with the alpha value set to 255.
176
+ """
177
+
178
+ if image.shape[0] == 3:
179
+ _, height, width = image.shape
180
+ elif image.shape[-1] == 3:
181
+ height, width, _ = image.shape
182
+ else:
183
+ return image
184
+
185
+ if backend == "hal":
186
+ try:
187
+ import edgefirst_python # type: ignore
188
+ except ImportError:
189
+ raise ImportError(
190
+ "EdgeFirst HAL is needed to perform RGB to RGBA conversion.")
191
+
192
+ src = edgefirst_python.TensorImage(
193
+ width, height, fourcc=edgefirst_python.FourCC.RGB)
194
+ src.copy_from_numpy(image)
195
+
196
+ dst = edgefirst_python.TensorImage(
197
+ width, height, fourcc=edgefirst_python.FourCC.RGBA)
198
+ CONVERTER.convert(src, dst)
199
+
200
+ im = np.zeros((dst.height, dst.width, 4), dtype=np.uint8)
201
+ dst.normalize_to_numpy(im)
202
+ return im
203
+ else:
204
+ alpha_channel = np.full((height, width, 1), 255, dtype=np.uint8)
205
+ return np.concatenate((image, alpha_channel), axis=-1)
206
+
207
+
208
+ def imagenet(image: np.ndarray) -> np.ndarray:
209
+ """
210
+ Normalize the image with imagenet normalization.
211
+
212
+ Parameters
213
+ ----------
214
+ image: np.ndarray
215
+ The image RGB array with shape
216
+ (3, height, width) or (height, width, 3).
217
+
218
+ Returns
219
+ -------
220
+ np.ndarray
221
+ The image with imagenet normalization.
222
+ """
223
+ mean = np.array([0.079, 0.05, 0]) + 0.406
224
+ std = np.array([0.005, 0, 0.001]) + 0.224
225
+
226
+ if image.shape[0] == 3:
227
+ for channel in range(image.shape[0]):
228
+ image[channel, :, :] = (image[channel, :, :] / 255
229
+ - mean[channel]) / std[channel]
230
+ else:
231
+ for channel in range(image.shape[2]):
232
+ image[:, :, channel] = (image[:, :, channel] / 255
233
+ - mean[channel]) / std[channel]
234
+ return image
235
+
236
+
237
+ def image_normalization(
238
+ image: np.ndarray,
239
+ normalization: str,
240
+ input_type: np.dtype = np.float32
241
+ ):
242
+ """
243
+ Performs image normalizations (signed, unsigned, raw).
244
+
245
+ Parameters
246
+ ----------
247
+ image: np.ndarray
248
+ The image to perform normalization.
249
+ normalization: str
250
+ This is the type of normalization to perform
251
+ ("signed", "unsigned", "raw", "imagenet").
252
+ input_type: str
253
+ This is the NumPy datatype to convert. Ex. "uint8"
254
+
255
+ Returns
256
+ -------
257
+ np.ndarray
258
+ Depending on the normalization, the image will be returned.
259
+ """
260
+ if normalization.lower() == 'signed':
261
+ return ((image.astype(np.float32) / 127.5) - 1.0).astype(input_type)
262
+ elif normalization.lower() == 'unsigned':
263
+ return (image.astype(np.float32) /
264
+ 255.0).astype(input_type)
265
+ elif normalization.lower() == 'imagenet':
266
+ return (imagenet(image.astype(np.float32))).astype(input_type)
267
+ else:
268
+ return (image).astype(input_type)
269
+
270
+
271
+ def crop_image(image: np.ndarray, box: Union[list, np.ndarray]) -> np.ndarray:
272
+ """
273
+ Crops the image to only the area that is covered by
274
+ the box provided. This is primarily used in pose validation.
275
+
276
+ Parameters
277
+ ----------
278
+ image: np.ndarray
279
+ The frame to crop before feeding to the model.
280
+ box: Union[list, np.ndarray]
281
+ This contains non-normalized [xmin, ymin, xmax, ymax].
282
+
283
+ Returns
284
+ -------
285
+ np.ndarray
286
+ The image cropped to the area of the bounding box.
287
+ """
288
+ x1, y1, x2, y2 = box
289
+ box_area = image[y1:y2, x1:x2, ...]
290
+ return box_area
291
+
292
+
293
+ def rotate_image(data: Union[bytes, str]) -> Image.Image:
294
+ """
295
+ Read from the ImageExif to apply rotation on the image.
296
+
297
+ Parameters
298
+ ----------
299
+ data: Union[bytes, str]
300
+ Read image file as a bytes object or a string path
301
+ to the image file.
302
+
303
+ Returns
304
+ -------
305
+ Image.Image
306
+ The pillow Image with rotation applied.
307
+ """
308
+ if isinstance(data, bytes):
309
+ data = BytesIO(data)
310
+ try:
311
+ image = Image.open(data)
312
+ for orientation in ExifTags.TAGS.keys():
313
+ if ExifTags.TAGS[orientation] == 'Orientation':
314
+ break
315
+ exif = dict(image._getexif().items())
316
+
317
+ if exif[orientation] == 3:
318
+ image = image.transpose(Image.ROTATE_180)
319
+ elif exif[orientation] == 6:
320
+ image = image.transpose(Image.ROTATE_270)
321
+ elif exif[orientation] == 8:
322
+ image = image.transpose(Image.ROTATE_90)
323
+ except (AttributeError, KeyError, IndexError):
324
+ # cases: image don't have getexif
325
+ image = Image.open(data).convert('RGB')
326
+ return image
327
+
328
+
329
+ def resize(
330
+ image: Union[TensorImage, np.ndarray],
331
+ size: tuple = None,
332
+ backend: str = "hal"
333
+ ) -> np.ndarray:
334
+ """
335
+ Resizes the images with the specified dimension using
336
+ the EdgeFirst Tensor API. The original aspect ratio is not maintained.
337
+ Image needs to be uint8.
338
+
339
+ Parameters
340
+ ----------
341
+ image: Union[edgefirst_python.TensorImage, np.ndarray]
342
+ The image (RGB, RGBA, Gray) tensor with uint8 dtype.
343
+ size: tuple
344
+ Specify the (width, height) size of the new image.
345
+ backend: str
346
+ Specify the backend library for resizing the image from the options
347
+ "hal", "opencv", "pillow".
348
+
349
+ Returns
350
+ -------
351
+ np.ndarray
352
+ Resized image.
353
+ """
354
+ if size is None:
355
+ return image
356
+
357
+ if backend == "hal":
358
+ try:
359
+ import edgefirst_python # type: ignore
360
+ except ImportError:
361
+ raise ImportError(
362
+ "EdgeFirst HAL is needed to resize using hal.")
363
+
364
+ if isinstance(image, np.ndarray):
365
+ # Array without any channels is assumed to be grey.
366
+ if len(image.shape) == 2:
367
+ fourcc = edgefirst_python.FourCC.GREY
368
+ fourc = fourcc
369
+ image = np.expand_dims(image, axis=-1)
370
+ channels = 1
371
+ else:
372
+ # Currently OpenGL in x86_64 only supports RGBA.
373
+ channels = 4
374
+ fourcc = edgefirst_python.FourCC.RGBA
375
+ if image.shape[-1] == 4:
376
+ fourc = edgefirst_python.FourCC.RGBA
377
+ elif image.shape[-1] == 1:
378
+ fourcc = edgefirst_python.FourCC.GREY
379
+ fourc = fourcc
380
+ channels = 1
381
+ else:
382
+ fourc = edgefirst_python.FourCC.RGB
383
+
384
+ height, width, _ = image.shape
385
+ src = edgefirst_python.TensorImage(width, height, fourcc=fourc)
386
+ src.copy_from_numpy(image)
387
+ else:
388
+ src = image
389
+ # Currently OpenGL in x86_64 only supports RGBA.
390
+ fourcc = (edgefirst_python.FourCC.RGBA if
391
+ src.format == edgefirst_python.FourCC.RGB else src.format)
392
+ channels = 1 if fourcc == edgefirst_python.FourCC.GREY else 4
393
+
394
+ dst = edgefirst_python.TensorImage(size[0], size[1], fourcc=fourcc)
395
+ CONVERTER.convert(src, dst)
396
+
397
+ im = np.zeros((dst.height, dst.width, channels), dtype=np.uint8)
398
+ dst.normalize_to_numpy(im)
399
+
400
+ if src.format == edgefirst_python.FourCC.GREY:
401
+ return im.squeeze()
402
+ elif src.format == edgefirst_python.FourCC.RGB:
403
+ return im[:, :, 0:3]
404
+ return im
405
+ elif backend == "opencv":
406
+ try:
407
+ import cv2 # type: ignore
408
+ except ImportError:
409
+ raise ImportError("OpenCV is needed to resize using opencv.")
410
+
411
+ return cv2.resize(image, size, interpolation=cv2.INTER_LINEAR)
412
+ else:
413
+ image = Image.fromarray(np.uint8(image))
414
+ image = image.resize(size)
415
+ return np.array(image)
416
+
417
+
418
+ def pad(
419
+ image: np.ndarray,
420
+ input_size: tuple,
421
+ backend: str = "hal"
422
+ ) -> Tuple[np.ndarray, list]:
423
+ """
424
+ Performs image padding based on the implementation provided in YOLOx:\
425
+ https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/data/data_augment.py#L142
426
+
427
+ The image is always padded on the right and at the bottom portions.
428
+
429
+ Parameters
430
+ ----------
431
+ image: np.ndarray
432
+ This is the input image to pad.
433
+ input_size: tuple
434
+ This is the model input size (generally) or the output image
435
+ resolution after padding in the order (height, width).
436
+ backend: str
437
+ Specify the backend library for resizing the image from the options
438
+ "hal", "opencv", "pillow".
439
+
440
+ Returns
441
+ --------
442
+ image: np.ndarray
443
+ This is the padded image.
444
+ shapes: list
445
+ This is used to scale the bounding boxes of the ground
446
+ truth and the model detections based on the letterbox
447
+ transformation.
448
+ [[pad image height, pad image width],
449
+ [[scale_y, scale_x], [pad x, pad y]].
450
+ """
451
+ height, width = image.shape[:2] # current shape [height, width]
452
+ if len(image.shape) == 3:
453
+ padded_image = np.ones(
454
+ (input_size[0], input_size[1], 3), dtype=np.uint8) * 114
455
+ else:
456
+ padded_image = np.ones(input_size, dtype=np.uint8) * 114
457
+
458
+ r = min(input_size[0] / height, input_size[1] / width)
459
+ resized_image = resize(
460
+ image, (int(width * r), int(height * r)), backend=backend
461
+ )
462
+ padded_image[: int(height * r),
463
+ : int(width * r)] = resized_image
464
+ padded_image = rgb2bgr(padded_image) # RGB2BGR
465
+ padded_image = np.ascontiguousarray(padded_image)
466
+
467
+ # The bounding box offset to add due to image padding.
468
+ # Requires normalization due to the bounding boxes are already normalized.
469
+ new_unpad = int(round(height * r)), int(round(width * r))
470
+ dw = (padded_image.shape[1] - new_unpad[1]) # / new_unpad[1]
471
+ dh = (padded_image.shape[0] - new_unpad[0]) # / new_unpad[0]
472
+
473
+ # The image was not rescaled, so default to 1.0.
474
+ shapes = [
475
+ # imgsz (model input shape) [height, width]
476
+ [padded_image.shape[0], padded_image.shape[1]],
477
+ [[resized_image.shape[0] / input_size[0],
478
+ resized_image.shape[1] / input_size[1]],
479
+ [dw, dh]] # ratio_pad [[scale y, scale x], [pad w, pad h]]
480
+ ]
481
+ return padded_image, shapes
482
+
483
+
484
+ def letterbox_native(
485
+ image: np.ndarray,
486
+ new_shape: tuple = (640, 640),
487
+ constant: int = 114,
488
+ backend: str = "hal"
489
+ ) -> Tuple[np.ndarray, list]:
490
+ """
491
+ Applies the letterbox image transformations based in YOLOv5 and YOLOv7.
492
+
493
+ Parameters
494
+ ----------
495
+ image : np.ndarray
496
+ Input image array (HWC format).
497
+ new_shape : tuple, optional
498
+ Target shape (height, width) for output image, by default (640, 640).
499
+ constant : int, optional
500
+ Padding pixel value (0–255), by default 114 (gray).
501
+ backend: str
502
+ Specify the backend library for letterboxing the
503
+ image from the options "opencv", "pillow".
504
+
505
+ Returns
506
+ -------
507
+ image: np.ndarray
508
+ The resized and padded image in HWC format.
509
+ shapes: list
510
+ This is used to scale the bounding boxes of the ground
511
+ truth and the model detections based on the letterbox
512
+ transformation. Tuple containing padded image size, scale ratio,
513
+ and padding offsets.
514
+ [[pad image height, pad image width],
515
+ [[scale_y, scale_x], [pad x, pad y]]].
516
+ """
517
+ height, width = image.shape[:2]
518
+ scale = min(new_shape[1] / width, new_shape[0] / height)
519
+ new_width = int(round(width * scale))
520
+ new_height = int(round(height * scale))
521
+
522
+ if scale != 1.0:
523
+ image = resize(image, (new_width, new_height), backend=backend)
524
+
525
+ # Compute padding
526
+ dw, dh = new_shape[1] - new_width, new_shape[0] - new_height # wh padding
527
+ top = round(dh / 2)
528
+ bottom = dh - top
529
+ left = round(dw / 2)
530
+ right = dw - left
531
+
532
+ if backend == "opencv":
533
+ try:
534
+ import cv2 # type: ignore
535
+ except ImportError:
536
+ raise ImportError("OpenCV is needed for letterbox.")
537
+
538
+ padded_image = cv2.copyMakeBorder(
539
+ image, top, bottom, left, right, cv2.BORDER_CONSTANT,
540
+ value=(constant, constant, constant)) # add border
541
+ else:
542
+ padded_image = np.zeros(
543
+ (3, new_height + top + bottom, new_width + left + right))
544
+ for i, _ in enumerate(padded_image):
545
+ padded_image[i, :, :] = np.pad(
546
+ image[:, :, i], ((top, bottom), (left, right)),
547
+ mode='constant', constant_values=constant)
548
+ padded_image = np.transpose(
549
+ padded_image, axes=(1, 2, 0)).astype(np.uint8)
550
+
551
+ shapes = [
552
+ # imgsz (model input shape) [height, width]
553
+ [padded_image.shape[0], padded_image.shape[1]],
554
+ # ratio_pad [[scale y, scale x], [pad w, pad h]]
555
+ [[scale, scale], [left, top]]
556
+ ]
557
+ return padded_image, shapes
558
+
559
+
560
+ def letterbox_hal(
561
+ image: TensorImage,
562
+ dst: TensorImage,
563
+ ) -> list:
564
+ """
565
+ Applies the letterbox image transformations using HAL.
566
+
567
+ Parameters
568
+ ----------
569
+ image: TensorImage
570
+ An RGBA tensor image loaded using the HAL.
571
+ dst: TensorImage
572
+ The destination tensor image after letterbox transformation.
573
+
574
+ Returns
575
+ -------
576
+ label_ratio: list
577
+ Scaling factors (width, height) applied to original boxes.
578
+ shapes: list
579
+ This is used to scale the bounding boxes of the ground
580
+ truth and the model detections based on the letterbox
581
+ transformation. Tuple containing padded image size, scale ratio,
582
+ and padding offsets.
583
+ [[pad image height, pad image width],
584
+ [[scale_y, scale_x], [pad x, pad y]]].
585
+ """
586
+
587
+ try:
588
+ import edgefirst_python # type: ignore
589
+ except ImportError:
590
+ raise ImportError(
591
+ "EdgeFirst HAL is needed to perform letterbox using hal.")
592
+
593
+ ratio = min(dst.height / image.height, dst.width / image.width)
594
+ height = image.height * ratio
595
+ width = image.width * ratio
596
+ top = round((dst.height - height) / 2)
597
+ left = round((dst.width - width) / 2)
598
+ height = round(height)
599
+ width = round(width)
600
+
601
+ CONVERTER.convert(image, dst,
602
+ dst_crop=edgefirst_python.Rect(left, top, width, height),
603
+ dst_color=[114, 114, 114, 255])
604
+
605
+ shapes = [
606
+ # imgsz (model input shape) [height, width]
607
+ [dst.height, dst.width],
608
+ # ratio_pad [[scale y, scale x], [pad w, pad h]]
609
+ [[ratio, ratio], [left, top]]
610
+ ]
611
+
612
+ return shapes
613
+
614
+
615
+ def preprocess_hal(
616
+ image: TensorImage,
617
+ shape: tuple,
618
+ input_type: np.dtype,
619
+ dst: TensorImage,
620
+ transpose: bool = False,
621
+ input_tensor: Callable = None,
622
+ preprocessing: str = "letterbox",
623
+ normalization: str = "unsigned",
624
+ quantization: tuple = None,
625
+ visualize: bool = False
626
+ ) -> Tuple[np.ndarray, np.ndarray, list, tuple]:
627
+ """
628
+ Optimized input preprocessing using the HAL.
629
+
630
+ Parameters
631
+ ----------
632
+ image: TensorImage
633
+ The image input to preprocess.
634
+ shape: tuple
635
+ The model input shape. This can either be formatted as
636
+ (batch size, channels, height, width) or
637
+ (batch size, height, width, channels).
638
+ input_type: np.dtype
639
+ The input datatype of the model.
640
+ dst: TensorImage
641
+ Destination tensor for placing the image transformations.
642
+ transpose: bool
643
+ Condition of whether to transpose the image or not. This
644
+ is True for input shapes with channels first. Otherwise it is False.
645
+ input_tensor: Callable
646
+ Callable function for retrieving the input view tensor
647
+ from the model for directly copying the input tensor
648
+ into the model such as the case for TFLite.
649
+ preprocessing: str
650
+ The type of image preprocessing to apply. By default 'letterbox'
651
+ is used. However, 'resize' or 'pad' are possible variations.
652
+ normalization: str
653
+ The type of image normalization to apply. Default is set to
654
+ 'unsigned'. However 'signed', 'raw', and 'imagenet' are possible
655
+ values.
656
+ quantization: tuple
657
+ The quantization parameters of the input containing
658
+ the (scale, zero point) values.
659
+ visualize: bool
660
+ When visualizing the model outputs, this requires a second
661
+ copy of the transformed image. By default,
662
+ visualization is set to False.
663
+
664
+ Returns
665
+ -------
666
+ image: np.ndarray
667
+ The image input after being preprocessed.
668
+ visual_image: np.ndarray
669
+ The image that is used for visualization post
670
+ letterbox, padding, resize transformations.
671
+ shapes: list
672
+ This is used to scale the bounding boxes of the ground
673
+ truth and the model detections based on the letterbox/padding
674
+ transformation.
675
+
676
+ .. code-block:: python
677
+
678
+ [[input_height, input_width],
679
+ [[scale_y, scale_x], [pad_w, pad_h]]]
680
+ image_shape: tuple
681
+ The original image dimensions.
682
+ """
683
+
684
+ try:
685
+ import edgefirst_python # type: ignore
686
+ except ImportError:
687
+ raise ImportError(
688
+ "EdgeFirst HAL is needed to perform preprocessing using hal.")
689
+
690
+ # Fetch only (height, width) from the shape.
691
+ # Format for YUYV, RGB, and RGBA
692
+ if shape[-1] in [2, 3, 4]:
693
+ channels = shape[-1]
694
+ shape = shape[1:3]
695
+ else:
696
+ channels = shape[1]
697
+ shape = shape[2:4]
698
+
699
+ height, width = image.height, image.width
700
+ shapes = [
701
+ # imgsz (model input shape) [height, width]
702
+ [int(shape[0]), int(shape[1])],
703
+ [[float(shape[0] / height), float(shape[1] / width)],
704
+ [0.0, 0.0]] # ratio_pad [image_scale, [pad w, pad h]]
705
+ ]
706
+
707
+ if preprocessing == "letterbox":
708
+ shapes = letterbox_hal(image, dst)
709
+ elif preprocessing == "pad":
710
+ raise NotImplementedError("Padding with HAL is not yet implemented.")
711
+ else:
712
+ CONVERTER.convert(image, dst)
713
+
714
+ if transpose:
715
+ image = np.zeros([channels, dst.height, dst.width], dtype=input_type)
716
+ else:
717
+ image = np.zeros([dst.height, dst.width, channels], dtype=input_type)
718
+
719
+ if input_type in [np.float16, np.float32]:
720
+ if normalization == "unsigned":
721
+ normalization = edgefirst_python.Normalization.UNSIGNED
722
+ elif normalization == "signed":
723
+ normalization = edgefirst_python.Normalization.SIGNED
724
+ elif normalization == "raw":
725
+ normalization = edgefirst_python.Normalization.RAW
726
+ elif normalization == "imagenet":
727
+ raise NotImplementedError(
728
+ "ImageNet normalization is currently not implemented in HAL.")
729
+ else:
730
+ normalization = edgefirst_python.Normalization.DEFAULT
731
+ else:
732
+ normalization = edgefirst_python.Normalization.DEFAULT
733
+
734
+ zero_point = None
735
+ if quantization is not None:
736
+ if input_type == np.int8:
737
+ zero_point = abs(quantization[-1])
738
+ # Directly copy the input tensor into the model for TFLite.
739
+ if input_tensor is not None:
740
+ dst.normalize_to_numpy(input_tensor()[0, :, :, :],
741
+ normalization=normalization,
742
+ zero_point=zero_point)
743
+ else:
744
+ # NOTE: PLANAR_RGBA is not yet supported in HAL.
745
+ if transpose and channels == 4:
746
+ dst.normalize_to_numpy(image[0:3, :, :], normalization=normalization,
747
+ zero_point=zero_point)
748
+ else:
749
+ dst.normalize_to_numpy(image, normalization=normalization,
750
+ zero_point=zero_point)
751
+
752
+ visual_image = None
753
+ if visualize:
754
+ if transpose:
755
+ visual_image = np.zeros([3, dst.height, dst.width], dtype=np.uint8)
756
+ dst.normalize_to_numpy(visual_image)
757
+ visual_image = np.transpose(visual_image, axes=[1, 2, 0])
758
+ else:
759
+ visual_image = np.zeros([dst.height, dst.width, 3], dtype=np.uint8)
760
+ dst.normalize_to_numpy(visual_image)
761
+ image = image[None]
762
+ return image, visual_image, shapes, (height, width)
763
+
764
+
765
+ def preprocess_native(
766
+ image: np.ndarray,
767
+ shape: tuple,
768
+ input_type: np.dtype,
769
+ transpose: bool = False,
770
+ input_tensor: Callable = None,
771
+ preprocessing: str = "letterbox",
772
+ normalization: str = "unsigned",
773
+ quantization: tuple = None,
774
+ backend: str = "hal",
775
+ ) -> Tuple[np.ndarray, np.ndarray, list, tuple]:
776
+ """
777
+ Standard preprocessing method. Default parameters are based on
778
+ Ultralytics defaults.
779
+
780
+ Parameters
781
+ ----------
782
+ image: np.ndarray
783
+ The image input to preprocess.
784
+ shape: tuple
785
+ The model input shape. This can either be formatted as
786
+ (batch size, channels, height, width) or
787
+ (batch size, height, width, channels).
788
+ input_type: np.dtype
789
+ The input datatype of the model.
790
+ transpose: bool
791
+ Condition of whether to transpose the image or not. This
792
+ is True for input shapes with channels first. Otherwise it is False.
793
+ input_tensor: Callable
794
+ Callable function for retrieving the input view tensor
795
+ from the model for directly copying the input tensor
796
+ into the model such as the case for TFLite.
797
+ preprocessing: str
798
+ The type of image preprocessing to apply. By default 'letterbox'
799
+ is used. However, 'resize' or 'pad' are possible variations.
800
+ normalization: str
801
+ The type of image normalization to apply. Default is set to
802
+ 'unsigned'. However 'signed', 'raw', and 'imagenet' are possible
803
+ values.
804
+ quantization: tuple
805
+ The quantization parameters of the input containing
806
+ the (scale, zero point) values.
807
+ backend: str
808
+ Specify the backend library for letterboxing the
809
+ image from the options "opencv", "pillow".
810
+
811
+ Returns
812
+ -------
813
+ image: np.ndarray
814
+ The image input after being preprocessed.
815
+ visual_image: np.ndarray
816
+ The image that is used for visualization post
817
+ letterbox, padding, resize transformations.
818
+ shapes: list
819
+ This is used to scale the bounding boxes of the ground
820
+ truth and the model detections based on the letterbox/padding
821
+ transformation.
822
+
823
+ .. code-block:: python
824
+
825
+ [[input_height, input_width],
826
+ [[scale_y, scale_x], [pad_w, pad_h]]]
827
+ image_shape: tuple
828
+ The original image dimensions.
829
+ """
830
+
831
+ # Fetch only (height, width) from the shape.
832
+ # Format for YUYV, RGB, and RGBA
833
+ if shape[-1] in [2, 3, 4]:
834
+ channel = shape[-1]
835
+ shape = shape[1:3]
836
+ else:
837
+ channel = shape[1]
838
+ shape = shape[2:4]
839
+ # Transpose the image to meet requirements of the channel order.
840
+
841
+ transformer = None # Function that transforms image formats.
842
+ if channel == 2:
843
+ transformer = rgb2yuyv
844
+ elif channel == 4:
845
+ transformer = rgb2rgba
846
+
847
+ height, width = image.shape[0:2]
848
+
849
+ shapes = [
850
+ shape, # imgsz (model input shape) [height, width]
851
+ [[shape[0] / height, shape[1] / width],
852
+ [0.0, 0.0]] # ratio_pad [image_scale, [pad w, pad h]]
853
+ ]
854
+
855
+ if backend == "opencv":
856
+ # OpenCV reads images into BGR by default.
857
+ image = bgr2rgb(image)
858
+
859
+ if preprocessing == "letterbox":
860
+ image, shapes = letterbox_native(
861
+ image, new_shape=shape, backend=backend)
862
+ elif preprocessing == "pad":
863
+ image, shapes = pad(image, shape, backend=backend)
864
+ else:
865
+ image = resize(image, (shape[1], shape[0]), backend=backend)
866
+
867
+ visual_image = image
868
+ if preprocessing == "pad":
869
+ visual_image = bgr2rgb(visual_image)
870
+
871
+ # Convert image format to either YUYV, RGBA or keep as RGB.
872
+ image = transformer(image, backend=backend) if transformer else image
873
+
874
+ # Expects batch size, channel, height, width.
875
+ if transpose:
876
+ image = np.transpose(image, axes=[2, 0, 1])
877
+
878
+ # Handle full/half precision input types.
879
+ if input_type in [np.float16, np.float32]:
880
+ image = image_normalization(image, normalization, input_type)
881
+
882
+ # For quantized models, run input quantization parameters.
883
+ if quantization is not None:
884
+ if input_type == np.int8:
885
+ zero_point = abs(quantization[-1])
886
+ image = (image.astype(np.int16) - zero_point).astype(np.int8)
887
+
888
+ image = image[None]
889
+ # Directly copy the input tensor into the model for TFLite.
890
+ if input_tensor is not None:
891
+ np.copyto(input_tensor(), image)
892
+
893
+ return image, visual_image, shapes, (height, width)
894
+
895
+
896
+ # Functions for Annotation Transformations
897
+
898
+ def clamp(
899
+ value: Union[float, int],
900
+ min: Union[float, int] = 0,
901
+ max: Union[float, int] = 1
902
+ ) -> Union[float, int]:
903
+ """
904
+ Clamps a given value between 0 and 1 by default.
905
+ If the value is in between the set min and max, then it is returned.
906
+ Otherwise it returns either min or max depending on which is the closest.
907
+
908
+ Parameters
909
+ ----------
910
+ value: Union[float, int]
911
+ Value to clamp between 0 and 1 (default).
912
+ min: Union[float, int]
913
+ Minimum acceptable value. Default to 0.
914
+ max: Union[float, int]
915
+ Maximum acceptable value. Default to 1.
916
+
917
+ Returns
918
+ -------
919
+ Union[float, int]
920
+ This is the clamped value.
921
+ """
922
+ return min if value < min else max if value > max else value
923
+
924
+
925
+ def standardize_coco_labels(labels: Union[list, np.ndarray]) -> list:
926
+ """
927
+ Converts synonyms of COCO labels to standard COCO labels using the
928
+ provided labels mapping "COCO_LABEL_SYNC". This requires that the labels
929
+ provided contain strings.
930
+
931
+ Parameters
932
+ ----------
933
+ labels: Union[list, np.ndarray]
934
+ This contains a list of string labels to map to
935
+ standard COCO labels.
936
+
937
+ Returns
938
+ -------
939
+ list
940
+ Converted string labels to standard COCO labels.
941
+ """
942
+ synced_labels = list()
943
+ for label in labels:
944
+ for key in COCO_LABEL_SYNC.keys():
945
+ if label == key:
946
+ label = COCO_LABEL_SYNC[key]
947
+ synced_labels.append(label)
948
+ return synced_labels
949
+
950
+
951
+ def labels2string(
952
+ int_labels: Union[list, np.ndarray],
953
+ string_labels: Union[list, np.ndarray]
954
+ ) -> list:
955
+ """
956
+ Converts label indices into their string represenations.
957
+
958
+ Parameters
959
+ ----------
960
+ int_labels: Union[list, np.ndarray]
961
+ A list of integer labels as indices to convert into strings.
962
+ string_labels: Union[list, np.ndarray]
963
+ A list of unique string labels used to map the label
964
+ indices into their string representations.
965
+
966
+ Returns
967
+ -------
968
+ list
969
+ A list of string labels.
970
+ """
971
+ labels = []
972
+ for label in int_labels:
973
+ labels.append(string_labels[int(label)] if isinstance(
974
+ label, (numbers.Number, np.ndarray)) else label)
975
+ return labels
976
+
977
+
978
+ def normalize(boxes: np.ndarray, shape: tuple = None) -> np.ndarray:
979
+ """
980
+ Normalizes the boxes to the width and height
981
+ of the image or model input resolution.
982
+
983
+ Parameters
984
+ ----------
985
+ boxes: np.ndarray
986
+ Contains bounding boxes to normalize [[boxes1], [boxes2]].
987
+ shape: tuple
988
+ The (height, width) shape of the image to normalize the annotations.
989
+
990
+ Returns
991
+ -------
992
+ np.ndarray
993
+ new x-coordinate = old x-coordinate / width
994
+ new y-coordinate = old y-coordinate / height
995
+ """
996
+ if shape is None:
997
+ return boxes
998
+
999
+ if isinstance(boxes, list):
1000
+ boxes = np.array(boxes)
1001
+ boxes[..., 0:1] /= shape[1]
1002
+ boxes[..., 1:2] /= shape[0]
1003
+ boxes[..., 2:3] /= shape[1]
1004
+ boxes[..., 3:4] /= shape[0]
1005
+ return boxes
1006
+
1007
+
1008
+ def denormalize(boxes: np.ndarray, shape: tuple = None) -> np.ndarray:
1009
+ """
1010
+ Denormalizes the boxes by the width and height of the image
1011
+ or model input resolution to get the pixel values of the boxes.
1012
+
1013
+ Parameters
1014
+ ----------
1015
+ boxes: np.ndarray
1016
+ Contains bounding boxes to denormalize [[boxes1], [boxes2]].
1017
+ shape: tuple
1018
+ The (height, width) shape of the image to denormalize the annotations.
1019
+
1020
+ Returns
1021
+ -------
1022
+ np.ndarray
1023
+ Denormalized set of bounding boxes in pixels values.
1024
+ """
1025
+ if shape is None:
1026
+ return boxes
1027
+
1028
+ if isinstance(boxes, list):
1029
+ boxes = np.array(boxes)
1030
+ boxes[..., 0:1] *= shape[1]
1031
+ boxes[..., 1:2] *= shape[0]
1032
+ boxes[..., 2:3] *= shape[1]
1033
+ boxes[..., 3:4] *= shape[0]
1034
+ return boxes.astype(np.int32)
1035
+
1036
+
1037
+ def normalize_polygon(vertex: Union[list, np.ndarray], shape: tuple) -> list:
1038
+ """
1039
+ Normalizes the vertex coordinate of a polygon.
1040
+
1041
+ Parameters
1042
+ ----------
1043
+ vertex: Union[list, np.ndarray]
1044
+ This contains [x, y] coordinate.
1045
+ shape: tuple
1046
+ The (height, width) shape of the image to normalize the annotations.
1047
+
1048
+ Returns
1049
+ -------
1050
+ list
1051
+ This contains normalized [x, y] coordinates.
1052
+ """
1053
+ return [float(vertex[0]) / shape[1], float(vertex[1]) / shape[0]]
1054
+
1055
+
1056
+ def denormalize_polygon(vertex: Union[list, np.ndarray], shape: tuple) -> list:
1057
+ """
1058
+ Denormalizes the vertex coordinate of a polygon.
1059
+
1060
+ Parameters
1061
+ ----------
1062
+ vertex: Union[list, np.ndarray]
1063
+ This contains [x, y] coordinate.
1064
+ shape: tuple
1065
+ The (height, width) shape of the image to denormalize the annotations.
1066
+
1067
+ Returns
1068
+ -------
1069
+ list
1070
+ This contains denormalized [x, y] coordinates.
1071
+ """
1072
+ return [int(float(vertex[0]) * shape[1]), int(float(vertex[1]) * shape[0])]
1073
+
1074
+
1075
+ def xcycwh2xyxy(boxes: np.ndarray) -> np.ndarray:
1076
+ """
1077
+ Converts YOLO (xcycwh) format into PascalVOC (xyxy) format.
1078
+
1079
+ Parameters
1080
+ ----------
1081
+ boxes: np.ndarray
1082
+ Contains lists for each boxes in YOLO format [[boxes1], [boxes2]].
1083
+
1084
+ Returns
1085
+ -------
1086
+ np.ndarray
1087
+ Contains list for each boxes in PascalVOC format.
1088
+ """
1089
+ return np.concatenate([
1090
+ boxes[:, 0:2] - boxes[:, 2:4] / 2,
1091
+ boxes[:, 0:2] + boxes[:, 2:4] / 2
1092
+ ], axis=1)
1093
+
1094
+
1095
+ def xyxy2xcycwh(boxes: np.ndarray) -> np.ndarray:
1096
+ """
1097
+ Converts PascalVOC (xyxy) into YOLO (xcycwh) format.
1098
+
1099
+ Parameters
1100
+ ----------
1101
+ boxes: np.ndarray
1102
+ Contains lists for each boxes in PascalVOC format [[boxes1], [boxes2]].
1103
+
1104
+ Returns
1105
+ -------
1106
+ np.ndarray
1107
+ Contains list for each boxes in YOLO format.
1108
+ """
1109
+ w_c = boxes[..., 2:3] - boxes[..., 0:1]
1110
+ h_c = boxes[..., 3:4] - boxes[..., 1:2]
1111
+ boxes[..., 0:1] = boxes[..., 0:1] + w_c / 2
1112
+ boxes[..., 1:2] = boxes[..., 1:2] + h_c / 2
1113
+ boxes[..., 2:3] = w_c
1114
+ boxes[..., 3:4] = h_c
1115
+ return boxes
1116
+
1117
+
1118
+ def xywh2xyxy(boxes: np.ndarray) -> np.ndarray:
1119
+ """
1120
+ Converts COCO (xywh) format to PascalVOC (xyxy) format.
1121
+
1122
+ Parameters
1123
+ ----------
1124
+ boxes: np.ndarray
1125
+ Contains lists for each boxes in COCO format [[boxes1], [boxes2]].
1126
+
1127
+ Returns
1128
+ -------
1129
+ np.ndarray
1130
+ Contains list for each boxes in PascalVOC format.
1131
+ """
1132
+ boxes[..., 2:3] = boxes[..., 2:3] + boxes[..., 0:1]
1133
+ boxes[..., 3:4] = boxes[..., 3:4] + boxes[..., 1:2]
1134
+ return boxes
1135
+
1136
+
1137
+ def xyxy2xywh(boxes: np.ndarray) -> np.ndarray:
1138
+ """
1139
+ Converts PascalVOC (xyxy) format to COCO (xywh) format.
1140
+
1141
+ Parameters
1142
+ ----------
1143
+ boxes: np.ndarray
1144
+ Contains lists for each boxes in COCO format [[boxes1], [boxes2]].
1145
+
1146
+ Returns
1147
+ -------
1148
+ np.ndarray
1149
+ Contains list of each boxes in COCO format.
1150
+ """
1151
+ boxes[..., 2:3] = boxes[..., 2:3] - boxes[..., 0:1]
1152
+ boxes[..., 3:4] = boxes[..., 3:4] - boxes[..., 1:2]
1153
+ return boxes
1154
+
1155
+
1156
+ def scale(
1157
+ boxes: np.ndarray,
1158
+ w: int = 640,
1159
+ h: int = 640,
1160
+ padw: int = 0,
1161
+ padh: int = 0,
1162
+ ) -> np.ndarray:
1163
+ """
1164
+ Scales the bounding boxes to be centered around the objects of an image
1165
+ with letterbox transformation.
1166
+
1167
+ Parameters
1168
+ ----------
1169
+ boxes: np.ndarray (nx4)
1170
+ This is already in xyxy format.
1171
+ w: int
1172
+ This is the width of the image before any letterbox
1173
+ transformation.
1174
+ h: int
1175
+ This is the height of the image before any letterbox
1176
+ transformation.
1177
+ padw: int
1178
+ The width padding in relation to the letterbox.
1179
+ padh: int
1180
+ The height padding in relation to the letterbox.
1181
+
1182
+ Returns
1183
+ -------
1184
+ np.ndarray
1185
+ The bounding boxes rescaled to be centered around the
1186
+ objects of an image with letterbox transformation.
1187
+ """
1188
+ y = np.copy(boxes)
1189
+ y[..., 0] = (w * (boxes[..., 0]) + padw) # top left boxes
1190
+ y[..., 1] = (h * (boxes[..., 1]) + padh) # top left y
1191
+ y[..., 2] = (w * (boxes[..., 2]) + padw) # bottom right boxes
1192
+ y[..., 3] = (h * (boxes[..., 3]) + padh) # bottom right y
1193
+ return y
1194
+
1195
+
1196
+ def clamp_boxes(boxes: np.ndarray, clamp: int,
1197
+ shape: tuple = None) -> np.ndarray:
1198
+ """
1199
+ Clamps bounding boxes with size less than the provided clamp value to
1200
+ the clamp value in pixels. The minimum width and height (dimensions)
1201
+ of the bounding is the clamp value in pixels.
1202
+
1203
+ Parameters
1204
+ ----------
1205
+ boxes: np.ndarray
1206
+ The bounding boxes to clamp. The bounding boxes with dimensions
1207
+ larger than the clamp value will be kept, but the smaller boxes will
1208
+ be resized to the clamp value.
1209
+ clamp: int
1210
+ The minimum dimensions allowed for the height and width of the
1211
+ bounding box. This value is in pixels.
1212
+ shape: tuple
1213
+ If None is provided (by default), it assumes the boxes are in pixels.
1214
+ Otherwise, if shape is provided, the boxes are normalized which
1215
+ will transform the boxes in pixel representations first to be
1216
+ compared to the clamp value provided which is in pixels. The
1217
+ shape provided should be the (height, width) of the image.
1218
+
1219
+ Returns
1220
+ -------
1221
+ np.ndarray
1222
+ The bounding boxes where the smaller boxes have been
1223
+ sized to the clamp value provided.
1224
+ """
1225
+ if len(boxes) == 0:
1226
+ return boxes
1227
+
1228
+ if shape is None:
1229
+ height, width = (1, 1)
1230
+ else:
1231
+ height, width = shape
1232
+
1233
+ widths = ((boxes[..., 2:3] - boxes[..., 0:1]) * width).flatten()
1234
+ heights = ((boxes[..., 3:4] - boxes[..., 1:2]) * height).flatten()
1235
+ modify = np.transpose(
1236
+ np.nonzero(((widths < clamp) + (heights < clamp)))).flatten()
1237
+
1238
+ boxes[modify, 2:3] = boxes[modify, 0:1] + clamp / width
1239
+ boxes[modify, 3:4] = boxes[modify, 1:2] + clamp / height
1240
+ return boxes
1241
+
1242
+
1243
+ def ignore_boxes(
1244
+ ignore: int,
1245
+ boxes: np.ndarray,
1246
+ labels: np.ndarray,
1247
+ scores: np.ndarray = None,
1248
+ shape: tuple = None
1249
+ ) -> Tuple[np.ndarray, np.ndarray, Union[None, np.ndarray]]:
1250
+ """
1251
+ Removes the boxes, labels, and scores provided if the boxes have dimensions
1252
+ less than the provided value set by the ignore parameter in pixels.
1253
+
1254
+ Parameters
1255
+ ----------
1256
+ ignore: int
1257
+ The size of the boxes lower than this value will be removed. This
1258
+ value is in pixels.
1259
+ boxes: np.ndarray
1260
+ The bounding boxes array with shape (n, 4). The bounding boxes with
1261
+ dimensions less than the ignore parameter will be removed.
1262
+ labels: np.ndarray
1263
+ The labels associated to each bounding box. For every bounding box
1264
+ that was removed, the labels will also be removed.
1265
+ scores: np.ndarray
1266
+ (Optional) the scores associated to each bounding box. For every
1267
+ bounding box that was removed, the scores will also be removed.
1268
+ shape: tuple
1269
+ If None is provided (by default), it assumes the boxes are in pixels.
1270
+ Otherwise, if shape is provided, the boxes are normalized which
1271
+ will transform the boxes in pixel representations first to be
1272
+ compared to the ignore value provided which is in pixels. The
1273
+ shape provided should be the (height, width) of the image.
1274
+
1275
+ Returns
1276
+ -------
1277
+ boxes: np.ndarray
1278
+ The bounding boxes where the smaller boxes have been removed.
1279
+ labels: np.ndarray
1280
+ The labels which contains only the labels of
1281
+ the existing bounding boxes.
1282
+ scores: Union[None, np.ndarray]
1283
+ If scores is not provided, None is returned. Otherwise,
1284
+ the scores of the returned bounding boxes are returned.
1285
+ """
1286
+ if shape is None:
1287
+ height, width = (1, 1)
1288
+ else:
1289
+ height, width = shape
1290
+
1291
+ widths = ((boxes[..., 2:3] - boxes[..., 0:1]) * width).flatten()
1292
+ heights = ((boxes[..., 3:4] - boxes[..., 1:2]) * height).flatten()
1293
+ keep = np.transpose(
1294
+ np.nonzero(((widths >= ignore) * (heights >= ignore)))).flatten()
1295
+
1296
+ boxes = np.take(boxes, keep, axis=0)
1297
+ labels = np.take(labels, keep, axis=0)
1298
+ if scores is not None:
1299
+ scores = np.take(scores, keep, axis=0)
1300
+
1301
+ return boxes, labels, scores
1302
+
1303
+ # Functions for Segmentation Transformations
1304
+
1305
+
1306
+ def segments2boxes(segments: list, box_format: str = "xcycwh") -> np.ndarray:
1307
+ """
1308
+ Convert segment labels to box labels, i.e.
1309
+ (xy1, xy2, ...) to (xcycwh).
1310
+
1311
+ Parameters
1312
+ ----------
1313
+ segments: list
1314
+ List of segments where each segment is a list of points,
1315
+ each point is [x, y] coordinates.
1316
+ box_format: str
1317
+ Default output box format is in "xcycwh" (YOLO) format.
1318
+ Otherwise, "xywh" (COCO) and "xyxy" (PascalVOC) are also accepted.
1319
+
1320
+ Returns
1321
+ -------
1322
+ np.ndarray
1323
+ Bounding box coordinates in YOLO format.
1324
+ """
1325
+ boxes = []
1326
+ for s in segments:
1327
+ x, y = s.T # segment xy
1328
+ boxes.append([x.min(), y.min(), x.max(), y.max()]) # xyxy
1329
+
1330
+ if box_format == "xcycwh":
1331
+ return xyxy2xcycwh(np.array(boxes)) # cls, xywh
1332
+ elif box_format == "xywh":
1333
+ return xyxy2xywh(np.array(boxes))
1334
+ else:
1335
+ return np.array(boxes)
1336
+
1337
+
1338
+ def resample_segments(segments: list, n: int = 1000) -> list:
1339
+ """
1340
+ Resample segments to n points each using linear interpolation.
1341
+ Source: https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py#L485
1342
+
1343
+ Parameters
1344
+ ----------
1345
+ segments: list
1346
+ List of (N, 2) arrays where N is the number of points in each segment.
1347
+ n: int
1348
+ Number of points to resample each segment to.
1349
+
1350
+ Returns
1351
+ -------
1352
+ list
1353
+ Resampled segments with n points each.
1354
+ """
1355
+ for i, s in enumerate(segments):
1356
+ if len(s) == n:
1357
+ continue
1358
+ s = np.concatenate((s, s[0:1, :]), axis=0)
1359
+ x = np.linspace(0, len(s) - 1, n - len(s) if len(s) < n else n)
1360
+ xp = np.arange(len(s))
1361
+ x = np.insert(x, np.searchsorted(x, xp), xp) if len(s) < n else x
1362
+ segments[i] = (
1363
+ np.concatenate([np.interp(x, xp, s[:, i])
1364
+ for i in range(2)], dtype=np.float32).reshape(2, -1).T
1365
+ ) # segment xy
1366
+ return segments
1367
+
1368
+
1369
+ def format_segments(
1370
+ segments: np.ndarray,
1371
+ shape: tuple,
1372
+ ratio_pad: tuple,
1373
+ colors: Union[list, np.ndarray],
1374
+ mask_ratio: int = 1,
1375
+ semantic: bool = False,
1376
+ backend: str = "hal"
1377
+ ) -> Tuple[np.ndarray, np.ndarray]:
1378
+ """
1379
+ Convert polygon segments to bitmap masks.
1380
+
1381
+ Parameters
1382
+ ----------
1383
+ segments: np.ndarray
1384
+ Mask segments with shape (# polygons, # coordinates, 2)
1385
+ shape: tuple
1386
+ This represents the (height, width) of the model input shape.
1387
+ ratio_pad: tuple
1388
+ This contains the scale and the padding factors after letterbox
1389
+ transformations in the form ((scale x, scale y), (pad x, pad y)).
1390
+ colors: Union[list, np.ndarray]
1391
+ The label to specify to each polygon.
1392
+ mask_ratio: int, optional
1393
+ Masks are downsampled according to mask_ratio. Set to 1 so
1394
+ that the output shape of the mask matches the model prediction shape.
1395
+ semantic: bool, optional
1396
+ Specify if the type of segmentation is semantic segmentation.
1397
+ By default this is False and set to instance segmentation as
1398
+ seen in Ultralytics. Instance segmentation is where
1399
+ each mask is represented separately.
1400
+ backend: str
1401
+ Specify the backend library for resizing the image from the options
1402
+ "hal", "opencv", "pillow".
1403
+
1404
+ Returns
1405
+ -------
1406
+ masks: np.ndarray
1407
+ Bitmap masks with shape (N, H, W) or (1, H, W)
1408
+ if mask_overlap is True.
1409
+ sorted_idx: np.ndarray
1410
+ Resorting the ground truth based on these indices.
1411
+ """
1412
+ scale_h, scale_w = ratio_pad[0]
1413
+ padw, padh = ratio_pad[1]
1414
+
1415
+ if len(segments):
1416
+ segments[..., 0] *= scale_w
1417
+ segments[..., 1] *= scale_h
1418
+ segments[..., 0] += padw
1419
+ segments[..., 1] += padh
1420
+
1421
+ sorted_idx = None
1422
+
1423
+ if semantic:
1424
+ masks = create_mask_image(
1425
+ polygons=segments,
1426
+ labels=colors,
1427
+ shape=shape
1428
+ )
1429
+ else:
1430
+ masks = polygons2masks(
1431
+ imgsz=shape,
1432
+ segments=segments,
1433
+ downsample_ratio=mask_ratio,
1434
+ backend=backend
1435
+ )
1436
+ return masks, sorted_idx
1437
+
1438
+
1439
+ def polygon2mask(
1440
+ imgsz: Tuple[int, int],
1441
+ polygons: List[np.ndarray],
1442
+ color: int = 1,
1443
+ downsample_ratio: int = 1,
1444
+ backend: str = "hal"
1445
+ ) -> np.ndarray:
1446
+ """
1447
+ Convert a list of polygons to a binary mask of the specified image size.
1448
+
1449
+ Parameters
1450
+ ----------
1451
+ imgsz: Tuple[int, int]
1452
+ The size of the image as (height, width).
1453
+ polygons: List[np.ndarray]
1454
+ A list of polygons. Each polygon is an array with shape (N, M), where
1455
+ N is the number of polygons, and M is the number of points
1456
+ such that M % 2 = 0.
1457
+ color: int, optional
1458
+ The color value to fill in the polygons on the mask.
1459
+ downsample_ratio: int, optional
1460
+ Factor by which to downsample the mask.
1461
+ backend: str
1462
+ Specify the backend library for resizing the image from the options
1463
+ "hal", "opencv", "pillow".
1464
+
1465
+ Returns
1466
+ -------
1467
+ np.ndarray
1468
+ A binary mask of the specified image size with the polygons filled in.
1469
+ """
1470
+ polygons = np.asarray(polygons, dtype=np.int32)
1471
+ polygons = polygons.reshape((polygons.shape[0], -1, 2))
1472
+ mask = create_mask_image(
1473
+ polygons=polygons,
1474
+ labels=color,
1475
+ shape=imgsz
1476
+ )
1477
+
1478
+ if downsample_ratio > 1:
1479
+ nh, nw = (imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio)
1480
+ mask = resize(mask, (nw, nh), backend=backend)
1481
+ return mask
1482
+
1483
+
1484
+ def polygons2masks(
1485
+ imgsz: Tuple[int, int],
1486
+ segments: List[np.ndarray],
1487
+ downsample_ratio: int = 1,
1488
+ backend: str = "hal"
1489
+ ) -> np.ndarray:
1490
+ """
1491
+ Convert a list of polygons to a set of binary instance
1492
+ segmentation masks at the specified image size.
1493
+
1494
+ Parameters
1495
+ ----------
1496
+ imgsz: Tuple[int, int]
1497
+ The size of the image as (height, width).
1498
+ segments: List[np.ndarray]
1499
+ A list of polygons. Each polygon is an array with shape (N, M), where
1500
+ N is the number of polygons, and M is the number of points
1501
+ such that M % 2 = 0.
1502
+ colors: Union[list, np.ndarray]
1503
+ The color value to fill each polygon in the masks.
1504
+ downsample_ratio: int, optional
1505
+ Factor by which to downsample each mask.
1506
+ backend: str
1507
+ Specify the backend library for resizing the image from the options
1508
+ "hal", "opencv", "pillow".
1509
+
1510
+ Returns
1511
+ -------
1512
+ np.ndarray
1513
+ A set of binary masks of the specified image size
1514
+ with the polygons filled in.
1515
+ """
1516
+ if len(segments) == 0:
1517
+ return np.zeros((1, imgsz[0], imgsz[1]), dtype=np.int32)
1518
+ return np.array([polygon2mask(imgsz, [x.reshape(-1)],
1519
+ downsample_ratio=downsample_ratio,
1520
+ backend=backend)
1521
+ for x in segments])
1522
+
1523
+
1524
+ def create_mask_image(
1525
+ polygons: Union[list, np.ndarray],
1526
+ labels: Union[list, np.ndarray, int],
1527
+ shape: tuple
1528
+ ) -> np.ndarray:
1529
+ """
1530
+ Creates a NumPy array of masks from a given list of polygons.
1531
+
1532
+ Parameters
1533
+ ----------
1534
+ polygons: Union[list, np.ndarray]
1535
+ This contains the polygon points. Ex.
1536
+ [[[x1,y1], [x2,y2], ... ,[xn,yn]], [...], ...]
1537
+ labels: Union[list, np.ndarray, int]
1538
+ The integer label of each polygon for assigning the mask.
1539
+ If an integer is supplied, then a constant label is applied
1540
+ for all the polygons.
1541
+ shape: tuple
1542
+ This is the shape (height, width) of the mask.
1543
+
1544
+ Returns
1545
+ -------
1546
+ np.ndarray
1547
+ The 2D mask image with shape (height, width) specified.
1548
+ """
1549
+ mask = Image.new('L', (shape[1], shape[0]), 0)
1550
+ canvas = ImageDraw.Draw(mask)
1551
+ polygons = polygons.tolist() if isinstance(polygons, np.ndarray) else polygons
1552
+ if isinstance(labels, (int, np.ScalarType)):
1553
+ labels = np.full(len(polygons), labels, dtype=np.int32)
1554
+ for c, polygon in zip(labels, polygons):
1555
+ polygon = [tuple(pt) for pt in polygon] # requires a list of Tuples.
1556
+ if len(polygon) >= 2:
1557
+ canvas.polygon(polygon, outline=int(c), fill=int(c))
1558
+ # This array contains a mask of the image where the objects are
1559
+ # outlined by class number
1560
+ return np.array(mask)
1561
+
1562
+
1563
+ def create_binary_mask(mask: np.ndarray) -> np.ndarray:
1564
+ """
1565
+ Creates a binary NumPy array of 1's and 0's encapsulating
1566
+ every object (regardless of class) in the image as a 1 and
1567
+ background as 0.
1568
+
1569
+ Parameters
1570
+ ----------
1571
+ mask: np.ndarray
1572
+ 2D array mask of class labels unique to each object.
1573
+
1574
+ Returns
1575
+ -------
1576
+ np.ndarray
1577
+ Binary 2D mask of 1's and 0's.
1578
+ """
1579
+ return np.where(mask > 0, 1, mask)
1580
+
1581
+
1582
+ def create_mask_class(mask: np.ndarray, cls: int) -> np.ndarray:
1583
+ """
1584
+ Separates a mask with more than one classes into an individual
1585
+ mask of 1's and 0's where 1 represents the specified class and
1586
+ 0 represents other classes including background.
1587
+
1588
+ Parameters
1589
+ ----------
1590
+ mask: np.ndarray
1591
+ Multiclass mask of class labels unique to each object.
1592
+ cls: int
1593
+ The integer representing the class in the mask
1594
+ to keep as a value of 1. The other classes will be treated as
1595
+ 0's.
1596
+
1597
+ Returns
1598
+ -------
1599
+ np.ndarray
1600
+ Binary 2D mask of 1's and 0's.
1601
+ """
1602
+ temp_mask = np.where(mask != cls, 0, mask)
1603
+ temp_mask[temp_mask == cls] = 1
1604
+ return temp_mask
1605
+
1606
+
1607
+ def create_mask_classes(
1608
+ new_mask: np.ndarray,
1609
+ cls: int,
1610
+ current_mask: np.ndarray = None
1611
+ ) -> np.ndarray:
1612
+ """
1613
+ Appends a current mask with another mask of different class
1614
+ i.e converting a binary mask (new mask) into a mask with its
1615
+ class and then appending the original mask to include
1616
+ the new mask with its class.
1617
+
1618
+ Parameters
1619
+ ----------
1620
+ new_mask: np.ndarray
1621
+ The current binary (0, 1) 2D mask.
1622
+ cls: int
1623
+ Class representing the 1's in the new mask. This is the class
1624
+ to append to the current mask.
1625
+ current_mask: (height, width) np.ndarray
1626
+ Current multiclass mask.
1627
+
1628
+ Returns
1629
+ -------
1630
+ np.ndarray
1631
+ Multiclass mask with an additional class added.
1632
+ """
1633
+ new_mask = np.where(new_mask == 1, cls, new_mask)
1634
+ if current_mask is not None:
1635
+ return np.add(current_mask, new_mask)
1636
+ else:
1637
+ return new_mask
1638
+
1639
+
1640
+ def create_mask_background(mask: np.ndarray) -> np.ndarray:
1641
+ """
1642
+ Creates a binary mask for the background class with 1's in the
1643
+ image and the rest of the objects will have values of 0's. This function
1644
+ switches the labels for background to 1 and positive classes to 0's.
1645
+
1646
+ Parameters
1647
+ ----------
1648
+ mask: np.ndarray
1649
+ Multiclass mask array representing each image pixels.
1650
+
1651
+ Returns
1652
+ -------
1653
+ np.ndarray
1654
+ Binary mask of 1's and 0's, where 1's is background and
1655
+ objects are 0's
1656
+ """
1657
+ # 2 is a temporary class
1658
+ temp_mask = np.where(mask != 0, 2, mask)
1659
+ temp_mask[temp_mask == 0] = 1
1660
+ temp_mask[temp_mask == 2] = 0
1661
+ return temp_mask
1662
+
1663
+
1664
+ def convert_to_serializable(obj: Any):
1665
+ """
1666
+ Recursively convert NumPy types to
1667
+ Python-native types for JSON serialization.
1668
+
1669
+ Parameters
1670
+ ----------
1671
+ obj: Any
1672
+ Any NumPy type.
1673
+
1674
+ Returns
1675
+ -------
1676
+ obj
1677
+ The object with a native
1678
+ python type representation.
1679
+ """
1680
+ if isinstance(obj, np.ndarray):
1681
+ return obj.tolist()
1682
+ elif isinstance(obj, (np.float32, np.float64)):
1683
+ return float(obj)
1684
+ elif isinstance(obj, (np.int32, np.int64)):
1685
+ return int(obj)
1686
+ elif isinstance(obj, np.generic):
1687
+ return obj.item() # Convert other NumPy scalars
1688
+ elif isinstance(obj, float) and (math.isnan(obj) or math.isinf(obj)):
1689
+ return 0
1690
+ elif isinstance(obj, dict):
1691
+ return {k: convert_to_serializable(v) for k, v in obj.items()}
1692
+ elif isinstance(obj, list):
1693
+ return [convert_to_serializable(i) for i in obj]
1694
+ else:
1695
+ return obj