edgefirst-validator 4.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepview/modelpack/utils/argmax.py +16 -0
- edgefirst/validator/__init__.py +1 -0
- edgefirst/validator/__main__.py +375 -0
- edgefirst/validator/datasets/__init__.py +118 -0
- edgefirst/validator/datasets/cache.py +296 -0
- edgefirst/validator/datasets/core.py +250 -0
- edgefirst/validator/datasets/darknet.py +446 -0
- edgefirst/validator/datasets/database.py +1067 -0
- edgefirst/validator/datasets/instance/__init__.py +4 -0
- edgefirst/validator/datasets/instance/core.py +222 -0
- edgefirst/validator/datasets/instance/detection.py +145 -0
- edgefirst/validator/datasets/instance/multitask.py +80 -0
- edgefirst/validator/datasets/instance/segmentation.py +120 -0
- edgefirst/validator/datasets/utils/fetch.py +682 -0
- edgefirst/validator/datasets/utils/readers.py +425 -0
- edgefirst/validator/datasets/utils/transformations.py +1695 -0
- edgefirst/validator/evaluators/__init__.py +17 -0
- edgefirst/validator/evaluators/callbacks/__init__.py +3 -0
- edgefirst/validator/evaluators/callbacks/core.py +192 -0
- edgefirst/validator/evaluators/callbacks/plots.py +900 -0
- edgefirst/validator/evaluators/callbacks/studio.py +234 -0
- edgefirst/validator/evaluators/core.py +257 -0
- edgefirst/validator/evaluators/detection.py +749 -0
- edgefirst/validator/evaluators/multitask.py +270 -0
- edgefirst/validator/evaluators/parameters/__init__.py +53 -0
- edgefirst/validator/evaluators/parameters/core.py +554 -0
- edgefirst/validator/evaluators/parameters/dataset.py +239 -0
- edgefirst/validator/evaluators/parameters/model.py +338 -0
- edgefirst/validator/evaluators/parameters/validation.py +528 -0
- edgefirst/validator/evaluators/segmentation.py +729 -0
- edgefirst/validator/evaluators/utils/__init__.py +3 -0
- edgefirst/validator/evaluators/utils/classify.py +292 -0
- edgefirst/validator/evaluators/utils/match.py +262 -0
- edgefirst/validator/evaluators/utils/timer.py +132 -0
- edgefirst/validator/metrics/__init__.py +9 -0
- edgefirst/validator/metrics/data/__init__.py +7 -0
- edgefirst/validator/metrics/data/label.py +668 -0
- edgefirst/validator/metrics/data/metrics.py +759 -0
- edgefirst/validator/metrics/data/plots.py +476 -0
- edgefirst/validator/metrics/data/stats.py +507 -0
- edgefirst/validator/metrics/detection.py +595 -0
- edgefirst/validator/metrics/segmentation.py +173 -0
- edgefirst/validator/metrics/utils/math.py +717 -0
- edgefirst/validator/publishers/__init__.py +3 -0
- edgefirst/validator/publishers/console.py +147 -0
- edgefirst/validator/publishers/studio.py +128 -0
- edgefirst/validator/publishers/tensorboard.py +119 -0
- edgefirst/validator/publishers/utils/logger.py +111 -0
- edgefirst/validator/publishers/utils/table.py +403 -0
- edgefirst/validator/runners/__init__.py +8 -0
- edgefirst/validator/runners/core.py +727 -0
- edgefirst/validator/runners/deepviewrt.py +177 -0
- edgefirst/validator/runners/hailo.py +263 -0
- edgefirst/validator/runners/keras.py +150 -0
- edgefirst/validator/runners/kinara.py +265 -0
- edgefirst/validator/runners/offline.py +228 -0
- edgefirst/validator/runners/onnx.py +241 -0
- edgefirst/validator/runners/processing/decode.py +320 -0
- edgefirst/validator/runners/processing/dvapi.py +4192 -0
- edgefirst/validator/runners/processing/nms.py +637 -0
- edgefirst/validator/runners/processing/outputs.py +507 -0
- edgefirst/validator/runners/tensorrt.py +321 -0
- edgefirst/validator/runners/tflite.py +221 -0
- edgefirst/validator/validate.py +843 -0
- edgefirst/validator/visualize/__init__.py +3 -0
- edgefirst/validator/visualize/detection.py +623 -0
- edgefirst/validator/visualize/segmentation.py +281 -0
- edgefirst/validator/visualize/utils/plots.py +635 -0
- edgefirst_validator-4.2.1.dist-info/METADATA +111 -0
- edgefirst_validator-4.2.1.dist-info/RECORD +73 -0
- edgefirst_validator-4.2.1.dist-info/WHEEL +5 -0
- edgefirst_validator-4.2.1.dist-info/entry_points.txt +2 -0
- edgefirst_validator-4.2.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1695 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains functions for transforming dataset artifacts.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import math
|
|
8
|
+
import numbers
|
|
9
|
+
from io import BytesIO
|
|
10
|
+
from typing import TYPE_CHECKING, Union, Tuple, Any, List, Callable
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
from PIL import Image, ImageDraw, ExifTags
|
|
14
|
+
|
|
15
|
+
# Transform label synonyms to a common representation.
|
|
16
|
+
COCO_LABEL_SYNC = {
|
|
17
|
+
"motorbike": "motorcycle",
|
|
18
|
+
"aeroplane": "airplane",
|
|
19
|
+
"sofa": "couch",
|
|
20
|
+
"pottedplant": "potted plant",
|
|
21
|
+
"diningtable": "dining table",
|
|
22
|
+
"tvmonitor": "tv"
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
import edgefirst_python # type: ignore
|
|
27
|
+
CONVERTER = edgefirst_python.ImageConverter()
|
|
28
|
+
except ImportError:
|
|
29
|
+
CONVERTER = None
|
|
30
|
+
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
from edgefirst_python import TensorImage # type: ignore
|
|
33
|
+
|
|
34
|
+
# Functions for Sensor Transformations
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def bgr2rgb(image: np.ndarray) -> np.ndarray:
|
|
38
|
+
"""
|
|
39
|
+
Converts BGR image to RGB image.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
image: (height, width, 3) np.ndarray
|
|
44
|
+
The BGR image NumPy array.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
np.ndarray
|
|
49
|
+
The RGB image NumPy array.
|
|
50
|
+
"""
|
|
51
|
+
return image[:, :, ::-1]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def rgb2bgr(image: np.ndarray) -> np.ndarray:
|
|
55
|
+
"""
|
|
56
|
+
Converts RGB image to BGR image.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
image: (height, width, 3) np.ndarray
|
|
61
|
+
The RGB image NumPy array.
|
|
62
|
+
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
np.ndarray
|
|
66
|
+
The BGR image NumPy array.
|
|
67
|
+
"""
|
|
68
|
+
return bgr2rgb(image)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def rgb2yuyv(image: np.ndarray, backend: str = "hal") -> np.ndarray:
|
|
72
|
+
"""
|
|
73
|
+
Convert an RGB image to YUYV format using the EdgeFirst Tensor API.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
image: np.ndarray
|
|
78
|
+
The 3-channel RGB image NumPy array.
|
|
79
|
+
backend: str
|
|
80
|
+
The backend library to use for this conversion.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
np.ndarray
|
|
85
|
+
The 2-channel YUYV image array.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
if backend == "hal":
|
|
89
|
+
try:
|
|
90
|
+
import edgefirst_python # type: ignore
|
|
91
|
+
except ImportError:
|
|
92
|
+
raise ImportError(
|
|
93
|
+
"EdgeFirst HAL is needed to perform RGB to YUYV conversion.")
|
|
94
|
+
|
|
95
|
+
height, width, _ = image.shape
|
|
96
|
+
src = edgefirst_python.TensorImage(
|
|
97
|
+
width, height, fourcc=edgefirst_python.FourCC.RGB)
|
|
98
|
+
src.copy_from_numpy(image)
|
|
99
|
+
|
|
100
|
+
dst = edgefirst_python.TensorImage(
|
|
101
|
+
width, height, fourcc=edgefirst_python.FourCC.YUYV)
|
|
102
|
+
CONVERTER.convert(src, dst)
|
|
103
|
+
|
|
104
|
+
im = np.zeros((dst.height, dst.width, 2), dtype=np.uint8)
|
|
105
|
+
dst.normalize_to_numpy(im)
|
|
106
|
+
return im
|
|
107
|
+
else:
|
|
108
|
+
try:
|
|
109
|
+
import cv2
|
|
110
|
+
except ImportError:
|
|
111
|
+
raise ImportError(
|
|
112
|
+
"OpenCV is needed to perform RGB to YUYV conversion.")
|
|
113
|
+
return cv2.cvtColor(image, cv2.COLOR_RGB2YUV_YUY2)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def yuyv2rgb(image: np.ndarray, backend: str = "hal") -> np.ndarray:
|
|
117
|
+
"""
|
|
118
|
+
Convert a YUYV image to RGB format using the EdgeFirst Tensor API.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
image: np.ndarray
|
|
123
|
+
The input 2-channel YUYV image.
|
|
124
|
+
backend: str
|
|
125
|
+
The backend library to use for this conversion.
|
|
126
|
+
|
|
127
|
+
Returns
|
|
128
|
+
-------
|
|
129
|
+
np.ndarray
|
|
130
|
+
The output 3-channel RGB image.
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
if backend == "hal":
|
|
134
|
+
try:
|
|
135
|
+
import edgefirst_python # type: ignore
|
|
136
|
+
except ImportError:
|
|
137
|
+
raise ImportError(
|
|
138
|
+
"EdgeFirst HAL is needed to perform YUYV to RGB conversion.")
|
|
139
|
+
|
|
140
|
+
height, width, _ = image.shape
|
|
141
|
+
src = edgefirst_python.TensorImage(
|
|
142
|
+
width, height, fourcc=edgefirst_python.FourCC.YUYV)
|
|
143
|
+
src.copy_from_numpy(image)
|
|
144
|
+
|
|
145
|
+
dst = edgefirst_python.TensorImage(
|
|
146
|
+
width, height, fourcc=edgefirst_python.FourCC.RGB)
|
|
147
|
+
CONVERTER.convert(src, dst)
|
|
148
|
+
|
|
149
|
+
im = np.zeros((dst.height, dst.width, 3), dtype=np.uint8)
|
|
150
|
+
dst.normalize_to_numpy(im)
|
|
151
|
+
return im
|
|
152
|
+
else:
|
|
153
|
+
try:
|
|
154
|
+
import cv2
|
|
155
|
+
except ImportError:
|
|
156
|
+
raise ImportError(
|
|
157
|
+
"OpenCV is needed to perform YUYV to RGB conversion.")
|
|
158
|
+
return cv2.cvtColor(image, cv2.COLOR_YUV2RGB_YUY2)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def rgb2rgba(image: np.ndarray, backend: str = "hal") -> np.ndarray:
|
|
162
|
+
"""
|
|
163
|
+
Convert a 3-channel RGB image to 4-channel RGBA image.
|
|
164
|
+
|
|
165
|
+
Parameters
|
|
166
|
+
----------
|
|
167
|
+
image: np.ndarray
|
|
168
|
+
The 3-channel RGB image array.
|
|
169
|
+
backend: str
|
|
170
|
+
The backend library to use for this conversion.
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
np.ndarray
|
|
175
|
+
The 4-channel RGBA image array with the alpha value set to 255.
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
if image.shape[0] == 3:
|
|
179
|
+
_, height, width = image.shape
|
|
180
|
+
elif image.shape[-1] == 3:
|
|
181
|
+
height, width, _ = image.shape
|
|
182
|
+
else:
|
|
183
|
+
return image
|
|
184
|
+
|
|
185
|
+
if backend == "hal":
|
|
186
|
+
try:
|
|
187
|
+
import edgefirst_python # type: ignore
|
|
188
|
+
except ImportError:
|
|
189
|
+
raise ImportError(
|
|
190
|
+
"EdgeFirst HAL is needed to perform RGB to RGBA conversion.")
|
|
191
|
+
|
|
192
|
+
src = edgefirst_python.TensorImage(
|
|
193
|
+
width, height, fourcc=edgefirst_python.FourCC.RGB)
|
|
194
|
+
src.copy_from_numpy(image)
|
|
195
|
+
|
|
196
|
+
dst = edgefirst_python.TensorImage(
|
|
197
|
+
width, height, fourcc=edgefirst_python.FourCC.RGBA)
|
|
198
|
+
CONVERTER.convert(src, dst)
|
|
199
|
+
|
|
200
|
+
im = np.zeros((dst.height, dst.width, 4), dtype=np.uint8)
|
|
201
|
+
dst.normalize_to_numpy(im)
|
|
202
|
+
return im
|
|
203
|
+
else:
|
|
204
|
+
alpha_channel = np.full((height, width, 1), 255, dtype=np.uint8)
|
|
205
|
+
return np.concatenate((image, alpha_channel), axis=-1)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def imagenet(image: np.ndarray) -> np.ndarray:
|
|
209
|
+
"""
|
|
210
|
+
Normalize the image with imagenet normalization.
|
|
211
|
+
|
|
212
|
+
Parameters
|
|
213
|
+
----------
|
|
214
|
+
image: np.ndarray
|
|
215
|
+
The image RGB array with shape
|
|
216
|
+
(3, height, width) or (height, width, 3).
|
|
217
|
+
|
|
218
|
+
Returns
|
|
219
|
+
-------
|
|
220
|
+
np.ndarray
|
|
221
|
+
The image with imagenet normalization.
|
|
222
|
+
"""
|
|
223
|
+
mean = np.array([0.079, 0.05, 0]) + 0.406
|
|
224
|
+
std = np.array([0.005, 0, 0.001]) + 0.224
|
|
225
|
+
|
|
226
|
+
if image.shape[0] == 3:
|
|
227
|
+
for channel in range(image.shape[0]):
|
|
228
|
+
image[channel, :, :] = (image[channel, :, :] / 255
|
|
229
|
+
- mean[channel]) / std[channel]
|
|
230
|
+
else:
|
|
231
|
+
for channel in range(image.shape[2]):
|
|
232
|
+
image[:, :, channel] = (image[:, :, channel] / 255
|
|
233
|
+
- mean[channel]) / std[channel]
|
|
234
|
+
return image
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def image_normalization(
|
|
238
|
+
image: np.ndarray,
|
|
239
|
+
normalization: str,
|
|
240
|
+
input_type: np.dtype = np.float32
|
|
241
|
+
):
|
|
242
|
+
"""
|
|
243
|
+
Performs image normalizations (signed, unsigned, raw).
|
|
244
|
+
|
|
245
|
+
Parameters
|
|
246
|
+
----------
|
|
247
|
+
image: np.ndarray
|
|
248
|
+
The image to perform normalization.
|
|
249
|
+
normalization: str
|
|
250
|
+
This is the type of normalization to perform
|
|
251
|
+
("signed", "unsigned", "raw", "imagenet").
|
|
252
|
+
input_type: str
|
|
253
|
+
This is the NumPy datatype to convert. Ex. "uint8"
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
np.ndarray
|
|
258
|
+
Depending on the normalization, the image will be returned.
|
|
259
|
+
"""
|
|
260
|
+
if normalization.lower() == 'signed':
|
|
261
|
+
return ((image.astype(np.float32) / 127.5) - 1.0).astype(input_type)
|
|
262
|
+
elif normalization.lower() == 'unsigned':
|
|
263
|
+
return (image.astype(np.float32) /
|
|
264
|
+
255.0).astype(input_type)
|
|
265
|
+
elif normalization.lower() == 'imagenet':
|
|
266
|
+
return (imagenet(image.astype(np.float32))).astype(input_type)
|
|
267
|
+
else:
|
|
268
|
+
return (image).astype(input_type)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def crop_image(image: np.ndarray, box: Union[list, np.ndarray]) -> np.ndarray:
|
|
272
|
+
"""
|
|
273
|
+
Crops the image to only the area that is covered by
|
|
274
|
+
the box provided. This is primarily used in pose validation.
|
|
275
|
+
|
|
276
|
+
Parameters
|
|
277
|
+
----------
|
|
278
|
+
image: np.ndarray
|
|
279
|
+
The frame to crop before feeding to the model.
|
|
280
|
+
box: Union[list, np.ndarray]
|
|
281
|
+
This contains non-normalized [xmin, ymin, xmax, ymax].
|
|
282
|
+
|
|
283
|
+
Returns
|
|
284
|
+
-------
|
|
285
|
+
np.ndarray
|
|
286
|
+
The image cropped to the area of the bounding box.
|
|
287
|
+
"""
|
|
288
|
+
x1, y1, x2, y2 = box
|
|
289
|
+
box_area = image[y1:y2, x1:x2, ...]
|
|
290
|
+
return box_area
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def rotate_image(data: Union[bytes, str]) -> Image.Image:
|
|
294
|
+
"""
|
|
295
|
+
Read from the ImageExif to apply rotation on the image.
|
|
296
|
+
|
|
297
|
+
Parameters
|
|
298
|
+
----------
|
|
299
|
+
data: Union[bytes, str]
|
|
300
|
+
Read image file as a bytes object or a string path
|
|
301
|
+
to the image file.
|
|
302
|
+
|
|
303
|
+
Returns
|
|
304
|
+
-------
|
|
305
|
+
Image.Image
|
|
306
|
+
The pillow Image with rotation applied.
|
|
307
|
+
"""
|
|
308
|
+
if isinstance(data, bytes):
|
|
309
|
+
data = BytesIO(data)
|
|
310
|
+
try:
|
|
311
|
+
image = Image.open(data)
|
|
312
|
+
for orientation in ExifTags.TAGS.keys():
|
|
313
|
+
if ExifTags.TAGS[orientation] == 'Orientation':
|
|
314
|
+
break
|
|
315
|
+
exif = dict(image._getexif().items())
|
|
316
|
+
|
|
317
|
+
if exif[orientation] == 3:
|
|
318
|
+
image = image.transpose(Image.ROTATE_180)
|
|
319
|
+
elif exif[orientation] == 6:
|
|
320
|
+
image = image.transpose(Image.ROTATE_270)
|
|
321
|
+
elif exif[orientation] == 8:
|
|
322
|
+
image = image.transpose(Image.ROTATE_90)
|
|
323
|
+
except (AttributeError, KeyError, IndexError):
|
|
324
|
+
# cases: image don't have getexif
|
|
325
|
+
image = Image.open(data).convert('RGB')
|
|
326
|
+
return image
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def resize(
|
|
330
|
+
image: Union[TensorImage, np.ndarray],
|
|
331
|
+
size: tuple = None,
|
|
332
|
+
backend: str = "hal"
|
|
333
|
+
) -> np.ndarray:
|
|
334
|
+
"""
|
|
335
|
+
Resizes the images with the specified dimension using
|
|
336
|
+
the EdgeFirst Tensor API. The original aspect ratio is not maintained.
|
|
337
|
+
Image needs to be uint8.
|
|
338
|
+
|
|
339
|
+
Parameters
|
|
340
|
+
----------
|
|
341
|
+
image: Union[edgefirst_python.TensorImage, np.ndarray]
|
|
342
|
+
The image (RGB, RGBA, Gray) tensor with uint8 dtype.
|
|
343
|
+
size: tuple
|
|
344
|
+
Specify the (width, height) size of the new image.
|
|
345
|
+
backend: str
|
|
346
|
+
Specify the backend library for resizing the image from the options
|
|
347
|
+
"hal", "opencv", "pillow".
|
|
348
|
+
|
|
349
|
+
Returns
|
|
350
|
+
-------
|
|
351
|
+
np.ndarray
|
|
352
|
+
Resized image.
|
|
353
|
+
"""
|
|
354
|
+
if size is None:
|
|
355
|
+
return image
|
|
356
|
+
|
|
357
|
+
if backend == "hal":
|
|
358
|
+
try:
|
|
359
|
+
import edgefirst_python # type: ignore
|
|
360
|
+
except ImportError:
|
|
361
|
+
raise ImportError(
|
|
362
|
+
"EdgeFirst HAL is needed to resize using hal.")
|
|
363
|
+
|
|
364
|
+
if isinstance(image, np.ndarray):
|
|
365
|
+
# Array without any channels is assumed to be grey.
|
|
366
|
+
if len(image.shape) == 2:
|
|
367
|
+
fourcc = edgefirst_python.FourCC.GREY
|
|
368
|
+
fourc = fourcc
|
|
369
|
+
image = np.expand_dims(image, axis=-1)
|
|
370
|
+
channels = 1
|
|
371
|
+
else:
|
|
372
|
+
# Currently OpenGL in x86_64 only supports RGBA.
|
|
373
|
+
channels = 4
|
|
374
|
+
fourcc = edgefirst_python.FourCC.RGBA
|
|
375
|
+
if image.shape[-1] == 4:
|
|
376
|
+
fourc = edgefirst_python.FourCC.RGBA
|
|
377
|
+
elif image.shape[-1] == 1:
|
|
378
|
+
fourcc = edgefirst_python.FourCC.GREY
|
|
379
|
+
fourc = fourcc
|
|
380
|
+
channels = 1
|
|
381
|
+
else:
|
|
382
|
+
fourc = edgefirst_python.FourCC.RGB
|
|
383
|
+
|
|
384
|
+
height, width, _ = image.shape
|
|
385
|
+
src = edgefirst_python.TensorImage(width, height, fourcc=fourc)
|
|
386
|
+
src.copy_from_numpy(image)
|
|
387
|
+
else:
|
|
388
|
+
src = image
|
|
389
|
+
# Currently OpenGL in x86_64 only supports RGBA.
|
|
390
|
+
fourcc = (edgefirst_python.FourCC.RGBA if
|
|
391
|
+
src.format == edgefirst_python.FourCC.RGB else src.format)
|
|
392
|
+
channels = 1 if fourcc == edgefirst_python.FourCC.GREY else 4
|
|
393
|
+
|
|
394
|
+
dst = edgefirst_python.TensorImage(size[0], size[1], fourcc=fourcc)
|
|
395
|
+
CONVERTER.convert(src, dst)
|
|
396
|
+
|
|
397
|
+
im = np.zeros((dst.height, dst.width, channels), dtype=np.uint8)
|
|
398
|
+
dst.normalize_to_numpy(im)
|
|
399
|
+
|
|
400
|
+
if src.format == edgefirst_python.FourCC.GREY:
|
|
401
|
+
return im.squeeze()
|
|
402
|
+
elif src.format == edgefirst_python.FourCC.RGB:
|
|
403
|
+
return im[:, :, 0:3]
|
|
404
|
+
return im
|
|
405
|
+
elif backend == "opencv":
|
|
406
|
+
try:
|
|
407
|
+
import cv2 # type: ignore
|
|
408
|
+
except ImportError:
|
|
409
|
+
raise ImportError("OpenCV is needed to resize using opencv.")
|
|
410
|
+
|
|
411
|
+
return cv2.resize(image, size, interpolation=cv2.INTER_LINEAR)
|
|
412
|
+
else:
|
|
413
|
+
image = Image.fromarray(np.uint8(image))
|
|
414
|
+
image = image.resize(size)
|
|
415
|
+
return np.array(image)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def pad(
|
|
419
|
+
image: np.ndarray,
|
|
420
|
+
input_size: tuple,
|
|
421
|
+
backend: str = "hal"
|
|
422
|
+
) -> Tuple[np.ndarray, list]:
|
|
423
|
+
"""
|
|
424
|
+
Performs image padding based on the implementation provided in YOLOx:\
|
|
425
|
+
https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/data/data_augment.py#L142
|
|
426
|
+
|
|
427
|
+
The image is always padded on the right and at the bottom portions.
|
|
428
|
+
|
|
429
|
+
Parameters
|
|
430
|
+
----------
|
|
431
|
+
image: np.ndarray
|
|
432
|
+
This is the input image to pad.
|
|
433
|
+
input_size: tuple
|
|
434
|
+
This is the model input size (generally) or the output image
|
|
435
|
+
resolution after padding in the order (height, width).
|
|
436
|
+
backend: str
|
|
437
|
+
Specify the backend library for resizing the image from the options
|
|
438
|
+
"hal", "opencv", "pillow".
|
|
439
|
+
|
|
440
|
+
Returns
|
|
441
|
+
--------
|
|
442
|
+
image: np.ndarray
|
|
443
|
+
This is the padded image.
|
|
444
|
+
shapes: list
|
|
445
|
+
This is used to scale the bounding boxes of the ground
|
|
446
|
+
truth and the model detections based on the letterbox
|
|
447
|
+
transformation.
|
|
448
|
+
[[pad image height, pad image width],
|
|
449
|
+
[[scale_y, scale_x], [pad x, pad y]].
|
|
450
|
+
"""
|
|
451
|
+
height, width = image.shape[:2] # current shape [height, width]
|
|
452
|
+
if len(image.shape) == 3:
|
|
453
|
+
padded_image = np.ones(
|
|
454
|
+
(input_size[0], input_size[1], 3), dtype=np.uint8) * 114
|
|
455
|
+
else:
|
|
456
|
+
padded_image = np.ones(input_size, dtype=np.uint8) * 114
|
|
457
|
+
|
|
458
|
+
r = min(input_size[0] / height, input_size[1] / width)
|
|
459
|
+
resized_image = resize(
|
|
460
|
+
image, (int(width * r), int(height * r)), backend=backend
|
|
461
|
+
)
|
|
462
|
+
padded_image[: int(height * r),
|
|
463
|
+
: int(width * r)] = resized_image
|
|
464
|
+
padded_image = rgb2bgr(padded_image) # RGB2BGR
|
|
465
|
+
padded_image = np.ascontiguousarray(padded_image)
|
|
466
|
+
|
|
467
|
+
# The bounding box offset to add due to image padding.
|
|
468
|
+
# Requires normalization due to the bounding boxes are already normalized.
|
|
469
|
+
new_unpad = int(round(height * r)), int(round(width * r))
|
|
470
|
+
dw = (padded_image.shape[1] - new_unpad[1]) # / new_unpad[1]
|
|
471
|
+
dh = (padded_image.shape[0] - new_unpad[0]) # / new_unpad[0]
|
|
472
|
+
|
|
473
|
+
# The image was not rescaled, so default to 1.0.
|
|
474
|
+
shapes = [
|
|
475
|
+
# imgsz (model input shape) [height, width]
|
|
476
|
+
[padded_image.shape[0], padded_image.shape[1]],
|
|
477
|
+
[[resized_image.shape[0] / input_size[0],
|
|
478
|
+
resized_image.shape[1] / input_size[1]],
|
|
479
|
+
[dw, dh]] # ratio_pad [[scale y, scale x], [pad w, pad h]]
|
|
480
|
+
]
|
|
481
|
+
return padded_image, shapes
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def letterbox_native(
|
|
485
|
+
image: np.ndarray,
|
|
486
|
+
new_shape: tuple = (640, 640),
|
|
487
|
+
constant: int = 114,
|
|
488
|
+
backend: str = "hal"
|
|
489
|
+
) -> Tuple[np.ndarray, list]:
|
|
490
|
+
"""
|
|
491
|
+
Applies the letterbox image transformations based in YOLOv5 and YOLOv7.
|
|
492
|
+
|
|
493
|
+
Parameters
|
|
494
|
+
----------
|
|
495
|
+
image : np.ndarray
|
|
496
|
+
Input image array (HWC format).
|
|
497
|
+
new_shape : tuple, optional
|
|
498
|
+
Target shape (height, width) for output image, by default (640, 640).
|
|
499
|
+
constant : int, optional
|
|
500
|
+
Padding pixel value (0–255), by default 114 (gray).
|
|
501
|
+
backend: str
|
|
502
|
+
Specify the backend library for letterboxing the
|
|
503
|
+
image from the options "opencv", "pillow".
|
|
504
|
+
|
|
505
|
+
Returns
|
|
506
|
+
-------
|
|
507
|
+
image: np.ndarray
|
|
508
|
+
The resized and padded image in HWC format.
|
|
509
|
+
shapes: list
|
|
510
|
+
This is used to scale the bounding boxes of the ground
|
|
511
|
+
truth and the model detections based on the letterbox
|
|
512
|
+
transformation. Tuple containing padded image size, scale ratio,
|
|
513
|
+
and padding offsets.
|
|
514
|
+
[[pad image height, pad image width],
|
|
515
|
+
[[scale_y, scale_x], [pad x, pad y]]].
|
|
516
|
+
"""
|
|
517
|
+
height, width = image.shape[:2]
|
|
518
|
+
scale = min(new_shape[1] / width, new_shape[0] / height)
|
|
519
|
+
new_width = int(round(width * scale))
|
|
520
|
+
new_height = int(round(height * scale))
|
|
521
|
+
|
|
522
|
+
if scale != 1.0:
|
|
523
|
+
image = resize(image, (new_width, new_height), backend=backend)
|
|
524
|
+
|
|
525
|
+
# Compute padding
|
|
526
|
+
dw, dh = new_shape[1] - new_width, new_shape[0] - new_height # wh padding
|
|
527
|
+
top = round(dh / 2)
|
|
528
|
+
bottom = dh - top
|
|
529
|
+
left = round(dw / 2)
|
|
530
|
+
right = dw - left
|
|
531
|
+
|
|
532
|
+
if backend == "opencv":
|
|
533
|
+
try:
|
|
534
|
+
import cv2 # type: ignore
|
|
535
|
+
except ImportError:
|
|
536
|
+
raise ImportError("OpenCV is needed for letterbox.")
|
|
537
|
+
|
|
538
|
+
padded_image = cv2.copyMakeBorder(
|
|
539
|
+
image, top, bottom, left, right, cv2.BORDER_CONSTANT,
|
|
540
|
+
value=(constant, constant, constant)) # add border
|
|
541
|
+
else:
|
|
542
|
+
padded_image = np.zeros(
|
|
543
|
+
(3, new_height + top + bottom, new_width + left + right))
|
|
544
|
+
for i, _ in enumerate(padded_image):
|
|
545
|
+
padded_image[i, :, :] = np.pad(
|
|
546
|
+
image[:, :, i], ((top, bottom), (left, right)),
|
|
547
|
+
mode='constant', constant_values=constant)
|
|
548
|
+
padded_image = np.transpose(
|
|
549
|
+
padded_image, axes=(1, 2, 0)).astype(np.uint8)
|
|
550
|
+
|
|
551
|
+
shapes = [
|
|
552
|
+
# imgsz (model input shape) [height, width]
|
|
553
|
+
[padded_image.shape[0], padded_image.shape[1]],
|
|
554
|
+
# ratio_pad [[scale y, scale x], [pad w, pad h]]
|
|
555
|
+
[[scale, scale], [left, top]]
|
|
556
|
+
]
|
|
557
|
+
return padded_image, shapes
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def letterbox_hal(
|
|
561
|
+
image: TensorImage,
|
|
562
|
+
dst: TensorImage,
|
|
563
|
+
) -> list:
|
|
564
|
+
"""
|
|
565
|
+
Applies the letterbox image transformations using HAL.
|
|
566
|
+
|
|
567
|
+
Parameters
|
|
568
|
+
----------
|
|
569
|
+
image: TensorImage
|
|
570
|
+
An RGBA tensor image loaded using the HAL.
|
|
571
|
+
dst: TensorImage
|
|
572
|
+
The destination tensor image after letterbox transformation.
|
|
573
|
+
|
|
574
|
+
Returns
|
|
575
|
+
-------
|
|
576
|
+
label_ratio: list
|
|
577
|
+
Scaling factors (width, height) applied to original boxes.
|
|
578
|
+
shapes: list
|
|
579
|
+
This is used to scale the bounding boxes of the ground
|
|
580
|
+
truth and the model detections based on the letterbox
|
|
581
|
+
transformation. Tuple containing padded image size, scale ratio,
|
|
582
|
+
and padding offsets.
|
|
583
|
+
[[pad image height, pad image width],
|
|
584
|
+
[[scale_y, scale_x], [pad x, pad y]]].
|
|
585
|
+
"""
|
|
586
|
+
|
|
587
|
+
try:
|
|
588
|
+
import edgefirst_python # type: ignore
|
|
589
|
+
except ImportError:
|
|
590
|
+
raise ImportError(
|
|
591
|
+
"EdgeFirst HAL is needed to perform letterbox using hal.")
|
|
592
|
+
|
|
593
|
+
ratio = min(dst.height / image.height, dst.width / image.width)
|
|
594
|
+
height = image.height * ratio
|
|
595
|
+
width = image.width * ratio
|
|
596
|
+
top = round((dst.height - height) / 2)
|
|
597
|
+
left = round((dst.width - width) / 2)
|
|
598
|
+
height = round(height)
|
|
599
|
+
width = round(width)
|
|
600
|
+
|
|
601
|
+
CONVERTER.convert(image, dst,
|
|
602
|
+
dst_crop=edgefirst_python.Rect(left, top, width, height),
|
|
603
|
+
dst_color=[114, 114, 114, 255])
|
|
604
|
+
|
|
605
|
+
shapes = [
|
|
606
|
+
# imgsz (model input shape) [height, width]
|
|
607
|
+
[dst.height, dst.width],
|
|
608
|
+
# ratio_pad [[scale y, scale x], [pad w, pad h]]
|
|
609
|
+
[[ratio, ratio], [left, top]]
|
|
610
|
+
]
|
|
611
|
+
|
|
612
|
+
return shapes
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def preprocess_hal(
|
|
616
|
+
image: TensorImage,
|
|
617
|
+
shape: tuple,
|
|
618
|
+
input_type: np.dtype,
|
|
619
|
+
dst: TensorImage,
|
|
620
|
+
transpose: bool = False,
|
|
621
|
+
input_tensor: Callable = None,
|
|
622
|
+
preprocessing: str = "letterbox",
|
|
623
|
+
normalization: str = "unsigned",
|
|
624
|
+
quantization: tuple = None,
|
|
625
|
+
visualize: bool = False
|
|
626
|
+
) -> Tuple[np.ndarray, np.ndarray, list, tuple]:
|
|
627
|
+
"""
|
|
628
|
+
Optimized input preprocessing using the HAL.
|
|
629
|
+
|
|
630
|
+
Parameters
|
|
631
|
+
----------
|
|
632
|
+
image: TensorImage
|
|
633
|
+
The image input to preprocess.
|
|
634
|
+
shape: tuple
|
|
635
|
+
The model input shape. This can either be formatted as
|
|
636
|
+
(batch size, channels, height, width) or
|
|
637
|
+
(batch size, height, width, channels).
|
|
638
|
+
input_type: np.dtype
|
|
639
|
+
The input datatype of the model.
|
|
640
|
+
dst: TensorImage
|
|
641
|
+
Destination tensor for placing the image transformations.
|
|
642
|
+
transpose: bool
|
|
643
|
+
Condition of whether to transpose the image or not. This
|
|
644
|
+
is True for input shapes with channels first. Otherwise it is False.
|
|
645
|
+
input_tensor: Callable
|
|
646
|
+
Callable function for retrieving the input view tensor
|
|
647
|
+
from the model for directly copying the input tensor
|
|
648
|
+
into the model such as the case for TFLite.
|
|
649
|
+
preprocessing: str
|
|
650
|
+
The type of image preprocessing to apply. By default 'letterbox'
|
|
651
|
+
is used. However, 'resize' or 'pad' are possible variations.
|
|
652
|
+
normalization: str
|
|
653
|
+
The type of image normalization to apply. Default is set to
|
|
654
|
+
'unsigned'. However 'signed', 'raw', and 'imagenet' are possible
|
|
655
|
+
values.
|
|
656
|
+
quantization: tuple
|
|
657
|
+
The quantization parameters of the input containing
|
|
658
|
+
the (scale, zero point) values.
|
|
659
|
+
visualize: bool
|
|
660
|
+
When visualizing the model outputs, this requires a second
|
|
661
|
+
copy of the transformed image. By default,
|
|
662
|
+
visualization is set to False.
|
|
663
|
+
|
|
664
|
+
Returns
|
|
665
|
+
-------
|
|
666
|
+
image: np.ndarray
|
|
667
|
+
The image input after being preprocessed.
|
|
668
|
+
visual_image: np.ndarray
|
|
669
|
+
The image that is used for visualization post
|
|
670
|
+
letterbox, padding, resize transformations.
|
|
671
|
+
shapes: list
|
|
672
|
+
This is used to scale the bounding boxes of the ground
|
|
673
|
+
truth and the model detections based on the letterbox/padding
|
|
674
|
+
transformation.
|
|
675
|
+
|
|
676
|
+
.. code-block:: python
|
|
677
|
+
|
|
678
|
+
[[input_height, input_width],
|
|
679
|
+
[[scale_y, scale_x], [pad_w, pad_h]]]
|
|
680
|
+
image_shape: tuple
|
|
681
|
+
The original image dimensions.
|
|
682
|
+
"""
|
|
683
|
+
|
|
684
|
+
try:
|
|
685
|
+
import edgefirst_python # type: ignore
|
|
686
|
+
except ImportError:
|
|
687
|
+
raise ImportError(
|
|
688
|
+
"EdgeFirst HAL is needed to perform preprocessing using hal.")
|
|
689
|
+
|
|
690
|
+
# Fetch only (height, width) from the shape.
|
|
691
|
+
# Format for YUYV, RGB, and RGBA
|
|
692
|
+
if shape[-1] in [2, 3, 4]:
|
|
693
|
+
channels = shape[-1]
|
|
694
|
+
shape = shape[1:3]
|
|
695
|
+
else:
|
|
696
|
+
channels = shape[1]
|
|
697
|
+
shape = shape[2:4]
|
|
698
|
+
|
|
699
|
+
height, width = image.height, image.width
|
|
700
|
+
shapes = [
|
|
701
|
+
# imgsz (model input shape) [height, width]
|
|
702
|
+
[int(shape[0]), int(shape[1])],
|
|
703
|
+
[[float(shape[0] / height), float(shape[1] / width)],
|
|
704
|
+
[0.0, 0.0]] # ratio_pad [image_scale, [pad w, pad h]]
|
|
705
|
+
]
|
|
706
|
+
|
|
707
|
+
if preprocessing == "letterbox":
|
|
708
|
+
shapes = letterbox_hal(image, dst)
|
|
709
|
+
elif preprocessing == "pad":
|
|
710
|
+
raise NotImplementedError("Padding with HAL is not yet implemented.")
|
|
711
|
+
else:
|
|
712
|
+
CONVERTER.convert(image, dst)
|
|
713
|
+
|
|
714
|
+
if transpose:
|
|
715
|
+
image = np.zeros([channels, dst.height, dst.width], dtype=input_type)
|
|
716
|
+
else:
|
|
717
|
+
image = np.zeros([dst.height, dst.width, channels], dtype=input_type)
|
|
718
|
+
|
|
719
|
+
if input_type in [np.float16, np.float32]:
|
|
720
|
+
if normalization == "unsigned":
|
|
721
|
+
normalization = edgefirst_python.Normalization.UNSIGNED
|
|
722
|
+
elif normalization == "signed":
|
|
723
|
+
normalization = edgefirst_python.Normalization.SIGNED
|
|
724
|
+
elif normalization == "raw":
|
|
725
|
+
normalization = edgefirst_python.Normalization.RAW
|
|
726
|
+
elif normalization == "imagenet":
|
|
727
|
+
raise NotImplementedError(
|
|
728
|
+
"ImageNet normalization is currently not implemented in HAL.")
|
|
729
|
+
else:
|
|
730
|
+
normalization = edgefirst_python.Normalization.DEFAULT
|
|
731
|
+
else:
|
|
732
|
+
normalization = edgefirst_python.Normalization.DEFAULT
|
|
733
|
+
|
|
734
|
+
zero_point = None
|
|
735
|
+
if quantization is not None:
|
|
736
|
+
if input_type == np.int8:
|
|
737
|
+
zero_point = abs(quantization[-1])
|
|
738
|
+
# Directly copy the input tensor into the model for TFLite.
|
|
739
|
+
if input_tensor is not None:
|
|
740
|
+
dst.normalize_to_numpy(input_tensor()[0, :, :, :],
|
|
741
|
+
normalization=normalization,
|
|
742
|
+
zero_point=zero_point)
|
|
743
|
+
else:
|
|
744
|
+
# NOTE: PLANAR_RGBA is not yet supported in HAL.
|
|
745
|
+
if transpose and channels == 4:
|
|
746
|
+
dst.normalize_to_numpy(image[0:3, :, :], normalization=normalization,
|
|
747
|
+
zero_point=zero_point)
|
|
748
|
+
else:
|
|
749
|
+
dst.normalize_to_numpy(image, normalization=normalization,
|
|
750
|
+
zero_point=zero_point)
|
|
751
|
+
|
|
752
|
+
visual_image = None
|
|
753
|
+
if visualize:
|
|
754
|
+
if transpose:
|
|
755
|
+
visual_image = np.zeros([3, dst.height, dst.width], dtype=np.uint8)
|
|
756
|
+
dst.normalize_to_numpy(visual_image)
|
|
757
|
+
visual_image = np.transpose(visual_image, axes=[1, 2, 0])
|
|
758
|
+
else:
|
|
759
|
+
visual_image = np.zeros([dst.height, dst.width, 3], dtype=np.uint8)
|
|
760
|
+
dst.normalize_to_numpy(visual_image)
|
|
761
|
+
image = image[None]
|
|
762
|
+
return image, visual_image, shapes, (height, width)
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
def preprocess_native(
|
|
766
|
+
image: np.ndarray,
|
|
767
|
+
shape: tuple,
|
|
768
|
+
input_type: np.dtype,
|
|
769
|
+
transpose: bool = False,
|
|
770
|
+
input_tensor: Callable = None,
|
|
771
|
+
preprocessing: str = "letterbox",
|
|
772
|
+
normalization: str = "unsigned",
|
|
773
|
+
quantization: tuple = None,
|
|
774
|
+
backend: str = "hal",
|
|
775
|
+
) -> Tuple[np.ndarray, np.ndarray, list, tuple]:
|
|
776
|
+
"""
|
|
777
|
+
Standard preprocessing method. Default parameters are based on
|
|
778
|
+
Ultralytics defaults.
|
|
779
|
+
|
|
780
|
+
Parameters
|
|
781
|
+
----------
|
|
782
|
+
image: np.ndarray
|
|
783
|
+
The image input to preprocess.
|
|
784
|
+
shape: tuple
|
|
785
|
+
The model input shape. This can either be formatted as
|
|
786
|
+
(batch size, channels, height, width) or
|
|
787
|
+
(batch size, height, width, channels).
|
|
788
|
+
input_type: np.dtype
|
|
789
|
+
The input datatype of the model.
|
|
790
|
+
transpose: bool
|
|
791
|
+
Condition of whether to transpose the image or not. This
|
|
792
|
+
is True for input shapes with channels first. Otherwise it is False.
|
|
793
|
+
input_tensor: Callable
|
|
794
|
+
Callable function for retrieving the input view tensor
|
|
795
|
+
from the model for directly copying the input tensor
|
|
796
|
+
into the model such as the case for TFLite.
|
|
797
|
+
preprocessing: str
|
|
798
|
+
The type of image preprocessing to apply. By default 'letterbox'
|
|
799
|
+
is used. However, 'resize' or 'pad' are possible variations.
|
|
800
|
+
normalization: str
|
|
801
|
+
The type of image normalization to apply. Default is set to
|
|
802
|
+
'unsigned'. However 'signed', 'raw', and 'imagenet' are possible
|
|
803
|
+
values.
|
|
804
|
+
quantization: tuple
|
|
805
|
+
The quantization parameters of the input containing
|
|
806
|
+
the (scale, zero point) values.
|
|
807
|
+
backend: str
|
|
808
|
+
Specify the backend library for letterboxing the
|
|
809
|
+
image from the options "opencv", "pillow".
|
|
810
|
+
|
|
811
|
+
Returns
|
|
812
|
+
-------
|
|
813
|
+
image: np.ndarray
|
|
814
|
+
The image input after being preprocessed.
|
|
815
|
+
visual_image: np.ndarray
|
|
816
|
+
The image that is used for visualization post
|
|
817
|
+
letterbox, padding, resize transformations.
|
|
818
|
+
shapes: list
|
|
819
|
+
This is used to scale the bounding boxes of the ground
|
|
820
|
+
truth and the model detections based on the letterbox/padding
|
|
821
|
+
transformation.
|
|
822
|
+
|
|
823
|
+
.. code-block:: python
|
|
824
|
+
|
|
825
|
+
[[input_height, input_width],
|
|
826
|
+
[[scale_y, scale_x], [pad_w, pad_h]]]
|
|
827
|
+
image_shape: tuple
|
|
828
|
+
The original image dimensions.
|
|
829
|
+
"""
|
|
830
|
+
|
|
831
|
+
# Fetch only (height, width) from the shape.
|
|
832
|
+
# Format for YUYV, RGB, and RGBA
|
|
833
|
+
if shape[-1] in [2, 3, 4]:
|
|
834
|
+
channel = shape[-1]
|
|
835
|
+
shape = shape[1:3]
|
|
836
|
+
else:
|
|
837
|
+
channel = shape[1]
|
|
838
|
+
shape = shape[2:4]
|
|
839
|
+
# Transpose the image to meet requirements of the channel order.
|
|
840
|
+
|
|
841
|
+
transformer = None # Function that transforms image formats.
|
|
842
|
+
if channel == 2:
|
|
843
|
+
transformer = rgb2yuyv
|
|
844
|
+
elif channel == 4:
|
|
845
|
+
transformer = rgb2rgba
|
|
846
|
+
|
|
847
|
+
height, width = image.shape[0:2]
|
|
848
|
+
|
|
849
|
+
shapes = [
|
|
850
|
+
shape, # imgsz (model input shape) [height, width]
|
|
851
|
+
[[shape[0] / height, shape[1] / width],
|
|
852
|
+
[0.0, 0.0]] # ratio_pad [image_scale, [pad w, pad h]]
|
|
853
|
+
]
|
|
854
|
+
|
|
855
|
+
if backend == "opencv":
|
|
856
|
+
# OpenCV reads images into BGR by default.
|
|
857
|
+
image = bgr2rgb(image)
|
|
858
|
+
|
|
859
|
+
if preprocessing == "letterbox":
|
|
860
|
+
image, shapes = letterbox_native(
|
|
861
|
+
image, new_shape=shape, backend=backend)
|
|
862
|
+
elif preprocessing == "pad":
|
|
863
|
+
image, shapes = pad(image, shape, backend=backend)
|
|
864
|
+
else:
|
|
865
|
+
image = resize(image, (shape[1], shape[0]), backend=backend)
|
|
866
|
+
|
|
867
|
+
visual_image = image
|
|
868
|
+
if preprocessing == "pad":
|
|
869
|
+
visual_image = bgr2rgb(visual_image)
|
|
870
|
+
|
|
871
|
+
# Convert image format to either YUYV, RGBA or keep as RGB.
|
|
872
|
+
image = transformer(image, backend=backend) if transformer else image
|
|
873
|
+
|
|
874
|
+
# Expects batch size, channel, height, width.
|
|
875
|
+
if transpose:
|
|
876
|
+
image = np.transpose(image, axes=[2, 0, 1])
|
|
877
|
+
|
|
878
|
+
# Handle full/half precision input types.
|
|
879
|
+
if input_type in [np.float16, np.float32]:
|
|
880
|
+
image = image_normalization(image, normalization, input_type)
|
|
881
|
+
|
|
882
|
+
# For quantized models, run input quantization parameters.
|
|
883
|
+
if quantization is not None:
|
|
884
|
+
if input_type == np.int8:
|
|
885
|
+
zero_point = abs(quantization[-1])
|
|
886
|
+
image = (image.astype(np.int16) - zero_point).astype(np.int8)
|
|
887
|
+
|
|
888
|
+
image = image[None]
|
|
889
|
+
# Directly copy the input tensor into the model for TFLite.
|
|
890
|
+
if input_tensor is not None:
|
|
891
|
+
np.copyto(input_tensor(), image)
|
|
892
|
+
|
|
893
|
+
return image, visual_image, shapes, (height, width)
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
# Functions for Annotation Transformations
|
|
897
|
+
|
|
898
|
+
def clamp(
|
|
899
|
+
value: Union[float, int],
|
|
900
|
+
min: Union[float, int] = 0,
|
|
901
|
+
max: Union[float, int] = 1
|
|
902
|
+
) -> Union[float, int]:
|
|
903
|
+
"""
|
|
904
|
+
Clamps a given value between 0 and 1 by default.
|
|
905
|
+
If the value is in between the set min and max, then it is returned.
|
|
906
|
+
Otherwise it returns either min or max depending on which is the closest.
|
|
907
|
+
|
|
908
|
+
Parameters
|
|
909
|
+
----------
|
|
910
|
+
value: Union[float, int]
|
|
911
|
+
Value to clamp between 0 and 1 (default).
|
|
912
|
+
min: Union[float, int]
|
|
913
|
+
Minimum acceptable value. Default to 0.
|
|
914
|
+
max: Union[float, int]
|
|
915
|
+
Maximum acceptable value. Default to 1.
|
|
916
|
+
|
|
917
|
+
Returns
|
|
918
|
+
-------
|
|
919
|
+
Union[float, int]
|
|
920
|
+
This is the clamped value.
|
|
921
|
+
"""
|
|
922
|
+
return min if value < min else max if value > max else value
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
def standardize_coco_labels(labels: Union[list, np.ndarray]) -> list:
|
|
926
|
+
"""
|
|
927
|
+
Converts synonyms of COCO labels to standard COCO labels using the
|
|
928
|
+
provided labels mapping "COCO_LABEL_SYNC". This requires that the labels
|
|
929
|
+
provided contain strings.
|
|
930
|
+
|
|
931
|
+
Parameters
|
|
932
|
+
----------
|
|
933
|
+
labels: Union[list, np.ndarray]
|
|
934
|
+
This contains a list of string labels to map to
|
|
935
|
+
standard COCO labels.
|
|
936
|
+
|
|
937
|
+
Returns
|
|
938
|
+
-------
|
|
939
|
+
list
|
|
940
|
+
Converted string labels to standard COCO labels.
|
|
941
|
+
"""
|
|
942
|
+
synced_labels = list()
|
|
943
|
+
for label in labels:
|
|
944
|
+
for key in COCO_LABEL_SYNC.keys():
|
|
945
|
+
if label == key:
|
|
946
|
+
label = COCO_LABEL_SYNC[key]
|
|
947
|
+
synced_labels.append(label)
|
|
948
|
+
return synced_labels
|
|
949
|
+
|
|
950
|
+
|
|
951
|
+
def labels2string(
|
|
952
|
+
int_labels: Union[list, np.ndarray],
|
|
953
|
+
string_labels: Union[list, np.ndarray]
|
|
954
|
+
) -> list:
|
|
955
|
+
"""
|
|
956
|
+
Converts label indices into their string represenations.
|
|
957
|
+
|
|
958
|
+
Parameters
|
|
959
|
+
----------
|
|
960
|
+
int_labels: Union[list, np.ndarray]
|
|
961
|
+
A list of integer labels as indices to convert into strings.
|
|
962
|
+
string_labels: Union[list, np.ndarray]
|
|
963
|
+
A list of unique string labels used to map the label
|
|
964
|
+
indices into their string representations.
|
|
965
|
+
|
|
966
|
+
Returns
|
|
967
|
+
-------
|
|
968
|
+
list
|
|
969
|
+
A list of string labels.
|
|
970
|
+
"""
|
|
971
|
+
labels = []
|
|
972
|
+
for label in int_labels:
|
|
973
|
+
labels.append(string_labels[int(label)] if isinstance(
|
|
974
|
+
label, (numbers.Number, np.ndarray)) else label)
|
|
975
|
+
return labels
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
def normalize(boxes: np.ndarray, shape: tuple = None) -> np.ndarray:
|
|
979
|
+
"""
|
|
980
|
+
Normalizes the boxes to the width and height
|
|
981
|
+
of the image or model input resolution.
|
|
982
|
+
|
|
983
|
+
Parameters
|
|
984
|
+
----------
|
|
985
|
+
boxes: np.ndarray
|
|
986
|
+
Contains bounding boxes to normalize [[boxes1], [boxes2]].
|
|
987
|
+
shape: tuple
|
|
988
|
+
The (height, width) shape of the image to normalize the annotations.
|
|
989
|
+
|
|
990
|
+
Returns
|
|
991
|
+
-------
|
|
992
|
+
np.ndarray
|
|
993
|
+
new x-coordinate = old x-coordinate / width
|
|
994
|
+
new y-coordinate = old y-coordinate / height
|
|
995
|
+
"""
|
|
996
|
+
if shape is None:
|
|
997
|
+
return boxes
|
|
998
|
+
|
|
999
|
+
if isinstance(boxes, list):
|
|
1000
|
+
boxes = np.array(boxes)
|
|
1001
|
+
boxes[..., 0:1] /= shape[1]
|
|
1002
|
+
boxes[..., 1:2] /= shape[0]
|
|
1003
|
+
boxes[..., 2:3] /= shape[1]
|
|
1004
|
+
boxes[..., 3:4] /= shape[0]
|
|
1005
|
+
return boxes
|
|
1006
|
+
|
|
1007
|
+
|
|
1008
|
+
def denormalize(boxes: np.ndarray, shape: tuple = None) -> np.ndarray:
|
|
1009
|
+
"""
|
|
1010
|
+
Denormalizes the boxes by the width and height of the image
|
|
1011
|
+
or model input resolution to get the pixel values of the boxes.
|
|
1012
|
+
|
|
1013
|
+
Parameters
|
|
1014
|
+
----------
|
|
1015
|
+
boxes: np.ndarray
|
|
1016
|
+
Contains bounding boxes to denormalize [[boxes1], [boxes2]].
|
|
1017
|
+
shape: tuple
|
|
1018
|
+
The (height, width) shape of the image to denormalize the annotations.
|
|
1019
|
+
|
|
1020
|
+
Returns
|
|
1021
|
+
-------
|
|
1022
|
+
np.ndarray
|
|
1023
|
+
Denormalized set of bounding boxes in pixels values.
|
|
1024
|
+
"""
|
|
1025
|
+
if shape is None:
|
|
1026
|
+
return boxes
|
|
1027
|
+
|
|
1028
|
+
if isinstance(boxes, list):
|
|
1029
|
+
boxes = np.array(boxes)
|
|
1030
|
+
boxes[..., 0:1] *= shape[1]
|
|
1031
|
+
boxes[..., 1:2] *= shape[0]
|
|
1032
|
+
boxes[..., 2:3] *= shape[1]
|
|
1033
|
+
boxes[..., 3:4] *= shape[0]
|
|
1034
|
+
return boxes.astype(np.int32)
|
|
1035
|
+
|
|
1036
|
+
|
|
1037
|
+
def normalize_polygon(vertex: Union[list, np.ndarray], shape: tuple) -> list:
|
|
1038
|
+
"""
|
|
1039
|
+
Normalizes the vertex coordinate of a polygon.
|
|
1040
|
+
|
|
1041
|
+
Parameters
|
|
1042
|
+
----------
|
|
1043
|
+
vertex: Union[list, np.ndarray]
|
|
1044
|
+
This contains [x, y] coordinate.
|
|
1045
|
+
shape: tuple
|
|
1046
|
+
The (height, width) shape of the image to normalize the annotations.
|
|
1047
|
+
|
|
1048
|
+
Returns
|
|
1049
|
+
-------
|
|
1050
|
+
list
|
|
1051
|
+
This contains normalized [x, y] coordinates.
|
|
1052
|
+
"""
|
|
1053
|
+
return [float(vertex[0]) / shape[1], float(vertex[1]) / shape[0]]
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
def denormalize_polygon(vertex: Union[list, np.ndarray], shape: tuple) -> list:
|
|
1057
|
+
"""
|
|
1058
|
+
Denormalizes the vertex coordinate of a polygon.
|
|
1059
|
+
|
|
1060
|
+
Parameters
|
|
1061
|
+
----------
|
|
1062
|
+
vertex: Union[list, np.ndarray]
|
|
1063
|
+
This contains [x, y] coordinate.
|
|
1064
|
+
shape: tuple
|
|
1065
|
+
The (height, width) shape of the image to denormalize the annotations.
|
|
1066
|
+
|
|
1067
|
+
Returns
|
|
1068
|
+
-------
|
|
1069
|
+
list
|
|
1070
|
+
This contains denormalized [x, y] coordinates.
|
|
1071
|
+
"""
|
|
1072
|
+
return [int(float(vertex[0]) * shape[1]), int(float(vertex[1]) * shape[0])]
|
|
1073
|
+
|
|
1074
|
+
|
|
1075
|
+
def xcycwh2xyxy(boxes: np.ndarray) -> np.ndarray:
|
|
1076
|
+
"""
|
|
1077
|
+
Converts YOLO (xcycwh) format into PascalVOC (xyxy) format.
|
|
1078
|
+
|
|
1079
|
+
Parameters
|
|
1080
|
+
----------
|
|
1081
|
+
boxes: np.ndarray
|
|
1082
|
+
Contains lists for each boxes in YOLO format [[boxes1], [boxes2]].
|
|
1083
|
+
|
|
1084
|
+
Returns
|
|
1085
|
+
-------
|
|
1086
|
+
np.ndarray
|
|
1087
|
+
Contains list for each boxes in PascalVOC format.
|
|
1088
|
+
"""
|
|
1089
|
+
return np.concatenate([
|
|
1090
|
+
boxes[:, 0:2] - boxes[:, 2:4] / 2,
|
|
1091
|
+
boxes[:, 0:2] + boxes[:, 2:4] / 2
|
|
1092
|
+
], axis=1)
|
|
1093
|
+
|
|
1094
|
+
|
|
1095
|
+
def xyxy2xcycwh(boxes: np.ndarray) -> np.ndarray:
|
|
1096
|
+
"""
|
|
1097
|
+
Converts PascalVOC (xyxy) into YOLO (xcycwh) format.
|
|
1098
|
+
|
|
1099
|
+
Parameters
|
|
1100
|
+
----------
|
|
1101
|
+
boxes: np.ndarray
|
|
1102
|
+
Contains lists for each boxes in PascalVOC format [[boxes1], [boxes2]].
|
|
1103
|
+
|
|
1104
|
+
Returns
|
|
1105
|
+
-------
|
|
1106
|
+
np.ndarray
|
|
1107
|
+
Contains list for each boxes in YOLO format.
|
|
1108
|
+
"""
|
|
1109
|
+
w_c = boxes[..., 2:3] - boxes[..., 0:1]
|
|
1110
|
+
h_c = boxes[..., 3:4] - boxes[..., 1:2]
|
|
1111
|
+
boxes[..., 0:1] = boxes[..., 0:1] + w_c / 2
|
|
1112
|
+
boxes[..., 1:2] = boxes[..., 1:2] + h_c / 2
|
|
1113
|
+
boxes[..., 2:3] = w_c
|
|
1114
|
+
boxes[..., 3:4] = h_c
|
|
1115
|
+
return boxes
|
|
1116
|
+
|
|
1117
|
+
|
|
1118
|
+
def xywh2xyxy(boxes: np.ndarray) -> np.ndarray:
|
|
1119
|
+
"""
|
|
1120
|
+
Converts COCO (xywh) format to PascalVOC (xyxy) format.
|
|
1121
|
+
|
|
1122
|
+
Parameters
|
|
1123
|
+
----------
|
|
1124
|
+
boxes: np.ndarray
|
|
1125
|
+
Contains lists for each boxes in COCO format [[boxes1], [boxes2]].
|
|
1126
|
+
|
|
1127
|
+
Returns
|
|
1128
|
+
-------
|
|
1129
|
+
np.ndarray
|
|
1130
|
+
Contains list for each boxes in PascalVOC format.
|
|
1131
|
+
"""
|
|
1132
|
+
boxes[..., 2:3] = boxes[..., 2:3] + boxes[..., 0:1]
|
|
1133
|
+
boxes[..., 3:4] = boxes[..., 3:4] + boxes[..., 1:2]
|
|
1134
|
+
return boxes
|
|
1135
|
+
|
|
1136
|
+
|
|
1137
|
+
def xyxy2xywh(boxes: np.ndarray) -> np.ndarray:
|
|
1138
|
+
"""
|
|
1139
|
+
Converts PascalVOC (xyxy) format to COCO (xywh) format.
|
|
1140
|
+
|
|
1141
|
+
Parameters
|
|
1142
|
+
----------
|
|
1143
|
+
boxes: np.ndarray
|
|
1144
|
+
Contains lists for each boxes in COCO format [[boxes1], [boxes2]].
|
|
1145
|
+
|
|
1146
|
+
Returns
|
|
1147
|
+
-------
|
|
1148
|
+
np.ndarray
|
|
1149
|
+
Contains list of each boxes in COCO format.
|
|
1150
|
+
"""
|
|
1151
|
+
boxes[..., 2:3] = boxes[..., 2:3] - boxes[..., 0:1]
|
|
1152
|
+
boxes[..., 3:4] = boxes[..., 3:4] - boxes[..., 1:2]
|
|
1153
|
+
return boxes
|
|
1154
|
+
|
|
1155
|
+
|
|
1156
|
+
def scale(
|
|
1157
|
+
boxes: np.ndarray,
|
|
1158
|
+
w: int = 640,
|
|
1159
|
+
h: int = 640,
|
|
1160
|
+
padw: int = 0,
|
|
1161
|
+
padh: int = 0,
|
|
1162
|
+
) -> np.ndarray:
|
|
1163
|
+
"""
|
|
1164
|
+
Scales the bounding boxes to be centered around the objects of an image
|
|
1165
|
+
with letterbox transformation.
|
|
1166
|
+
|
|
1167
|
+
Parameters
|
|
1168
|
+
----------
|
|
1169
|
+
boxes: np.ndarray (nx4)
|
|
1170
|
+
This is already in xyxy format.
|
|
1171
|
+
w: int
|
|
1172
|
+
This is the width of the image before any letterbox
|
|
1173
|
+
transformation.
|
|
1174
|
+
h: int
|
|
1175
|
+
This is the height of the image before any letterbox
|
|
1176
|
+
transformation.
|
|
1177
|
+
padw: int
|
|
1178
|
+
The width padding in relation to the letterbox.
|
|
1179
|
+
padh: int
|
|
1180
|
+
The height padding in relation to the letterbox.
|
|
1181
|
+
|
|
1182
|
+
Returns
|
|
1183
|
+
-------
|
|
1184
|
+
np.ndarray
|
|
1185
|
+
The bounding boxes rescaled to be centered around the
|
|
1186
|
+
objects of an image with letterbox transformation.
|
|
1187
|
+
"""
|
|
1188
|
+
y = np.copy(boxes)
|
|
1189
|
+
y[..., 0] = (w * (boxes[..., 0]) + padw) # top left boxes
|
|
1190
|
+
y[..., 1] = (h * (boxes[..., 1]) + padh) # top left y
|
|
1191
|
+
y[..., 2] = (w * (boxes[..., 2]) + padw) # bottom right boxes
|
|
1192
|
+
y[..., 3] = (h * (boxes[..., 3]) + padh) # bottom right y
|
|
1193
|
+
return y
|
|
1194
|
+
|
|
1195
|
+
|
|
1196
|
+
def clamp_boxes(boxes: np.ndarray, clamp: int,
|
|
1197
|
+
shape: tuple = None) -> np.ndarray:
|
|
1198
|
+
"""
|
|
1199
|
+
Clamps bounding boxes with size less than the provided clamp value to
|
|
1200
|
+
the clamp value in pixels. The minimum width and height (dimensions)
|
|
1201
|
+
of the bounding is the clamp value in pixels.
|
|
1202
|
+
|
|
1203
|
+
Parameters
|
|
1204
|
+
----------
|
|
1205
|
+
boxes: np.ndarray
|
|
1206
|
+
The bounding boxes to clamp. The bounding boxes with dimensions
|
|
1207
|
+
larger than the clamp value will be kept, but the smaller boxes will
|
|
1208
|
+
be resized to the clamp value.
|
|
1209
|
+
clamp: int
|
|
1210
|
+
The minimum dimensions allowed for the height and width of the
|
|
1211
|
+
bounding box. This value is in pixels.
|
|
1212
|
+
shape: tuple
|
|
1213
|
+
If None is provided (by default), it assumes the boxes are in pixels.
|
|
1214
|
+
Otherwise, if shape is provided, the boxes are normalized which
|
|
1215
|
+
will transform the boxes in pixel representations first to be
|
|
1216
|
+
compared to the clamp value provided which is in pixels. The
|
|
1217
|
+
shape provided should be the (height, width) of the image.
|
|
1218
|
+
|
|
1219
|
+
Returns
|
|
1220
|
+
-------
|
|
1221
|
+
np.ndarray
|
|
1222
|
+
The bounding boxes where the smaller boxes have been
|
|
1223
|
+
sized to the clamp value provided.
|
|
1224
|
+
"""
|
|
1225
|
+
if len(boxes) == 0:
|
|
1226
|
+
return boxes
|
|
1227
|
+
|
|
1228
|
+
if shape is None:
|
|
1229
|
+
height, width = (1, 1)
|
|
1230
|
+
else:
|
|
1231
|
+
height, width = shape
|
|
1232
|
+
|
|
1233
|
+
widths = ((boxes[..., 2:3] - boxes[..., 0:1]) * width).flatten()
|
|
1234
|
+
heights = ((boxes[..., 3:4] - boxes[..., 1:2]) * height).flatten()
|
|
1235
|
+
modify = np.transpose(
|
|
1236
|
+
np.nonzero(((widths < clamp) + (heights < clamp)))).flatten()
|
|
1237
|
+
|
|
1238
|
+
boxes[modify, 2:3] = boxes[modify, 0:1] + clamp / width
|
|
1239
|
+
boxes[modify, 3:4] = boxes[modify, 1:2] + clamp / height
|
|
1240
|
+
return boxes
|
|
1241
|
+
|
|
1242
|
+
|
|
1243
|
+
def ignore_boxes(
|
|
1244
|
+
ignore: int,
|
|
1245
|
+
boxes: np.ndarray,
|
|
1246
|
+
labels: np.ndarray,
|
|
1247
|
+
scores: np.ndarray = None,
|
|
1248
|
+
shape: tuple = None
|
|
1249
|
+
) -> Tuple[np.ndarray, np.ndarray, Union[None, np.ndarray]]:
|
|
1250
|
+
"""
|
|
1251
|
+
Removes the boxes, labels, and scores provided if the boxes have dimensions
|
|
1252
|
+
less than the provided value set by the ignore parameter in pixels.
|
|
1253
|
+
|
|
1254
|
+
Parameters
|
|
1255
|
+
----------
|
|
1256
|
+
ignore: int
|
|
1257
|
+
The size of the boxes lower than this value will be removed. This
|
|
1258
|
+
value is in pixels.
|
|
1259
|
+
boxes: np.ndarray
|
|
1260
|
+
The bounding boxes array with shape (n, 4). The bounding boxes with
|
|
1261
|
+
dimensions less than the ignore parameter will be removed.
|
|
1262
|
+
labels: np.ndarray
|
|
1263
|
+
The labels associated to each bounding box. For every bounding box
|
|
1264
|
+
that was removed, the labels will also be removed.
|
|
1265
|
+
scores: np.ndarray
|
|
1266
|
+
(Optional) the scores associated to each bounding box. For every
|
|
1267
|
+
bounding box that was removed, the scores will also be removed.
|
|
1268
|
+
shape: tuple
|
|
1269
|
+
If None is provided (by default), it assumes the boxes are in pixels.
|
|
1270
|
+
Otherwise, if shape is provided, the boxes are normalized which
|
|
1271
|
+
will transform the boxes in pixel representations first to be
|
|
1272
|
+
compared to the ignore value provided which is in pixels. The
|
|
1273
|
+
shape provided should be the (height, width) of the image.
|
|
1274
|
+
|
|
1275
|
+
Returns
|
|
1276
|
+
-------
|
|
1277
|
+
boxes: np.ndarray
|
|
1278
|
+
The bounding boxes where the smaller boxes have been removed.
|
|
1279
|
+
labels: np.ndarray
|
|
1280
|
+
The labels which contains only the labels of
|
|
1281
|
+
the existing bounding boxes.
|
|
1282
|
+
scores: Union[None, np.ndarray]
|
|
1283
|
+
If scores is not provided, None is returned. Otherwise,
|
|
1284
|
+
the scores of the returned bounding boxes are returned.
|
|
1285
|
+
"""
|
|
1286
|
+
if shape is None:
|
|
1287
|
+
height, width = (1, 1)
|
|
1288
|
+
else:
|
|
1289
|
+
height, width = shape
|
|
1290
|
+
|
|
1291
|
+
widths = ((boxes[..., 2:3] - boxes[..., 0:1]) * width).flatten()
|
|
1292
|
+
heights = ((boxes[..., 3:4] - boxes[..., 1:2]) * height).flatten()
|
|
1293
|
+
keep = np.transpose(
|
|
1294
|
+
np.nonzero(((widths >= ignore) * (heights >= ignore)))).flatten()
|
|
1295
|
+
|
|
1296
|
+
boxes = np.take(boxes, keep, axis=0)
|
|
1297
|
+
labels = np.take(labels, keep, axis=0)
|
|
1298
|
+
if scores is not None:
|
|
1299
|
+
scores = np.take(scores, keep, axis=0)
|
|
1300
|
+
|
|
1301
|
+
return boxes, labels, scores
|
|
1302
|
+
|
|
1303
|
+
# Functions for Segmentation Transformations
|
|
1304
|
+
|
|
1305
|
+
|
|
1306
|
+
def segments2boxes(segments: list, box_format: str = "xcycwh") -> np.ndarray:
|
|
1307
|
+
"""
|
|
1308
|
+
Convert segment labels to box labels, i.e.
|
|
1309
|
+
(xy1, xy2, ...) to (xcycwh).
|
|
1310
|
+
|
|
1311
|
+
Parameters
|
|
1312
|
+
----------
|
|
1313
|
+
segments: list
|
|
1314
|
+
List of segments where each segment is a list of points,
|
|
1315
|
+
each point is [x, y] coordinates.
|
|
1316
|
+
box_format: str
|
|
1317
|
+
Default output box format is in "xcycwh" (YOLO) format.
|
|
1318
|
+
Otherwise, "xywh" (COCO) and "xyxy" (PascalVOC) are also accepted.
|
|
1319
|
+
|
|
1320
|
+
Returns
|
|
1321
|
+
-------
|
|
1322
|
+
np.ndarray
|
|
1323
|
+
Bounding box coordinates in YOLO format.
|
|
1324
|
+
"""
|
|
1325
|
+
boxes = []
|
|
1326
|
+
for s in segments:
|
|
1327
|
+
x, y = s.T # segment xy
|
|
1328
|
+
boxes.append([x.min(), y.min(), x.max(), y.max()]) # xyxy
|
|
1329
|
+
|
|
1330
|
+
if box_format == "xcycwh":
|
|
1331
|
+
return xyxy2xcycwh(np.array(boxes)) # cls, xywh
|
|
1332
|
+
elif box_format == "xywh":
|
|
1333
|
+
return xyxy2xywh(np.array(boxes))
|
|
1334
|
+
else:
|
|
1335
|
+
return np.array(boxes)
|
|
1336
|
+
|
|
1337
|
+
|
|
1338
|
+
def resample_segments(segments: list, n: int = 1000) -> list:
|
|
1339
|
+
"""
|
|
1340
|
+
Resample segments to n points each using linear interpolation.
|
|
1341
|
+
Source: https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py#L485
|
|
1342
|
+
|
|
1343
|
+
Parameters
|
|
1344
|
+
----------
|
|
1345
|
+
segments: list
|
|
1346
|
+
List of (N, 2) arrays where N is the number of points in each segment.
|
|
1347
|
+
n: int
|
|
1348
|
+
Number of points to resample each segment to.
|
|
1349
|
+
|
|
1350
|
+
Returns
|
|
1351
|
+
-------
|
|
1352
|
+
list
|
|
1353
|
+
Resampled segments with n points each.
|
|
1354
|
+
"""
|
|
1355
|
+
for i, s in enumerate(segments):
|
|
1356
|
+
if len(s) == n:
|
|
1357
|
+
continue
|
|
1358
|
+
s = np.concatenate((s, s[0:1, :]), axis=0)
|
|
1359
|
+
x = np.linspace(0, len(s) - 1, n - len(s) if len(s) < n else n)
|
|
1360
|
+
xp = np.arange(len(s))
|
|
1361
|
+
x = np.insert(x, np.searchsorted(x, xp), xp) if len(s) < n else x
|
|
1362
|
+
segments[i] = (
|
|
1363
|
+
np.concatenate([np.interp(x, xp, s[:, i])
|
|
1364
|
+
for i in range(2)], dtype=np.float32).reshape(2, -1).T
|
|
1365
|
+
) # segment xy
|
|
1366
|
+
return segments
|
|
1367
|
+
|
|
1368
|
+
|
|
1369
|
+
def format_segments(
|
|
1370
|
+
segments: np.ndarray,
|
|
1371
|
+
shape: tuple,
|
|
1372
|
+
ratio_pad: tuple,
|
|
1373
|
+
colors: Union[list, np.ndarray],
|
|
1374
|
+
mask_ratio: int = 1,
|
|
1375
|
+
semantic: bool = False,
|
|
1376
|
+
backend: str = "hal"
|
|
1377
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
1378
|
+
"""
|
|
1379
|
+
Convert polygon segments to bitmap masks.
|
|
1380
|
+
|
|
1381
|
+
Parameters
|
|
1382
|
+
----------
|
|
1383
|
+
segments: np.ndarray
|
|
1384
|
+
Mask segments with shape (# polygons, # coordinates, 2)
|
|
1385
|
+
shape: tuple
|
|
1386
|
+
This represents the (height, width) of the model input shape.
|
|
1387
|
+
ratio_pad: tuple
|
|
1388
|
+
This contains the scale and the padding factors after letterbox
|
|
1389
|
+
transformations in the form ((scale x, scale y), (pad x, pad y)).
|
|
1390
|
+
colors: Union[list, np.ndarray]
|
|
1391
|
+
The label to specify to each polygon.
|
|
1392
|
+
mask_ratio: int, optional
|
|
1393
|
+
Masks are downsampled according to mask_ratio. Set to 1 so
|
|
1394
|
+
that the output shape of the mask matches the model prediction shape.
|
|
1395
|
+
semantic: bool, optional
|
|
1396
|
+
Specify if the type of segmentation is semantic segmentation.
|
|
1397
|
+
By default this is False and set to instance segmentation as
|
|
1398
|
+
seen in Ultralytics. Instance segmentation is where
|
|
1399
|
+
each mask is represented separately.
|
|
1400
|
+
backend: str
|
|
1401
|
+
Specify the backend library for resizing the image from the options
|
|
1402
|
+
"hal", "opencv", "pillow".
|
|
1403
|
+
|
|
1404
|
+
Returns
|
|
1405
|
+
-------
|
|
1406
|
+
masks: np.ndarray
|
|
1407
|
+
Bitmap masks with shape (N, H, W) or (1, H, W)
|
|
1408
|
+
if mask_overlap is True.
|
|
1409
|
+
sorted_idx: np.ndarray
|
|
1410
|
+
Resorting the ground truth based on these indices.
|
|
1411
|
+
"""
|
|
1412
|
+
scale_h, scale_w = ratio_pad[0]
|
|
1413
|
+
padw, padh = ratio_pad[1]
|
|
1414
|
+
|
|
1415
|
+
if len(segments):
|
|
1416
|
+
segments[..., 0] *= scale_w
|
|
1417
|
+
segments[..., 1] *= scale_h
|
|
1418
|
+
segments[..., 0] += padw
|
|
1419
|
+
segments[..., 1] += padh
|
|
1420
|
+
|
|
1421
|
+
sorted_idx = None
|
|
1422
|
+
|
|
1423
|
+
if semantic:
|
|
1424
|
+
masks = create_mask_image(
|
|
1425
|
+
polygons=segments,
|
|
1426
|
+
labels=colors,
|
|
1427
|
+
shape=shape
|
|
1428
|
+
)
|
|
1429
|
+
else:
|
|
1430
|
+
masks = polygons2masks(
|
|
1431
|
+
imgsz=shape,
|
|
1432
|
+
segments=segments,
|
|
1433
|
+
downsample_ratio=mask_ratio,
|
|
1434
|
+
backend=backend
|
|
1435
|
+
)
|
|
1436
|
+
return masks, sorted_idx
|
|
1437
|
+
|
|
1438
|
+
|
|
1439
|
+
def polygon2mask(
|
|
1440
|
+
imgsz: Tuple[int, int],
|
|
1441
|
+
polygons: List[np.ndarray],
|
|
1442
|
+
color: int = 1,
|
|
1443
|
+
downsample_ratio: int = 1,
|
|
1444
|
+
backend: str = "hal"
|
|
1445
|
+
) -> np.ndarray:
|
|
1446
|
+
"""
|
|
1447
|
+
Convert a list of polygons to a binary mask of the specified image size.
|
|
1448
|
+
|
|
1449
|
+
Parameters
|
|
1450
|
+
----------
|
|
1451
|
+
imgsz: Tuple[int, int]
|
|
1452
|
+
The size of the image as (height, width).
|
|
1453
|
+
polygons: List[np.ndarray]
|
|
1454
|
+
A list of polygons. Each polygon is an array with shape (N, M), where
|
|
1455
|
+
N is the number of polygons, and M is the number of points
|
|
1456
|
+
such that M % 2 = 0.
|
|
1457
|
+
color: int, optional
|
|
1458
|
+
The color value to fill in the polygons on the mask.
|
|
1459
|
+
downsample_ratio: int, optional
|
|
1460
|
+
Factor by which to downsample the mask.
|
|
1461
|
+
backend: str
|
|
1462
|
+
Specify the backend library for resizing the image from the options
|
|
1463
|
+
"hal", "opencv", "pillow".
|
|
1464
|
+
|
|
1465
|
+
Returns
|
|
1466
|
+
-------
|
|
1467
|
+
np.ndarray
|
|
1468
|
+
A binary mask of the specified image size with the polygons filled in.
|
|
1469
|
+
"""
|
|
1470
|
+
polygons = np.asarray(polygons, dtype=np.int32)
|
|
1471
|
+
polygons = polygons.reshape((polygons.shape[0], -1, 2))
|
|
1472
|
+
mask = create_mask_image(
|
|
1473
|
+
polygons=polygons,
|
|
1474
|
+
labels=color,
|
|
1475
|
+
shape=imgsz
|
|
1476
|
+
)
|
|
1477
|
+
|
|
1478
|
+
if downsample_ratio > 1:
|
|
1479
|
+
nh, nw = (imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio)
|
|
1480
|
+
mask = resize(mask, (nw, nh), backend=backend)
|
|
1481
|
+
return mask
|
|
1482
|
+
|
|
1483
|
+
|
|
1484
|
+
def polygons2masks(
|
|
1485
|
+
imgsz: Tuple[int, int],
|
|
1486
|
+
segments: List[np.ndarray],
|
|
1487
|
+
downsample_ratio: int = 1,
|
|
1488
|
+
backend: str = "hal"
|
|
1489
|
+
) -> np.ndarray:
|
|
1490
|
+
"""
|
|
1491
|
+
Convert a list of polygons to a set of binary instance
|
|
1492
|
+
segmentation masks at the specified image size.
|
|
1493
|
+
|
|
1494
|
+
Parameters
|
|
1495
|
+
----------
|
|
1496
|
+
imgsz: Tuple[int, int]
|
|
1497
|
+
The size of the image as (height, width).
|
|
1498
|
+
segments: List[np.ndarray]
|
|
1499
|
+
A list of polygons. Each polygon is an array with shape (N, M), where
|
|
1500
|
+
N is the number of polygons, and M is the number of points
|
|
1501
|
+
such that M % 2 = 0.
|
|
1502
|
+
colors: Union[list, np.ndarray]
|
|
1503
|
+
The color value to fill each polygon in the masks.
|
|
1504
|
+
downsample_ratio: int, optional
|
|
1505
|
+
Factor by which to downsample each mask.
|
|
1506
|
+
backend: str
|
|
1507
|
+
Specify the backend library for resizing the image from the options
|
|
1508
|
+
"hal", "opencv", "pillow".
|
|
1509
|
+
|
|
1510
|
+
Returns
|
|
1511
|
+
-------
|
|
1512
|
+
np.ndarray
|
|
1513
|
+
A set of binary masks of the specified image size
|
|
1514
|
+
with the polygons filled in.
|
|
1515
|
+
"""
|
|
1516
|
+
if len(segments) == 0:
|
|
1517
|
+
return np.zeros((1, imgsz[0], imgsz[1]), dtype=np.int32)
|
|
1518
|
+
return np.array([polygon2mask(imgsz, [x.reshape(-1)],
|
|
1519
|
+
downsample_ratio=downsample_ratio,
|
|
1520
|
+
backend=backend)
|
|
1521
|
+
for x in segments])
|
|
1522
|
+
|
|
1523
|
+
|
|
1524
|
+
def create_mask_image(
|
|
1525
|
+
polygons: Union[list, np.ndarray],
|
|
1526
|
+
labels: Union[list, np.ndarray, int],
|
|
1527
|
+
shape: tuple
|
|
1528
|
+
) -> np.ndarray:
|
|
1529
|
+
"""
|
|
1530
|
+
Creates a NumPy array of masks from a given list of polygons.
|
|
1531
|
+
|
|
1532
|
+
Parameters
|
|
1533
|
+
----------
|
|
1534
|
+
polygons: Union[list, np.ndarray]
|
|
1535
|
+
This contains the polygon points. Ex.
|
|
1536
|
+
[[[x1,y1], [x2,y2], ... ,[xn,yn]], [...], ...]
|
|
1537
|
+
labels: Union[list, np.ndarray, int]
|
|
1538
|
+
The integer label of each polygon for assigning the mask.
|
|
1539
|
+
If an integer is supplied, then a constant label is applied
|
|
1540
|
+
for all the polygons.
|
|
1541
|
+
shape: tuple
|
|
1542
|
+
This is the shape (height, width) of the mask.
|
|
1543
|
+
|
|
1544
|
+
Returns
|
|
1545
|
+
-------
|
|
1546
|
+
np.ndarray
|
|
1547
|
+
The 2D mask image with shape (height, width) specified.
|
|
1548
|
+
"""
|
|
1549
|
+
mask = Image.new('L', (shape[1], shape[0]), 0)
|
|
1550
|
+
canvas = ImageDraw.Draw(mask)
|
|
1551
|
+
polygons = polygons.tolist() if isinstance(polygons, np.ndarray) else polygons
|
|
1552
|
+
if isinstance(labels, (int, np.ScalarType)):
|
|
1553
|
+
labels = np.full(len(polygons), labels, dtype=np.int32)
|
|
1554
|
+
for c, polygon in zip(labels, polygons):
|
|
1555
|
+
polygon = [tuple(pt) for pt in polygon] # requires a list of Tuples.
|
|
1556
|
+
if len(polygon) >= 2:
|
|
1557
|
+
canvas.polygon(polygon, outline=int(c), fill=int(c))
|
|
1558
|
+
# This array contains a mask of the image where the objects are
|
|
1559
|
+
# outlined by class number
|
|
1560
|
+
return np.array(mask)
|
|
1561
|
+
|
|
1562
|
+
|
|
1563
|
+
def create_binary_mask(mask: np.ndarray) -> np.ndarray:
|
|
1564
|
+
"""
|
|
1565
|
+
Creates a binary NumPy array of 1's and 0's encapsulating
|
|
1566
|
+
every object (regardless of class) in the image as a 1 and
|
|
1567
|
+
background as 0.
|
|
1568
|
+
|
|
1569
|
+
Parameters
|
|
1570
|
+
----------
|
|
1571
|
+
mask: np.ndarray
|
|
1572
|
+
2D array mask of class labels unique to each object.
|
|
1573
|
+
|
|
1574
|
+
Returns
|
|
1575
|
+
-------
|
|
1576
|
+
np.ndarray
|
|
1577
|
+
Binary 2D mask of 1's and 0's.
|
|
1578
|
+
"""
|
|
1579
|
+
return np.where(mask > 0, 1, mask)
|
|
1580
|
+
|
|
1581
|
+
|
|
1582
|
+
def create_mask_class(mask: np.ndarray, cls: int) -> np.ndarray:
|
|
1583
|
+
"""
|
|
1584
|
+
Separates a mask with more than one classes into an individual
|
|
1585
|
+
mask of 1's and 0's where 1 represents the specified class and
|
|
1586
|
+
0 represents other classes including background.
|
|
1587
|
+
|
|
1588
|
+
Parameters
|
|
1589
|
+
----------
|
|
1590
|
+
mask: np.ndarray
|
|
1591
|
+
Multiclass mask of class labels unique to each object.
|
|
1592
|
+
cls: int
|
|
1593
|
+
The integer representing the class in the mask
|
|
1594
|
+
to keep as a value of 1. The other classes will be treated as
|
|
1595
|
+
0's.
|
|
1596
|
+
|
|
1597
|
+
Returns
|
|
1598
|
+
-------
|
|
1599
|
+
np.ndarray
|
|
1600
|
+
Binary 2D mask of 1's and 0's.
|
|
1601
|
+
"""
|
|
1602
|
+
temp_mask = np.where(mask != cls, 0, mask)
|
|
1603
|
+
temp_mask[temp_mask == cls] = 1
|
|
1604
|
+
return temp_mask
|
|
1605
|
+
|
|
1606
|
+
|
|
1607
|
+
def create_mask_classes(
|
|
1608
|
+
new_mask: np.ndarray,
|
|
1609
|
+
cls: int,
|
|
1610
|
+
current_mask: np.ndarray = None
|
|
1611
|
+
) -> np.ndarray:
|
|
1612
|
+
"""
|
|
1613
|
+
Appends a current mask with another mask of different class
|
|
1614
|
+
i.e converting a binary mask (new mask) into a mask with its
|
|
1615
|
+
class and then appending the original mask to include
|
|
1616
|
+
the new mask with its class.
|
|
1617
|
+
|
|
1618
|
+
Parameters
|
|
1619
|
+
----------
|
|
1620
|
+
new_mask: np.ndarray
|
|
1621
|
+
The current binary (0, 1) 2D mask.
|
|
1622
|
+
cls: int
|
|
1623
|
+
Class representing the 1's in the new mask. This is the class
|
|
1624
|
+
to append to the current mask.
|
|
1625
|
+
current_mask: (height, width) np.ndarray
|
|
1626
|
+
Current multiclass mask.
|
|
1627
|
+
|
|
1628
|
+
Returns
|
|
1629
|
+
-------
|
|
1630
|
+
np.ndarray
|
|
1631
|
+
Multiclass mask with an additional class added.
|
|
1632
|
+
"""
|
|
1633
|
+
new_mask = np.where(new_mask == 1, cls, new_mask)
|
|
1634
|
+
if current_mask is not None:
|
|
1635
|
+
return np.add(current_mask, new_mask)
|
|
1636
|
+
else:
|
|
1637
|
+
return new_mask
|
|
1638
|
+
|
|
1639
|
+
|
|
1640
|
+
def create_mask_background(mask: np.ndarray) -> np.ndarray:
|
|
1641
|
+
"""
|
|
1642
|
+
Creates a binary mask for the background class with 1's in the
|
|
1643
|
+
image and the rest of the objects will have values of 0's. This function
|
|
1644
|
+
switches the labels for background to 1 and positive classes to 0's.
|
|
1645
|
+
|
|
1646
|
+
Parameters
|
|
1647
|
+
----------
|
|
1648
|
+
mask: np.ndarray
|
|
1649
|
+
Multiclass mask array representing each image pixels.
|
|
1650
|
+
|
|
1651
|
+
Returns
|
|
1652
|
+
-------
|
|
1653
|
+
np.ndarray
|
|
1654
|
+
Binary mask of 1's and 0's, where 1's is background and
|
|
1655
|
+
objects are 0's
|
|
1656
|
+
"""
|
|
1657
|
+
# 2 is a temporary class
|
|
1658
|
+
temp_mask = np.where(mask != 0, 2, mask)
|
|
1659
|
+
temp_mask[temp_mask == 0] = 1
|
|
1660
|
+
temp_mask[temp_mask == 2] = 0
|
|
1661
|
+
return temp_mask
|
|
1662
|
+
|
|
1663
|
+
|
|
1664
|
+
def convert_to_serializable(obj: Any):
|
|
1665
|
+
"""
|
|
1666
|
+
Recursively convert NumPy types to
|
|
1667
|
+
Python-native types for JSON serialization.
|
|
1668
|
+
|
|
1669
|
+
Parameters
|
|
1670
|
+
----------
|
|
1671
|
+
obj: Any
|
|
1672
|
+
Any NumPy type.
|
|
1673
|
+
|
|
1674
|
+
Returns
|
|
1675
|
+
-------
|
|
1676
|
+
obj
|
|
1677
|
+
The object with a native
|
|
1678
|
+
python type representation.
|
|
1679
|
+
"""
|
|
1680
|
+
if isinstance(obj, np.ndarray):
|
|
1681
|
+
return obj.tolist()
|
|
1682
|
+
elif isinstance(obj, (np.float32, np.float64)):
|
|
1683
|
+
return float(obj)
|
|
1684
|
+
elif isinstance(obj, (np.int32, np.int64)):
|
|
1685
|
+
return int(obj)
|
|
1686
|
+
elif isinstance(obj, np.generic):
|
|
1687
|
+
return obj.item() # Convert other NumPy scalars
|
|
1688
|
+
elif isinstance(obj, float) and (math.isnan(obj) or math.isinf(obj)):
|
|
1689
|
+
return 0
|
|
1690
|
+
elif isinstance(obj, dict):
|
|
1691
|
+
return {k: convert_to_serializable(v) for k, v in obj.items()}
|
|
1692
|
+
elif isinstance(obj, list):
|
|
1693
|
+
return [convert_to_serializable(i) for i in obj]
|
|
1694
|
+
else:
|
|
1695
|
+
return obj
|