eye-cv 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eye/__init__.py +115 -0
- eye/__init___supervision_original.py +120 -0
- eye/annotators/__init__.py +0 -0
- eye/annotators/base.py +22 -0
- eye/annotators/core.py +2699 -0
- eye/annotators/line.py +107 -0
- eye/annotators/modern.py +529 -0
- eye/annotators/trace.py +142 -0
- eye/annotators/utils.py +177 -0
- eye/assets/__init__.py +2 -0
- eye/assets/downloader.py +95 -0
- eye/assets/list.py +83 -0
- eye/classification/__init__.py +0 -0
- eye/classification/core.py +188 -0
- eye/config.py +2 -0
- eye/core/__init__.py +0 -0
- eye/core/trackers/__init__.py +1 -0
- eye/core/trackers/botsort_tracker.py +336 -0
- eye/core/trackers/bytetrack_tracker.py +284 -0
- eye/core/trackers/sort_tracker.py +200 -0
- eye/core/tracking.py +146 -0
- eye/dataset/__init__.py +0 -0
- eye/dataset/core.py +919 -0
- eye/dataset/formats/__init__.py +0 -0
- eye/dataset/formats/coco.py +258 -0
- eye/dataset/formats/pascal_voc.py +279 -0
- eye/dataset/formats/yolo.py +272 -0
- eye/dataset/utils.py +259 -0
- eye/detection/__init__.py +0 -0
- eye/detection/auto_convert.py +155 -0
- eye/detection/core.py +1529 -0
- eye/detection/detections_enhanced.py +392 -0
- eye/detection/line_zone.py +859 -0
- eye/detection/lmm.py +184 -0
- eye/detection/overlap_filter.py +270 -0
- eye/detection/tools/__init__.py +0 -0
- eye/detection/tools/csv_sink.py +181 -0
- eye/detection/tools/inference_slicer.py +288 -0
- eye/detection/tools/json_sink.py +142 -0
- eye/detection/tools/polygon_zone.py +202 -0
- eye/detection/tools/smoother.py +123 -0
- eye/detection/tools/smoothing.py +179 -0
- eye/detection/tools/smoothing_config.py +202 -0
- eye/detection/tools/transformers.py +247 -0
- eye/detection/utils.py +1175 -0
- eye/draw/__init__.py +0 -0
- eye/draw/color.py +154 -0
- eye/draw/utils.py +374 -0
- eye/filters.py +112 -0
- eye/geometry/__init__.py +0 -0
- eye/geometry/core.py +128 -0
- eye/geometry/utils.py +47 -0
- eye/keypoint/__init__.py +0 -0
- eye/keypoint/annotators.py +442 -0
- eye/keypoint/core.py +687 -0
- eye/keypoint/skeletons.py +2647 -0
- eye/metrics/__init__.py +21 -0
- eye/metrics/core.py +72 -0
- eye/metrics/detection.py +843 -0
- eye/metrics/f1_score.py +648 -0
- eye/metrics/mean_average_precision.py +628 -0
- eye/metrics/mean_average_recall.py +697 -0
- eye/metrics/precision.py +653 -0
- eye/metrics/recall.py +652 -0
- eye/metrics/utils/__init__.py +0 -0
- eye/metrics/utils/object_size.py +158 -0
- eye/metrics/utils/utils.py +9 -0
- eye/py.typed +0 -0
- eye/quick.py +104 -0
- eye/tracker/__init__.py +0 -0
- eye/tracker/byte_tracker/__init__.py +0 -0
- eye/tracker/byte_tracker/core.py +386 -0
- eye/tracker/byte_tracker/kalman_filter.py +205 -0
- eye/tracker/byte_tracker/matching.py +69 -0
- eye/tracker/byte_tracker/single_object_track.py +178 -0
- eye/tracker/byte_tracker/utils.py +18 -0
- eye/utils/__init__.py +0 -0
- eye/utils/conversion.py +132 -0
- eye/utils/file.py +159 -0
- eye/utils/image.py +794 -0
- eye/utils/internal.py +200 -0
- eye/utils/iterables.py +84 -0
- eye/utils/notebook.py +114 -0
- eye/utils/video.py +307 -0
- eye/utils_eye/__init__.py +1 -0
- eye/utils_eye/geometry.py +71 -0
- eye/utils_eye/nms.py +55 -0
- eye/validators/__init__.py +140 -0
- eye/web.py +271 -0
- eye_cv-1.0.0.dist-info/METADATA +319 -0
- eye_cv-1.0.0.dist-info/RECORD +94 -0
- eye_cv-1.0.0.dist-info/WHEEL +5 -0
- eye_cv-1.0.0.dist-info/licenses/LICENSE +21 -0
- eye_cv-1.0.0.dist-info/top_level.txt +1 -0
eye/detection/utils.py
ADDED
|
@@ -0,0 +1,1175 @@
|
|
|
1
|
+
from itertools import chain
|
|
2
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
3
|
+
|
|
4
|
+
import cv2
|
|
5
|
+
import numpy as np
|
|
6
|
+
import numpy.typing as npt
|
|
7
|
+
|
|
8
|
+
from eye.config import CLASS_NAME_DATA_FIELD
|
|
9
|
+
from eye.geometry.core import Vector
|
|
10
|
+
|
|
11
|
+
MIN_POLYGON_POINT_COUNT = 3
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def polygon_to_mask(polygon: np.ndarray, resolution_wh: Tuple[int, int]) -> np.ndarray:
|
|
15
|
+
"""Generate a mask from a polygon.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
polygon (np.ndarray): The polygon for which the mask should be generated,
|
|
19
|
+
given as a list of vertices.
|
|
20
|
+
resolution_wh (Tuple[int, int]): The width and height of the desired resolution.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
np.ndarray: The generated 2D mask, where the polygon is marked with
|
|
24
|
+
`1`'s and the rest is filled with `0`'s.
|
|
25
|
+
"""
|
|
26
|
+
width, height = map(int, resolution_wh)
|
|
27
|
+
mask = np.zeros((height, width), dtype=np.uint8)
|
|
28
|
+
cv2.fillPoly(mask, [polygon.astype(np.int32)], color=1)
|
|
29
|
+
return mask
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def box_iou_batch(boxes_true: np.ndarray, boxes_detection: np.ndarray) -> np.ndarray:
|
|
33
|
+
"""
|
|
34
|
+
Compute Intersection over Union (IoU) of two sets of bounding boxes -
|
|
35
|
+
`boxes_true` and `boxes_detection`. Both sets
|
|
36
|
+
of boxes are expected to be in `(x_min, y_min, x_max, y_max)` format.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
boxes_true (np.ndarray): 2D `np.ndarray` representing ground-truth boxes.
|
|
40
|
+
`shape = (N, 4)` where `N` is number of true objects.
|
|
41
|
+
boxes_detection (np.ndarray): 2D `np.ndarray` representing detection boxes.
|
|
42
|
+
`shape = (M, 4)` where `M` is number of detected objects.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
np.ndarray: Pairwise IoU of boxes from `boxes_true` and `boxes_detection`.
|
|
46
|
+
`shape = (N, M)` where `N` is number of true objects and
|
|
47
|
+
`M` is number of detected objects.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def box_area(box):
|
|
51
|
+
return (box[2] - box[0]) * (box[3] - box[1])
|
|
52
|
+
|
|
53
|
+
area_true = box_area(boxes_true.T)
|
|
54
|
+
area_detection = box_area(boxes_detection.T)
|
|
55
|
+
|
|
56
|
+
top_left = np.maximum(boxes_true[:, None, :2], boxes_detection[:, :2])
|
|
57
|
+
bottom_right = np.minimum(boxes_true[:, None, 2:], boxes_detection[:, 2:])
|
|
58
|
+
|
|
59
|
+
area_inter = np.prod(np.clip(bottom_right - top_left, a_min=0, a_max=None), 2)
|
|
60
|
+
ious = area_inter / (area_true[:, None] + area_detection - area_inter)
|
|
61
|
+
ious = np.nan_to_num(ious)
|
|
62
|
+
return ious
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _mask_iou_batch_split(
|
|
66
|
+
masks_true: np.ndarray, masks_detection: np.ndarray
|
|
67
|
+
) -> np.ndarray:
|
|
68
|
+
"""
|
|
69
|
+
Internal function.
|
|
70
|
+
Compute Intersection over Union (IoU) of two sets of masks -
|
|
71
|
+
`masks_true` and `masks_detection`.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
masks_true (np.ndarray): 3D `np.ndarray` representing ground-truth masks.
|
|
75
|
+
masks_detection (np.ndarray): 3D `np.ndarray` representing detection masks.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
np.ndarray: Pairwise IoU of masks from `masks_true` and `masks_detection`.
|
|
79
|
+
"""
|
|
80
|
+
intersection_area = np.logical_and(masks_true[:, None], masks_detection).sum(
|
|
81
|
+
axis=(2, 3)
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
masks_true_area = masks_true.sum(axis=(1, 2))
|
|
85
|
+
masks_detection_area = masks_detection.sum(axis=(1, 2))
|
|
86
|
+
union_area = masks_true_area[:, None] + masks_detection_area - intersection_area
|
|
87
|
+
|
|
88
|
+
return np.divide(
|
|
89
|
+
intersection_area,
|
|
90
|
+
union_area,
|
|
91
|
+
out=np.zeros_like(intersection_area, dtype=float),
|
|
92
|
+
where=union_area != 0,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def mask_iou_batch(
|
|
97
|
+
masks_true: np.ndarray,
|
|
98
|
+
masks_detection: np.ndarray,
|
|
99
|
+
memory_limit: int = 1024 * 5,
|
|
100
|
+
) -> np.ndarray:
|
|
101
|
+
"""
|
|
102
|
+
Compute Intersection over Union (IoU) of two sets of masks -
|
|
103
|
+
`masks_true` and `masks_detection`.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
masks_true (np.ndarray): 3D `np.ndarray` representing ground-truth masks.
|
|
107
|
+
masks_detection (np.ndarray): 3D `np.ndarray` representing detection masks.
|
|
108
|
+
memory_limit (int): memory limit in MB, default is 1024 * 5 MB (5GB).
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
np.ndarray: Pairwise IoU of masks from `masks_true` and `masks_detection`.
|
|
112
|
+
"""
|
|
113
|
+
memory = (
|
|
114
|
+
masks_true.shape[0]
|
|
115
|
+
* masks_true.shape[1]
|
|
116
|
+
* masks_true.shape[2]
|
|
117
|
+
* masks_detection.shape[0]
|
|
118
|
+
/ 1024
|
|
119
|
+
/ 1024
|
|
120
|
+
)
|
|
121
|
+
if memory <= memory_limit:
|
|
122
|
+
return _mask_iou_batch_split(masks_true, masks_detection)
|
|
123
|
+
|
|
124
|
+
ious = []
|
|
125
|
+
step = max(
|
|
126
|
+
memory_limit
|
|
127
|
+
* 1024
|
|
128
|
+
* 1024
|
|
129
|
+
// (
|
|
130
|
+
masks_detection.shape[0]
|
|
131
|
+
* masks_detection.shape[1]
|
|
132
|
+
* masks_detection.shape[2]
|
|
133
|
+
),
|
|
134
|
+
1,
|
|
135
|
+
)
|
|
136
|
+
for i in range(0, masks_true.shape[0], step):
|
|
137
|
+
ious.append(_mask_iou_batch_split(masks_true[i : i + step], masks_detection))
|
|
138
|
+
|
|
139
|
+
return np.vstack(ious)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def oriented_box_iou_batch(
|
|
143
|
+
boxes_true: np.ndarray, boxes_detection: np.ndarray
|
|
144
|
+
) -> np.ndarray:
|
|
145
|
+
"""
|
|
146
|
+
Compute Intersection over Union (IoU) of two sets of oriented bounding boxes -
|
|
147
|
+
`boxes_true` and `boxes_detection`. Both sets of boxes are expected to be in
|
|
148
|
+
`((x1, y1), (x2, y2), (x3, y3), (x4, y4))` format.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
boxes_true (np.ndarray): a `np.ndarray` representing ground-truth boxes.
|
|
152
|
+
`shape = (N, 4, 2)` where `N` is number of true objects.
|
|
153
|
+
boxes_detection (np.ndarray): a `np.ndarray` representing detection boxes.
|
|
154
|
+
`shape = (M, 4, 2)` where `M` is number of detected objects.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
np.ndarray: Pairwise IoU of boxes from `boxes_true` and `boxes_detection`.
|
|
158
|
+
`shape = (N, M)` where `N` is number of true objects and
|
|
159
|
+
`M` is number of detected objects.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
boxes_true = boxes_true.reshape(-1, 4, 2)
|
|
163
|
+
boxes_detection = boxes_detection.reshape(-1, 4, 2)
|
|
164
|
+
|
|
165
|
+
max_height = int(max(boxes_true[:, :, 0].max(), boxes_detection[:, :, 0].max()) + 1)
|
|
166
|
+
# adding 1 because we are 0-indexed
|
|
167
|
+
max_width = int(max(boxes_true[:, :, 1].max(), boxes_detection[:, :, 1].max()) + 1)
|
|
168
|
+
|
|
169
|
+
mask_true = np.zeros((boxes_true.shape[0], max_height, max_width))
|
|
170
|
+
for i, box_true in enumerate(boxes_true):
|
|
171
|
+
mask_true[i] = polygon_to_mask(box_true, (max_width, max_height))
|
|
172
|
+
|
|
173
|
+
mask_detection = np.zeros((boxes_detection.shape[0], max_height, max_width))
|
|
174
|
+
for i, box_detection in enumerate(boxes_detection):
|
|
175
|
+
mask_detection[i] = polygon_to_mask(box_detection, (max_width, max_height))
|
|
176
|
+
|
|
177
|
+
ious = mask_iou_batch(mask_true, mask_detection)
|
|
178
|
+
return ious
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def clip_boxes(xyxy: np.ndarray, resolution_wh: Tuple[int, int]) -> np.ndarray:
|
|
182
|
+
"""
|
|
183
|
+
Clips bounding boxes coordinates to fit within the frame resolution.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
xyxy (np.ndarray): A numpy array of shape `(N, 4)` where each
|
|
187
|
+
row corresponds to a bounding box in
|
|
188
|
+
the format `(x_min, y_min, x_max, y_max)`.
|
|
189
|
+
resolution_wh (Tuple[int, int]): A tuple of the form `(width, height)`
|
|
190
|
+
representing the resolution of the frame.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
np.ndarray: A numpy array of shape `(N, 4)` where each row
|
|
194
|
+
corresponds to a bounding box with coordinates clipped to fit
|
|
195
|
+
within the frame resolution.
|
|
196
|
+
|
|
197
|
+
Examples:
|
|
198
|
+
```python
|
|
199
|
+
import numpy as np
|
|
200
|
+
import eye as sv
|
|
201
|
+
|
|
202
|
+
xyxy = np.array([
|
|
203
|
+
[10, 20, 300, 200],
|
|
204
|
+
[15, 25, 350, 450],
|
|
205
|
+
[-10, -20, 30, 40]
|
|
206
|
+
])
|
|
207
|
+
|
|
208
|
+
sv.clip_boxes(xyxy=xyxy, resolution_wh=(320, 240))
|
|
209
|
+
# array([
|
|
210
|
+
# [ 10, 20, 300, 200],
|
|
211
|
+
# [ 15, 25, 320, 240],
|
|
212
|
+
# [ 0, 0, 30, 40]
|
|
213
|
+
# ])
|
|
214
|
+
```
|
|
215
|
+
"""
|
|
216
|
+
result = np.copy(xyxy)
|
|
217
|
+
width, height = resolution_wh
|
|
218
|
+
result[:, [0, 2]] = result[:, [0, 2]].clip(0, width)
|
|
219
|
+
result[:, [1, 3]] = result[:, [1, 3]].clip(0, height)
|
|
220
|
+
return result
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def pad_boxes(xyxy: np.ndarray, px: int, py: Optional[int] = None) -> np.ndarray:
|
|
224
|
+
"""
|
|
225
|
+
Pads bounding boxes coordinates with a constant padding.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
xyxy (np.ndarray): A numpy array of shape `(N, 4)` where each
|
|
229
|
+
row corresponds to a bounding box in the format
|
|
230
|
+
`(x_min, y_min, x_max, y_max)`.
|
|
231
|
+
px (int): The padding value to be added to both the left and right sides of
|
|
232
|
+
each bounding box.
|
|
233
|
+
py (Optional[int]): The padding value to be added to both the top and bottom
|
|
234
|
+
sides of each bounding box. If not provided, `px` will be used for both
|
|
235
|
+
dimensions.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
np.ndarray: A numpy array of shape `(N, 4)` where each row corresponds to a
|
|
239
|
+
bounding box with coordinates padded according to the provided padding
|
|
240
|
+
values.
|
|
241
|
+
|
|
242
|
+
Examples:
|
|
243
|
+
```python
|
|
244
|
+
import numpy as np
|
|
245
|
+
import eye as sv
|
|
246
|
+
|
|
247
|
+
xyxy = np.array([
|
|
248
|
+
[10, 20, 30, 40],
|
|
249
|
+
[15, 25, 35, 45]
|
|
250
|
+
])
|
|
251
|
+
|
|
252
|
+
sv.pad_boxes(xyxy=xyxy, px=5, py=10)
|
|
253
|
+
# array([
|
|
254
|
+
# [ 5, 10, 35, 50],
|
|
255
|
+
# [10, 15, 40, 55]
|
|
256
|
+
# ])
|
|
257
|
+
```
|
|
258
|
+
"""
|
|
259
|
+
if py is None:
|
|
260
|
+
py = px
|
|
261
|
+
|
|
262
|
+
result = xyxy.copy()
|
|
263
|
+
result[:, [0, 1]] -= [px, py]
|
|
264
|
+
result[:, [2, 3]] += [px, py]
|
|
265
|
+
|
|
266
|
+
return result
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def xywh_to_xyxy(xywh: np.ndarray) -> np.ndarray:
|
|
270
|
+
"""
|
|
271
|
+
Converts bounding box coordinates from `(x, y, width, height)`
|
|
272
|
+
format to `(x_min, y_min, x_max, y_max)` format.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
xywh (np.ndarray): A numpy array of shape `(N, 4)` where each row
|
|
276
|
+
corresponds to a bounding box in the format `(x, y, width, height)`.
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
np.ndarray: A numpy array of shape `(N, 4)` where each row corresponds
|
|
280
|
+
to a bounding box in the format `(x_min, y_min, x_max, y_max)`.
|
|
281
|
+
|
|
282
|
+
Examples:
|
|
283
|
+
```python
|
|
284
|
+
import numpy as np
|
|
285
|
+
import eye as sv
|
|
286
|
+
|
|
287
|
+
xywh = np.array([
|
|
288
|
+
[10, 20, 30, 40],
|
|
289
|
+
[15, 25, 35, 45]
|
|
290
|
+
])
|
|
291
|
+
|
|
292
|
+
sv.xywh_to_xyxy(xywh=xywh)
|
|
293
|
+
# array([
|
|
294
|
+
# [10, 20, 40, 60],
|
|
295
|
+
# [15, 25, 50, 70]
|
|
296
|
+
# ])
|
|
297
|
+
```
|
|
298
|
+
"""
|
|
299
|
+
xyxy = xywh.copy()
|
|
300
|
+
xyxy[:, 2] = xywh[:, 0] + xywh[:, 2]
|
|
301
|
+
xyxy[:, 3] = xywh[:, 1] + xywh[:, 3]
|
|
302
|
+
return xyxy
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def xcycwh_to_xyxy(xcycwh: np.ndarray) -> np.ndarray:
|
|
306
|
+
"""
|
|
307
|
+
Converts bounding box coordinates from `(center_x, center_y, width, height)`
|
|
308
|
+
format to `(x_min, y_min, x_max, y_max)` format.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
xcycwh (np.ndarray): A numpy array of shape `(N, 4)` where each row
|
|
312
|
+
corresponds to a bounding box in the format `(center_x, center_y, width,
|
|
313
|
+
height)`.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
np.ndarray: A numpy array of shape `(N, 4)` where each row corresponds
|
|
317
|
+
to a bounding box in the format `(x_min, y_min, x_max, y_max)`.
|
|
318
|
+
|
|
319
|
+
Examples:
|
|
320
|
+
```python
|
|
321
|
+
import numpy as np
|
|
322
|
+
import eye as sv
|
|
323
|
+
|
|
324
|
+
xcycwh = np.array([
|
|
325
|
+
[50, 50, 20, 30],
|
|
326
|
+
[30, 40, 10, 15]
|
|
327
|
+
])
|
|
328
|
+
|
|
329
|
+
sv.xcycwh_to_xyxy(xcycwh=xcycwh)
|
|
330
|
+
# array([
|
|
331
|
+
# [40, 35, 60, 65],
|
|
332
|
+
# [25, 32.5, 35, 47.5]
|
|
333
|
+
# ])
|
|
334
|
+
```
|
|
335
|
+
"""
|
|
336
|
+
xyxy = xcycwh.copy()
|
|
337
|
+
xyxy[:, 0] = xcycwh[:, 0] - xcycwh[:, 2] / 2
|
|
338
|
+
xyxy[:, 1] = xcycwh[:, 1] - xcycwh[:, 3] / 2
|
|
339
|
+
xyxy[:, 2] = xcycwh[:, 0] + xcycwh[:, 2] / 2
|
|
340
|
+
xyxy[:, 3] = xcycwh[:, 1] + xcycwh[:, 3] / 2
|
|
341
|
+
return xyxy
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def mask_to_xyxy(masks: np.ndarray) -> np.ndarray:
|
|
345
|
+
"""
|
|
346
|
+
Converts a 3D `np.array` of 2D bool masks into a 2D `np.array` of bounding boxes.
|
|
347
|
+
|
|
348
|
+
Parameters:
|
|
349
|
+
masks (np.ndarray): A 3D `np.array` of shape `(N, W, H)`
|
|
350
|
+
containing 2D bool masks
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
np.ndarray: A 2D `np.array` of shape `(N, 4)` containing the bounding boxes
|
|
354
|
+
`(x_min, y_min, x_max, y_max)` for each mask
|
|
355
|
+
"""
|
|
356
|
+
n = masks.shape[0]
|
|
357
|
+
xyxy = np.zeros((n, 4), dtype=int)
|
|
358
|
+
|
|
359
|
+
for i, mask in enumerate(masks):
|
|
360
|
+
rows, cols = np.where(mask)
|
|
361
|
+
|
|
362
|
+
if len(rows) > 0 and len(cols) > 0:
|
|
363
|
+
x_min, x_max = np.min(cols), np.max(cols)
|
|
364
|
+
y_min, y_max = np.min(rows), np.max(rows)
|
|
365
|
+
xyxy[i, :] = [x_min, y_min, x_max, y_max]
|
|
366
|
+
|
|
367
|
+
return xyxy
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def mask_to_polygons(mask: np.ndarray) -> List[np.ndarray]:
|
|
371
|
+
"""
|
|
372
|
+
Converts a binary mask to a list of polygons.
|
|
373
|
+
|
|
374
|
+
Parameters:
|
|
375
|
+
mask (np.ndarray): A binary mask represented as a 2D NumPy array of
|
|
376
|
+
shape `(H, W)`, where H and W are the height and width of
|
|
377
|
+
the mask, respectively.
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
List[np.ndarray]: A list of polygons, where each polygon is represented by a
|
|
381
|
+
NumPy array of shape `(N, 2)`, containing the `x`, `y` coordinates
|
|
382
|
+
of the points. Polygons with fewer points than `MIN_POLYGON_POINT_COUNT = 3`
|
|
383
|
+
are excluded from the output.
|
|
384
|
+
"""
|
|
385
|
+
|
|
386
|
+
contours, _ = cv2.findContours(
|
|
387
|
+
mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
|
|
388
|
+
)
|
|
389
|
+
return [
|
|
390
|
+
np.squeeze(contour, axis=1)
|
|
391
|
+
for contour in contours
|
|
392
|
+
if contour.shape[0] >= MIN_POLYGON_POINT_COUNT
|
|
393
|
+
]
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def filter_polygons_by_area(
|
|
397
|
+
polygons: List[np.ndarray],
|
|
398
|
+
min_area: Optional[float] = None,
|
|
399
|
+
max_area: Optional[float] = None,
|
|
400
|
+
) -> List[np.ndarray]:
|
|
401
|
+
"""
|
|
402
|
+
Filters a list of polygons based on their area.
|
|
403
|
+
|
|
404
|
+
Parameters:
|
|
405
|
+
polygons (List[np.ndarray]): A list of polygons, where each polygon is
|
|
406
|
+
represented by a NumPy array of shape `(N, 2)`,
|
|
407
|
+
containing the `x`, `y` coordinates of the points.
|
|
408
|
+
min_area (Optional[float]): The minimum area threshold.
|
|
409
|
+
Only polygons with an area greater than or equal to this value
|
|
410
|
+
will be included in the output. If set to None,
|
|
411
|
+
no minimum area constraint will be applied.
|
|
412
|
+
max_area (Optional[float]): The maximum area threshold.
|
|
413
|
+
Only polygons with an area less than or equal to this value
|
|
414
|
+
will be included in the output. If set to None,
|
|
415
|
+
no maximum area constraint will be applied.
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
List[np.ndarray]: A new list of polygons containing only those with
|
|
419
|
+
areas within the specified thresholds.
|
|
420
|
+
"""
|
|
421
|
+
if min_area is None and max_area is None:
|
|
422
|
+
return polygons
|
|
423
|
+
ares = [cv2.contourArea(polygon) for polygon in polygons]
|
|
424
|
+
return [
|
|
425
|
+
polygon
|
|
426
|
+
for polygon, area in zip(polygons, ares)
|
|
427
|
+
if (min_area is None or area >= min_area)
|
|
428
|
+
and (max_area is None or area <= max_area)
|
|
429
|
+
]
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def polygon_to_xyxy(polygon: np.ndarray) -> np.ndarray:
|
|
433
|
+
"""
|
|
434
|
+
Converts a polygon represented by a NumPy array into a bounding box.
|
|
435
|
+
|
|
436
|
+
Parameters:
|
|
437
|
+
polygon (np.ndarray): A polygon represented by a NumPy array of shape `(N, 2)`,
|
|
438
|
+
containing the `x`, `y` coordinates of the points.
|
|
439
|
+
|
|
440
|
+
Returns:
|
|
441
|
+
np.ndarray: A 1D NumPy array containing the bounding box
|
|
442
|
+
`(x_min, y_min, x_max, y_max)` of the input polygon.
|
|
443
|
+
"""
|
|
444
|
+
x_min, y_min = np.min(polygon, axis=0)
|
|
445
|
+
x_max, y_max = np.max(polygon, axis=0)
|
|
446
|
+
return np.array([x_min, y_min, x_max, y_max])
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def approximate_polygon(
|
|
450
|
+
polygon: np.ndarray, percentage: float, epsilon_step: float = 0.05
|
|
451
|
+
) -> np.ndarray:
|
|
452
|
+
"""
|
|
453
|
+
Approximates a given polygon by reducing a certain percentage of points.
|
|
454
|
+
|
|
455
|
+
This function uses the Ramer-Douglas-Peucker algorithm to simplify the input
|
|
456
|
+
polygon by reducing the number of points
|
|
457
|
+
while preserving the general shape.
|
|
458
|
+
|
|
459
|
+
Parameters:
|
|
460
|
+
polygon (np.ndarray): A 2D NumPy array of shape `(N, 2)` containing
|
|
461
|
+
the `x`, `y` coordinates of the input polygon's points.
|
|
462
|
+
percentage (float): The percentage of points to be removed from the
|
|
463
|
+
input polygon, in the range `[0, 1)`.
|
|
464
|
+
epsilon_step (float): Approximation accuracy step.
|
|
465
|
+
Epsilon is the maximum distance between the original curve
|
|
466
|
+
and its approximation.
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
np.ndarray: A new 2D NumPy array of shape `(M, 2)`,
|
|
470
|
+
where `M <= N * (1 - percentage)`, containing
|
|
471
|
+
the `x`, `y` coordinates of the
|
|
472
|
+
approximated polygon's points.
|
|
473
|
+
"""
|
|
474
|
+
|
|
475
|
+
if percentage < 0 or percentage >= 1:
|
|
476
|
+
raise ValueError("Percentage must be in the range [0, 1).")
|
|
477
|
+
|
|
478
|
+
target_points = max(int(len(polygon) * (1 - percentage)), 3)
|
|
479
|
+
|
|
480
|
+
if len(polygon) <= target_points:
|
|
481
|
+
return polygon
|
|
482
|
+
|
|
483
|
+
epsilon = 0
|
|
484
|
+
approximated_points = polygon
|
|
485
|
+
while True:
|
|
486
|
+
epsilon += epsilon_step
|
|
487
|
+
new_approximated_points = cv2.approxPolyDP(polygon, epsilon, closed=True)
|
|
488
|
+
if len(new_approximated_points) > target_points:
|
|
489
|
+
approximated_points = new_approximated_points
|
|
490
|
+
else:
|
|
491
|
+
break
|
|
492
|
+
|
|
493
|
+
return np.squeeze(approximated_points, axis=1)
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def extract_ultralytics_masks(yolov8_results) -> Optional[np.ndarray]:
|
|
497
|
+
if not yolov8_results.masks:
|
|
498
|
+
return None
|
|
499
|
+
|
|
500
|
+
orig_shape = yolov8_results.orig_shape
|
|
501
|
+
inference_shape = tuple(yolov8_results.masks.data.shape[1:])
|
|
502
|
+
|
|
503
|
+
pad = (0, 0)
|
|
504
|
+
|
|
505
|
+
if inference_shape != orig_shape:
|
|
506
|
+
gain = min(
|
|
507
|
+
inference_shape[0] / orig_shape[0],
|
|
508
|
+
inference_shape[1] / orig_shape[1],
|
|
509
|
+
)
|
|
510
|
+
pad = (
|
|
511
|
+
(inference_shape[1] - orig_shape[1] * gain) / 2,
|
|
512
|
+
(inference_shape[0] - orig_shape[0] * gain) / 2,
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
top, left = int(pad[1]), int(pad[0])
|
|
516
|
+
bottom, right = int(inference_shape[0] - pad[1]), int(inference_shape[1] - pad[0])
|
|
517
|
+
|
|
518
|
+
mask_maps = []
|
|
519
|
+
masks = yolov8_results.masks.data.cpu().numpy()
|
|
520
|
+
for i in range(masks.shape[0]):
|
|
521
|
+
mask = masks[i]
|
|
522
|
+
mask = mask[top:bottom, left:right]
|
|
523
|
+
|
|
524
|
+
if mask.shape != orig_shape:
|
|
525
|
+
mask = cv2.resize(mask, (orig_shape[1], orig_shape[0]))
|
|
526
|
+
|
|
527
|
+
mask_maps.append(mask)
|
|
528
|
+
|
|
529
|
+
return np.asarray(mask_maps, dtype=bool)
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def process_roboflow_result(
|
|
533
|
+
roboflow_result: dict,
|
|
534
|
+
) -> Tuple[
|
|
535
|
+
np.ndarray,
|
|
536
|
+
np.ndarray,
|
|
537
|
+
np.ndarray,
|
|
538
|
+
Optional[np.ndarray],
|
|
539
|
+
Optional[np.ndarray],
|
|
540
|
+
Dict[str, Union[List[np.ndarray], np.ndarray]],
|
|
541
|
+
]:
|
|
542
|
+
if not roboflow_result["predictions"]:
|
|
543
|
+
return (
|
|
544
|
+
np.empty((0, 4)),
|
|
545
|
+
np.empty(0),
|
|
546
|
+
np.empty(0),
|
|
547
|
+
None,
|
|
548
|
+
None,
|
|
549
|
+
{CLASS_NAME_DATA_FIELD: np.empty(0)},
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
xyxy = []
|
|
553
|
+
confidence = []
|
|
554
|
+
class_id = []
|
|
555
|
+
class_name = []
|
|
556
|
+
masks = []
|
|
557
|
+
tracker_ids = []
|
|
558
|
+
|
|
559
|
+
image_width = int(roboflow_result["image"]["width"])
|
|
560
|
+
image_height = int(roboflow_result["image"]["height"])
|
|
561
|
+
|
|
562
|
+
for prediction in roboflow_result["predictions"]:
|
|
563
|
+
x = prediction["x"]
|
|
564
|
+
y = prediction["y"]
|
|
565
|
+
width = prediction["width"]
|
|
566
|
+
height = prediction["height"]
|
|
567
|
+
x_min = x - width / 2
|
|
568
|
+
y_min = y - height / 2
|
|
569
|
+
x_max = x_min + width
|
|
570
|
+
y_max = y_min + height
|
|
571
|
+
|
|
572
|
+
if "points" not in prediction:
|
|
573
|
+
xyxy.append([x_min, y_min, x_max, y_max])
|
|
574
|
+
class_id.append(prediction["class_id"])
|
|
575
|
+
class_name.append(prediction["class"])
|
|
576
|
+
confidence.append(prediction["confidence"])
|
|
577
|
+
if "tracker_id" in prediction:
|
|
578
|
+
tracker_ids.append(prediction["tracker_id"])
|
|
579
|
+
elif len(prediction["points"]) >= 3:
|
|
580
|
+
polygon = np.array(
|
|
581
|
+
[[point["x"], point["y"]] for point in prediction["points"]], dtype=int
|
|
582
|
+
)
|
|
583
|
+
mask = polygon_to_mask(polygon, resolution_wh=(image_width, image_height))
|
|
584
|
+
xyxy.append([x_min, y_min, x_max, y_max])
|
|
585
|
+
class_id.append(prediction["class_id"])
|
|
586
|
+
class_name.append(prediction["class"])
|
|
587
|
+
confidence.append(prediction["confidence"])
|
|
588
|
+
masks.append(mask)
|
|
589
|
+
if "tracker_id" in prediction:
|
|
590
|
+
tracker_ids.append(prediction["tracker_id"])
|
|
591
|
+
|
|
592
|
+
xyxy = np.array(xyxy) if len(xyxy) > 0 else np.empty((0, 4))
|
|
593
|
+
confidence = np.array(confidence) if len(confidence) > 0 else np.empty(0)
|
|
594
|
+
class_id = np.array(class_id).astype(int) if len(class_id) > 0 else np.empty(0)
|
|
595
|
+
class_name = np.array(class_name) if len(class_name) > 0 else np.empty(0)
|
|
596
|
+
masks = np.array(masks, dtype=bool) if len(masks) > 0 else None
|
|
597
|
+
tracker_id = np.array(tracker_ids).astype(int) if len(tracker_ids) > 0 else None
|
|
598
|
+
data = {CLASS_NAME_DATA_FIELD: class_name}
|
|
599
|
+
|
|
600
|
+
return xyxy, confidence, class_id, masks, tracker_id, data
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def move_boxes(
|
|
604
|
+
xyxy: npt.NDArray[np.float64], offset: npt.NDArray[np.int32]
|
|
605
|
+
) -> npt.NDArray[np.float64]:
|
|
606
|
+
"""
|
|
607
|
+
Parameters:
|
|
608
|
+
xyxy (npt.NDArray[np.float64]): An array of shape `(n, 4)` containing the
|
|
609
|
+
bounding boxes coordinates in format `[x1, y1, x2, y2]`
|
|
610
|
+
offset (np.array): An array of shape `(2,)` containing offset values in format
|
|
611
|
+
is `[dx, dy]`.
|
|
612
|
+
|
|
613
|
+
Returns:
|
|
614
|
+
npt.NDArray[np.float64]: Repositioned bounding boxes.
|
|
615
|
+
|
|
616
|
+
Examples:
|
|
617
|
+
```python
|
|
618
|
+
import numpy as np
|
|
619
|
+
import eye as sv
|
|
620
|
+
|
|
621
|
+
xyxy = np.array([
|
|
622
|
+
[10, 10, 20, 20],
|
|
623
|
+
[30, 30, 40, 40]
|
|
624
|
+
])
|
|
625
|
+
offset = np.array([5, 5])
|
|
626
|
+
|
|
627
|
+
sv.move_boxes(xyxy=xyxy, offset=offset)
|
|
628
|
+
# array([
|
|
629
|
+
# [15, 15, 25, 25],
|
|
630
|
+
# [35, 35, 45, 45]
|
|
631
|
+
# ])
|
|
632
|
+
```
|
|
633
|
+
"""
|
|
634
|
+
return xyxy + np.hstack([offset, offset])
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def move_oriented_boxes(
|
|
638
|
+
xyxyxyxy: npt.NDArray[np.float64], offset: npt.NDArray[np.int32]
|
|
639
|
+
) -> npt.NDArray[np.float64]:
|
|
640
|
+
"""
|
|
641
|
+
Parameters:
|
|
642
|
+
xyxyxyxy (npt.NDArray[np.float64]): An array of shape `(n, 4, 2)` containing the
|
|
643
|
+
oriented bounding boxes coordinates in format
|
|
644
|
+
`[[x1, y1], [x2, y2], [x3, y3], [x3, y3]]`
|
|
645
|
+
offset (np.array): An array of shape `(2,)` containing offset values in format
|
|
646
|
+
is `[dx, dy]`.
|
|
647
|
+
|
|
648
|
+
Returns:
|
|
649
|
+
npt.NDArray[np.float64]: Repositioned bounding boxes.
|
|
650
|
+
|
|
651
|
+
Examples:
|
|
652
|
+
```python
|
|
653
|
+
import numpy as np
|
|
654
|
+
import eye as sv
|
|
655
|
+
|
|
656
|
+
xyxyxyxy = np.array([
|
|
657
|
+
[
|
|
658
|
+
[20, 10],
|
|
659
|
+
[10, 20],
|
|
660
|
+
[20, 30],
|
|
661
|
+
[30, 20]
|
|
662
|
+
],
|
|
663
|
+
[
|
|
664
|
+
[30 ,30],
|
|
665
|
+
[20, 40],
|
|
666
|
+
[30, 50],
|
|
667
|
+
[40, 40]
|
|
668
|
+
]
|
|
669
|
+
])
|
|
670
|
+
offset = np.array([5, 5])
|
|
671
|
+
|
|
672
|
+
sv.move_oriented_boxes(xyxy=xyxy, offset=offset)
|
|
673
|
+
# array([
|
|
674
|
+
# [
|
|
675
|
+
# [25, 15],
|
|
676
|
+
# [15, 25],
|
|
677
|
+
# [25, 35],
|
|
678
|
+
# [35, 25]
|
|
679
|
+
# ],
|
|
680
|
+
# [
|
|
681
|
+
# [35, 35],
|
|
682
|
+
# [25, 45],
|
|
683
|
+
# [35, 55],
|
|
684
|
+
# [45, 45]
|
|
685
|
+
# ]
|
|
686
|
+
# ])
|
|
687
|
+
```
|
|
688
|
+
"""
|
|
689
|
+
return xyxyxyxy + offset
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
def move_masks(
|
|
693
|
+
masks: npt.NDArray[np.bool_],
|
|
694
|
+
offset: npt.NDArray[np.int32],
|
|
695
|
+
resolution_wh: Tuple[int, int],
|
|
696
|
+
) -> npt.NDArray[np.bool_]:
|
|
697
|
+
"""
|
|
698
|
+
Offset the masks in an array by the specified (x, y) amount.
|
|
699
|
+
|
|
700
|
+
Args:
|
|
701
|
+
masks (npt.NDArray[np.bool_]): A 3D array of binary masks corresponding to the
|
|
702
|
+
predictions. Shape: `(N, H, W)`, where N is the number of predictions, and
|
|
703
|
+
H, W are the dimensions of each mask.
|
|
704
|
+
offset (npt.NDArray[np.int32]): An array of shape `(2,)` containing non-negative
|
|
705
|
+
int values `[dx, dy]`.
|
|
706
|
+
resolution_wh (Tuple[int, int]): The width and height of the desired mask
|
|
707
|
+
resolution.
|
|
708
|
+
|
|
709
|
+
Returns:
|
|
710
|
+
(npt.NDArray[np.bool_]) repositioned masks, optionally padded to the specified
|
|
711
|
+
shape.
|
|
712
|
+
"""
|
|
713
|
+
|
|
714
|
+
if offset[0] < 0 or offset[1] < 0:
|
|
715
|
+
raise ValueError(f"Offset values must be non-negative integers. Got: {offset}")
|
|
716
|
+
|
|
717
|
+
mask_array = np.full((masks.shape[0], resolution_wh[1], resolution_wh[0]), False)
|
|
718
|
+
mask_array[
|
|
719
|
+
:,
|
|
720
|
+
offset[1] : masks.shape[1] + offset[1],
|
|
721
|
+
offset[0] : masks.shape[2] + offset[0],
|
|
722
|
+
] = masks
|
|
723
|
+
|
|
724
|
+
return mask_array
|
|
725
|
+
|
|
726
|
+
|
|
727
|
+
def scale_boxes(
|
|
728
|
+
xyxy: npt.NDArray[np.float64], factor: float
|
|
729
|
+
) -> npt.NDArray[np.float64]:
|
|
730
|
+
"""
|
|
731
|
+
Scale the dimensions of bounding boxes.
|
|
732
|
+
|
|
733
|
+
Parameters:
|
|
734
|
+
xyxy (npt.NDArray[np.float64]): An array of shape `(n, 4)` containing the
|
|
735
|
+
bounding boxes coordinates in format `[x1, y1, x2, y2]`
|
|
736
|
+
factor (float): A float value representing the factor by which the box
|
|
737
|
+
dimensions are scaled. A factor greater than 1 enlarges the boxes, while a
|
|
738
|
+
factor less than 1 shrinks them.
|
|
739
|
+
|
|
740
|
+
Returns:
|
|
741
|
+
npt.NDArray[np.float64]: Scaled bounding boxes.
|
|
742
|
+
|
|
743
|
+
Examples:
|
|
744
|
+
```python
|
|
745
|
+
import numpy as np
|
|
746
|
+
import eye as sv
|
|
747
|
+
|
|
748
|
+
xyxy = np.array([
|
|
749
|
+
[10, 10, 20, 20],
|
|
750
|
+
[30, 30, 40, 40]
|
|
751
|
+
])
|
|
752
|
+
|
|
753
|
+
sv.scale_boxes(xyxy=xyxy, factor=1.5)
|
|
754
|
+
# array([
|
|
755
|
+
# [ 7.5, 7.5, 22.5, 22.5],
|
|
756
|
+
# [27.5, 27.5, 42.5, 42.5]
|
|
757
|
+
# ])
|
|
758
|
+
```
|
|
759
|
+
"""
|
|
760
|
+
centers = (xyxy[:, :2] + xyxy[:, 2:]) / 2
|
|
761
|
+
new_sizes = (xyxy[:, 2:] - xyxy[:, :2]) * factor
|
|
762
|
+
return np.concatenate((centers - new_sizes / 2, centers + new_sizes / 2), axis=1)
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
def calculate_masks_centroids(masks: np.ndarray) -> np.ndarray:
|
|
766
|
+
"""
|
|
767
|
+
Calculate the centroids of binary masks in a tensor.
|
|
768
|
+
|
|
769
|
+
Parameters:
|
|
770
|
+
masks (np.ndarray): A 3D NumPy array of shape (num_masks, height, width).
|
|
771
|
+
Each 2D array in the tensor represents a binary mask.
|
|
772
|
+
|
|
773
|
+
Returns:
|
|
774
|
+
A 2D NumPy array of shape (num_masks, 2), where each row contains the x and y
|
|
775
|
+
coordinates (in that order) of the centroid of the corresponding mask.
|
|
776
|
+
"""
|
|
777
|
+
num_masks, height, width = masks.shape
|
|
778
|
+
total_pixels = masks.sum(axis=(1, 2))
|
|
779
|
+
|
|
780
|
+
# offset for 1-based indexing
|
|
781
|
+
vertical_indices, horizontal_indices = np.indices((height, width)) + 0.5
|
|
782
|
+
# avoid division by zero for empty masks
|
|
783
|
+
total_pixels[total_pixels == 0] = 1
|
|
784
|
+
|
|
785
|
+
def sum_over_mask(indices: np.ndarray, axis: tuple) -> np.ndarray:
|
|
786
|
+
return np.tensordot(masks, indices, axes=axis)
|
|
787
|
+
|
|
788
|
+
aggregation_axis = ([1, 2], [0, 1])
|
|
789
|
+
centroid_x = sum_over_mask(horizontal_indices, aggregation_axis) / total_pixels
|
|
790
|
+
centroid_y = sum_over_mask(vertical_indices, aggregation_axis) / total_pixels
|
|
791
|
+
|
|
792
|
+
return np.column_stack((centroid_x, centroid_y)).astype(int)
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
def is_data_equal(data_a: Dict[str, np.ndarray], data_b: Dict[str, np.ndarray]) -> bool:
|
|
796
|
+
"""
|
|
797
|
+
Compares the data payloads of two Detections instances.
|
|
798
|
+
|
|
799
|
+
Args:
|
|
800
|
+
data_a, data_b: The data payloads of the instances.
|
|
801
|
+
|
|
802
|
+
Returns:
|
|
803
|
+
True if the data payloads are equal, False otherwise.
|
|
804
|
+
"""
|
|
805
|
+
return set(data_a.keys()) == set(data_b.keys()) and all(
|
|
806
|
+
np.array_equal(data_a[key], data_b[key]) for key in data_a
|
|
807
|
+
)
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
def is_metadata_equal(metadata_a: Dict[str, Any], metadata_b: Dict[str, Any]) -> bool:
|
|
811
|
+
"""
|
|
812
|
+
Compares the metadata payloads of two Detections instances.
|
|
813
|
+
|
|
814
|
+
Args:
|
|
815
|
+
metadata_a, metadata_b: The metadata payloads of the instances.
|
|
816
|
+
|
|
817
|
+
Returns:
|
|
818
|
+
True if the metadata payloads are equal, False otherwise.
|
|
819
|
+
"""
|
|
820
|
+
return set(metadata_a.keys()) == set(metadata_b.keys()) and all(
|
|
821
|
+
np.array_equal(metadata_a[key], metadata_b[key])
|
|
822
|
+
if (
|
|
823
|
+
isinstance(metadata_a[key], np.ndarray)
|
|
824
|
+
and isinstance(metadata_b[key], np.ndarray)
|
|
825
|
+
)
|
|
826
|
+
else metadata_a[key] == metadata_b[key]
|
|
827
|
+
for key in metadata_a
|
|
828
|
+
)
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
def merge_data(
|
|
832
|
+
data_list: List[Dict[str, Union[npt.NDArray[np.generic], List]]],
|
|
833
|
+
) -> Dict[str, Union[npt.NDArray[np.generic], List]]:
|
|
834
|
+
"""
|
|
835
|
+
Merges the data payloads of a list of Detections instances.
|
|
836
|
+
|
|
837
|
+
Warning: Assumes that empty detections were filtered-out before passing data to
|
|
838
|
+
this function.
|
|
839
|
+
|
|
840
|
+
Args:
|
|
841
|
+
data_list: The data payloads of the Detections instances. Each data payload
|
|
842
|
+
is a dictionary with the same keys, and the values are either lists or
|
|
843
|
+
npt.NDArray[np.generic].
|
|
844
|
+
|
|
845
|
+
Returns:
|
|
846
|
+
A single data payload containing the merged data, preserving the original data
|
|
847
|
+
types (list or npt.NDArray[np.generic]).
|
|
848
|
+
|
|
849
|
+
Raises:
|
|
850
|
+
ValueError: If data values within a single object have different lengths or if
|
|
851
|
+
dictionaries have different keys.
|
|
852
|
+
"""
|
|
853
|
+
if not data_list:
|
|
854
|
+
return {}
|
|
855
|
+
|
|
856
|
+
all_keys_sets = [set(data.keys()) for data in data_list]
|
|
857
|
+
if not all(keys_set == all_keys_sets[0] for keys_set in all_keys_sets):
|
|
858
|
+
raise ValueError("All data dictionaries must have the same keys to merge.")
|
|
859
|
+
|
|
860
|
+
for data in data_list:
|
|
861
|
+
lengths = [len(value) for value in data.values()]
|
|
862
|
+
if len(set(lengths)) > 1:
|
|
863
|
+
raise ValueError(
|
|
864
|
+
"All data values within a single object must have equal length."
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
merged_data = {key: [] for key in all_keys_sets[0]}
|
|
868
|
+
for data in data_list:
|
|
869
|
+
for key in data:
|
|
870
|
+
merged_data[key].append(data[key])
|
|
871
|
+
|
|
872
|
+
for key in merged_data:
|
|
873
|
+
if all(isinstance(item, list) for item in merged_data[key]):
|
|
874
|
+
merged_data[key] = list(chain.from_iterable(merged_data[key]))
|
|
875
|
+
elif all(isinstance(item, np.ndarray) for item in merged_data[key]):
|
|
876
|
+
ndim = merged_data[key][0].ndim
|
|
877
|
+
if ndim == 1:
|
|
878
|
+
merged_data[key] = np.hstack(merged_data[key])
|
|
879
|
+
elif ndim > 1:
|
|
880
|
+
merged_data[key] = np.vstack(merged_data[key])
|
|
881
|
+
else:
|
|
882
|
+
raise ValueError(f"Unexpected array dimension for key '{key}'.")
|
|
883
|
+
else:
|
|
884
|
+
raise ValueError(
|
|
885
|
+
f"Inconsistent data types for key '{key}'. Only np.ndarray and list "
|
|
886
|
+
f"types are allowed."
|
|
887
|
+
)
|
|
888
|
+
|
|
889
|
+
return merged_data
|
|
890
|
+
|
|
891
|
+
|
|
892
|
+
def merge_metadata(metadata_list: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
893
|
+
"""
|
|
894
|
+
Merge metadata from a list of metadata dictionaries.
|
|
895
|
+
|
|
896
|
+
This function combines the metadata dictionaries. If a key appears in more than one
|
|
897
|
+
dictionary, the values must be identical for the merge to succeed.
|
|
898
|
+
|
|
899
|
+
Warning: Assumes that empty detections were filtered-out before passing metadata to
|
|
900
|
+
this function.
|
|
901
|
+
|
|
902
|
+
Args:
|
|
903
|
+
metadata_list (List[Dict[str, Any]]): A list of metadata dictionaries to merge.
|
|
904
|
+
|
|
905
|
+
Returns:
|
|
906
|
+
Dict[str, Any]: A single merged metadata dictionary.
|
|
907
|
+
|
|
908
|
+
Raises:
|
|
909
|
+
ValueError: If there are conflicting values for the same key or if
|
|
910
|
+
dictionaries have different keys.
|
|
911
|
+
"""
|
|
912
|
+
if not metadata_list:
|
|
913
|
+
return {}
|
|
914
|
+
|
|
915
|
+
all_keys_sets = [set(metadata.keys()) for metadata in metadata_list]
|
|
916
|
+
if not all(keys_set == all_keys_sets[0] for keys_set in all_keys_sets):
|
|
917
|
+
raise ValueError("All metadata dictionaries must have the same keys to merge.")
|
|
918
|
+
|
|
919
|
+
merged_metadata: Dict[str, Any] = {}
|
|
920
|
+
for metadata in metadata_list:
|
|
921
|
+
for key, value in metadata.items():
|
|
922
|
+
if key not in merged_metadata:
|
|
923
|
+
merged_metadata[key] = value
|
|
924
|
+
continue
|
|
925
|
+
|
|
926
|
+
other_value = merged_metadata[key]
|
|
927
|
+
if isinstance(value, np.ndarray) and isinstance(other_value, np.ndarray):
|
|
928
|
+
if not np.array_equal(merged_metadata[key], value):
|
|
929
|
+
raise ValueError(
|
|
930
|
+
f"Conflicting metadata for key: '{key}': "
|
|
931
|
+
"{type(value)}, {type(other_value)}."
|
|
932
|
+
)
|
|
933
|
+
elif isinstance(value, np.ndarray) or isinstance(other_value, np.ndarray):
|
|
934
|
+
# Since [] == np.array([]).
|
|
935
|
+
raise ValueError(
|
|
936
|
+
f"Conflicting metadata for key: '{key}': "
|
|
937
|
+
"{type(value)}, {type(other_value)}."
|
|
938
|
+
)
|
|
939
|
+
else:
|
|
940
|
+
print("hm")
|
|
941
|
+
if merged_metadata[key] != value:
|
|
942
|
+
raise ValueError(f"Conflicting metadata for key: '{key}'.")
|
|
943
|
+
|
|
944
|
+
return merged_metadata
|
|
945
|
+
|
|
946
|
+
|
|
947
|
+
def get_data_item(
|
|
948
|
+
data: Dict[str, Union[np.ndarray, List]],
|
|
949
|
+
index: Union[int, slice, List[int], np.ndarray],
|
|
950
|
+
) -> Dict[str, Union[np.ndarray, List]]:
|
|
951
|
+
"""
|
|
952
|
+
Retrieve a subset of the data dictionary based on the given index.
|
|
953
|
+
|
|
954
|
+
Args:
|
|
955
|
+
data: The data dictionary of the Detections object.
|
|
956
|
+
index: The index or indices specifying the subset to retrieve.
|
|
957
|
+
|
|
958
|
+
Returns:
|
|
959
|
+
A subset of the data dictionary corresponding to the specified index.
|
|
960
|
+
"""
|
|
961
|
+
subset_data = {}
|
|
962
|
+
for key, value in data.items():
|
|
963
|
+
if isinstance(value, np.ndarray):
|
|
964
|
+
subset_data[key] = value[index]
|
|
965
|
+
elif isinstance(value, list):
|
|
966
|
+
if isinstance(index, slice):
|
|
967
|
+
subset_data[key] = value[index]
|
|
968
|
+
elif isinstance(index, list):
|
|
969
|
+
subset_data[key] = [value[i] for i in index]
|
|
970
|
+
elif isinstance(index, np.ndarray):
|
|
971
|
+
if index.dtype == bool:
|
|
972
|
+
subset_data[key] = [
|
|
973
|
+
value[i] for i, index_value in enumerate(index) if index_value
|
|
974
|
+
]
|
|
975
|
+
else:
|
|
976
|
+
subset_data[key] = [value[i] for i in index]
|
|
977
|
+
elif isinstance(index, int):
|
|
978
|
+
subset_data[key] = [value[index]]
|
|
979
|
+
else:
|
|
980
|
+
raise TypeError(f"Unsupported index type: {type(index)}")
|
|
981
|
+
else:
|
|
982
|
+
raise TypeError(f"Unsupported data type for key '{key}': {type(value)}")
|
|
983
|
+
|
|
984
|
+
return subset_data
|
|
985
|
+
|
|
986
|
+
|
|
987
|
+
def contains_holes(mask: npt.NDArray[np.bool_]) -> bool:
|
|
988
|
+
"""
|
|
989
|
+
Checks if the binary mask contains holes (background pixels fully enclosed by
|
|
990
|
+
foreground pixels).
|
|
991
|
+
|
|
992
|
+
Args:
|
|
993
|
+
mask (npt.NDArray[np.bool_]): 2D binary mask where `True` indicates foreground
|
|
994
|
+
object and `False` indicates background.
|
|
995
|
+
|
|
996
|
+
Returns:
|
|
997
|
+
True if holes are detected, False otherwise.
|
|
998
|
+
|
|
999
|
+
Examples:
|
|
1000
|
+
```python
|
|
1001
|
+
import numpy as np
|
|
1002
|
+
import eye as sv
|
|
1003
|
+
|
|
1004
|
+
mask = np.array([
|
|
1005
|
+
[0, 0, 0, 0, 0],
|
|
1006
|
+
[0, 1, 1, 1, 0],
|
|
1007
|
+
[0, 1, 0, 1, 0],
|
|
1008
|
+
[0, 1, 1, 1, 0],
|
|
1009
|
+
[0, 0, 0, 0, 0]
|
|
1010
|
+
]).astype(bool)
|
|
1011
|
+
|
|
1012
|
+
sv.contains_holes(mask=mask)
|
|
1013
|
+
# True
|
|
1014
|
+
|
|
1015
|
+
mask = np.array([
|
|
1016
|
+
[0, 0, 0, 0, 0],
|
|
1017
|
+
[0, 1, 1, 1, 0],
|
|
1018
|
+
[0, 1, 1, 1, 0],
|
|
1019
|
+
[0, 1, 1, 1, 0],
|
|
1020
|
+
[0, 0, 0, 0, 0]
|
|
1021
|
+
]).astype(bool)
|
|
1022
|
+
|
|
1023
|
+
sv.contains_holes(mask=mask)
|
|
1024
|
+
# False
|
|
1025
|
+
```
|
|
1026
|
+
|
|
1027
|
+
{ align=center width="800" }
|
|
1028
|
+
""" # noqa E501 // docs
|
|
1029
|
+
mask_uint8 = mask.astype(np.uint8)
|
|
1030
|
+
_, hierarchy = cv2.findContours(mask_uint8, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
|
|
1031
|
+
|
|
1032
|
+
if hierarchy is not None:
|
|
1033
|
+
parent_contour_index = 3
|
|
1034
|
+
for h in hierarchy[0]:
|
|
1035
|
+
if h[parent_contour_index] != -1:
|
|
1036
|
+
return True
|
|
1037
|
+
return False
|
|
1038
|
+
|
|
1039
|
+
|
|
1040
|
+
def contains_multiple_segments(
|
|
1041
|
+
mask: npt.NDArray[np.bool_], connectivity: int = 4
|
|
1042
|
+
) -> bool:
|
|
1043
|
+
"""
|
|
1044
|
+
Checks if the binary mask contains multiple unconnected foreground segments.
|
|
1045
|
+
|
|
1046
|
+
Args:
|
|
1047
|
+
mask (npt.NDArray[np.bool_]): 2D binary mask where `True` indicates foreground
|
|
1048
|
+
object and `False` indicates background.
|
|
1049
|
+
connectivity (int) : Default: 4 is 4-way connectivity, which means that
|
|
1050
|
+
foreground pixels are the part of the same segment/component
|
|
1051
|
+
if their edges touch.
|
|
1052
|
+
Alternatively: 8 for 8-way connectivity, when foreground pixels are
|
|
1053
|
+
connected by their edges or corners touch.
|
|
1054
|
+
|
|
1055
|
+
Returns:
|
|
1056
|
+
True when the mask contains multiple not connected components, False otherwise.
|
|
1057
|
+
|
|
1058
|
+
Raises:
|
|
1059
|
+
ValueError: If connectivity(int) parameter value is not 4 or 8.
|
|
1060
|
+
|
|
1061
|
+
Examples:
|
|
1062
|
+
```python
|
|
1063
|
+
import numpy as np
|
|
1064
|
+
import eye as sv
|
|
1065
|
+
|
|
1066
|
+
mask = np.array([
|
|
1067
|
+
[0, 0, 0, 0, 0, 0],
|
|
1068
|
+
[0, 1, 1, 0, 1, 1],
|
|
1069
|
+
[0, 1, 1, 0, 1, 1],
|
|
1070
|
+
[0, 0, 0, 0, 0, 0],
|
|
1071
|
+
[0, 1, 1, 1, 0, 0],
|
|
1072
|
+
[0, 1, 1, 1, 0, 0]
|
|
1073
|
+
]).astype(bool)
|
|
1074
|
+
|
|
1075
|
+
sv.contains_multiple_segments(mask=mask, connectivity=4)
|
|
1076
|
+
# True
|
|
1077
|
+
|
|
1078
|
+
mask = np.array([
|
|
1079
|
+
[0, 0, 0, 0, 0, 0],
|
|
1080
|
+
[0, 1, 1, 1, 1, 1],
|
|
1081
|
+
[0, 1, 1, 1, 1, 1],
|
|
1082
|
+
[0, 1, 1, 1, 1, 1],
|
|
1083
|
+
[0, 1, 1, 1, 1, 1],
|
|
1084
|
+
[0, 0, 0, 0, 0, 0]
|
|
1085
|
+
]).astype(bool)
|
|
1086
|
+
|
|
1087
|
+
sv.contains_multiple_segments(mask=mask, connectivity=4)
|
|
1088
|
+
# False
|
|
1089
|
+
```
|
|
1090
|
+
|
|
1091
|
+
{ align=center width="800" }
|
|
1092
|
+
""" # noqa E501 // docs
|
|
1093
|
+
if connectivity != 4 and connectivity != 8:
|
|
1094
|
+
raise ValueError(
|
|
1095
|
+
"Incorrect connectivity value. Possible connectivity values: 4 or 8."
|
|
1096
|
+
)
|
|
1097
|
+
mask_uint8 = mask.astype(np.uint8)
|
|
1098
|
+
labels = np.zeros_like(mask_uint8, dtype=np.int32)
|
|
1099
|
+
number_of_labels, _ = cv2.connectedComponents(
|
|
1100
|
+
mask_uint8, labels, connectivity=connectivity
|
|
1101
|
+
)
|
|
1102
|
+
return number_of_labels > 2
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
def cross_product(anchors: np.ndarray, vector: Vector) -> np.ndarray:
|
|
1106
|
+
"""
|
|
1107
|
+
Get array of cross products of each anchor with a vector.
|
|
1108
|
+
Args:
|
|
1109
|
+
anchors: Array of anchors of shape (number of anchors, detections, 2)
|
|
1110
|
+
vector: Vector to calculate cross product with
|
|
1111
|
+
|
|
1112
|
+
Returns:
|
|
1113
|
+
Array of cross products of shape (number of anchors, detections)
|
|
1114
|
+
"""
|
|
1115
|
+
vector_at_zero = np.array(
|
|
1116
|
+
[vector.end.x - vector.start.x, vector.end.y - vector.start.y]
|
|
1117
|
+
)
|
|
1118
|
+
vector_start = np.array([vector.start.x, vector.start.y])
|
|
1119
|
+
return np.cross(vector_at_zero, anchors - vector_start)
|
|
1120
|
+
|
|
1121
|
+
|
|
1122
|
+
def spread_out_boxes(
|
|
1123
|
+
xyxy: np.ndarray,
|
|
1124
|
+
max_iterations: int = 100,
|
|
1125
|
+
) -> np.ndarray:
|
|
1126
|
+
"""
|
|
1127
|
+
Spread out boxes that overlap with each other.
|
|
1128
|
+
|
|
1129
|
+
Args:
|
|
1130
|
+
xyxy: Numpy array of shape (N, 4) where N is the number of boxes.
|
|
1131
|
+
max_iterations: Maximum number of iterations to run the algorithm for.
|
|
1132
|
+
"""
|
|
1133
|
+
if len(xyxy) == 0:
|
|
1134
|
+
return xyxy
|
|
1135
|
+
|
|
1136
|
+
xyxy_padded = pad_boxes(xyxy, px=1)
|
|
1137
|
+
for _ in range(max_iterations):
|
|
1138
|
+
# NxN
|
|
1139
|
+
iou = box_iou_batch(xyxy_padded, xyxy_padded)
|
|
1140
|
+
np.fill_diagonal(iou, 0)
|
|
1141
|
+
if np.all(iou == 0):
|
|
1142
|
+
break
|
|
1143
|
+
|
|
1144
|
+
overlap_mask = iou > 0
|
|
1145
|
+
|
|
1146
|
+
# Nx2
|
|
1147
|
+
centers = (xyxy_padded[:, :2] + xyxy_padded[:, 2:]) / 2
|
|
1148
|
+
|
|
1149
|
+
# NxNx2
|
|
1150
|
+
delta_centers = centers[:, np.newaxis, :] - centers[np.newaxis, :, :]
|
|
1151
|
+
delta_centers *= overlap_mask[:, :, np.newaxis]
|
|
1152
|
+
|
|
1153
|
+
# Nx2
|
|
1154
|
+
delta_sum = np.sum(delta_centers, axis=1)
|
|
1155
|
+
delta_magnitude = np.linalg.norm(delta_sum, axis=1, keepdims=True)
|
|
1156
|
+
direction_vectors = np.divide(
|
|
1157
|
+
delta_sum,
|
|
1158
|
+
delta_magnitude,
|
|
1159
|
+
out=np.zeros_like(delta_sum),
|
|
1160
|
+
where=delta_magnitude != 0,
|
|
1161
|
+
)
|
|
1162
|
+
|
|
1163
|
+
force_vectors = np.sum(iou, axis=1)
|
|
1164
|
+
force_vectors = force_vectors[:, np.newaxis] * direction_vectors
|
|
1165
|
+
|
|
1166
|
+
force_vectors *= 10
|
|
1167
|
+
force_vectors[(force_vectors > 0) & (force_vectors < 2)] = 2
|
|
1168
|
+
force_vectors[(force_vectors < 0) & (force_vectors > -2)] = -2
|
|
1169
|
+
|
|
1170
|
+
force_vectors = force_vectors.astype(int)
|
|
1171
|
+
|
|
1172
|
+
xyxy_padded[:, [0, 1]] += force_vectors
|
|
1173
|
+
xyxy_padded[:, [2, 3]] += force_vectors
|
|
1174
|
+
|
|
1175
|
+
return pad_boxes(xyxy_padded, px=-1)
|