eye-cv 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eye/__init__.py +115 -0
- eye/__init___supervision_original.py +120 -0
- eye/annotators/__init__.py +0 -0
- eye/annotators/base.py +22 -0
- eye/annotators/core.py +2699 -0
- eye/annotators/line.py +107 -0
- eye/annotators/modern.py +529 -0
- eye/annotators/trace.py +142 -0
- eye/annotators/utils.py +177 -0
- eye/assets/__init__.py +2 -0
- eye/assets/downloader.py +95 -0
- eye/assets/list.py +83 -0
- eye/classification/__init__.py +0 -0
- eye/classification/core.py +188 -0
- eye/config.py +2 -0
- eye/core/__init__.py +0 -0
- eye/core/trackers/__init__.py +1 -0
- eye/core/trackers/botsort_tracker.py +336 -0
- eye/core/trackers/bytetrack_tracker.py +284 -0
- eye/core/trackers/sort_tracker.py +200 -0
- eye/core/tracking.py +146 -0
- eye/dataset/__init__.py +0 -0
- eye/dataset/core.py +919 -0
- eye/dataset/formats/__init__.py +0 -0
- eye/dataset/formats/coco.py +258 -0
- eye/dataset/formats/pascal_voc.py +279 -0
- eye/dataset/formats/yolo.py +272 -0
- eye/dataset/utils.py +259 -0
- eye/detection/__init__.py +0 -0
- eye/detection/auto_convert.py +155 -0
- eye/detection/core.py +1529 -0
- eye/detection/detections_enhanced.py +392 -0
- eye/detection/line_zone.py +859 -0
- eye/detection/lmm.py +184 -0
- eye/detection/overlap_filter.py +270 -0
- eye/detection/tools/__init__.py +0 -0
- eye/detection/tools/csv_sink.py +181 -0
- eye/detection/tools/inference_slicer.py +288 -0
- eye/detection/tools/json_sink.py +142 -0
- eye/detection/tools/polygon_zone.py +202 -0
- eye/detection/tools/smoother.py +123 -0
- eye/detection/tools/smoothing.py +179 -0
- eye/detection/tools/smoothing_config.py +202 -0
- eye/detection/tools/transformers.py +247 -0
- eye/detection/utils.py +1175 -0
- eye/draw/__init__.py +0 -0
- eye/draw/color.py +154 -0
- eye/draw/utils.py +374 -0
- eye/filters.py +112 -0
- eye/geometry/__init__.py +0 -0
- eye/geometry/core.py +128 -0
- eye/geometry/utils.py +47 -0
- eye/keypoint/__init__.py +0 -0
- eye/keypoint/annotators.py +442 -0
- eye/keypoint/core.py +687 -0
- eye/keypoint/skeletons.py +2647 -0
- eye/metrics/__init__.py +21 -0
- eye/metrics/core.py +72 -0
- eye/metrics/detection.py +843 -0
- eye/metrics/f1_score.py +648 -0
- eye/metrics/mean_average_precision.py +628 -0
- eye/metrics/mean_average_recall.py +697 -0
- eye/metrics/precision.py +653 -0
- eye/metrics/recall.py +652 -0
- eye/metrics/utils/__init__.py +0 -0
- eye/metrics/utils/object_size.py +158 -0
- eye/metrics/utils/utils.py +9 -0
- eye/py.typed +0 -0
- eye/quick.py +104 -0
- eye/tracker/__init__.py +0 -0
- eye/tracker/byte_tracker/__init__.py +0 -0
- eye/tracker/byte_tracker/core.py +386 -0
- eye/tracker/byte_tracker/kalman_filter.py +205 -0
- eye/tracker/byte_tracker/matching.py +69 -0
- eye/tracker/byte_tracker/single_object_track.py +178 -0
- eye/tracker/byte_tracker/utils.py +18 -0
- eye/utils/__init__.py +0 -0
- eye/utils/conversion.py +132 -0
- eye/utils/file.py +159 -0
- eye/utils/image.py +794 -0
- eye/utils/internal.py +200 -0
- eye/utils/iterables.py +84 -0
- eye/utils/notebook.py +114 -0
- eye/utils/video.py +307 -0
- eye/utils_eye/__init__.py +1 -0
- eye/utils_eye/geometry.py +71 -0
- eye/utils_eye/nms.py +55 -0
- eye/validators/__init__.py +140 -0
- eye/web.py +271 -0
- eye_cv-1.0.0.dist-info/METADATA +319 -0
- eye_cv-1.0.0.dist-info/RECORD +94 -0
- eye_cv-1.0.0.dist-info/WHEEL +5 -0
- eye_cv-1.0.0.dist-info/licenses/LICENSE +21 -0
- eye_cv-1.0.0.dist-info/top_level.txt +1 -0
eye/detection/core.py
ADDED
|
@@ -0,0 +1,1529 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from contextlib import suppress
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from eye.config import (
|
|
10
|
+
CLASS_NAME_DATA_FIELD,
|
|
11
|
+
ORIENTED_BOX_COORDINATES,
|
|
12
|
+
)
|
|
13
|
+
from eye.detection.lmm import (
|
|
14
|
+
LMM,
|
|
15
|
+
from_florence_2,
|
|
16
|
+
from_paligemma,
|
|
17
|
+
validate_lmm_parameters,
|
|
18
|
+
)
|
|
19
|
+
from eye.detection.overlap_filter import (
|
|
20
|
+
box_non_max_merge,
|
|
21
|
+
box_non_max_suppression,
|
|
22
|
+
mask_non_max_suppression,
|
|
23
|
+
)
|
|
24
|
+
from eye.detection.tools.transformers import (
|
|
25
|
+
process_transformers_detection_result,
|
|
26
|
+
process_transformers_v4_segmentation_result,
|
|
27
|
+
process_transformers_v5_segmentation_result,
|
|
28
|
+
)
|
|
29
|
+
from eye.detection.utils import (
|
|
30
|
+
box_iou_batch,
|
|
31
|
+
calculate_masks_centroids,
|
|
32
|
+
extract_ultralytics_masks,
|
|
33
|
+
get_data_item,
|
|
34
|
+
is_data_equal,
|
|
35
|
+
is_metadata_equal,
|
|
36
|
+
mask_to_xyxy,
|
|
37
|
+
merge_data,
|
|
38
|
+
merge_metadata,
|
|
39
|
+
process_roboflow_result,
|
|
40
|
+
xywh_to_xyxy,
|
|
41
|
+
)
|
|
42
|
+
from eye.geometry.core import Position
|
|
43
|
+
from eye.utils.internal import get_instance_variables
|
|
44
|
+
from eye.validators import validate_detections_fields
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class Detections:
|
|
49
|
+
"""
|
|
50
|
+
The `sv.Detections` class in the eye library standardizes results from
|
|
51
|
+
various object detection and segmentation models into a consistent format. This
|
|
52
|
+
class simplifies data manipulation and filtering, providing a uniform API for
|
|
53
|
+
integration with eye [trackers](/trackers/), [annotators](/latest/detection/annotators/), and [tools](/detection/tools/line_zone/).
|
|
54
|
+
|
|
55
|
+
=== "Inference"
|
|
56
|
+
|
|
57
|
+
Use [`sv.Detections.from_inference`](/detection/core/#eye.detection.core.Detections.from_inference)
|
|
58
|
+
method, which accepts model results from both detection and segmentation models.
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
import cv2
|
|
62
|
+
import eye as sv
|
|
63
|
+
from inference import get_model
|
|
64
|
+
|
|
65
|
+
model = get_model(model_id="yolov8n-640")
|
|
66
|
+
image = cv2.imread(<SOURCE_IMAGE_PATH>)
|
|
67
|
+
results = model.infer(image)[0]
|
|
68
|
+
detections = sv.Detections.from_inference(results)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
=== "Ultralytics"
|
|
72
|
+
|
|
73
|
+
Use [`sv.Detections.from_ultralytics`](/detection/core/#eye.detection.core.Detections.from_ultralytics)
|
|
74
|
+
method, which accepts model results from both detection and segmentation models.
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import cv2
|
|
78
|
+
import eye as sv
|
|
79
|
+
from ultralytics import YOLO
|
|
80
|
+
|
|
81
|
+
model = YOLO("yolov8n.pt")
|
|
82
|
+
image = cv2.imread(<SOURCE_IMAGE_PATH>)
|
|
83
|
+
results = model(image)[0]
|
|
84
|
+
detections = sv.Detections.from_ultralytics(results)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
=== "Transformers"
|
|
88
|
+
|
|
89
|
+
Use [`sv.Detections.from_transformers`](/detection/core/#eye.detection.core.Detections.from_transformers)
|
|
90
|
+
method, which accepts model results from both detection and segmentation models.
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
import torch
|
|
94
|
+
import eye as sv
|
|
95
|
+
from PIL import Image
|
|
96
|
+
from transformers import DetrImageProcessor, DetrForObjectDetection
|
|
97
|
+
|
|
98
|
+
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
|
|
99
|
+
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
|
|
100
|
+
|
|
101
|
+
image = Image.open(<SOURCE_IMAGE_PATH>)
|
|
102
|
+
inputs = processor(images=image, return_tensors="pt")
|
|
103
|
+
|
|
104
|
+
with torch.no_grad():
|
|
105
|
+
outputs = model(**inputs)
|
|
106
|
+
|
|
107
|
+
width, height = image.size
|
|
108
|
+
target_size = torch.tensor([[height, width]])
|
|
109
|
+
results = processor.post_process_object_detection(
|
|
110
|
+
outputs=outputs, target_sizes=target_size)[0]
|
|
111
|
+
detections = sv.Detections.from_transformers(
|
|
112
|
+
transformers_results=results,
|
|
113
|
+
id2label=model.config.id2label)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Attributes:
|
|
117
|
+
xyxy (np.ndarray): An array of shape `(n, 4)` containing
|
|
118
|
+
the bounding boxes coordinates in format `[x1, y1, x2, y2]`
|
|
119
|
+
mask: (Optional[np.ndarray]): An array of shape
|
|
120
|
+
`(n, H, W)` containing the segmentation masks.
|
|
121
|
+
confidence (Optional[np.ndarray]): An array of shape
|
|
122
|
+
`(n,)` containing the confidence scores of the detections.
|
|
123
|
+
class_id (Optional[np.ndarray]): An array of shape
|
|
124
|
+
`(n,)` containing the class ids of the detections.
|
|
125
|
+
tracker_id (Optional[np.ndarray]): An array of shape
|
|
126
|
+
`(n,)` containing the tracker ids of the detections.
|
|
127
|
+
data (Dict[str, Union[np.ndarray, List]]): A dictionary containing additional
|
|
128
|
+
data where each key is a string representing the data type, and the value
|
|
129
|
+
is either a NumPy array or a list of corresponding data.
|
|
130
|
+
metadata (Dict[str, Any]): A dictionary containing collection-level metadata
|
|
131
|
+
that applies to the entire set of detections. This may include information such
|
|
132
|
+
as the video name, camera parameters, timestamp, or other global metadata.
|
|
133
|
+
""" # noqa: E501 // docs
|
|
134
|
+
|
|
135
|
+
xyxy: np.ndarray
|
|
136
|
+
mask: Optional[np.ndarray] = None
|
|
137
|
+
confidence: Optional[np.ndarray] = None
|
|
138
|
+
class_id: Optional[np.ndarray] = None
|
|
139
|
+
tracker_id: Optional[np.ndarray] = None
|
|
140
|
+
data: Dict[str, Union[np.ndarray, List]] = field(default_factory=dict)
|
|
141
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
142
|
+
|
|
143
|
+
def __post_init__(self):
|
|
144
|
+
validate_detections_fields(
|
|
145
|
+
xyxy=self.xyxy,
|
|
146
|
+
mask=self.mask,
|
|
147
|
+
confidence=self.confidence,
|
|
148
|
+
class_id=self.class_id,
|
|
149
|
+
tracker_id=self.tracker_id,
|
|
150
|
+
data=self.data,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
def __len__(self):
|
|
154
|
+
"""
|
|
155
|
+
Returns the number of detections in the Detections object.
|
|
156
|
+
"""
|
|
157
|
+
return len(self.xyxy)
|
|
158
|
+
|
|
159
|
+
def __iter__(
|
|
160
|
+
self,
|
|
161
|
+
) -> Iterator[
|
|
162
|
+
Tuple[
|
|
163
|
+
np.ndarray,
|
|
164
|
+
Optional[np.ndarray],
|
|
165
|
+
Optional[float],
|
|
166
|
+
Optional[int],
|
|
167
|
+
Optional[int],
|
|
168
|
+
Dict[str, Union[np.ndarray, List]],
|
|
169
|
+
]
|
|
170
|
+
]:
|
|
171
|
+
"""
|
|
172
|
+
Iterates over the Detections object and yield a tuple of
|
|
173
|
+
`(xyxy, mask, confidence, class_id, tracker_id, data)` for each detection.
|
|
174
|
+
"""
|
|
175
|
+
for i in range(len(self.xyxy)):
|
|
176
|
+
yield (
|
|
177
|
+
self.xyxy[i],
|
|
178
|
+
self.mask[i] if self.mask is not None else None,
|
|
179
|
+
self.confidence[i] if self.confidence is not None else None,
|
|
180
|
+
self.class_id[i] if self.class_id is not None else None,
|
|
181
|
+
self.tracker_id[i] if self.tracker_id is not None else None,
|
|
182
|
+
get_data_item(self.data, i),
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def __eq__(self, other: Detections):
|
|
186
|
+
return all(
|
|
187
|
+
[
|
|
188
|
+
np.array_equal(self.xyxy, other.xyxy),
|
|
189
|
+
np.array_equal(self.mask, other.mask),
|
|
190
|
+
np.array_equal(self.class_id, other.class_id),
|
|
191
|
+
np.array_equal(self.confidence, other.confidence),
|
|
192
|
+
np.array_equal(self.tracker_id, other.tracker_id),
|
|
193
|
+
is_data_equal(self.data, other.data),
|
|
194
|
+
is_metadata_equal(self.metadata, other.metadata),
|
|
195
|
+
]
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
@classmethod
|
|
199
|
+
def from_yolov5(cls, yolov5_results) -> Detections:
|
|
200
|
+
"""
|
|
201
|
+
Creates a Detections instance from a
|
|
202
|
+
[YOLOv5](https://github.com/ultralytics/yolov5) inference result.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
yolov5_results (yolov5.models.common.Detections):
|
|
206
|
+
The output Detections instance from YOLOv5
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
Detections: A new Detections object.
|
|
210
|
+
|
|
211
|
+
Example:
|
|
212
|
+
```python
|
|
213
|
+
import cv2
|
|
214
|
+
import torch
|
|
215
|
+
import eye as sv
|
|
216
|
+
|
|
217
|
+
image = cv2.imread(<SOURCE_IMAGE_PATH>)
|
|
218
|
+
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
|
|
219
|
+
result = model(image)
|
|
220
|
+
detections = sv.Detections.from_yolov5(result)
|
|
221
|
+
```
|
|
222
|
+
"""
|
|
223
|
+
yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy()
|
|
224
|
+
|
|
225
|
+
return cls(
|
|
226
|
+
xyxy=yolov5_detections_predictions[:, :4],
|
|
227
|
+
confidence=yolov5_detections_predictions[:, 4],
|
|
228
|
+
class_id=yolov5_detections_predictions[:, 5].astype(int),
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
@classmethod
|
|
232
|
+
def from_ultralytics(cls, ultralytics_results) -> Detections:
|
|
233
|
+
"""
|
|
234
|
+
Creates a `sv.Detections` instance from a
|
|
235
|
+
[YOLOv8](https://github.com/ultralytics/ultralytics) inference result.
|
|
236
|
+
|
|
237
|
+
!!! Note
|
|
238
|
+
|
|
239
|
+
`from_ultralytics` is compatible with
|
|
240
|
+
[detection](https://docs.ultralytics.com/tasks/detect/),
|
|
241
|
+
[segmentation](https://docs.ultralytics.com/tasks/segment/), and
|
|
242
|
+
[OBB](https://docs.ultralytics.com/tasks/obb/) models.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
ultralytics_results (ultralytics.yolo.engine.results.Results):
|
|
246
|
+
The output Results instance from Ultralytics
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Detections: A new Detections object.
|
|
250
|
+
|
|
251
|
+
Example:
|
|
252
|
+
```python
|
|
253
|
+
import cv2
|
|
254
|
+
import eye as sv
|
|
255
|
+
from ultralytics import YOLO
|
|
256
|
+
|
|
257
|
+
image = cv2.imread(<SOURCE_IMAGE_PATH>)
|
|
258
|
+
model = YOLO('yolov8s.pt')
|
|
259
|
+
results = model(image)[0]
|
|
260
|
+
detections = sv.Detections.from_ultralytics(results)
|
|
261
|
+
```
|
|
262
|
+
"""
|
|
263
|
+
|
|
264
|
+
if hasattr(ultralytics_results, "obb") and ultralytics_results.obb is not None:
|
|
265
|
+
class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int)
|
|
266
|
+
class_names = np.array([ultralytics_results.names[i] for i in class_id])
|
|
267
|
+
oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy()
|
|
268
|
+
return cls(
|
|
269
|
+
xyxy=ultralytics_results.obb.xyxy.cpu().numpy(),
|
|
270
|
+
confidence=ultralytics_results.obb.conf.cpu().numpy(),
|
|
271
|
+
class_id=class_id,
|
|
272
|
+
tracker_id=ultralytics_results.obb.id.int().cpu().numpy()
|
|
273
|
+
if ultralytics_results.obb.id is not None
|
|
274
|
+
else None,
|
|
275
|
+
data={
|
|
276
|
+
ORIENTED_BOX_COORDINATES: oriented_box_coordinates,
|
|
277
|
+
CLASS_NAME_DATA_FIELD: class_names,
|
|
278
|
+
},
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
if hasattr(ultralytics_results, "boxes") and ultralytics_results.boxes is None:
|
|
282
|
+
masks = extract_ultralytics_masks(ultralytics_results)
|
|
283
|
+
return cls(
|
|
284
|
+
xyxy=mask_to_xyxy(masks),
|
|
285
|
+
mask=masks,
|
|
286
|
+
class_id=np.arange(len(ultralytics_results)),
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
|
|
290
|
+
class_names = np.array([ultralytics_results.names[i] for i in class_id])
|
|
291
|
+
return cls(
|
|
292
|
+
xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),
|
|
293
|
+
confidence=ultralytics_results.boxes.conf.cpu().numpy(),
|
|
294
|
+
class_id=class_id,
|
|
295
|
+
mask=extract_ultralytics_masks(ultralytics_results),
|
|
296
|
+
tracker_id=ultralytics_results.boxes.id.int().cpu().numpy()
|
|
297
|
+
if ultralytics_results.boxes.id is not None
|
|
298
|
+
else None,
|
|
299
|
+
data={CLASS_NAME_DATA_FIELD: class_names},
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
@classmethod
|
|
303
|
+
def from_yolo_nas(cls, yolo_nas_results) -> Detections:
|
|
304
|
+
"""
|
|
305
|
+
Creates a Detections instance from a
|
|
306
|
+
[YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS.md)
|
|
307
|
+
inference result.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
yolo_nas_results (ImageDetectionPrediction):
|
|
311
|
+
The output Results instance from YOLO-NAS
|
|
312
|
+
ImageDetectionPrediction is coming from
|
|
313
|
+
'super_gradients.training.models.prediction_results'
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
Detections: A new Detections object.
|
|
317
|
+
|
|
318
|
+
Example:
|
|
319
|
+
```python
|
|
320
|
+
import cv2
|
|
321
|
+
from super_gradients.training import models
|
|
322
|
+
import eye as sv
|
|
323
|
+
|
|
324
|
+
image = cv2.imread(<SOURCE_IMAGE_PATH>)
|
|
325
|
+
model = models.get('yolo_nas_l', pretrained_weights="coco")
|
|
326
|
+
|
|
327
|
+
result = list(model.predict(image, conf=0.35))[0]
|
|
328
|
+
detections = sv.Detections.from_yolo_nas(result)
|
|
329
|
+
```
|
|
330
|
+
"""
|
|
331
|
+
if np.asarray(yolo_nas_results.prediction.bboxes_xyxy).shape[0] == 0:
|
|
332
|
+
return cls.empty()
|
|
333
|
+
|
|
334
|
+
return cls(
|
|
335
|
+
xyxy=yolo_nas_results.prediction.bboxes_xyxy,
|
|
336
|
+
confidence=yolo_nas_results.prediction.confidence,
|
|
337
|
+
class_id=yolo_nas_results.prediction.labels.astype(int),
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
@classmethod
|
|
341
|
+
def from_tensorflow(
|
|
342
|
+
cls, tensorflow_results: dict, resolution_wh: tuple
|
|
343
|
+
) -> Detections:
|
|
344
|
+
"""
|
|
345
|
+
Creates a Detections instance from a
|
|
346
|
+
[Tensorflow Hub](https://www.tensorflow.org/hub/tutorials/tf2_object_detection)
|
|
347
|
+
inference result.
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
tensorflow_results (dict):
|
|
351
|
+
The output results from Tensorflow Hub.
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
Detections: A new Detections object.
|
|
355
|
+
|
|
356
|
+
Example:
|
|
357
|
+
```python
|
|
358
|
+
import tensorflow as tf
|
|
359
|
+
import tensorflow_hub as hub
|
|
360
|
+
import numpy as np
|
|
361
|
+
import cv2
|
|
362
|
+
|
|
363
|
+
module_handle = "https://tfhub.dev/tensorflow/centernet/hourglass_512x512_kpts/1"
|
|
364
|
+
model = hub.load(module_handle)
|
|
365
|
+
img = np.array(cv2.imread(SOURCE_IMAGE_PATH))
|
|
366
|
+
result = model(img)
|
|
367
|
+
detections = sv.Detections.from_tensorflow(result)
|
|
368
|
+
```
|
|
369
|
+
"""
|
|
370
|
+
|
|
371
|
+
boxes = tensorflow_results["detection_boxes"][0].numpy()
|
|
372
|
+
boxes[:, [0, 2]] *= resolution_wh[0]
|
|
373
|
+
boxes[:, [1, 3]] *= resolution_wh[1]
|
|
374
|
+
boxes = boxes[:, [1, 0, 3, 2]]
|
|
375
|
+
return cls(
|
|
376
|
+
xyxy=boxes,
|
|
377
|
+
confidence=tensorflow_results["detection_scores"][0].numpy(),
|
|
378
|
+
class_id=tensorflow_results["detection_classes"][0].numpy().astype(int),
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
@classmethod
|
|
382
|
+
def from_deepsparse(cls, deepsparse_results) -> Detections:
|
|
383
|
+
"""
|
|
384
|
+
Creates a Detections instance from a
|
|
385
|
+
[DeepSparse](https://github.com/neuralmagic/deepsparse)
|
|
386
|
+
inference result.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
deepsparse_results (deepsparse.yolo.schemas.YOLOOutput):
|
|
390
|
+
The output Results instance from DeepSparse.
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
Detections: A new Detections object.
|
|
394
|
+
|
|
395
|
+
Example:
|
|
396
|
+
```python
|
|
397
|
+
import eye as sv
|
|
398
|
+
from deepsparse import Pipeline
|
|
399
|
+
|
|
400
|
+
yolo_pipeline = Pipeline.create(
|
|
401
|
+
task="yolo",
|
|
402
|
+
model_path = "zoo:cv/detection/yolov5-l/pytorch/ultralytics/coco/pruned80_quant-none"
|
|
403
|
+
)
|
|
404
|
+
result = yolo_pipeline(<SOURCE IMAGE PATH>)
|
|
405
|
+
detections = sv.Detections.from_deepsparse(result)
|
|
406
|
+
```
|
|
407
|
+
""" # noqa: E501 // docs
|
|
408
|
+
|
|
409
|
+
if np.asarray(deepsparse_results.boxes[0]).shape[0] == 0:
|
|
410
|
+
return cls.empty()
|
|
411
|
+
|
|
412
|
+
return cls(
|
|
413
|
+
xyxy=np.array(deepsparse_results.boxes[0]),
|
|
414
|
+
confidence=np.array(deepsparse_results.scores[0]),
|
|
415
|
+
class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int),
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
@classmethod
|
|
419
|
+
def from_mmdetection(cls, mmdet_results) -> Detections:
|
|
420
|
+
"""
|
|
421
|
+
Creates a Detections instance from a
|
|
422
|
+
[mmdetection](https://github.com/open-mmlab/mmdetection) and
|
|
423
|
+
[mmyolo](https://github.com/open-mmlab/mmyolo) inference result.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
mmdet_results (mmdet.structures.DetDataSample):
|
|
427
|
+
The output Results instance from MMDetection.
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
Detections: A new Detections object.
|
|
431
|
+
|
|
432
|
+
Example:
|
|
433
|
+
```python
|
|
434
|
+
import cv2
|
|
435
|
+
import eye as sv
|
|
436
|
+
from mmdet.apis import init_detector, inference_detector
|
|
437
|
+
|
|
438
|
+
image = cv2.imread(<SOURCE_IMAGE_PATH>)
|
|
439
|
+
model = init_detector(<CONFIG_PATH>, <WEIGHTS_PATH>, device=<DEVICE>)
|
|
440
|
+
|
|
441
|
+
result = inference_detector(model, image)
|
|
442
|
+
detections = sv.Detections.from_mmdetection(result)
|
|
443
|
+
```
|
|
444
|
+
"""
|
|
445
|
+
|
|
446
|
+
return cls(
|
|
447
|
+
xyxy=mmdet_results.pred_instances.bboxes.cpu().numpy(),
|
|
448
|
+
confidence=mmdet_results.pred_instances.scores.cpu().numpy(),
|
|
449
|
+
class_id=mmdet_results.pred_instances.labels.cpu().numpy().astype(int),
|
|
450
|
+
mask=mmdet_results.pred_instances.masks.cpu().numpy()
|
|
451
|
+
if "masks" in mmdet_results.pred_instances
|
|
452
|
+
else None,
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
@classmethod
|
|
456
|
+
def from_transformers(
|
|
457
|
+
cls, transformers_results: dict, id2label: Optional[Dict[int, str]] = None
|
|
458
|
+
) -> Detections:
|
|
459
|
+
"""
|
|
460
|
+
Creates a Detections instance from object detection or panoptic, semantic
|
|
461
|
+
and instance segmentation
|
|
462
|
+
[Transformer](https://github.com/huggingface/transformers) inference result.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
transformers_results (Union[dict, torch.Tensor]): Inference results from
|
|
466
|
+
your Transformers model. This can be either a dictionary containing
|
|
467
|
+
valuable outputs like `scores`, `labels`, `boxes`, `masks`,
|
|
468
|
+
`segments_info`, and `segmentation`, or a `torch.Tensor` holding a
|
|
469
|
+
segmentation map where values represent class IDs.
|
|
470
|
+
id2label (Optional[Dict[int, str]]): A dictionary mapping class IDs to
|
|
471
|
+
labels, typically part of the `transformers` model configuration. If
|
|
472
|
+
provided, the resulting dictionary will include class names.
|
|
473
|
+
|
|
474
|
+
Returns:
|
|
475
|
+
Detections: A new Detections object.
|
|
476
|
+
|
|
477
|
+
Example:
|
|
478
|
+
```python
|
|
479
|
+
import torch
|
|
480
|
+
import eye as sv
|
|
481
|
+
from PIL import Image
|
|
482
|
+
from transformers import DetrImageProcessor, DetrForObjectDetection
|
|
483
|
+
|
|
484
|
+
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
|
|
485
|
+
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
|
|
486
|
+
|
|
487
|
+
image = Image.open(<SOURCE_IMAGE_PATH>)
|
|
488
|
+
inputs = processor(images=image, return_tensors="pt")
|
|
489
|
+
|
|
490
|
+
with torch.no_grad():
|
|
491
|
+
outputs = model(**inputs)
|
|
492
|
+
|
|
493
|
+
width, height = image.size
|
|
494
|
+
target_size = torch.tensor([[height, width]])
|
|
495
|
+
results = processor.post_process_object_detection(
|
|
496
|
+
outputs=outputs, target_sizes=target_size)[0]
|
|
497
|
+
|
|
498
|
+
detections = sv.Detections.from_transformers(
|
|
499
|
+
transformers_results=results,
|
|
500
|
+
id2label=model.config.id2label
|
|
501
|
+
)
|
|
502
|
+
```
|
|
503
|
+
"""
|
|
504
|
+
|
|
505
|
+
if (
|
|
506
|
+
transformers_results.__class__.__name__ == "Tensor"
|
|
507
|
+
or "segmentation" in transformers_results
|
|
508
|
+
):
|
|
509
|
+
return cls(
|
|
510
|
+
**process_transformers_v5_segmentation_result(
|
|
511
|
+
transformers_results, id2label
|
|
512
|
+
)
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
if "masks" in transformers_results or "png_string" in transformers_results:
|
|
516
|
+
return cls(
|
|
517
|
+
**process_transformers_v4_segmentation_result(
|
|
518
|
+
transformers_results, id2label
|
|
519
|
+
)
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
if "boxes" in transformers_results:
|
|
523
|
+
return cls(
|
|
524
|
+
**process_transformers_detection_result(transformers_results, id2label)
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
else:
|
|
528
|
+
raise ValueError(
|
|
529
|
+
"The provided Transformers results do not contain any valid fields."
|
|
530
|
+
" Expected fields are 'boxes', 'masks', 'segments_info' or"
|
|
531
|
+
" 'segmentation'."
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
@classmethod
|
|
535
|
+
def from_detectron2(cls, detectron2_results: Any) -> Detections:
|
|
536
|
+
"""
|
|
537
|
+
Create a Detections object from the
|
|
538
|
+
[Detectron2](https://github.com/facebookresearch/detectron2) inference result.
|
|
539
|
+
|
|
540
|
+
Args:
|
|
541
|
+
detectron2_results (Any): The output of a
|
|
542
|
+
Detectron2 model containing instances with prediction data.
|
|
543
|
+
|
|
544
|
+
Returns:
|
|
545
|
+
(Detections): A Detections object containing the bounding boxes,
|
|
546
|
+
class IDs, and confidences of the predictions.
|
|
547
|
+
|
|
548
|
+
Example:
|
|
549
|
+
```python
|
|
550
|
+
import cv2
|
|
551
|
+
import eye as sv
|
|
552
|
+
from detectron2.engine import DefaultPredictor
|
|
553
|
+
from detectron2.config import get_cfg
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
image = cv2.imread(<SOURCE_IMAGE_PATH>)
|
|
557
|
+
cfg = get_cfg()
|
|
558
|
+
cfg.merge_from_file(<CONFIG_PATH>)
|
|
559
|
+
cfg.MODEL.WEIGHTS = <WEIGHTS_PATH>
|
|
560
|
+
predictor = DefaultPredictor(cfg)
|
|
561
|
+
|
|
562
|
+
result = predictor(image)
|
|
563
|
+
detections = sv.Detections.from_detectron2(result)
|
|
564
|
+
```
|
|
565
|
+
"""
|
|
566
|
+
|
|
567
|
+
return cls(
|
|
568
|
+
xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu().numpy(),
|
|
569
|
+
confidence=detectron2_results["instances"].scores.cpu().numpy(),
|
|
570
|
+
mask=detectron2_results["instances"].pred_masks.cpu().numpy()
|
|
571
|
+
if hasattr(detectron2_results["instances"], "pred_masks")
|
|
572
|
+
else None,
|
|
573
|
+
class_id=detectron2_results["instances"]
|
|
574
|
+
.pred_classes.cpu()
|
|
575
|
+
.numpy()
|
|
576
|
+
.astype(int),
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
@classmethod
|
|
580
|
+
def from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections:
|
|
581
|
+
"""
|
|
582
|
+
Create a `sv.Detections` object from the [Roboflow](https://roboflow.com/)
|
|
583
|
+
API inference result or the [Inference](https://inference.roboflow.com/)
|
|
584
|
+
package results. This method extracts bounding boxes, class IDs,
|
|
585
|
+
confidences, and class names from the Roboflow API result and encapsulates
|
|
586
|
+
them into a Detections object.
|
|
587
|
+
|
|
588
|
+
Args:
|
|
589
|
+
roboflow_result (dict, any): The result from the
|
|
590
|
+
Roboflow API or Inference package containing predictions.
|
|
591
|
+
|
|
592
|
+
Returns:
|
|
593
|
+
(Detections): A Detections object containing the bounding boxes, class IDs,
|
|
594
|
+
and confidences of the predictions.
|
|
595
|
+
|
|
596
|
+
Example:
|
|
597
|
+
```python
|
|
598
|
+
import cv2
|
|
599
|
+
import eye as sv
|
|
600
|
+
from inference import get_model
|
|
601
|
+
|
|
602
|
+
image = cv2.imread(<SOURCE_IMAGE_PATH>)
|
|
603
|
+
model = get_model(model_id="yolov8s-640")
|
|
604
|
+
|
|
605
|
+
result = model.infer(image)[0]
|
|
606
|
+
detections = sv.Detections.from_inference(result)
|
|
607
|
+
```
|
|
608
|
+
"""
|
|
609
|
+
with suppress(AttributeError):
|
|
610
|
+
roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True)
|
|
611
|
+
xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result(
|
|
612
|
+
roboflow_result=roboflow_result
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
if np.asarray(xyxy).shape[0] == 0:
|
|
616
|
+
empty_detection = cls.empty()
|
|
617
|
+
empty_detection.data = {CLASS_NAME_DATA_FIELD: np.empty(0)}
|
|
618
|
+
return empty_detection
|
|
619
|
+
|
|
620
|
+
return cls(
|
|
621
|
+
xyxy=xyxy,
|
|
622
|
+
confidence=confidence,
|
|
623
|
+
class_id=class_id,
|
|
624
|
+
mask=masks,
|
|
625
|
+
tracker_id=trackers,
|
|
626
|
+
data=data,
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
@classmethod
|
|
630
|
+
def from_sam(cls, sam_result: List[dict]) -> Detections:
|
|
631
|
+
"""
|
|
632
|
+
Creates a Detections instance from
|
|
633
|
+
[Segment Anything Model](https://github.com/facebookresearch/segment-anything)
|
|
634
|
+
inference result.
|
|
635
|
+
|
|
636
|
+
Args:
|
|
637
|
+
sam_result (List[dict]): The output Results instance from SAM
|
|
638
|
+
|
|
639
|
+
Returns:
|
|
640
|
+
Detections: A new Detections object.
|
|
641
|
+
|
|
642
|
+
Example:
|
|
643
|
+
```python
|
|
644
|
+
import eye as sv
|
|
645
|
+
from segment_anything import (
|
|
646
|
+
sam_model_registry,
|
|
647
|
+
SamAutomaticMaskGenerator
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
sam_model_reg = sam_model_registry[MODEL_TYPE]
|
|
651
|
+
sam = sam_model_reg(checkpoint=CHECKPOINT_PATH).to(device=DEVICE)
|
|
652
|
+
mask_generator = SamAutomaticMaskGenerator(sam)
|
|
653
|
+
sam_result = mask_generator.generate(IMAGE)
|
|
654
|
+
detections = sv.Detections.from_sam(sam_result=sam_result)
|
|
655
|
+
```
|
|
656
|
+
"""
|
|
657
|
+
|
|
658
|
+
sorted_generated_masks = sorted(
|
|
659
|
+
sam_result, key=lambda x: x["area"], reverse=True
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
xywh = np.array([mask["bbox"] for mask in sorted_generated_masks])
|
|
663
|
+
mask = np.array([mask["segmentation"] for mask in sorted_generated_masks])
|
|
664
|
+
|
|
665
|
+
if np.asarray(xywh).shape[0] == 0:
|
|
666
|
+
return cls.empty()
|
|
667
|
+
|
|
668
|
+
xyxy = xywh_to_xyxy(xywh=xywh)
|
|
669
|
+
return cls(xyxy=xyxy, mask=mask)
|
|
670
|
+
|
|
671
|
+
@classmethod
|
|
672
|
+
def from_azure_analyze_image(
|
|
673
|
+
cls, azure_result: dict, class_map: Optional[Dict[int, str]] = None
|
|
674
|
+
) -> Detections:
|
|
675
|
+
"""
|
|
676
|
+
Creates a Detections instance from [Azure Image Analysis 4.0](
|
|
677
|
+
https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/
|
|
678
|
+
concept-object-detection-40).
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
azure_result (dict): The result from Azure Image Analysis. It should
|
|
682
|
+
contain detected objects and their bounding box coordinates.
|
|
683
|
+
class_map (Optional[Dict[int, str]]): A mapping ofclass IDs (int) to class
|
|
684
|
+
names (str). If None, a new mapping is created dynamically.
|
|
685
|
+
|
|
686
|
+
Returns:
|
|
687
|
+
Detections: A new Detections object.
|
|
688
|
+
|
|
689
|
+
Example:
|
|
690
|
+
```python
|
|
691
|
+
import requests
|
|
692
|
+
import eye as sv
|
|
693
|
+
|
|
694
|
+
image = open(input, "rb").read()
|
|
695
|
+
|
|
696
|
+
endpoint = "https://.cognitiveservices.azure.com/"
|
|
697
|
+
subscription_key = ""
|
|
698
|
+
|
|
699
|
+
headers = {
|
|
700
|
+
"Content-Type": "application/octet-stream",
|
|
701
|
+
"Ocp-Apim-Subscription-Key": subscription_key
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
response = requests.post(endpoint,
|
|
705
|
+
headers=self.headers,
|
|
706
|
+
data=image
|
|
707
|
+
).json()
|
|
708
|
+
|
|
709
|
+
detections = sv.Detections.from_azure_analyze_image(response)
|
|
710
|
+
```
|
|
711
|
+
"""
|
|
712
|
+
if "error" in azure_result:
|
|
713
|
+
raise ValueError(
|
|
714
|
+
f'Azure API returned an error {azure_result["error"]["message"]}'
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
xyxy, confidences, class_ids = [], [], []
|
|
718
|
+
|
|
719
|
+
is_dynamic_mapping = class_map is None
|
|
720
|
+
if is_dynamic_mapping:
|
|
721
|
+
class_map = {}
|
|
722
|
+
|
|
723
|
+
class_map = {value: key for key, value in class_map.items()}
|
|
724
|
+
|
|
725
|
+
for detection in azure_result["objectsResult"]["values"]:
|
|
726
|
+
bbox = detection["boundingBox"]
|
|
727
|
+
|
|
728
|
+
tags = detection["tags"]
|
|
729
|
+
|
|
730
|
+
x0 = bbox["x"]
|
|
731
|
+
y0 = bbox["y"]
|
|
732
|
+
x1 = x0 + bbox["w"]
|
|
733
|
+
y1 = y0 + bbox["h"]
|
|
734
|
+
|
|
735
|
+
for tag in tags:
|
|
736
|
+
confidence = tag["confidence"]
|
|
737
|
+
class_name = tag["name"]
|
|
738
|
+
class_id = class_map.get(class_name, None)
|
|
739
|
+
|
|
740
|
+
if is_dynamic_mapping and class_id is None:
|
|
741
|
+
class_id = len(class_map)
|
|
742
|
+
class_map[class_name] = class_id
|
|
743
|
+
|
|
744
|
+
if class_id is not None:
|
|
745
|
+
xyxy.append([x0, y0, x1, y1])
|
|
746
|
+
confidences.append(confidence)
|
|
747
|
+
class_ids.append(class_id)
|
|
748
|
+
|
|
749
|
+
if len(xyxy) == 0:
|
|
750
|
+
return Detections.empty()
|
|
751
|
+
|
|
752
|
+
return cls(
|
|
753
|
+
xyxy=np.array(xyxy),
|
|
754
|
+
class_id=np.array(class_ids),
|
|
755
|
+
confidence=np.array(confidences),
|
|
756
|
+
)
|
|
757
|
+
|
|
758
|
+
@classmethod
|
|
759
|
+
def from_paddledet(cls, paddledet_result) -> Detections:
|
|
760
|
+
"""
|
|
761
|
+
Creates a Detections instance from
|
|
762
|
+
[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)
|
|
763
|
+
inference result.
|
|
764
|
+
|
|
765
|
+
Args:
|
|
766
|
+
paddledet_result (List[dict]): The output Results instance from PaddleDet
|
|
767
|
+
|
|
768
|
+
Returns:
|
|
769
|
+
Detections: A new Detections object.
|
|
770
|
+
|
|
771
|
+
Example:
|
|
772
|
+
```python
|
|
773
|
+
import eye as sv
|
|
774
|
+
import paddle
|
|
775
|
+
from ppdet.engine import Trainer
|
|
776
|
+
from ppdet.core.workspace import load_config
|
|
777
|
+
|
|
778
|
+
weights = ()
|
|
779
|
+
config = ()
|
|
780
|
+
|
|
781
|
+
cfg = load_config(config)
|
|
782
|
+
trainer = Trainer(cfg, mode='test')
|
|
783
|
+
trainer.load_weights(weights)
|
|
784
|
+
|
|
785
|
+
paddledet_result = trainer.predict([images])[0]
|
|
786
|
+
|
|
787
|
+
detections = sv.Detections.from_paddledet(paddledet_result)
|
|
788
|
+
```
|
|
789
|
+
"""
|
|
790
|
+
|
|
791
|
+
if np.asarray(paddledet_result["bbox"][:, 2:6]).shape[0] == 0:
|
|
792
|
+
return cls.empty()
|
|
793
|
+
|
|
794
|
+
return cls(
|
|
795
|
+
xyxy=paddledet_result["bbox"][:, 2:6],
|
|
796
|
+
confidence=paddledet_result["bbox"][:, 1],
|
|
797
|
+
class_id=paddledet_result["bbox"][:, 0].astype(int),
|
|
798
|
+
)
|
|
799
|
+
|
|
800
|
+
@classmethod
|
|
801
|
+
def from_lmm(
|
|
802
|
+
cls, lmm: Union[LMM, str], result: Union[str, dict], **kwargs: Any
|
|
803
|
+
) -> Detections:
|
|
804
|
+
"""
|
|
805
|
+
Creates a Detections object from the given result string based on the specified
|
|
806
|
+
Large Multimodal Model (LMM).
|
|
807
|
+
|
|
808
|
+
Args:
|
|
809
|
+
lmm (Union[LMM, str]): The type of LMM (Large Multimodal Model) to use.
|
|
810
|
+
result (str): The result string containing the detection data.
|
|
811
|
+
**kwargs (Any): Additional keyword arguments required by the specified LMM.
|
|
812
|
+
|
|
813
|
+
Returns:
|
|
814
|
+
Detections: A new Detections object.
|
|
815
|
+
|
|
816
|
+
Raises:
|
|
817
|
+
ValueError: If the LMM is invalid, required arguments are missing, or
|
|
818
|
+
disallowed arguments are provided.
|
|
819
|
+
ValueError: If the specified LMM is not supported.
|
|
820
|
+
|
|
821
|
+
Examples:
|
|
822
|
+
```python
|
|
823
|
+
import eye as sv
|
|
824
|
+
|
|
825
|
+
paligemma_result = "<loc0256><loc0256><loc0768><loc0768> cat"
|
|
826
|
+
detections = sv.Detections.from_lmm(
|
|
827
|
+
sv.LMM.PALIGEMMA,
|
|
828
|
+
paligemma_result,
|
|
829
|
+
resolution_wh=(1000, 1000),
|
|
830
|
+
classes=['cat', 'dog']
|
|
831
|
+
)
|
|
832
|
+
detections.xyxy
|
|
833
|
+
# array([[250., 250., 750., 750.]])
|
|
834
|
+
|
|
835
|
+
detections.class_id
|
|
836
|
+
# array([0])
|
|
837
|
+
```
|
|
838
|
+
"""
|
|
839
|
+
lmm = validate_lmm_parameters(lmm, result, kwargs)
|
|
840
|
+
|
|
841
|
+
if lmm == LMM.PALIGEMMA:
|
|
842
|
+
assert isinstance(result, str)
|
|
843
|
+
xyxy, class_id, class_name = from_paligemma(result, **kwargs)
|
|
844
|
+
data = {CLASS_NAME_DATA_FIELD: class_name}
|
|
845
|
+
return cls(xyxy=xyxy, class_id=class_id, data=data)
|
|
846
|
+
|
|
847
|
+
if lmm == LMM.FLORENCE_2:
|
|
848
|
+
assert isinstance(result, dict)
|
|
849
|
+
xyxy, labels, mask, xyxyxyxy = from_florence_2(result, **kwargs)
|
|
850
|
+
if len(xyxy) == 0:
|
|
851
|
+
return cls.empty()
|
|
852
|
+
|
|
853
|
+
data = {}
|
|
854
|
+
if labels is not None:
|
|
855
|
+
data[CLASS_NAME_DATA_FIELD] = labels
|
|
856
|
+
if xyxyxyxy is not None:
|
|
857
|
+
data[ORIENTED_BOX_COORDINATES] = xyxyxyxy
|
|
858
|
+
|
|
859
|
+
return cls(xyxy=xyxy, mask=mask, data=data)
|
|
860
|
+
|
|
861
|
+
raise ValueError(f"Unsupported LMM: {lmm}")
|
|
862
|
+
|
|
863
|
+
@classmethod
|
|
864
|
+
def from_easyocr(cls, easyocr_results: list) -> Detections:
|
|
865
|
+
"""
|
|
866
|
+
Create a Detections object from the
|
|
867
|
+
[EasyOCR](https://github.com/JaidedAI/EasyOCR) result.
|
|
868
|
+
|
|
869
|
+
Results are placed in the `data` field with the key `"class_name"`.
|
|
870
|
+
|
|
871
|
+
Args:
|
|
872
|
+
easyocr_results (List): The output Results instance from EasyOCR
|
|
873
|
+
|
|
874
|
+
Returns:
|
|
875
|
+
Detections: A new Detections object.
|
|
876
|
+
|
|
877
|
+
Example:
|
|
878
|
+
```python
|
|
879
|
+
import eye as sv
|
|
880
|
+
import easyocr
|
|
881
|
+
|
|
882
|
+
reader = easyocr.Reader(['en'])
|
|
883
|
+
results = reader.readtext(<SOURCE_IMAGE_PATH>)
|
|
884
|
+
detections = sv.Detections.from_easyocr(results)
|
|
885
|
+
detected_text = detections["class_name"]
|
|
886
|
+
```
|
|
887
|
+
"""
|
|
888
|
+
if len(easyocr_results) == 0:
|
|
889
|
+
return cls.empty()
|
|
890
|
+
|
|
891
|
+
bbox = np.array([result[0] for result in easyocr_results])
|
|
892
|
+
xyxy = np.hstack((np.min(bbox, axis=1), np.max(bbox, axis=1)))
|
|
893
|
+
confidence = np.array(
|
|
894
|
+
[
|
|
895
|
+
result[2] if len(result) > 2 and result[2] else 0
|
|
896
|
+
for result in easyocr_results
|
|
897
|
+
]
|
|
898
|
+
)
|
|
899
|
+
ocr_text = np.array([result[1] for result in easyocr_results])
|
|
900
|
+
|
|
901
|
+
return cls(
|
|
902
|
+
xyxy=xyxy.astype(np.float32),
|
|
903
|
+
confidence=confidence.astype(np.float32),
|
|
904
|
+
data={
|
|
905
|
+
CLASS_NAME_DATA_FIELD: ocr_text,
|
|
906
|
+
},
|
|
907
|
+
)
|
|
908
|
+
|
|
909
|
+
@classmethod
|
|
910
|
+
def from_ncnn(cls, ncnn_results) -> Detections:
|
|
911
|
+
"""
|
|
912
|
+
Creates a Detections instance from the
|
|
913
|
+
[ncnn](https://github.com/Tencent/ncnn) inference result.
|
|
914
|
+
Supports object detection models.
|
|
915
|
+
|
|
916
|
+
Arguments:
|
|
917
|
+
ncnn_results (dict): The output Results instance from ncnn.
|
|
918
|
+
|
|
919
|
+
Returns:
|
|
920
|
+
Detections: A new Detections object.
|
|
921
|
+
|
|
922
|
+
Example:
|
|
923
|
+
```python
|
|
924
|
+
import cv2
|
|
925
|
+
from ncnn.model_zoo import get_model
|
|
926
|
+
import eye as sv
|
|
927
|
+
|
|
928
|
+
image = cv2.imread(<SOURCE_IMAGE_PATH>)
|
|
929
|
+
model = get_model(
|
|
930
|
+
"yolov8s",
|
|
931
|
+
target_size=640
|
|
932
|
+
prob_threshold=0.5,
|
|
933
|
+
nms_threshold=0.45,
|
|
934
|
+
num_threads=4,
|
|
935
|
+
use_gpu=True,
|
|
936
|
+
)
|
|
937
|
+
result = model(image)
|
|
938
|
+
detections = sv.Detections.from_ncnn(result)
|
|
939
|
+
```
|
|
940
|
+
"""
|
|
941
|
+
|
|
942
|
+
xywh, confidences, class_ids = [], [], []
|
|
943
|
+
|
|
944
|
+
if len(ncnn_results) == 0:
|
|
945
|
+
return cls.empty()
|
|
946
|
+
|
|
947
|
+
for ncnn_result in ncnn_results:
|
|
948
|
+
rect = ncnn_result.rect
|
|
949
|
+
xywh.append(
|
|
950
|
+
[
|
|
951
|
+
rect.x.astype(np.float32),
|
|
952
|
+
rect.y.astype(np.float32),
|
|
953
|
+
rect.w.astype(np.float32),
|
|
954
|
+
rect.h.astype(np.float32),
|
|
955
|
+
]
|
|
956
|
+
)
|
|
957
|
+
|
|
958
|
+
confidences.append(ncnn_result.prob)
|
|
959
|
+
class_ids.append(ncnn_result.label)
|
|
960
|
+
|
|
961
|
+
return cls(
|
|
962
|
+
xyxy=xywh_to_xyxy(np.array(xywh, dtype=np.float32)),
|
|
963
|
+
confidence=np.array(confidences, dtype=np.float32),
|
|
964
|
+
class_id=np.array(class_ids, dtype=int),
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
@classmethod
|
|
968
|
+
def empty(cls) -> Detections:
|
|
969
|
+
"""
|
|
970
|
+
Create an empty Detections object with no bounding boxes,
|
|
971
|
+
confidences, or class IDs.
|
|
972
|
+
|
|
973
|
+
Returns:
|
|
974
|
+
(Detections): An empty Detections object.
|
|
975
|
+
|
|
976
|
+
Example:
|
|
977
|
+
```python
|
|
978
|
+
from eye import Detections
|
|
979
|
+
|
|
980
|
+
empty_detections = Detections.empty()
|
|
981
|
+
```
|
|
982
|
+
"""
|
|
983
|
+
return cls(
|
|
984
|
+
xyxy=np.empty((0, 4), dtype=np.float32),
|
|
985
|
+
confidence=np.array([], dtype=np.float32),
|
|
986
|
+
class_id=np.array([], dtype=int),
|
|
987
|
+
)
|
|
988
|
+
|
|
989
|
+
def is_empty(self) -> bool:
|
|
990
|
+
"""
|
|
991
|
+
Returns `True` if the `Detections` object is considered empty.
|
|
992
|
+
"""
|
|
993
|
+
empty_detections = Detections.empty()
|
|
994
|
+
empty_detections.data = self.data
|
|
995
|
+
empty_detections.metadata = self.metadata
|
|
996
|
+
return self == empty_detections
|
|
997
|
+
|
|
998
|
+
@classmethod
|
|
999
|
+
def merge(cls, detections_list: List[Detections]) -> Detections:
|
|
1000
|
+
"""
|
|
1001
|
+
Merge a list of Detections objects into a single Detections object.
|
|
1002
|
+
|
|
1003
|
+
This method takes a list of Detections objects and combines their
|
|
1004
|
+
respective fields (`xyxy`, `mask`, `confidence`, `class_id`, and `tracker_id`)
|
|
1005
|
+
into a single Detections object.
|
|
1006
|
+
|
|
1007
|
+
For example, if merging Detections with 3 and 4 detected objects, this method
|
|
1008
|
+
will return a Detections with 7 objects (7 entries in `xyxy`, `mask`, etc).
|
|
1009
|
+
|
|
1010
|
+
!!! Note
|
|
1011
|
+
|
|
1012
|
+
When merging, empty `Detections` objects are ignored.
|
|
1013
|
+
|
|
1014
|
+
Args:
|
|
1015
|
+
detections_list (List[Detections]): A list of Detections objects to merge.
|
|
1016
|
+
|
|
1017
|
+
Returns:
|
|
1018
|
+
(Detections): A single Detections object containing
|
|
1019
|
+
the merged data from the input list.
|
|
1020
|
+
|
|
1021
|
+
Example:
|
|
1022
|
+
```python
|
|
1023
|
+
import numpy as np
|
|
1024
|
+
import eye as sv
|
|
1025
|
+
|
|
1026
|
+
detections_1 = sv.Detections(
|
|
1027
|
+
xyxy=np.array([[15, 15, 100, 100], [200, 200, 300, 300]]),
|
|
1028
|
+
class_id=np.array([1, 2]),
|
|
1029
|
+
data={'feature_vector': np.array([0.1, 0.2)])}
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
detections_2 = sv.Detections(
|
|
1033
|
+
xyxy=np.array([[30, 30, 120, 120]]),
|
|
1034
|
+
class_id=np.array([1]),
|
|
1035
|
+
data={'feature_vector': [np.array([0.3])]}
|
|
1036
|
+
)
|
|
1037
|
+
|
|
1038
|
+
merged_detections = Detections.merge([detections_1, detections_2])
|
|
1039
|
+
|
|
1040
|
+
merged_detections.xyxy
|
|
1041
|
+
array([[ 15, 15, 100, 100],
|
|
1042
|
+
[200, 200, 300, 300],
|
|
1043
|
+
[ 30, 30, 120, 120]])
|
|
1044
|
+
|
|
1045
|
+
merged_detections.class_id
|
|
1046
|
+
array([1, 2, 1])
|
|
1047
|
+
|
|
1048
|
+
merged_detections.data['feature_vector']
|
|
1049
|
+
array([0.1, 0.2, 0.3])
|
|
1050
|
+
```
|
|
1051
|
+
"""
|
|
1052
|
+
detections_list = [
|
|
1053
|
+
detections for detections in detections_list if not detections.is_empty()
|
|
1054
|
+
]
|
|
1055
|
+
|
|
1056
|
+
if len(detections_list) == 0:
|
|
1057
|
+
return Detections.empty()
|
|
1058
|
+
|
|
1059
|
+
for detections in detections_list:
|
|
1060
|
+
validate_detections_fields(
|
|
1061
|
+
xyxy=detections.xyxy,
|
|
1062
|
+
mask=detections.mask,
|
|
1063
|
+
confidence=detections.confidence,
|
|
1064
|
+
class_id=detections.class_id,
|
|
1065
|
+
tracker_id=detections.tracker_id,
|
|
1066
|
+
data=detections.data,
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
xyxy = np.vstack([d.xyxy for d in detections_list])
|
|
1070
|
+
|
|
1071
|
+
def stack_or_none(name: str):
|
|
1072
|
+
if all(d.__getattribute__(name) is None for d in detections_list):
|
|
1073
|
+
return None
|
|
1074
|
+
if any(d.__getattribute__(name) is None for d in detections_list):
|
|
1075
|
+
raise ValueError(f"All or none of the '{name}' fields must be None")
|
|
1076
|
+
return (
|
|
1077
|
+
np.vstack([d.__getattribute__(name) for d in detections_list])
|
|
1078
|
+
if name == "mask"
|
|
1079
|
+
else np.hstack([d.__getattribute__(name) for d in detections_list])
|
|
1080
|
+
)
|
|
1081
|
+
|
|
1082
|
+
mask = stack_or_none("mask")
|
|
1083
|
+
confidence = stack_or_none("confidence")
|
|
1084
|
+
class_id = stack_or_none("class_id")
|
|
1085
|
+
tracker_id = stack_or_none("tracker_id")
|
|
1086
|
+
|
|
1087
|
+
data = merge_data([d.data for d in detections_list])
|
|
1088
|
+
|
|
1089
|
+
metadata_list = [detections.metadata for detections in detections_list]
|
|
1090
|
+
metadata = merge_metadata(metadata_list)
|
|
1091
|
+
|
|
1092
|
+
return cls(
|
|
1093
|
+
xyxy=xyxy,
|
|
1094
|
+
mask=mask,
|
|
1095
|
+
confidence=confidence,
|
|
1096
|
+
class_id=class_id,
|
|
1097
|
+
tracker_id=tracker_id,
|
|
1098
|
+
data=data,
|
|
1099
|
+
metadata=metadata,
|
|
1100
|
+
)
|
|
1101
|
+
|
|
1102
|
+
def get_anchors_coordinates(self, anchor: Position) -> np.ndarray:
|
|
1103
|
+
"""
|
|
1104
|
+
Calculates and returns the coordinates of a specific anchor point
|
|
1105
|
+
within the bounding boxes defined by the `xyxy` attribute. The anchor
|
|
1106
|
+
point can be any of the predefined positions in the `Position` enum,
|
|
1107
|
+
such as `CENTER`, `CENTER_LEFT`, `BOTTOM_RIGHT`, etc.
|
|
1108
|
+
|
|
1109
|
+
Args:
|
|
1110
|
+
anchor (Position): An enum specifying the position of the anchor point
|
|
1111
|
+
within the bounding box. Supported positions are defined in the
|
|
1112
|
+
`Position` enum.
|
|
1113
|
+
|
|
1114
|
+
Returns:
|
|
1115
|
+
np.ndarray: An array of shape `(n, 2)`, where `n` is the number of bounding
|
|
1116
|
+
boxes. Each row contains the `[x, y]` coordinates of the specified
|
|
1117
|
+
anchor point for the corresponding bounding box.
|
|
1118
|
+
|
|
1119
|
+
Raises:
|
|
1120
|
+
ValueError: If the provided `anchor` is not supported.
|
|
1121
|
+
"""
|
|
1122
|
+
if anchor == Position.CENTER:
|
|
1123
|
+
return np.array(
|
|
1124
|
+
[
|
|
1125
|
+
(self.xyxy[:, 0] + self.xyxy[:, 2]) / 2,
|
|
1126
|
+
(self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,
|
|
1127
|
+
]
|
|
1128
|
+
).transpose()
|
|
1129
|
+
elif anchor == Position.CENTER_OF_MASS:
|
|
1130
|
+
if self.mask is None:
|
|
1131
|
+
raise ValueError(
|
|
1132
|
+
"Cannot use `Position.CENTER_OF_MASS` without a detection mask."
|
|
1133
|
+
)
|
|
1134
|
+
return calculate_masks_centroids(masks=self.mask)
|
|
1135
|
+
elif anchor == Position.CENTER_LEFT:
|
|
1136
|
+
return np.array(
|
|
1137
|
+
[
|
|
1138
|
+
self.xyxy[:, 0],
|
|
1139
|
+
(self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,
|
|
1140
|
+
]
|
|
1141
|
+
).transpose()
|
|
1142
|
+
elif anchor == Position.CENTER_RIGHT:
|
|
1143
|
+
return np.array(
|
|
1144
|
+
[
|
|
1145
|
+
self.xyxy[:, 2],
|
|
1146
|
+
(self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,
|
|
1147
|
+
]
|
|
1148
|
+
).transpose()
|
|
1149
|
+
elif anchor == Position.BOTTOM_CENTER:
|
|
1150
|
+
return np.array(
|
|
1151
|
+
[(self.xyxy[:, 0] + self.xyxy[:, 2]) / 2, self.xyxy[:, 3]]
|
|
1152
|
+
).transpose()
|
|
1153
|
+
elif anchor == Position.BOTTOM_LEFT:
|
|
1154
|
+
return np.array([self.xyxy[:, 0], self.xyxy[:, 3]]).transpose()
|
|
1155
|
+
elif anchor == Position.BOTTOM_RIGHT:
|
|
1156
|
+
return np.array([self.xyxy[:, 2], self.xyxy[:, 3]]).transpose()
|
|
1157
|
+
elif anchor == Position.TOP_CENTER:
|
|
1158
|
+
return np.array(
|
|
1159
|
+
[(self.xyxy[:, 0] + self.xyxy[:, 2]) / 2, self.xyxy[:, 1]]
|
|
1160
|
+
).transpose()
|
|
1161
|
+
elif anchor == Position.TOP_LEFT:
|
|
1162
|
+
return np.array([self.xyxy[:, 0], self.xyxy[:, 1]]).transpose()
|
|
1163
|
+
elif anchor == Position.TOP_RIGHT:
|
|
1164
|
+
return np.array([self.xyxy[:, 2], self.xyxy[:, 1]]).transpose()
|
|
1165
|
+
|
|
1166
|
+
raise ValueError(f"{anchor} is not supported.")
|
|
1167
|
+
|
|
1168
|
+
def __getitem__(
|
|
1169
|
+
self, index: Union[int, slice, List[int], np.ndarray, str]
|
|
1170
|
+
) -> Union[Detections, List, np.ndarray, None]:
|
|
1171
|
+
"""
|
|
1172
|
+
Get a subset of the Detections object or access an item from its data field.
|
|
1173
|
+
|
|
1174
|
+
When provided with an integer, slice, list of integers, or a numpy array, this
|
|
1175
|
+
method returns a new Detections object that represents a subset of the original
|
|
1176
|
+
detections. When provided with a string, it accesses the corresponding item in
|
|
1177
|
+
the data dictionary.
|
|
1178
|
+
|
|
1179
|
+
Args:
|
|
1180
|
+
index (Union[int, slice, List[int], np.ndarray, str]): The index, indices,
|
|
1181
|
+
or key to access a subset of the Detections or an item from the data.
|
|
1182
|
+
|
|
1183
|
+
Returns:
|
|
1184
|
+
Union[Detections, Any]: A subset of the Detections object or an item from
|
|
1185
|
+
the data field.
|
|
1186
|
+
|
|
1187
|
+
Example:
|
|
1188
|
+
```python
|
|
1189
|
+
import eye as sv
|
|
1190
|
+
|
|
1191
|
+
detections = sv.Detections()
|
|
1192
|
+
|
|
1193
|
+
first_detection = detections[0]
|
|
1194
|
+
first_10_detections = detections[0:10]
|
|
1195
|
+
some_detections = detections[[0, 2, 4]]
|
|
1196
|
+
class_0_detections = detections[detections.class_id == 0]
|
|
1197
|
+
high_confidence_detections = detections[detections.confidence > 0.5]
|
|
1198
|
+
|
|
1199
|
+
feature_vector = detections['feature_vector']
|
|
1200
|
+
```
|
|
1201
|
+
"""
|
|
1202
|
+
if isinstance(index, str):
|
|
1203
|
+
return self.data.get(index)
|
|
1204
|
+
if isinstance(index, int):
|
|
1205
|
+
index = [index]
|
|
1206
|
+
return Detections(
|
|
1207
|
+
xyxy=self.xyxy[index],
|
|
1208
|
+
mask=self.mask[index] if self.mask is not None else None,
|
|
1209
|
+
confidence=self.confidence[index] if self.confidence is not None else None,
|
|
1210
|
+
class_id=self.class_id[index] if self.class_id is not None else None,
|
|
1211
|
+
tracker_id=self.tracker_id[index] if self.tracker_id is not None else None,
|
|
1212
|
+
data=get_data_item(self.data, index),
|
|
1213
|
+
metadata=self.metadata,
|
|
1214
|
+
)
|
|
1215
|
+
|
|
1216
|
+
def __setitem__(self, key: str, value: Union[np.ndarray, List]):
|
|
1217
|
+
"""
|
|
1218
|
+
Set a value in the data dictionary of the Detections object.
|
|
1219
|
+
|
|
1220
|
+
Args:
|
|
1221
|
+
key (str): The key in the data dictionary to set.
|
|
1222
|
+
value (Union[np.ndarray, List]): The value to set for the key.
|
|
1223
|
+
|
|
1224
|
+
Example:
|
|
1225
|
+
```python
|
|
1226
|
+
import cv2
|
|
1227
|
+
import eye as sv
|
|
1228
|
+
from ultralytics import YOLO
|
|
1229
|
+
|
|
1230
|
+
image = cv2.imread(<SOURCE_IMAGE_PATH>)
|
|
1231
|
+
model = YOLO('yolov8s.pt')
|
|
1232
|
+
|
|
1233
|
+
result = model(image)[0]
|
|
1234
|
+
detections = sv.Detections.from_ultralytics(result)
|
|
1235
|
+
|
|
1236
|
+
detections['names'] = [
|
|
1237
|
+
model.model.names[class_id]
|
|
1238
|
+
for class_id
|
|
1239
|
+
in detections.class_id
|
|
1240
|
+
]
|
|
1241
|
+
```
|
|
1242
|
+
"""
|
|
1243
|
+
if not isinstance(value, (np.ndarray, list)):
|
|
1244
|
+
raise TypeError("Value must be a np.ndarray or a list")
|
|
1245
|
+
|
|
1246
|
+
if isinstance(value, list):
|
|
1247
|
+
value = np.array(value)
|
|
1248
|
+
|
|
1249
|
+
self.data[key] = value
|
|
1250
|
+
|
|
1251
|
+
@property
|
|
1252
|
+
def area(self) -> np.ndarray:
|
|
1253
|
+
"""
|
|
1254
|
+
Calculate the area of each detection in the set of object detections.
|
|
1255
|
+
If masks field is defined property returns are of each mask.
|
|
1256
|
+
If only box is given property return area of each box.
|
|
1257
|
+
|
|
1258
|
+
Returns:
|
|
1259
|
+
np.ndarray: An array of floats containing the area of each detection
|
|
1260
|
+
in the format of `(area_1, area_2, , area_n)`,
|
|
1261
|
+
where n is the number of detections.
|
|
1262
|
+
"""
|
|
1263
|
+
if self.mask is not None:
|
|
1264
|
+
return np.array([np.sum(mask) for mask in self.mask])
|
|
1265
|
+
else:
|
|
1266
|
+
return self.box_area
|
|
1267
|
+
|
|
1268
|
+
@property
|
|
1269
|
+
def box_area(self) -> np.ndarray:
|
|
1270
|
+
"""
|
|
1271
|
+
Calculate the area of each bounding box in the set of object detections.
|
|
1272
|
+
|
|
1273
|
+
Returns:
|
|
1274
|
+
np.ndarray: An array of floats containing the area of each bounding
|
|
1275
|
+
box in the format of `(area_1, area_2, , area_n)`,
|
|
1276
|
+
where n is the number of detections.
|
|
1277
|
+
"""
|
|
1278
|
+
return (self.xyxy[:, 3] - self.xyxy[:, 1]) * (self.xyxy[:, 2] - self.xyxy[:, 0])
|
|
1279
|
+
|
|
1280
|
+
def with_nms(
|
|
1281
|
+
self, threshold: float = 0.5, class_agnostic: bool = False
|
|
1282
|
+
) -> Detections:
|
|
1283
|
+
"""
|
|
1284
|
+
Performs non-max suppression on detection set. If the detections result
|
|
1285
|
+
from a segmentation model, the IoU mask is applied. Otherwise, box IoU is used.
|
|
1286
|
+
|
|
1287
|
+
Args:
|
|
1288
|
+
threshold (float): The intersection-over-union threshold
|
|
1289
|
+
to use for non-maximum suppression. I'm the lower the value the more
|
|
1290
|
+
restrictive the NMS becomes. Defaults to 0.5.
|
|
1291
|
+
class_agnostic (bool): Whether to perform class-agnostic
|
|
1292
|
+
non-maximum suppression. If True, the class_id of each detection
|
|
1293
|
+
will be ignored. Defaults to False.
|
|
1294
|
+
|
|
1295
|
+
Returns:
|
|
1296
|
+
Detections: A new Detections object containing the subset of detections
|
|
1297
|
+
after non-maximum suppression.
|
|
1298
|
+
|
|
1299
|
+
Raises:
|
|
1300
|
+
AssertionError: If `confidence` is None and class_agnostic is False.
|
|
1301
|
+
If `class_id` is None and class_agnostic is False.
|
|
1302
|
+
"""
|
|
1303
|
+
if len(self) == 0:
|
|
1304
|
+
return self
|
|
1305
|
+
|
|
1306
|
+
assert (
|
|
1307
|
+
self.confidence is not None
|
|
1308
|
+
), "Detections confidence must be given for NMS to be executed."
|
|
1309
|
+
|
|
1310
|
+
if class_agnostic:
|
|
1311
|
+
predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
|
|
1312
|
+
else:
|
|
1313
|
+
assert self.class_id is not None, (
|
|
1314
|
+
"Detections class_id must be given for NMS to be executed. If you"
|
|
1315
|
+
" intended to perform class agnostic NMS set class_agnostic=True."
|
|
1316
|
+
)
|
|
1317
|
+
predictions = np.hstack(
|
|
1318
|
+
(
|
|
1319
|
+
self.xyxy,
|
|
1320
|
+
self.confidence.reshape(-1, 1),
|
|
1321
|
+
self.class_id.reshape(-1, 1),
|
|
1322
|
+
)
|
|
1323
|
+
)
|
|
1324
|
+
|
|
1325
|
+
if self.mask is not None:
|
|
1326
|
+
indices = mask_non_max_suppression(
|
|
1327
|
+
predictions=predictions, masks=self.mask, iou_threshold=threshold
|
|
1328
|
+
)
|
|
1329
|
+
else:
|
|
1330
|
+
indices = box_non_max_suppression(
|
|
1331
|
+
predictions=predictions, iou_threshold=threshold
|
|
1332
|
+
)
|
|
1333
|
+
|
|
1334
|
+
return self[indices]
|
|
1335
|
+
|
|
1336
|
+
def with_nmm(
|
|
1337
|
+
self, threshold: float = 0.5, class_agnostic: bool = False
|
|
1338
|
+
) -> Detections:
|
|
1339
|
+
"""
|
|
1340
|
+
Perform non-maximum merging on the current set of object detections.
|
|
1341
|
+
|
|
1342
|
+
Args:
|
|
1343
|
+
threshold (float): The intersection-over-union threshold
|
|
1344
|
+
to use for non-maximum merging. Defaults to 0.5.
|
|
1345
|
+
class_agnostic (bool): Whether to perform class-agnostic
|
|
1346
|
+
non-maximum merging. If True, the class_id of each detection
|
|
1347
|
+
will be ignored. Defaults to False.
|
|
1348
|
+
|
|
1349
|
+
Returns:
|
|
1350
|
+
Detections: A new Detections object containing the subset of detections
|
|
1351
|
+
after non-maximum merging.
|
|
1352
|
+
|
|
1353
|
+
Raises:
|
|
1354
|
+
AssertionError: If `confidence` is None or `class_id` is None and
|
|
1355
|
+
class_agnostic is False.
|
|
1356
|
+
|
|
1357
|
+
{ align=center width="800" }
|
|
1358
|
+
""" # noqa: E501 // docs
|
|
1359
|
+
if len(self) == 0:
|
|
1360
|
+
return self
|
|
1361
|
+
|
|
1362
|
+
assert (
|
|
1363
|
+
self.confidence is not None
|
|
1364
|
+
), "Detections confidence must be given for NMM to be executed."
|
|
1365
|
+
|
|
1366
|
+
if class_agnostic:
|
|
1367
|
+
predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
|
|
1368
|
+
else:
|
|
1369
|
+
assert self.class_id is not None, (
|
|
1370
|
+
"Detections class_id must be given for NMM to be executed. If you"
|
|
1371
|
+
" intended to perform class agnostic NMM set class_agnostic=True."
|
|
1372
|
+
)
|
|
1373
|
+
predictions = np.hstack(
|
|
1374
|
+
(
|
|
1375
|
+
self.xyxy,
|
|
1376
|
+
self.confidence.reshape(-1, 1),
|
|
1377
|
+
self.class_id.reshape(-1, 1),
|
|
1378
|
+
)
|
|
1379
|
+
)
|
|
1380
|
+
|
|
1381
|
+
merge_groups = box_non_max_merge(
|
|
1382
|
+
predictions=predictions, iou_threshold=threshold
|
|
1383
|
+
)
|
|
1384
|
+
|
|
1385
|
+
result = []
|
|
1386
|
+
for merge_group in merge_groups:
|
|
1387
|
+
unmerged_detections = [self[i] for i in merge_group]
|
|
1388
|
+
merged_detections = merge_inner_detections_objects(
|
|
1389
|
+
unmerged_detections, threshold
|
|
1390
|
+
)
|
|
1391
|
+
result.append(merged_detections)
|
|
1392
|
+
|
|
1393
|
+
return Detections.merge(result)
|
|
1394
|
+
|
|
1395
|
+
|
|
1396
|
+
def merge_inner_detection_object_pair(
|
|
1397
|
+
detections_1: Detections, detections_2: Detections
|
|
1398
|
+
) -> Detections:
|
|
1399
|
+
"""
|
|
1400
|
+
Merges two Detections object into a single Detections object.
|
|
1401
|
+
Assumes each Detections contains exactly one object.
|
|
1402
|
+
|
|
1403
|
+
A `winning` detection is determined based on the confidence score of the two
|
|
1404
|
+
input detections. This winning detection is then used to specify which
|
|
1405
|
+
`class_id`, `tracker_id`, and `data` to include in the merged Detections object.
|
|
1406
|
+
|
|
1407
|
+
The resulting `confidence` of the merged object is calculated by the weighted
|
|
1408
|
+
contribution of ea detection to the merged object.
|
|
1409
|
+
The bounding boxes and masks of the two input detections are merged into a
|
|
1410
|
+
single bounding box and mask, respectively.
|
|
1411
|
+
|
|
1412
|
+
Args:
|
|
1413
|
+
detections_1 (Detections):
|
|
1414
|
+
The first Detections object
|
|
1415
|
+
detections_2 (Detections):
|
|
1416
|
+
The second Detections object
|
|
1417
|
+
|
|
1418
|
+
Returns:
|
|
1419
|
+
Detections: A new Detections object, with merged attributes.
|
|
1420
|
+
|
|
1421
|
+
Raises:
|
|
1422
|
+
ValueError: If the input Detections objects do not have exactly 1 detected
|
|
1423
|
+
object.
|
|
1424
|
+
|
|
1425
|
+
Example:
|
|
1426
|
+
```python
|
|
1427
|
+
import cv2
|
|
1428
|
+
import eye as sv
|
|
1429
|
+
from inference import get_model
|
|
1430
|
+
|
|
1431
|
+
image = cv2.imread(<SOURCE_IMAGE_PATH>)
|
|
1432
|
+
model = get_model(model_id="yolov8s-640")
|
|
1433
|
+
|
|
1434
|
+
result = model.infer(image)[0]
|
|
1435
|
+
detections = sv.Detections.from_inference(result)
|
|
1436
|
+
|
|
1437
|
+
merged_detections = merge_object_detection_pair(
|
|
1438
|
+
detections[0], detections[1])
|
|
1439
|
+
```
|
|
1440
|
+
"""
|
|
1441
|
+
if len(detections_1) != 1 or len(detections_2) != 1:
|
|
1442
|
+
raise ValueError("Both Detections should have exactly 1 detected object.")
|
|
1443
|
+
|
|
1444
|
+
validate_fields_both_defined_or_none(detections_1, detections_2)
|
|
1445
|
+
|
|
1446
|
+
xyxy_1 = detections_1.xyxy[0]
|
|
1447
|
+
xyxy_2 = detections_2.xyxy[0]
|
|
1448
|
+
if detections_1.confidence is None and detections_2.confidence is None:
|
|
1449
|
+
merged_confidence = None
|
|
1450
|
+
else:
|
|
1451
|
+
detection_1_area = (xyxy_1[2] - xyxy_1[0]) * (xyxy_1[3] - xyxy_1[1])
|
|
1452
|
+
detections_2_area = (xyxy_2[2] - xyxy_2[0]) * (xyxy_2[3] - xyxy_2[1])
|
|
1453
|
+
merged_confidence = (
|
|
1454
|
+
detection_1_area * detections_1.confidence[0]
|
|
1455
|
+
+ detections_2_area * detections_2.confidence[0]
|
|
1456
|
+
) / (detection_1_area + detections_2_area)
|
|
1457
|
+
merged_confidence = np.array([merged_confidence])
|
|
1458
|
+
|
|
1459
|
+
merged_x1, merged_y1 = np.minimum(xyxy_1[:2], xyxy_2[:2])
|
|
1460
|
+
merged_x2, merged_y2 = np.maximum(xyxy_1[2:], xyxy_2[2:])
|
|
1461
|
+
merged_xyxy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]])
|
|
1462
|
+
|
|
1463
|
+
if detections_1.mask is None and detections_2.mask is None:
|
|
1464
|
+
merged_mask = None
|
|
1465
|
+
else:
|
|
1466
|
+
merged_mask = np.logical_or(detections_1.mask, detections_2.mask)
|
|
1467
|
+
|
|
1468
|
+
if detections_1.confidence is None and detections_2.confidence is None:
|
|
1469
|
+
winning_detection = detections_1
|
|
1470
|
+
elif detections_1.confidence[0] >= detections_2.confidence[0]:
|
|
1471
|
+
winning_detection = detections_1
|
|
1472
|
+
else:
|
|
1473
|
+
winning_detection = detections_2
|
|
1474
|
+
|
|
1475
|
+
metadata = merge_metadata([detections_1.metadata, detections_2.metadata])
|
|
1476
|
+
|
|
1477
|
+
return Detections(
|
|
1478
|
+
xyxy=merged_xyxy,
|
|
1479
|
+
mask=merged_mask,
|
|
1480
|
+
confidence=merged_confidence,
|
|
1481
|
+
class_id=winning_detection.class_id,
|
|
1482
|
+
tracker_id=winning_detection.tracker_id,
|
|
1483
|
+
data=winning_detection.data,
|
|
1484
|
+
metadata=metadata,
|
|
1485
|
+
)
|
|
1486
|
+
|
|
1487
|
+
|
|
1488
|
+
def merge_inner_detections_objects(
|
|
1489
|
+
detections: List[Detections], threshold=0.5
|
|
1490
|
+
) -> Detections:
|
|
1491
|
+
"""
|
|
1492
|
+
Given N detections each of length 1 (exactly one object inside), combine them into a
|
|
1493
|
+
single detection object of length 1. The contained inner object will be the merged
|
|
1494
|
+
result of all the input detections.
|
|
1495
|
+
|
|
1496
|
+
For example, this lets you merge N boxes into one big box, N masks into one mask,
|
|
1497
|
+
etc.
|
|
1498
|
+
"""
|
|
1499
|
+
detections_1 = detections[0]
|
|
1500
|
+
for detections_2 in detections[1:]:
|
|
1501
|
+
box_iou = box_iou_batch(detections_1.xyxy, detections_2.xyxy)[0]
|
|
1502
|
+
if box_iou < threshold:
|
|
1503
|
+
break
|
|
1504
|
+
detections_1 = merge_inner_detection_object_pair(detections_1, detections_2)
|
|
1505
|
+
return detections_1
|
|
1506
|
+
|
|
1507
|
+
|
|
1508
|
+
def validate_fields_both_defined_or_none(
|
|
1509
|
+
detections_1: Detections, detections_2: Detections
|
|
1510
|
+
) -> None:
|
|
1511
|
+
"""
|
|
1512
|
+
Verify that for each optional field in the Detections, both instances either have
|
|
1513
|
+
the field set to None or both have it set to non-None values.
|
|
1514
|
+
|
|
1515
|
+
`data` field is ignored.
|
|
1516
|
+
|
|
1517
|
+
Raises:
|
|
1518
|
+
ValueError: If one field is None and the other is not, for any of the fields.
|
|
1519
|
+
"""
|
|
1520
|
+
attributes = get_instance_variables(detections_1)
|
|
1521
|
+
for attribute in attributes:
|
|
1522
|
+
value_1 = getattr(detections_1, attribute)
|
|
1523
|
+
value_2 = getattr(detections_2, attribute)
|
|
1524
|
+
|
|
1525
|
+
if (value_1 is None) != (value_2 is None):
|
|
1526
|
+
raise ValueError(
|
|
1527
|
+
f"Field '{attribute}' should be consistently None or not None in both "
|
|
1528
|
+
"Detections."
|
|
1529
|
+
)
|