eye-cv 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. eye/__init__.py +115 -0
  2. eye/__init___supervision_original.py +120 -0
  3. eye/annotators/__init__.py +0 -0
  4. eye/annotators/base.py +22 -0
  5. eye/annotators/core.py +2699 -0
  6. eye/annotators/line.py +107 -0
  7. eye/annotators/modern.py +529 -0
  8. eye/annotators/trace.py +142 -0
  9. eye/annotators/utils.py +177 -0
  10. eye/assets/__init__.py +2 -0
  11. eye/assets/downloader.py +95 -0
  12. eye/assets/list.py +83 -0
  13. eye/classification/__init__.py +0 -0
  14. eye/classification/core.py +188 -0
  15. eye/config.py +2 -0
  16. eye/core/__init__.py +0 -0
  17. eye/core/trackers/__init__.py +1 -0
  18. eye/core/trackers/botsort_tracker.py +336 -0
  19. eye/core/trackers/bytetrack_tracker.py +284 -0
  20. eye/core/trackers/sort_tracker.py +200 -0
  21. eye/core/tracking.py +146 -0
  22. eye/dataset/__init__.py +0 -0
  23. eye/dataset/core.py +919 -0
  24. eye/dataset/formats/__init__.py +0 -0
  25. eye/dataset/formats/coco.py +258 -0
  26. eye/dataset/formats/pascal_voc.py +279 -0
  27. eye/dataset/formats/yolo.py +272 -0
  28. eye/dataset/utils.py +259 -0
  29. eye/detection/__init__.py +0 -0
  30. eye/detection/auto_convert.py +155 -0
  31. eye/detection/core.py +1529 -0
  32. eye/detection/detections_enhanced.py +392 -0
  33. eye/detection/line_zone.py +859 -0
  34. eye/detection/lmm.py +184 -0
  35. eye/detection/overlap_filter.py +270 -0
  36. eye/detection/tools/__init__.py +0 -0
  37. eye/detection/tools/csv_sink.py +181 -0
  38. eye/detection/tools/inference_slicer.py +288 -0
  39. eye/detection/tools/json_sink.py +142 -0
  40. eye/detection/tools/polygon_zone.py +202 -0
  41. eye/detection/tools/smoother.py +123 -0
  42. eye/detection/tools/smoothing.py +179 -0
  43. eye/detection/tools/smoothing_config.py +202 -0
  44. eye/detection/tools/transformers.py +247 -0
  45. eye/detection/utils.py +1175 -0
  46. eye/draw/__init__.py +0 -0
  47. eye/draw/color.py +154 -0
  48. eye/draw/utils.py +374 -0
  49. eye/filters.py +112 -0
  50. eye/geometry/__init__.py +0 -0
  51. eye/geometry/core.py +128 -0
  52. eye/geometry/utils.py +47 -0
  53. eye/keypoint/__init__.py +0 -0
  54. eye/keypoint/annotators.py +442 -0
  55. eye/keypoint/core.py +687 -0
  56. eye/keypoint/skeletons.py +2647 -0
  57. eye/metrics/__init__.py +21 -0
  58. eye/metrics/core.py +72 -0
  59. eye/metrics/detection.py +843 -0
  60. eye/metrics/f1_score.py +648 -0
  61. eye/metrics/mean_average_precision.py +628 -0
  62. eye/metrics/mean_average_recall.py +697 -0
  63. eye/metrics/precision.py +653 -0
  64. eye/metrics/recall.py +652 -0
  65. eye/metrics/utils/__init__.py +0 -0
  66. eye/metrics/utils/object_size.py +158 -0
  67. eye/metrics/utils/utils.py +9 -0
  68. eye/py.typed +0 -0
  69. eye/quick.py +104 -0
  70. eye/tracker/__init__.py +0 -0
  71. eye/tracker/byte_tracker/__init__.py +0 -0
  72. eye/tracker/byte_tracker/core.py +386 -0
  73. eye/tracker/byte_tracker/kalman_filter.py +205 -0
  74. eye/tracker/byte_tracker/matching.py +69 -0
  75. eye/tracker/byte_tracker/single_object_track.py +178 -0
  76. eye/tracker/byte_tracker/utils.py +18 -0
  77. eye/utils/__init__.py +0 -0
  78. eye/utils/conversion.py +132 -0
  79. eye/utils/file.py +159 -0
  80. eye/utils/image.py +794 -0
  81. eye/utils/internal.py +200 -0
  82. eye/utils/iterables.py +84 -0
  83. eye/utils/notebook.py +114 -0
  84. eye/utils/video.py +307 -0
  85. eye/utils_eye/__init__.py +1 -0
  86. eye/utils_eye/geometry.py +71 -0
  87. eye/utils_eye/nms.py +55 -0
  88. eye/validators/__init__.py +140 -0
  89. eye/web.py +271 -0
  90. eye_cv-1.0.0.dist-info/METADATA +319 -0
  91. eye_cv-1.0.0.dist-info/RECORD +94 -0
  92. eye_cv-1.0.0.dist-info/WHEEL +5 -0
  93. eye_cv-1.0.0.dist-info/licenses/LICENSE +21 -0
  94. eye_cv-1.0.0.dist-info/top_level.txt +1 -0
eye/detection/core.py ADDED
@@ -0,0 +1,1529 @@
1
+ from __future__ import annotations
2
+
3
+ from contextlib import suppress
4
+ from dataclasses import dataclass, field
5
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
6
+
7
+ import numpy as np
8
+
9
+ from eye.config import (
10
+ CLASS_NAME_DATA_FIELD,
11
+ ORIENTED_BOX_COORDINATES,
12
+ )
13
+ from eye.detection.lmm import (
14
+ LMM,
15
+ from_florence_2,
16
+ from_paligemma,
17
+ validate_lmm_parameters,
18
+ )
19
+ from eye.detection.overlap_filter import (
20
+ box_non_max_merge,
21
+ box_non_max_suppression,
22
+ mask_non_max_suppression,
23
+ )
24
+ from eye.detection.tools.transformers import (
25
+ process_transformers_detection_result,
26
+ process_transformers_v4_segmentation_result,
27
+ process_transformers_v5_segmentation_result,
28
+ )
29
+ from eye.detection.utils import (
30
+ box_iou_batch,
31
+ calculate_masks_centroids,
32
+ extract_ultralytics_masks,
33
+ get_data_item,
34
+ is_data_equal,
35
+ is_metadata_equal,
36
+ mask_to_xyxy,
37
+ merge_data,
38
+ merge_metadata,
39
+ process_roboflow_result,
40
+ xywh_to_xyxy,
41
+ )
42
+ from eye.geometry.core import Position
43
+ from eye.utils.internal import get_instance_variables
44
+ from eye.validators import validate_detections_fields
45
+
46
+
47
+ @dataclass
48
+ class Detections:
49
+ """
50
+ The `sv.Detections` class in the eye library standardizes results from
51
+ various object detection and segmentation models into a consistent format. This
52
+ class simplifies data manipulation and filtering, providing a uniform API for
53
+ integration with eye [trackers](/trackers/), [annotators](/latest/detection/annotators/), and [tools](/detection/tools/line_zone/).
54
+
55
+ === "Inference"
56
+
57
+ Use [`sv.Detections.from_inference`](/detection/core/#eye.detection.core.Detections.from_inference)
58
+ method, which accepts model results from both detection and segmentation models.
59
+
60
+ ```python
61
+ import cv2
62
+ import eye as sv
63
+ from inference import get_model
64
+
65
+ model = get_model(model_id="yolov8n-640")
66
+ image = cv2.imread(<SOURCE_IMAGE_PATH>)
67
+ results = model.infer(image)[0]
68
+ detections = sv.Detections.from_inference(results)
69
+ ```
70
+
71
+ === "Ultralytics"
72
+
73
+ Use [`sv.Detections.from_ultralytics`](/detection/core/#eye.detection.core.Detections.from_ultralytics)
74
+ method, which accepts model results from both detection and segmentation models.
75
+
76
+ ```python
77
+ import cv2
78
+ import eye as sv
79
+ from ultralytics import YOLO
80
+
81
+ model = YOLO("yolov8n.pt")
82
+ image = cv2.imread(<SOURCE_IMAGE_PATH>)
83
+ results = model(image)[0]
84
+ detections = sv.Detections.from_ultralytics(results)
85
+ ```
86
+
87
+ === "Transformers"
88
+
89
+ Use [`sv.Detections.from_transformers`](/detection/core/#eye.detection.core.Detections.from_transformers)
90
+ method, which accepts model results from both detection and segmentation models.
91
+
92
+ ```python
93
+ import torch
94
+ import eye as sv
95
+ from PIL import Image
96
+ from transformers import DetrImageProcessor, DetrForObjectDetection
97
+
98
+ processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
99
+ model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
100
+
101
+ image = Image.open(<SOURCE_IMAGE_PATH>)
102
+ inputs = processor(images=image, return_tensors="pt")
103
+
104
+ with torch.no_grad():
105
+ outputs = model(**inputs)
106
+
107
+ width, height = image.size
108
+ target_size = torch.tensor([[height, width]])
109
+ results = processor.post_process_object_detection(
110
+ outputs=outputs, target_sizes=target_size)[0]
111
+ detections = sv.Detections.from_transformers(
112
+ transformers_results=results,
113
+ id2label=model.config.id2label)
114
+ ```
115
+
116
+ Attributes:
117
+ xyxy (np.ndarray): An array of shape `(n, 4)` containing
118
+ the bounding boxes coordinates in format `[x1, y1, x2, y2]`
119
+ mask: (Optional[np.ndarray]): An array of shape
120
+ `(n, H, W)` containing the segmentation masks.
121
+ confidence (Optional[np.ndarray]): An array of shape
122
+ `(n,)` containing the confidence scores of the detections.
123
+ class_id (Optional[np.ndarray]): An array of shape
124
+ `(n,)` containing the class ids of the detections.
125
+ tracker_id (Optional[np.ndarray]): An array of shape
126
+ `(n,)` containing the tracker ids of the detections.
127
+ data (Dict[str, Union[np.ndarray, List]]): A dictionary containing additional
128
+ data where each key is a string representing the data type, and the value
129
+ is either a NumPy array or a list of corresponding data.
130
+ metadata (Dict[str, Any]): A dictionary containing collection-level metadata
131
+ that applies to the entire set of detections. This may include information such
132
+ as the video name, camera parameters, timestamp, or other global metadata.
133
+ """ # noqa: E501 // docs
134
+
135
+ xyxy: np.ndarray
136
+ mask: Optional[np.ndarray] = None
137
+ confidence: Optional[np.ndarray] = None
138
+ class_id: Optional[np.ndarray] = None
139
+ tracker_id: Optional[np.ndarray] = None
140
+ data: Dict[str, Union[np.ndarray, List]] = field(default_factory=dict)
141
+ metadata: Dict[str, Any] = field(default_factory=dict)
142
+
143
+ def __post_init__(self):
144
+ validate_detections_fields(
145
+ xyxy=self.xyxy,
146
+ mask=self.mask,
147
+ confidence=self.confidence,
148
+ class_id=self.class_id,
149
+ tracker_id=self.tracker_id,
150
+ data=self.data,
151
+ )
152
+
153
+ def __len__(self):
154
+ """
155
+ Returns the number of detections in the Detections object.
156
+ """
157
+ return len(self.xyxy)
158
+
159
+ def __iter__(
160
+ self,
161
+ ) -> Iterator[
162
+ Tuple[
163
+ np.ndarray,
164
+ Optional[np.ndarray],
165
+ Optional[float],
166
+ Optional[int],
167
+ Optional[int],
168
+ Dict[str, Union[np.ndarray, List]],
169
+ ]
170
+ ]:
171
+ """
172
+ Iterates over the Detections object and yield a tuple of
173
+ `(xyxy, mask, confidence, class_id, tracker_id, data)` for each detection.
174
+ """
175
+ for i in range(len(self.xyxy)):
176
+ yield (
177
+ self.xyxy[i],
178
+ self.mask[i] if self.mask is not None else None,
179
+ self.confidence[i] if self.confidence is not None else None,
180
+ self.class_id[i] if self.class_id is not None else None,
181
+ self.tracker_id[i] if self.tracker_id is not None else None,
182
+ get_data_item(self.data, i),
183
+ )
184
+
185
+ def __eq__(self, other: Detections):
186
+ return all(
187
+ [
188
+ np.array_equal(self.xyxy, other.xyxy),
189
+ np.array_equal(self.mask, other.mask),
190
+ np.array_equal(self.class_id, other.class_id),
191
+ np.array_equal(self.confidence, other.confidence),
192
+ np.array_equal(self.tracker_id, other.tracker_id),
193
+ is_data_equal(self.data, other.data),
194
+ is_metadata_equal(self.metadata, other.metadata),
195
+ ]
196
+ )
197
+
198
+ @classmethod
199
+ def from_yolov5(cls, yolov5_results) -> Detections:
200
+ """
201
+ Creates a Detections instance from a
202
+ [YOLOv5](https://github.com/ultralytics/yolov5) inference result.
203
+
204
+ Args:
205
+ yolov5_results (yolov5.models.common.Detections):
206
+ The output Detections instance from YOLOv5
207
+
208
+ Returns:
209
+ Detections: A new Detections object.
210
+
211
+ Example:
212
+ ```python
213
+ import cv2
214
+ import torch
215
+ import eye as sv
216
+
217
+ image = cv2.imread(<SOURCE_IMAGE_PATH>)
218
+ model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
219
+ result = model(image)
220
+ detections = sv.Detections.from_yolov5(result)
221
+ ```
222
+ """
223
+ yolov5_detections_predictions = yolov5_results.pred[0].cpu().cpu().numpy()
224
+
225
+ return cls(
226
+ xyxy=yolov5_detections_predictions[:, :4],
227
+ confidence=yolov5_detections_predictions[:, 4],
228
+ class_id=yolov5_detections_predictions[:, 5].astype(int),
229
+ )
230
+
231
+ @classmethod
232
+ def from_ultralytics(cls, ultralytics_results) -> Detections:
233
+ """
234
+ Creates a `sv.Detections` instance from a
235
+ [YOLOv8](https://github.com/ultralytics/ultralytics) inference result.
236
+
237
+ !!! Note
238
+
239
+ `from_ultralytics` is compatible with
240
+ [detection](https://docs.ultralytics.com/tasks/detect/),
241
+ [segmentation](https://docs.ultralytics.com/tasks/segment/), and
242
+ [OBB](https://docs.ultralytics.com/tasks/obb/) models.
243
+
244
+ Args:
245
+ ultralytics_results (ultralytics.yolo.engine.results.Results):
246
+ The output Results instance from Ultralytics
247
+
248
+ Returns:
249
+ Detections: A new Detections object.
250
+
251
+ Example:
252
+ ```python
253
+ import cv2
254
+ import eye as sv
255
+ from ultralytics import YOLO
256
+
257
+ image = cv2.imread(<SOURCE_IMAGE_PATH>)
258
+ model = YOLO('yolov8s.pt')
259
+ results = model(image)[0]
260
+ detections = sv.Detections.from_ultralytics(results)
261
+ ```
262
+ """
263
+
264
+ if hasattr(ultralytics_results, "obb") and ultralytics_results.obb is not None:
265
+ class_id = ultralytics_results.obb.cls.cpu().numpy().astype(int)
266
+ class_names = np.array([ultralytics_results.names[i] for i in class_id])
267
+ oriented_box_coordinates = ultralytics_results.obb.xyxyxyxy.cpu().numpy()
268
+ return cls(
269
+ xyxy=ultralytics_results.obb.xyxy.cpu().numpy(),
270
+ confidence=ultralytics_results.obb.conf.cpu().numpy(),
271
+ class_id=class_id,
272
+ tracker_id=ultralytics_results.obb.id.int().cpu().numpy()
273
+ if ultralytics_results.obb.id is not None
274
+ else None,
275
+ data={
276
+ ORIENTED_BOX_COORDINATES: oriented_box_coordinates,
277
+ CLASS_NAME_DATA_FIELD: class_names,
278
+ },
279
+ )
280
+
281
+ if hasattr(ultralytics_results, "boxes") and ultralytics_results.boxes is None:
282
+ masks = extract_ultralytics_masks(ultralytics_results)
283
+ return cls(
284
+ xyxy=mask_to_xyxy(masks),
285
+ mask=masks,
286
+ class_id=np.arange(len(ultralytics_results)),
287
+ )
288
+
289
+ class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
290
+ class_names = np.array([ultralytics_results.names[i] for i in class_id])
291
+ return cls(
292
+ xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),
293
+ confidence=ultralytics_results.boxes.conf.cpu().numpy(),
294
+ class_id=class_id,
295
+ mask=extract_ultralytics_masks(ultralytics_results),
296
+ tracker_id=ultralytics_results.boxes.id.int().cpu().numpy()
297
+ if ultralytics_results.boxes.id is not None
298
+ else None,
299
+ data={CLASS_NAME_DATA_FIELD: class_names},
300
+ )
301
+
302
+ @classmethod
303
+ def from_yolo_nas(cls, yolo_nas_results) -> Detections:
304
+ """
305
+ Creates a Detections instance from a
306
+ [YOLO-NAS](https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS.md)
307
+ inference result.
308
+
309
+ Args:
310
+ yolo_nas_results (ImageDetectionPrediction):
311
+ The output Results instance from YOLO-NAS
312
+ ImageDetectionPrediction is coming from
313
+ 'super_gradients.training.models.prediction_results'
314
+
315
+ Returns:
316
+ Detections: A new Detections object.
317
+
318
+ Example:
319
+ ```python
320
+ import cv2
321
+ from super_gradients.training import models
322
+ import eye as sv
323
+
324
+ image = cv2.imread(<SOURCE_IMAGE_PATH>)
325
+ model = models.get('yolo_nas_l', pretrained_weights="coco")
326
+
327
+ result = list(model.predict(image, conf=0.35))[0]
328
+ detections = sv.Detections.from_yolo_nas(result)
329
+ ```
330
+ """
331
+ if np.asarray(yolo_nas_results.prediction.bboxes_xyxy).shape[0] == 0:
332
+ return cls.empty()
333
+
334
+ return cls(
335
+ xyxy=yolo_nas_results.prediction.bboxes_xyxy,
336
+ confidence=yolo_nas_results.prediction.confidence,
337
+ class_id=yolo_nas_results.prediction.labels.astype(int),
338
+ )
339
+
340
+ @classmethod
341
+ def from_tensorflow(
342
+ cls, tensorflow_results: dict, resolution_wh: tuple
343
+ ) -> Detections:
344
+ """
345
+ Creates a Detections instance from a
346
+ [Tensorflow Hub](https://www.tensorflow.org/hub/tutorials/tf2_object_detection)
347
+ inference result.
348
+
349
+ Args:
350
+ tensorflow_results (dict):
351
+ The output results from Tensorflow Hub.
352
+
353
+ Returns:
354
+ Detections: A new Detections object.
355
+
356
+ Example:
357
+ ```python
358
+ import tensorflow as tf
359
+ import tensorflow_hub as hub
360
+ import numpy as np
361
+ import cv2
362
+
363
+ module_handle = "https://tfhub.dev/tensorflow/centernet/hourglass_512x512_kpts/1"
364
+ model = hub.load(module_handle)
365
+ img = np.array(cv2.imread(SOURCE_IMAGE_PATH))
366
+ result = model(img)
367
+ detections = sv.Detections.from_tensorflow(result)
368
+ ```
369
+ """
370
+
371
+ boxes = tensorflow_results["detection_boxes"][0].numpy()
372
+ boxes[:, [0, 2]] *= resolution_wh[0]
373
+ boxes[:, [1, 3]] *= resolution_wh[1]
374
+ boxes = boxes[:, [1, 0, 3, 2]]
375
+ return cls(
376
+ xyxy=boxes,
377
+ confidence=tensorflow_results["detection_scores"][0].numpy(),
378
+ class_id=tensorflow_results["detection_classes"][0].numpy().astype(int),
379
+ )
380
+
381
+ @classmethod
382
+ def from_deepsparse(cls, deepsparse_results) -> Detections:
383
+ """
384
+ Creates a Detections instance from a
385
+ [DeepSparse](https://github.com/neuralmagic/deepsparse)
386
+ inference result.
387
+
388
+ Args:
389
+ deepsparse_results (deepsparse.yolo.schemas.YOLOOutput):
390
+ The output Results instance from DeepSparse.
391
+
392
+ Returns:
393
+ Detections: A new Detections object.
394
+
395
+ Example:
396
+ ```python
397
+ import eye as sv
398
+ from deepsparse import Pipeline
399
+
400
+ yolo_pipeline = Pipeline.create(
401
+ task="yolo",
402
+ model_path = "zoo:cv/detection/yolov5-l/pytorch/ultralytics/coco/pruned80_quant-none"
403
+ )
404
+ result = yolo_pipeline(<SOURCE IMAGE PATH>)
405
+ detections = sv.Detections.from_deepsparse(result)
406
+ ```
407
+ """ # noqa: E501 // docs
408
+
409
+ if np.asarray(deepsparse_results.boxes[0]).shape[0] == 0:
410
+ return cls.empty()
411
+
412
+ return cls(
413
+ xyxy=np.array(deepsparse_results.boxes[0]),
414
+ confidence=np.array(deepsparse_results.scores[0]),
415
+ class_id=np.array(deepsparse_results.labels[0]).astype(float).astype(int),
416
+ )
417
+
418
+ @classmethod
419
+ def from_mmdetection(cls, mmdet_results) -> Detections:
420
+ """
421
+ Creates a Detections instance from a
422
+ [mmdetection](https://github.com/open-mmlab/mmdetection) and
423
+ [mmyolo](https://github.com/open-mmlab/mmyolo) inference result.
424
+
425
+ Args:
426
+ mmdet_results (mmdet.structures.DetDataSample):
427
+ The output Results instance from MMDetection.
428
+
429
+ Returns:
430
+ Detections: A new Detections object.
431
+
432
+ Example:
433
+ ```python
434
+ import cv2
435
+ import eye as sv
436
+ from mmdet.apis import init_detector, inference_detector
437
+
438
+ image = cv2.imread(<SOURCE_IMAGE_PATH>)
439
+ model = init_detector(<CONFIG_PATH>, <WEIGHTS_PATH>, device=<DEVICE>)
440
+
441
+ result = inference_detector(model, image)
442
+ detections = sv.Detections.from_mmdetection(result)
443
+ ```
444
+ """
445
+
446
+ return cls(
447
+ xyxy=mmdet_results.pred_instances.bboxes.cpu().numpy(),
448
+ confidence=mmdet_results.pred_instances.scores.cpu().numpy(),
449
+ class_id=mmdet_results.pred_instances.labels.cpu().numpy().astype(int),
450
+ mask=mmdet_results.pred_instances.masks.cpu().numpy()
451
+ if "masks" in mmdet_results.pred_instances
452
+ else None,
453
+ )
454
+
455
+ @classmethod
456
+ def from_transformers(
457
+ cls, transformers_results: dict, id2label: Optional[Dict[int, str]] = None
458
+ ) -> Detections:
459
+ """
460
+ Creates a Detections instance from object detection or panoptic, semantic
461
+ and instance segmentation
462
+ [Transformer](https://github.com/huggingface/transformers) inference result.
463
+
464
+ Args:
465
+ transformers_results (Union[dict, torch.Tensor]): Inference results from
466
+ your Transformers model. This can be either a dictionary containing
467
+ valuable outputs like `scores`, `labels`, `boxes`, `masks`,
468
+ `segments_info`, and `segmentation`, or a `torch.Tensor` holding a
469
+ segmentation map where values represent class IDs.
470
+ id2label (Optional[Dict[int, str]]): A dictionary mapping class IDs to
471
+ labels, typically part of the `transformers` model configuration. If
472
+ provided, the resulting dictionary will include class names.
473
+
474
+ Returns:
475
+ Detections: A new Detections object.
476
+
477
+ Example:
478
+ ```python
479
+ import torch
480
+ import eye as sv
481
+ from PIL import Image
482
+ from transformers import DetrImageProcessor, DetrForObjectDetection
483
+
484
+ processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
485
+ model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
486
+
487
+ image = Image.open(<SOURCE_IMAGE_PATH>)
488
+ inputs = processor(images=image, return_tensors="pt")
489
+
490
+ with torch.no_grad():
491
+ outputs = model(**inputs)
492
+
493
+ width, height = image.size
494
+ target_size = torch.tensor([[height, width]])
495
+ results = processor.post_process_object_detection(
496
+ outputs=outputs, target_sizes=target_size)[0]
497
+
498
+ detections = sv.Detections.from_transformers(
499
+ transformers_results=results,
500
+ id2label=model.config.id2label
501
+ )
502
+ ```
503
+ """
504
+
505
+ if (
506
+ transformers_results.__class__.__name__ == "Tensor"
507
+ or "segmentation" in transformers_results
508
+ ):
509
+ return cls(
510
+ **process_transformers_v5_segmentation_result(
511
+ transformers_results, id2label
512
+ )
513
+ )
514
+
515
+ if "masks" in transformers_results or "png_string" in transformers_results:
516
+ return cls(
517
+ **process_transformers_v4_segmentation_result(
518
+ transformers_results, id2label
519
+ )
520
+ )
521
+
522
+ if "boxes" in transformers_results:
523
+ return cls(
524
+ **process_transformers_detection_result(transformers_results, id2label)
525
+ )
526
+
527
+ else:
528
+ raise ValueError(
529
+ "The provided Transformers results do not contain any valid fields."
530
+ " Expected fields are 'boxes', 'masks', 'segments_info' or"
531
+ " 'segmentation'."
532
+ )
533
+
534
+ @classmethod
535
+ def from_detectron2(cls, detectron2_results: Any) -> Detections:
536
+ """
537
+ Create a Detections object from the
538
+ [Detectron2](https://github.com/facebookresearch/detectron2) inference result.
539
+
540
+ Args:
541
+ detectron2_results (Any): The output of a
542
+ Detectron2 model containing instances with prediction data.
543
+
544
+ Returns:
545
+ (Detections): A Detections object containing the bounding boxes,
546
+ class IDs, and confidences of the predictions.
547
+
548
+ Example:
549
+ ```python
550
+ import cv2
551
+ import eye as sv
552
+ from detectron2.engine import DefaultPredictor
553
+ from detectron2.config import get_cfg
554
+
555
+
556
+ image = cv2.imread(<SOURCE_IMAGE_PATH>)
557
+ cfg = get_cfg()
558
+ cfg.merge_from_file(<CONFIG_PATH>)
559
+ cfg.MODEL.WEIGHTS = <WEIGHTS_PATH>
560
+ predictor = DefaultPredictor(cfg)
561
+
562
+ result = predictor(image)
563
+ detections = sv.Detections.from_detectron2(result)
564
+ ```
565
+ """
566
+
567
+ return cls(
568
+ xyxy=detectron2_results["instances"].pred_boxes.tensor.cpu().numpy(),
569
+ confidence=detectron2_results["instances"].scores.cpu().numpy(),
570
+ mask=detectron2_results["instances"].pred_masks.cpu().numpy()
571
+ if hasattr(detectron2_results["instances"], "pred_masks")
572
+ else None,
573
+ class_id=detectron2_results["instances"]
574
+ .pred_classes.cpu()
575
+ .numpy()
576
+ .astype(int),
577
+ )
578
+
579
+ @classmethod
580
+ def from_inference(cls, roboflow_result: Union[dict, Any]) -> Detections:
581
+ """
582
+ Create a `sv.Detections` object from the [Roboflow](https://roboflow.com/)
583
+ API inference result or the [Inference](https://inference.roboflow.com/)
584
+ package results. This method extracts bounding boxes, class IDs,
585
+ confidences, and class names from the Roboflow API result and encapsulates
586
+ them into a Detections object.
587
+
588
+ Args:
589
+ roboflow_result (dict, any): The result from the
590
+ Roboflow API or Inference package containing predictions.
591
+
592
+ Returns:
593
+ (Detections): A Detections object containing the bounding boxes, class IDs,
594
+ and confidences of the predictions.
595
+
596
+ Example:
597
+ ```python
598
+ import cv2
599
+ import eye as sv
600
+ from inference import get_model
601
+
602
+ image = cv2.imread(<SOURCE_IMAGE_PATH>)
603
+ model = get_model(model_id="yolov8s-640")
604
+
605
+ result = model.infer(image)[0]
606
+ detections = sv.Detections.from_inference(result)
607
+ ```
608
+ """
609
+ with suppress(AttributeError):
610
+ roboflow_result = roboflow_result.dict(exclude_none=True, by_alias=True)
611
+ xyxy, confidence, class_id, masks, trackers, data = process_roboflow_result(
612
+ roboflow_result=roboflow_result
613
+ )
614
+
615
+ if np.asarray(xyxy).shape[0] == 0:
616
+ empty_detection = cls.empty()
617
+ empty_detection.data = {CLASS_NAME_DATA_FIELD: np.empty(0)}
618
+ return empty_detection
619
+
620
+ return cls(
621
+ xyxy=xyxy,
622
+ confidence=confidence,
623
+ class_id=class_id,
624
+ mask=masks,
625
+ tracker_id=trackers,
626
+ data=data,
627
+ )
628
+
629
+ @classmethod
630
+ def from_sam(cls, sam_result: List[dict]) -> Detections:
631
+ """
632
+ Creates a Detections instance from
633
+ [Segment Anything Model](https://github.com/facebookresearch/segment-anything)
634
+ inference result.
635
+
636
+ Args:
637
+ sam_result (List[dict]): The output Results instance from SAM
638
+
639
+ Returns:
640
+ Detections: A new Detections object.
641
+
642
+ Example:
643
+ ```python
644
+ import eye as sv
645
+ from segment_anything import (
646
+ sam_model_registry,
647
+ SamAutomaticMaskGenerator
648
+ )
649
+
650
+ sam_model_reg = sam_model_registry[MODEL_TYPE]
651
+ sam = sam_model_reg(checkpoint=CHECKPOINT_PATH).to(device=DEVICE)
652
+ mask_generator = SamAutomaticMaskGenerator(sam)
653
+ sam_result = mask_generator.generate(IMAGE)
654
+ detections = sv.Detections.from_sam(sam_result=sam_result)
655
+ ```
656
+ """
657
+
658
+ sorted_generated_masks = sorted(
659
+ sam_result, key=lambda x: x["area"], reverse=True
660
+ )
661
+
662
+ xywh = np.array([mask["bbox"] for mask in sorted_generated_masks])
663
+ mask = np.array([mask["segmentation"] for mask in sorted_generated_masks])
664
+
665
+ if np.asarray(xywh).shape[0] == 0:
666
+ return cls.empty()
667
+
668
+ xyxy = xywh_to_xyxy(xywh=xywh)
669
+ return cls(xyxy=xyxy, mask=mask)
670
+
671
+ @classmethod
672
+ def from_azure_analyze_image(
673
+ cls, azure_result: dict, class_map: Optional[Dict[int, str]] = None
674
+ ) -> Detections:
675
+ """
676
+ Creates a Detections instance from [Azure Image Analysis 4.0](
677
+ https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/
678
+ concept-object-detection-40).
679
+
680
+ Args:
681
+ azure_result (dict): The result from Azure Image Analysis. It should
682
+ contain detected objects and their bounding box coordinates.
683
+ class_map (Optional[Dict[int, str]]): A mapping ofclass IDs (int) to class
684
+ names (str). If None, a new mapping is created dynamically.
685
+
686
+ Returns:
687
+ Detections: A new Detections object.
688
+
689
+ Example:
690
+ ```python
691
+ import requests
692
+ import eye as sv
693
+
694
+ image = open(input, "rb").read()
695
+
696
+ endpoint = "https://.cognitiveservices.azure.com/"
697
+ subscription_key = ""
698
+
699
+ headers = {
700
+ "Content-Type": "application/octet-stream",
701
+ "Ocp-Apim-Subscription-Key": subscription_key
702
+ }
703
+
704
+ response = requests.post(endpoint,
705
+ headers=self.headers,
706
+ data=image
707
+ ).json()
708
+
709
+ detections = sv.Detections.from_azure_analyze_image(response)
710
+ ```
711
+ """
712
+ if "error" in azure_result:
713
+ raise ValueError(
714
+ f'Azure API returned an error {azure_result["error"]["message"]}'
715
+ )
716
+
717
+ xyxy, confidences, class_ids = [], [], []
718
+
719
+ is_dynamic_mapping = class_map is None
720
+ if is_dynamic_mapping:
721
+ class_map = {}
722
+
723
+ class_map = {value: key for key, value in class_map.items()}
724
+
725
+ for detection in azure_result["objectsResult"]["values"]:
726
+ bbox = detection["boundingBox"]
727
+
728
+ tags = detection["tags"]
729
+
730
+ x0 = bbox["x"]
731
+ y0 = bbox["y"]
732
+ x1 = x0 + bbox["w"]
733
+ y1 = y0 + bbox["h"]
734
+
735
+ for tag in tags:
736
+ confidence = tag["confidence"]
737
+ class_name = tag["name"]
738
+ class_id = class_map.get(class_name, None)
739
+
740
+ if is_dynamic_mapping and class_id is None:
741
+ class_id = len(class_map)
742
+ class_map[class_name] = class_id
743
+
744
+ if class_id is not None:
745
+ xyxy.append([x0, y0, x1, y1])
746
+ confidences.append(confidence)
747
+ class_ids.append(class_id)
748
+
749
+ if len(xyxy) == 0:
750
+ return Detections.empty()
751
+
752
+ return cls(
753
+ xyxy=np.array(xyxy),
754
+ class_id=np.array(class_ids),
755
+ confidence=np.array(confidences),
756
+ )
757
+
758
+ @classmethod
759
+ def from_paddledet(cls, paddledet_result) -> Detections:
760
+ """
761
+ Creates a Detections instance from
762
+ [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)
763
+ inference result.
764
+
765
+ Args:
766
+ paddledet_result (List[dict]): The output Results instance from PaddleDet
767
+
768
+ Returns:
769
+ Detections: A new Detections object.
770
+
771
+ Example:
772
+ ```python
773
+ import eye as sv
774
+ import paddle
775
+ from ppdet.engine import Trainer
776
+ from ppdet.core.workspace import load_config
777
+
778
+ weights = ()
779
+ config = ()
780
+
781
+ cfg = load_config(config)
782
+ trainer = Trainer(cfg, mode='test')
783
+ trainer.load_weights(weights)
784
+
785
+ paddledet_result = trainer.predict([images])[0]
786
+
787
+ detections = sv.Detections.from_paddledet(paddledet_result)
788
+ ```
789
+ """
790
+
791
+ if np.asarray(paddledet_result["bbox"][:, 2:6]).shape[0] == 0:
792
+ return cls.empty()
793
+
794
+ return cls(
795
+ xyxy=paddledet_result["bbox"][:, 2:6],
796
+ confidence=paddledet_result["bbox"][:, 1],
797
+ class_id=paddledet_result["bbox"][:, 0].astype(int),
798
+ )
799
+
800
+ @classmethod
801
+ def from_lmm(
802
+ cls, lmm: Union[LMM, str], result: Union[str, dict], **kwargs: Any
803
+ ) -> Detections:
804
+ """
805
+ Creates a Detections object from the given result string based on the specified
806
+ Large Multimodal Model (LMM).
807
+
808
+ Args:
809
+ lmm (Union[LMM, str]): The type of LMM (Large Multimodal Model) to use.
810
+ result (str): The result string containing the detection data.
811
+ **kwargs (Any): Additional keyword arguments required by the specified LMM.
812
+
813
+ Returns:
814
+ Detections: A new Detections object.
815
+
816
+ Raises:
817
+ ValueError: If the LMM is invalid, required arguments are missing, or
818
+ disallowed arguments are provided.
819
+ ValueError: If the specified LMM is not supported.
820
+
821
+ Examples:
822
+ ```python
823
+ import eye as sv
824
+
825
+ paligemma_result = "<loc0256><loc0256><loc0768><loc0768> cat"
826
+ detections = sv.Detections.from_lmm(
827
+ sv.LMM.PALIGEMMA,
828
+ paligemma_result,
829
+ resolution_wh=(1000, 1000),
830
+ classes=['cat', 'dog']
831
+ )
832
+ detections.xyxy
833
+ # array([[250., 250., 750., 750.]])
834
+
835
+ detections.class_id
836
+ # array([0])
837
+ ```
838
+ """
839
+ lmm = validate_lmm_parameters(lmm, result, kwargs)
840
+
841
+ if lmm == LMM.PALIGEMMA:
842
+ assert isinstance(result, str)
843
+ xyxy, class_id, class_name = from_paligemma(result, **kwargs)
844
+ data = {CLASS_NAME_DATA_FIELD: class_name}
845
+ return cls(xyxy=xyxy, class_id=class_id, data=data)
846
+
847
+ if lmm == LMM.FLORENCE_2:
848
+ assert isinstance(result, dict)
849
+ xyxy, labels, mask, xyxyxyxy = from_florence_2(result, **kwargs)
850
+ if len(xyxy) == 0:
851
+ return cls.empty()
852
+
853
+ data = {}
854
+ if labels is not None:
855
+ data[CLASS_NAME_DATA_FIELD] = labels
856
+ if xyxyxyxy is not None:
857
+ data[ORIENTED_BOX_COORDINATES] = xyxyxyxy
858
+
859
+ return cls(xyxy=xyxy, mask=mask, data=data)
860
+
861
+ raise ValueError(f"Unsupported LMM: {lmm}")
862
+
863
+ @classmethod
864
+ def from_easyocr(cls, easyocr_results: list) -> Detections:
865
+ """
866
+ Create a Detections object from the
867
+ [EasyOCR](https://github.com/JaidedAI/EasyOCR) result.
868
+
869
+ Results are placed in the `data` field with the key `"class_name"`.
870
+
871
+ Args:
872
+ easyocr_results (List): The output Results instance from EasyOCR
873
+
874
+ Returns:
875
+ Detections: A new Detections object.
876
+
877
+ Example:
878
+ ```python
879
+ import eye as sv
880
+ import easyocr
881
+
882
+ reader = easyocr.Reader(['en'])
883
+ results = reader.readtext(<SOURCE_IMAGE_PATH>)
884
+ detections = sv.Detections.from_easyocr(results)
885
+ detected_text = detections["class_name"]
886
+ ```
887
+ """
888
+ if len(easyocr_results) == 0:
889
+ return cls.empty()
890
+
891
+ bbox = np.array([result[0] for result in easyocr_results])
892
+ xyxy = np.hstack((np.min(bbox, axis=1), np.max(bbox, axis=1)))
893
+ confidence = np.array(
894
+ [
895
+ result[2] if len(result) > 2 and result[2] else 0
896
+ for result in easyocr_results
897
+ ]
898
+ )
899
+ ocr_text = np.array([result[1] for result in easyocr_results])
900
+
901
+ return cls(
902
+ xyxy=xyxy.astype(np.float32),
903
+ confidence=confidence.astype(np.float32),
904
+ data={
905
+ CLASS_NAME_DATA_FIELD: ocr_text,
906
+ },
907
+ )
908
+
909
+ @classmethod
910
+ def from_ncnn(cls, ncnn_results) -> Detections:
911
+ """
912
+ Creates a Detections instance from the
913
+ [ncnn](https://github.com/Tencent/ncnn) inference result.
914
+ Supports object detection models.
915
+
916
+ Arguments:
917
+ ncnn_results (dict): The output Results instance from ncnn.
918
+
919
+ Returns:
920
+ Detections: A new Detections object.
921
+
922
+ Example:
923
+ ```python
924
+ import cv2
925
+ from ncnn.model_zoo import get_model
926
+ import eye as sv
927
+
928
+ image = cv2.imread(<SOURCE_IMAGE_PATH>)
929
+ model = get_model(
930
+ "yolov8s",
931
+ target_size=640
932
+ prob_threshold=0.5,
933
+ nms_threshold=0.45,
934
+ num_threads=4,
935
+ use_gpu=True,
936
+ )
937
+ result = model(image)
938
+ detections = sv.Detections.from_ncnn(result)
939
+ ```
940
+ """
941
+
942
+ xywh, confidences, class_ids = [], [], []
943
+
944
+ if len(ncnn_results) == 0:
945
+ return cls.empty()
946
+
947
+ for ncnn_result in ncnn_results:
948
+ rect = ncnn_result.rect
949
+ xywh.append(
950
+ [
951
+ rect.x.astype(np.float32),
952
+ rect.y.astype(np.float32),
953
+ rect.w.astype(np.float32),
954
+ rect.h.astype(np.float32),
955
+ ]
956
+ )
957
+
958
+ confidences.append(ncnn_result.prob)
959
+ class_ids.append(ncnn_result.label)
960
+
961
+ return cls(
962
+ xyxy=xywh_to_xyxy(np.array(xywh, dtype=np.float32)),
963
+ confidence=np.array(confidences, dtype=np.float32),
964
+ class_id=np.array(class_ids, dtype=int),
965
+ )
966
+
967
+ @classmethod
968
+ def empty(cls) -> Detections:
969
+ """
970
+ Create an empty Detections object with no bounding boxes,
971
+ confidences, or class IDs.
972
+
973
+ Returns:
974
+ (Detections): An empty Detections object.
975
+
976
+ Example:
977
+ ```python
978
+ from eye import Detections
979
+
980
+ empty_detections = Detections.empty()
981
+ ```
982
+ """
983
+ return cls(
984
+ xyxy=np.empty((0, 4), dtype=np.float32),
985
+ confidence=np.array([], dtype=np.float32),
986
+ class_id=np.array([], dtype=int),
987
+ )
988
+
989
+ def is_empty(self) -> bool:
990
+ """
991
+ Returns `True` if the `Detections` object is considered empty.
992
+ """
993
+ empty_detections = Detections.empty()
994
+ empty_detections.data = self.data
995
+ empty_detections.metadata = self.metadata
996
+ return self == empty_detections
997
+
998
+ @classmethod
999
+ def merge(cls, detections_list: List[Detections]) -> Detections:
1000
+ """
1001
+ Merge a list of Detections objects into a single Detections object.
1002
+
1003
+ This method takes a list of Detections objects and combines their
1004
+ respective fields (`xyxy`, `mask`, `confidence`, `class_id`, and `tracker_id`)
1005
+ into a single Detections object.
1006
+
1007
+ For example, if merging Detections with 3 and 4 detected objects, this method
1008
+ will return a Detections with 7 objects (7 entries in `xyxy`, `mask`, etc).
1009
+
1010
+ !!! Note
1011
+
1012
+ When merging, empty `Detections` objects are ignored.
1013
+
1014
+ Args:
1015
+ detections_list (List[Detections]): A list of Detections objects to merge.
1016
+
1017
+ Returns:
1018
+ (Detections): A single Detections object containing
1019
+ the merged data from the input list.
1020
+
1021
+ Example:
1022
+ ```python
1023
+ import numpy as np
1024
+ import eye as sv
1025
+
1026
+ detections_1 = sv.Detections(
1027
+ xyxy=np.array([[15, 15, 100, 100], [200, 200, 300, 300]]),
1028
+ class_id=np.array([1, 2]),
1029
+ data={'feature_vector': np.array([0.1, 0.2)])}
1030
+ )
1031
+
1032
+ detections_2 = sv.Detections(
1033
+ xyxy=np.array([[30, 30, 120, 120]]),
1034
+ class_id=np.array([1]),
1035
+ data={'feature_vector': [np.array([0.3])]}
1036
+ )
1037
+
1038
+ merged_detections = Detections.merge([detections_1, detections_2])
1039
+
1040
+ merged_detections.xyxy
1041
+ array([[ 15, 15, 100, 100],
1042
+ [200, 200, 300, 300],
1043
+ [ 30, 30, 120, 120]])
1044
+
1045
+ merged_detections.class_id
1046
+ array([1, 2, 1])
1047
+
1048
+ merged_detections.data['feature_vector']
1049
+ array([0.1, 0.2, 0.3])
1050
+ ```
1051
+ """
1052
+ detections_list = [
1053
+ detections for detections in detections_list if not detections.is_empty()
1054
+ ]
1055
+
1056
+ if len(detections_list) == 0:
1057
+ return Detections.empty()
1058
+
1059
+ for detections in detections_list:
1060
+ validate_detections_fields(
1061
+ xyxy=detections.xyxy,
1062
+ mask=detections.mask,
1063
+ confidence=detections.confidence,
1064
+ class_id=detections.class_id,
1065
+ tracker_id=detections.tracker_id,
1066
+ data=detections.data,
1067
+ )
1068
+
1069
+ xyxy = np.vstack([d.xyxy for d in detections_list])
1070
+
1071
+ def stack_or_none(name: str):
1072
+ if all(d.__getattribute__(name) is None for d in detections_list):
1073
+ return None
1074
+ if any(d.__getattribute__(name) is None for d in detections_list):
1075
+ raise ValueError(f"All or none of the '{name}' fields must be None")
1076
+ return (
1077
+ np.vstack([d.__getattribute__(name) for d in detections_list])
1078
+ if name == "mask"
1079
+ else np.hstack([d.__getattribute__(name) for d in detections_list])
1080
+ )
1081
+
1082
+ mask = stack_or_none("mask")
1083
+ confidence = stack_or_none("confidence")
1084
+ class_id = stack_or_none("class_id")
1085
+ tracker_id = stack_or_none("tracker_id")
1086
+
1087
+ data = merge_data([d.data for d in detections_list])
1088
+
1089
+ metadata_list = [detections.metadata for detections in detections_list]
1090
+ metadata = merge_metadata(metadata_list)
1091
+
1092
+ return cls(
1093
+ xyxy=xyxy,
1094
+ mask=mask,
1095
+ confidence=confidence,
1096
+ class_id=class_id,
1097
+ tracker_id=tracker_id,
1098
+ data=data,
1099
+ metadata=metadata,
1100
+ )
1101
+
1102
+ def get_anchors_coordinates(self, anchor: Position) -> np.ndarray:
1103
+ """
1104
+ Calculates and returns the coordinates of a specific anchor point
1105
+ within the bounding boxes defined by the `xyxy` attribute. The anchor
1106
+ point can be any of the predefined positions in the `Position` enum,
1107
+ such as `CENTER`, `CENTER_LEFT`, `BOTTOM_RIGHT`, etc.
1108
+
1109
+ Args:
1110
+ anchor (Position): An enum specifying the position of the anchor point
1111
+ within the bounding box. Supported positions are defined in the
1112
+ `Position` enum.
1113
+
1114
+ Returns:
1115
+ np.ndarray: An array of shape `(n, 2)`, where `n` is the number of bounding
1116
+ boxes. Each row contains the `[x, y]` coordinates of the specified
1117
+ anchor point for the corresponding bounding box.
1118
+
1119
+ Raises:
1120
+ ValueError: If the provided `anchor` is not supported.
1121
+ """
1122
+ if anchor == Position.CENTER:
1123
+ return np.array(
1124
+ [
1125
+ (self.xyxy[:, 0] + self.xyxy[:, 2]) / 2,
1126
+ (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,
1127
+ ]
1128
+ ).transpose()
1129
+ elif anchor == Position.CENTER_OF_MASS:
1130
+ if self.mask is None:
1131
+ raise ValueError(
1132
+ "Cannot use `Position.CENTER_OF_MASS` without a detection mask."
1133
+ )
1134
+ return calculate_masks_centroids(masks=self.mask)
1135
+ elif anchor == Position.CENTER_LEFT:
1136
+ return np.array(
1137
+ [
1138
+ self.xyxy[:, 0],
1139
+ (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,
1140
+ ]
1141
+ ).transpose()
1142
+ elif anchor == Position.CENTER_RIGHT:
1143
+ return np.array(
1144
+ [
1145
+ self.xyxy[:, 2],
1146
+ (self.xyxy[:, 1] + self.xyxy[:, 3]) / 2,
1147
+ ]
1148
+ ).transpose()
1149
+ elif anchor == Position.BOTTOM_CENTER:
1150
+ return np.array(
1151
+ [(self.xyxy[:, 0] + self.xyxy[:, 2]) / 2, self.xyxy[:, 3]]
1152
+ ).transpose()
1153
+ elif anchor == Position.BOTTOM_LEFT:
1154
+ return np.array([self.xyxy[:, 0], self.xyxy[:, 3]]).transpose()
1155
+ elif anchor == Position.BOTTOM_RIGHT:
1156
+ return np.array([self.xyxy[:, 2], self.xyxy[:, 3]]).transpose()
1157
+ elif anchor == Position.TOP_CENTER:
1158
+ return np.array(
1159
+ [(self.xyxy[:, 0] + self.xyxy[:, 2]) / 2, self.xyxy[:, 1]]
1160
+ ).transpose()
1161
+ elif anchor == Position.TOP_LEFT:
1162
+ return np.array([self.xyxy[:, 0], self.xyxy[:, 1]]).transpose()
1163
+ elif anchor == Position.TOP_RIGHT:
1164
+ return np.array([self.xyxy[:, 2], self.xyxy[:, 1]]).transpose()
1165
+
1166
+ raise ValueError(f"{anchor} is not supported.")
1167
+
1168
+ def __getitem__(
1169
+ self, index: Union[int, slice, List[int], np.ndarray, str]
1170
+ ) -> Union[Detections, List, np.ndarray, None]:
1171
+ """
1172
+ Get a subset of the Detections object or access an item from its data field.
1173
+
1174
+ When provided with an integer, slice, list of integers, or a numpy array, this
1175
+ method returns a new Detections object that represents a subset of the original
1176
+ detections. When provided with a string, it accesses the corresponding item in
1177
+ the data dictionary.
1178
+
1179
+ Args:
1180
+ index (Union[int, slice, List[int], np.ndarray, str]): The index, indices,
1181
+ or key to access a subset of the Detections or an item from the data.
1182
+
1183
+ Returns:
1184
+ Union[Detections, Any]: A subset of the Detections object or an item from
1185
+ the data field.
1186
+
1187
+ Example:
1188
+ ```python
1189
+ import eye as sv
1190
+
1191
+ detections = sv.Detections()
1192
+
1193
+ first_detection = detections[0]
1194
+ first_10_detections = detections[0:10]
1195
+ some_detections = detections[[0, 2, 4]]
1196
+ class_0_detections = detections[detections.class_id == 0]
1197
+ high_confidence_detections = detections[detections.confidence > 0.5]
1198
+
1199
+ feature_vector = detections['feature_vector']
1200
+ ```
1201
+ """
1202
+ if isinstance(index, str):
1203
+ return self.data.get(index)
1204
+ if isinstance(index, int):
1205
+ index = [index]
1206
+ return Detections(
1207
+ xyxy=self.xyxy[index],
1208
+ mask=self.mask[index] if self.mask is not None else None,
1209
+ confidence=self.confidence[index] if self.confidence is not None else None,
1210
+ class_id=self.class_id[index] if self.class_id is not None else None,
1211
+ tracker_id=self.tracker_id[index] if self.tracker_id is not None else None,
1212
+ data=get_data_item(self.data, index),
1213
+ metadata=self.metadata,
1214
+ )
1215
+
1216
+ def __setitem__(self, key: str, value: Union[np.ndarray, List]):
1217
+ """
1218
+ Set a value in the data dictionary of the Detections object.
1219
+
1220
+ Args:
1221
+ key (str): The key in the data dictionary to set.
1222
+ value (Union[np.ndarray, List]): The value to set for the key.
1223
+
1224
+ Example:
1225
+ ```python
1226
+ import cv2
1227
+ import eye as sv
1228
+ from ultralytics import YOLO
1229
+
1230
+ image = cv2.imread(<SOURCE_IMAGE_PATH>)
1231
+ model = YOLO('yolov8s.pt')
1232
+
1233
+ result = model(image)[0]
1234
+ detections = sv.Detections.from_ultralytics(result)
1235
+
1236
+ detections['names'] = [
1237
+ model.model.names[class_id]
1238
+ for class_id
1239
+ in detections.class_id
1240
+ ]
1241
+ ```
1242
+ """
1243
+ if not isinstance(value, (np.ndarray, list)):
1244
+ raise TypeError("Value must be a np.ndarray or a list")
1245
+
1246
+ if isinstance(value, list):
1247
+ value = np.array(value)
1248
+
1249
+ self.data[key] = value
1250
+
1251
+ @property
1252
+ def area(self) -> np.ndarray:
1253
+ """
1254
+ Calculate the area of each detection in the set of object detections.
1255
+ If masks field is defined property returns are of each mask.
1256
+ If only box is given property return area of each box.
1257
+
1258
+ Returns:
1259
+ np.ndarray: An array of floats containing the area of each detection
1260
+ in the format of `(area_1, area_2, , area_n)`,
1261
+ where n is the number of detections.
1262
+ """
1263
+ if self.mask is not None:
1264
+ return np.array([np.sum(mask) for mask in self.mask])
1265
+ else:
1266
+ return self.box_area
1267
+
1268
+ @property
1269
+ def box_area(self) -> np.ndarray:
1270
+ """
1271
+ Calculate the area of each bounding box in the set of object detections.
1272
+
1273
+ Returns:
1274
+ np.ndarray: An array of floats containing the area of each bounding
1275
+ box in the format of `(area_1, area_2, , area_n)`,
1276
+ where n is the number of detections.
1277
+ """
1278
+ return (self.xyxy[:, 3] - self.xyxy[:, 1]) * (self.xyxy[:, 2] - self.xyxy[:, 0])
1279
+
1280
+ def with_nms(
1281
+ self, threshold: float = 0.5, class_agnostic: bool = False
1282
+ ) -> Detections:
1283
+ """
1284
+ Performs non-max suppression on detection set. If the detections result
1285
+ from a segmentation model, the IoU mask is applied. Otherwise, box IoU is used.
1286
+
1287
+ Args:
1288
+ threshold (float): The intersection-over-union threshold
1289
+ to use for non-maximum suppression. I'm the lower the value the more
1290
+ restrictive the NMS becomes. Defaults to 0.5.
1291
+ class_agnostic (bool): Whether to perform class-agnostic
1292
+ non-maximum suppression. If True, the class_id of each detection
1293
+ will be ignored. Defaults to False.
1294
+
1295
+ Returns:
1296
+ Detections: A new Detections object containing the subset of detections
1297
+ after non-maximum suppression.
1298
+
1299
+ Raises:
1300
+ AssertionError: If `confidence` is None and class_agnostic is False.
1301
+ If `class_id` is None and class_agnostic is False.
1302
+ """
1303
+ if len(self) == 0:
1304
+ return self
1305
+
1306
+ assert (
1307
+ self.confidence is not None
1308
+ ), "Detections confidence must be given for NMS to be executed."
1309
+
1310
+ if class_agnostic:
1311
+ predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
1312
+ else:
1313
+ assert self.class_id is not None, (
1314
+ "Detections class_id must be given for NMS to be executed. If you"
1315
+ " intended to perform class agnostic NMS set class_agnostic=True."
1316
+ )
1317
+ predictions = np.hstack(
1318
+ (
1319
+ self.xyxy,
1320
+ self.confidence.reshape(-1, 1),
1321
+ self.class_id.reshape(-1, 1),
1322
+ )
1323
+ )
1324
+
1325
+ if self.mask is not None:
1326
+ indices = mask_non_max_suppression(
1327
+ predictions=predictions, masks=self.mask, iou_threshold=threshold
1328
+ )
1329
+ else:
1330
+ indices = box_non_max_suppression(
1331
+ predictions=predictions, iou_threshold=threshold
1332
+ )
1333
+
1334
+ return self[indices]
1335
+
1336
+ def with_nmm(
1337
+ self, threshold: float = 0.5, class_agnostic: bool = False
1338
+ ) -> Detections:
1339
+ """
1340
+ Perform non-maximum merging on the current set of object detections.
1341
+
1342
+ Args:
1343
+ threshold (float): The intersection-over-union threshold
1344
+ to use for non-maximum merging. Defaults to 0.5.
1345
+ class_agnostic (bool): Whether to perform class-agnostic
1346
+ non-maximum merging. If True, the class_id of each detection
1347
+ will be ignored. Defaults to False.
1348
+
1349
+ Returns:
1350
+ Detections: A new Detections object containing the subset of detections
1351
+ after non-maximum merging.
1352
+
1353
+ Raises:
1354
+ AssertionError: If `confidence` is None or `class_id` is None and
1355
+ class_agnostic is False.
1356
+
1357
+ ![non-max-merging](https://media.roboflow.com/eye-docs/non-max-merging.png){ align=center width="800" }
1358
+ """ # noqa: E501 // docs
1359
+ if len(self) == 0:
1360
+ return self
1361
+
1362
+ assert (
1363
+ self.confidence is not None
1364
+ ), "Detections confidence must be given for NMM to be executed."
1365
+
1366
+ if class_agnostic:
1367
+ predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
1368
+ else:
1369
+ assert self.class_id is not None, (
1370
+ "Detections class_id must be given for NMM to be executed. If you"
1371
+ " intended to perform class agnostic NMM set class_agnostic=True."
1372
+ )
1373
+ predictions = np.hstack(
1374
+ (
1375
+ self.xyxy,
1376
+ self.confidence.reshape(-1, 1),
1377
+ self.class_id.reshape(-1, 1),
1378
+ )
1379
+ )
1380
+
1381
+ merge_groups = box_non_max_merge(
1382
+ predictions=predictions, iou_threshold=threshold
1383
+ )
1384
+
1385
+ result = []
1386
+ for merge_group in merge_groups:
1387
+ unmerged_detections = [self[i] for i in merge_group]
1388
+ merged_detections = merge_inner_detections_objects(
1389
+ unmerged_detections, threshold
1390
+ )
1391
+ result.append(merged_detections)
1392
+
1393
+ return Detections.merge(result)
1394
+
1395
+
1396
+ def merge_inner_detection_object_pair(
1397
+ detections_1: Detections, detections_2: Detections
1398
+ ) -> Detections:
1399
+ """
1400
+ Merges two Detections object into a single Detections object.
1401
+ Assumes each Detections contains exactly one object.
1402
+
1403
+ A `winning` detection is determined based on the confidence score of the two
1404
+ input detections. This winning detection is then used to specify which
1405
+ `class_id`, `tracker_id`, and `data` to include in the merged Detections object.
1406
+
1407
+ The resulting `confidence` of the merged object is calculated by the weighted
1408
+ contribution of ea detection to the merged object.
1409
+ The bounding boxes and masks of the two input detections are merged into a
1410
+ single bounding box and mask, respectively.
1411
+
1412
+ Args:
1413
+ detections_1 (Detections):
1414
+ The first Detections object
1415
+ detections_2 (Detections):
1416
+ The second Detections object
1417
+
1418
+ Returns:
1419
+ Detections: A new Detections object, with merged attributes.
1420
+
1421
+ Raises:
1422
+ ValueError: If the input Detections objects do not have exactly 1 detected
1423
+ object.
1424
+
1425
+ Example:
1426
+ ```python
1427
+ import cv2
1428
+ import eye as sv
1429
+ from inference import get_model
1430
+
1431
+ image = cv2.imread(<SOURCE_IMAGE_PATH>)
1432
+ model = get_model(model_id="yolov8s-640")
1433
+
1434
+ result = model.infer(image)[0]
1435
+ detections = sv.Detections.from_inference(result)
1436
+
1437
+ merged_detections = merge_object_detection_pair(
1438
+ detections[0], detections[1])
1439
+ ```
1440
+ """
1441
+ if len(detections_1) != 1 or len(detections_2) != 1:
1442
+ raise ValueError("Both Detections should have exactly 1 detected object.")
1443
+
1444
+ validate_fields_both_defined_or_none(detections_1, detections_2)
1445
+
1446
+ xyxy_1 = detections_1.xyxy[0]
1447
+ xyxy_2 = detections_2.xyxy[0]
1448
+ if detections_1.confidence is None and detections_2.confidence is None:
1449
+ merged_confidence = None
1450
+ else:
1451
+ detection_1_area = (xyxy_1[2] - xyxy_1[0]) * (xyxy_1[3] - xyxy_1[1])
1452
+ detections_2_area = (xyxy_2[2] - xyxy_2[0]) * (xyxy_2[3] - xyxy_2[1])
1453
+ merged_confidence = (
1454
+ detection_1_area * detections_1.confidence[0]
1455
+ + detections_2_area * detections_2.confidence[0]
1456
+ ) / (detection_1_area + detections_2_area)
1457
+ merged_confidence = np.array([merged_confidence])
1458
+
1459
+ merged_x1, merged_y1 = np.minimum(xyxy_1[:2], xyxy_2[:2])
1460
+ merged_x2, merged_y2 = np.maximum(xyxy_1[2:], xyxy_2[2:])
1461
+ merged_xyxy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]])
1462
+
1463
+ if detections_1.mask is None and detections_2.mask is None:
1464
+ merged_mask = None
1465
+ else:
1466
+ merged_mask = np.logical_or(detections_1.mask, detections_2.mask)
1467
+
1468
+ if detections_1.confidence is None and detections_2.confidence is None:
1469
+ winning_detection = detections_1
1470
+ elif detections_1.confidence[0] >= detections_2.confidence[0]:
1471
+ winning_detection = detections_1
1472
+ else:
1473
+ winning_detection = detections_2
1474
+
1475
+ metadata = merge_metadata([detections_1.metadata, detections_2.metadata])
1476
+
1477
+ return Detections(
1478
+ xyxy=merged_xyxy,
1479
+ mask=merged_mask,
1480
+ confidence=merged_confidence,
1481
+ class_id=winning_detection.class_id,
1482
+ tracker_id=winning_detection.tracker_id,
1483
+ data=winning_detection.data,
1484
+ metadata=metadata,
1485
+ )
1486
+
1487
+
1488
+ def merge_inner_detections_objects(
1489
+ detections: List[Detections], threshold=0.5
1490
+ ) -> Detections:
1491
+ """
1492
+ Given N detections each of length 1 (exactly one object inside), combine them into a
1493
+ single detection object of length 1. The contained inner object will be the merged
1494
+ result of all the input detections.
1495
+
1496
+ For example, this lets you merge N boxes into one big box, N masks into one mask,
1497
+ etc.
1498
+ """
1499
+ detections_1 = detections[0]
1500
+ for detections_2 in detections[1:]:
1501
+ box_iou = box_iou_batch(detections_1.xyxy, detections_2.xyxy)[0]
1502
+ if box_iou < threshold:
1503
+ break
1504
+ detections_1 = merge_inner_detection_object_pair(detections_1, detections_2)
1505
+ return detections_1
1506
+
1507
+
1508
+ def validate_fields_both_defined_or_none(
1509
+ detections_1: Detections, detections_2: Detections
1510
+ ) -> None:
1511
+ """
1512
+ Verify that for each optional field in the Detections, both instances either have
1513
+ the field set to None or both have it set to non-None values.
1514
+
1515
+ `data` field is ignored.
1516
+
1517
+ Raises:
1518
+ ValueError: If one field is None and the other is not, for any of the fields.
1519
+ """
1520
+ attributes = get_instance_variables(detections_1)
1521
+ for attribute in attributes:
1522
+ value_1 = getattr(detections_1, attribute)
1523
+ value_2 = getattr(detections_2, attribute)
1524
+
1525
+ if (value_1 is None) != (value_2 is None):
1526
+ raise ValueError(
1527
+ f"Field '{attribute}' should be consistently None or not None in both "
1528
+ "Detections."
1529
+ )