eye-cv 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eye/__init__.py +115 -0
- eye/__init___supervision_original.py +120 -0
- eye/annotators/__init__.py +0 -0
- eye/annotators/base.py +22 -0
- eye/annotators/core.py +2699 -0
- eye/annotators/line.py +107 -0
- eye/annotators/modern.py +529 -0
- eye/annotators/trace.py +142 -0
- eye/annotators/utils.py +177 -0
- eye/assets/__init__.py +2 -0
- eye/assets/downloader.py +95 -0
- eye/assets/list.py +83 -0
- eye/classification/__init__.py +0 -0
- eye/classification/core.py +188 -0
- eye/config.py +2 -0
- eye/core/__init__.py +0 -0
- eye/core/trackers/__init__.py +1 -0
- eye/core/trackers/botsort_tracker.py +336 -0
- eye/core/trackers/bytetrack_tracker.py +284 -0
- eye/core/trackers/sort_tracker.py +200 -0
- eye/core/tracking.py +146 -0
- eye/dataset/__init__.py +0 -0
- eye/dataset/core.py +919 -0
- eye/dataset/formats/__init__.py +0 -0
- eye/dataset/formats/coco.py +258 -0
- eye/dataset/formats/pascal_voc.py +279 -0
- eye/dataset/formats/yolo.py +272 -0
- eye/dataset/utils.py +259 -0
- eye/detection/__init__.py +0 -0
- eye/detection/auto_convert.py +155 -0
- eye/detection/core.py +1529 -0
- eye/detection/detections_enhanced.py +392 -0
- eye/detection/line_zone.py +859 -0
- eye/detection/lmm.py +184 -0
- eye/detection/overlap_filter.py +270 -0
- eye/detection/tools/__init__.py +0 -0
- eye/detection/tools/csv_sink.py +181 -0
- eye/detection/tools/inference_slicer.py +288 -0
- eye/detection/tools/json_sink.py +142 -0
- eye/detection/tools/polygon_zone.py +202 -0
- eye/detection/tools/smoother.py +123 -0
- eye/detection/tools/smoothing.py +179 -0
- eye/detection/tools/smoothing_config.py +202 -0
- eye/detection/tools/transformers.py +247 -0
- eye/detection/utils.py +1175 -0
- eye/draw/__init__.py +0 -0
- eye/draw/color.py +154 -0
- eye/draw/utils.py +374 -0
- eye/filters.py +112 -0
- eye/geometry/__init__.py +0 -0
- eye/geometry/core.py +128 -0
- eye/geometry/utils.py +47 -0
- eye/keypoint/__init__.py +0 -0
- eye/keypoint/annotators.py +442 -0
- eye/keypoint/core.py +687 -0
- eye/keypoint/skeletons.py +2647 -0
- eye/metrics/__init__.py +21 -0
- eye/metrics/core.py +72 -0
- eye/metrics/detection.py +843 -0
- eye/metrics/f1_score.py +648 -0
- eye/metrics/mean_average_precision.py +628 -0
- eye/metrics/mean_average_recall.py +697 -0
- eye/metrics/precision.py +653 -0
- eye/metrics/recall.py +652 -0
- eye/metrics/utils/__init__.py +0 -0
- eye/metrics/utils/object_size.py +158 -0
- eye/metrics/utils/utils.py +9 -0
- eye/py.typed +0 -0
- eye/quick.py +104 -0
- eye/tracker/__init__.py +0 -0
- eye/tracker/byte_tracker/__init__.py +0 -0
- eye/tracker/byte_tracker/core.py +386 -0
- eye/tracker/byte_tracker/kalman_filter.py +205 -0
- eye/tracker/byte_tracker/matching.py +69 -0
- eye/tracker/byte_tracker/single_object_track.py +178 -0
- eye/tracker/byte_tracker/utils.py +18 -0
- eye/utils/__init__.py +0 -0
- eye/utils/conversion.py +132 -0
- eye/utils/file.py +159 -0
- eye/utils/image.py +794 -0
- eye/utils/internal.py +200 -0
- eye/utils/iterables.py +84 -0
- eye/utils/notebook.py +114 -0
- eye/utils/video.py +307 -0
- eye/utils_eye/__init__.py +1 -0
- eye/utils_eye/geometry.py +71 -0
- eye/utils_eye/nms.py +55 -0
- eye/validators/__init__.py +140 -0
- eye/web.py +271 -0
- eye_cv-1.0.0.dist-info/METADATA +319 -0
- eye_cv-1.0.0.dist-info/RECORD +94 -0
- eye_cv-1.0.0.dist-info/WHEEL +5 -0
- eye_cv-1.0.0.dist-info/licenses/LICENSE +21 -0
- eye_cv-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
import cv2
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from eye.config import ORIENTED_BOX_COORDINATES
|
|
9
|
+
from eye.dataset.utils import approximate_mask_with_polygons
|
|
10
|
+
from eye.detection.core import Detections
|
|
11
|
+
from eye.detection.utils import polygon_to_mask, polygon_to_xyxy
|
|
12
|
+
from eye.utils.file import (
|
|
13
|
+
list_files_with_extensions,
|
|
14
|
+
read_txt_file,
|
|
15
|
+
read_yaml_file,
|
|
16
|
+
save_text_file,
|
|
17
|
+
save_yaml_file,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from eye.dataset.core import DetectionDataset
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _parse_box(values: List[str]) -> np.ndarray:
|
|
25
|
+
x_center, y_center, width, height = values
|
|
26
|
+
return np.array(
|
|
27
|
+
[
|
|
28
|
+
float(x_center) - float(width) / 2,
|
|
29
|
+
float(y_center) - float(height) / 2,
|
|
30
|
+
float(x_center) + float(width) / 2,
|
|
31
|
+
float(y_center) + float(height) / 2,
|
|
32
|
+
],
|
|
33
|
+
dtype=np.float32,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _box_to_polygon(box: np.ndarray) -> np.ndarray:
|
|
38
|
+
return np.array(
|
|
39
|
+
[[box[0], box[1]], [box[2], box[1]], [box[2], box[3]], [box[0], box[3]]]
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _parse_polygon(values: List[str]) -> np.ndarray:
|
|
44
|
+
return np.array(values, dtype=np.float32).reshape(-1, 2)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _polygons_to_masks(
|
|
48
|
+
polygons: List[np.ndarray], resolution_wh: Tuple[int, int]
|
|
49
|
+
) -> np.ndarray:
|
|
50
|
+
return np.array(
|
|
51
|
+
[
|
|
52
|
+
polygon_to_mask(polygon=polygon, resolution_wh=resolution_wh)
|
|
53
|
+
for polygon in polygons
|
|
54
|
+
],
|
|
55
|
+
dtype=bool,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _with_mask(lines: List[str]) -> bool:
|
|
60
|
+
return any([len(line.split()) > 5 for line in lines])
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _extract_class_names(file_path: str) -> List[str]:
|
|
64
|
+
data = read_yaml_file(file_path=file_path)
|
|
65
|
+
names = data["names"]
|
|
66
|
+
if isinstance(names, dict):
|
|
67
|
+
names = [names[key] for key in sorted(names.keys())]
|
|
68
|
+
return names
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _image_name_to_annotation_name(image_name: str) -> str:
|
|
72
|
+
base_name, _ = os.path.splitext(image_name)
|
|
73
|
+
return base_name + ".txt"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def yolo_annotations_to_detections(
|
|
77
|
+
lines: List[str],
|
|
78
|
+
resolution_wh: Tuple[int, int],
|
|
79
|
+
with_masks: bool,
|
|
80
|
+
is_obb: bool = False,
|
|
81
|
+
) -> Detections:
|
|
82
|
+
if len(lines) == 0:
|
|
83
|
+
return Detections.empty()
|
|
84
|
+
|
|
85
|
+
class_id, relative_xyxy, relative_polygon, relative_xyxyxyxy = [], [], [], []
|
|
86
|
+
w, h = resolution_wh
|
|
87
|
+
for line in lines:
|
|
88
|
+
values = line.split()
|
|
89
|
+
class_id.append(int(values[0]))
|
|
90
|
+
if len(values) == 5:
|
|
91
|
+
box = _parse_box(values=values[1:])
|
|
92
|
+
relative_xyxy.append(box)
|
|
93
|
+
if with_masks:
|
|
94
|
+
relative_polygon.append(_box_to_polygon(box=box))
|
|
95
|
+
elif len(values) > 5:
|
|
96
|
+
polygon = _parse_polygon(values=values[1:])
|
|
97
|
+
relative_xyxy.append(polygon_to_xyxy(polygon=polygon))
|
|
98
|
+
if is_obb:
|
|
99
|
+
relative_xyxyxyxy.append(np.array(values[1:]))
|
|
100
|
+
if with_masks:
|
|
101
|
+
relative_polygon.append(polygon)
|
|
102
|
+
|
|
103
|
+
class_id = np.array(class_id, dtype=int)
|
|
104
|
+
relative_xyxy = np.array(relative_xyxy, dtype=np.float32)
|
|
105
|
+
xyxy = relative_xyxy * np.array([w, h, w, h], dtype=np.float32)
|
|
106
|
+
data = {}
|
|
107
|
+
|
|
108
|
+
if is_obb:
|
|
109
|
+
relative_xyxyxyxy = np.array(relative_xyxyxyxy, dtype=np.float32)
|
|
110
|
+
xyxyxyxy = relative_xyxyxyxy.reshape(-1, 4, 2)
|
|
111
|
+
xyxyxyxy *= np.array([w, h], dtype=np.float32)
|
|
112
|
+
data[ORIENTED_BOX_COORDINATES] = xyxyxyxy
|
|
113
|
+
|
|
114
|
+
if not with_masks:
|
|
115
|
+
return Detections(class_id=class_id, xyxy=xyxy, data=data)
|
|
116
|
+
|
|
117
|
+
polygons = [
|
|
118
|
+
(polygon * np.array(resolution_wh)).astype(int) for polygon in relative_polygon
|
|
119
|
+
]
|
|
120
|
+
mask = _polygons_to_masks(polygons=polygons, resolution_wh=resolution_wh)
|
|
121
|
+
return Detections(class_id=class_id, xyxy=xyxy, data=data, mask=mask)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def load_yolo_annotations(
|
|
125
|
+
images_directory_path: str,
|
|
126
|
+
annotations_directory_path: str,
|
|
127
|
+
data_yaml_path: str,
|
|
128
|
+
force_masks: bool = False,
|
|
129
|
+
is_obb: bool = False,
|
|
130
|
+
) -> Tuple[List[str], List[str], Dict[str, Detections]]:
|
|
131
|
+
"""
|
|
132
|
+
Loads YOLO annotations and returns class names, images,
|
|
133
|
+
and their corresponding detections.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
images_directory_path (str): The path to the directory containing the images.
|
|
137
|
+
annotations_directory_path (str): The path to the directory
|
|
138
|
+
containing the YOLO annotation files.
|
|
139
|
+
data_yaml_path (str): The path to the data
|
|
140
|
+
YAML file containing class information.
|
|
141
|
+
force_masks (bool): If True, forces masks to be loaded
|
|
142
|
+
for all annotations, regardless of whether they are present.
|
|
143
|
+
is_obb (bool): If True, loads the annotations in OBB format.
|
|
144
|
+
OBB annotations are defined as `[class_id, x, y, x, y, x, y, x, y]`,
|
|
145
|
+
where pairs of [x, y] are box corners.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Tuple[List[str], List[str], Dict[str, Detections]]:
|
|
149
|
+
A tuple containing a list of class names, a dictionary with
|
|
150
|
+
image names as keys and images as values, and a dictionary
|
|
151
|
+
with image names as keys and corresponding Detections instances as values.
|
|
152
|
+
"""
|
|
153
|
+
image_paths = [
|
|
154
|
+
str(path)
|
|
155
|
+
for path in list_files_with_extensions(
|
|
156
|
+
directory=images_directory_path, extensions=["jpg", "jpeg", "png"]
|
|
157
|
+
)
|
|
158
|
+
]
|
|
159
|
+
|
|
160
|
+
classes = _extract_class_names(file_path=data_yaml_path)
|
|
161
|
+
annotations = {}
|
|
162
|
+
|
|
163
|
+
for image_path in image_paths:
|
|
164
|
+
image_stem = Path(image_path).stem
|
|
165
|
+
annotation_path = os.path.join(annotations_directory_path, f"{image_stem}.txt")
|
|
166
|
+
if not os.path.exists(annotation_path):
|
|
167
|
+
annotations[image_path] = Detections.empty()
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
image = cv2.imread(image_path)
|
|
171
|
+
lines = read_txt_file(file_path=annotation_path, skip_empty=True)
|
|
172
|
+
h, w, _ = image.shape
|
|
173
|
+
resolution_wh = (w, h)
|
|
174
|
+
|
|
175
|
+
with_masks = _with_mask(lines=lines)
|
|
176
|
+
with_masks = force_masks if force_masks else with_masks
|
|
177
|
+
annotation = yolo_annotations_to_detections(
|
|
178
|
+
lines=lines,
|
|
179
|
+
resolution_wh=resolution_wh,
|
|
180
|
+
with_masks=with_masks,
|
|
181
|
+
is_obb=is_obb,
|
|
182
|
+
)
|
|
183
|
+
annotations[image_path] = annotation
|
|
184
|
+
return classes, image_paths, annotations
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def object_to_yolo(
|
|
188
|
+
xyxy: np.ndarray,
|
|
189
|
+
class_id: int,
|
|
190
|
+
image_shape: Tuple[int, int, int],
|
|
191
|
+
polygon: Optional[np.ndarray] = None,
|
|
192
|
+
) -> str:
|
|
193
|
+
h, w, _ = image_shape
|
|
194
|
+
if polygon is None:
|
|
195
|
+
xyxy_relative = xyxy / np.array([w, h, w, h], dtype=np.float32)
|
|
196
|
+
x_min, y_min, x_max, y_max = xyxy_relative
|
|
197
|
+
x_center = (x_min + x_max) / 2
|
|
198
|
+
y_center = (y_min + y_max) / 2
|
|
199
|
+
width = x_max - x_min
|
|
200
|
+
height = y_max - y_min
|
|
201
|
+
return f"{int(class_id)} {x_center:.5f} {y_center:.5f} {width:.5f} {height:.5f}"
|
|
202
|
+
else:
|
|
203
|
+
polygon_relative = polygon / np.array([w, h], dtype=np.float32)
|
|
204
|
+
polygon_relative = polygon_relative.reshape(-1)
|
|
205
|
+
polygon_parsed = " ".join([f"{value:.5f}" for value in polygon_relative])
|
|
206
|
+
return f"{int(class_id)} {polygon_parsed}"
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def detections_to_yolo_annotations(
|
|
210
|
+
detections: Detections,
|
|
211
|
+
image_shape: Tuple[int, int, int],
|
|
212
|
+
min_image_area_percentage: float = 0.0,
|
|
213
|
+
max_image_area_percentage: float = 1.0,
|
|
214
|
+
approximation_percentage: float = 0.75,
|
|
215
|
+
) -> List[str]:
|
|
216
|
+
annotation = []
|
|
217
|
+
for xyxy, mask, _, class_id, _, _ in detections:
|
|
218
|
+
if class_id is None:
|
|
219
|
+
raise ValueError("Class ID is required for YOLO annotations.")
|
|
220
|
+
|
|
221
|
+
if mask is not None:
|
|
222
|
+
polygons = approximate_mask_with_polygons(
|
|
223
|
+
mask=mask,
|
|
224
|
+
min_image_area_percentage=min_image_area_percentage,
|
|
225
|
+
max_image_area_percentage=max_image_area_percentage,
|
|
226
|
+
approximation_percentage=approximation_percentage,
|
|
227
|
+
)
|
|
228
|
+
for polygon in polygons:
|
|
229
|
+
xyxy = polygon_to_xyxy(polygon=polygon)
|
|
230
|
+
next_object = object_to_yolo(
|
|
231
|
+
xyxy=xyxy,
|
|
232
|
+
class_id=class_id,
|
|
233
|
+
image_shape=image_shape,
|
|
234
|
+
polygon=polygon,
|
|
235
|
+
)
|
|
236
|
+
annotation.append(next_object)
|
|
237
|
+
else:
|
|
238
|
+
next_object = object_to_yolo(
|
|
239
|
+
xyxy=xyxy, class_id=class_id, image_shape=image_shape
|
|
240
|
+
)
|
|
241
|
+
annotation.append(next_object)
|
|
242
|
+
return annotation
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def save_yolo_annotations(
|
|
246
|
+
dataset: "DetectionDataset",
|
|
247
|
+
annotations_directory_path: str,
|
|
248
|
+
min_image_area_percentage: float = 0.0,
|
|
249
|
+
max_image_area_percentage: float = 1.0,
|
|
250
|
+
approximation_percentage: float = 0.75,
|
|
251
|
+
) -> None:
|
|
252
|
+
Path(annotations_directory_path).mkdir(parents=True, exist_ok=True)
|
|
253
|
+
for image_path, image, annotation in dataset:
|
|
254
|
+
image_name = Path(image_path).name
|
|
255
|
+
yolo_annotations_name = _image_name_to_annotation_name(image_name=image_name)
|
|
256
|
+
yolo_annotations_path = os.path.join(
|
|
257
|
+
annotations_directory_path, yolo_annotations_name
|
|
258
|
+
)
|
|
259
|
+
lines = detections_to_yolo_annotations(
|
|
260
|
+
detections=annotation,
|
|
261
|
+
image_shape=image.shape, # type: ignore
|
|
262
|
+
min_image_area_percentage=min_image_area_percentage,
|
|
263
|
+
max_image_area_percentage=max_image_area_percentage,
|
|
264
|
+
approximation_percentage=approximation_percentage,
|
|
265
|
+
)
|
|
266
|
+
save_text_file(lines=lines, file_path=yolo_annotations_path)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def save_data_yaml(data_yaml_path: str, classes: List[str]) -> None:
|
|
270
|
+
data = {"nc": len(classes), "names": classes}
|
|
271
|
+
Path(data_yaml_path).parent.mkdir(parents=True, exist_ok=True)
|
|
272
|
+
save_yaml_file(data=data, file_path=data_yaml_path)
|
eye/dataset/utils.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import os
|
|
3
|
+
import random
|
|
4
|
+
import shutil
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, TypeVar, Union
|
|
7
|
+
|
|
8
|
+
import cv2
|
|
9
|
+
import numpy as np
|
|
10
|
+
import numpy.typing as npt
|
|
11
|
+
|
|
12
|
+
from eye.detection.core import Detections
|
|
13
|
+
from eye.detection.utils import (
|
|
14
|
+
approximate_polygon,
|
|
15
|
+
filter_polygons_by_area,
|
|
16
|
+
mask_to_polygons,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from eye.dataset.core import DetectionDataset
|
|
21
|
+
|
|
22
|
+
T = TypeVar("T")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def approximate_mask_with_polygons(
|
|
26
|
+
mask: np.ndarray,
|
|
27
|
+
min_image_area_percentage: float = 0.0,
|
|
28
|
+
max_image_area_percentage: float = 1.0,
|
|
29
|
+
approximation_percentage: float = 0.75,
|
|
30
|
+
) -> List[np.ndarray]:
|
|
31
|
+
height, width = mask.shape
|
|
32
|
+
image_area = height * width
|
|
33
|
+
minimum_detection_area = min_image_area_percentage * image_area
|
|
34
|
+
maximum_detection_area = max_image_area_percentage * image_area
|
|
35
|
+
|
|
36
|
+
polygons = mask_to_polygons(mask=mask)
|
|
37
|
+
if len(polygons) == 1:
|
|
38
|
+
polygons = filter_polygons_by_area(
|
|
39
|
+
polygons=polygons, min_area=None, max_area=maximum_detection_area
|
|
40
|
+
)
|
|
41
|
+
else:
|
|
42
|
+
polygons = filter_polygons_by_area(
|
|
43
|
+
polygons=polygons,
|
|
44
|
+
min_area=minimum_detection_area,
|
|
45
|
+
max_area=maximum_detection_area,
|
|
46
|
+
)
|
|
47
|
+
return [
|
|
48
|
+
approximate_polygon(polygon=polygon, percentage=approximation_percentage)
|
|
49
|
+
for polygon in polygons
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def merge_class_lists(class_lists: List[List[str]]) -> List[str]:
|
|
54
|
+
unique_classes = set()
|
|
55
|
+
|
|
56
|
+
for class_list in class_lists:
|
|
57
|
+
for class_name in class_list:
|
|
58
|
+
unique_classes.add(class_name)
|
|
59
|
+
|
|
60
|
+
return sorted(list(unique_classes))
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def build_class_index_mapping(
|
|
64
|
+
source_classes: List[str], target_classes: List[str]
|
|
65
|
+
) -> Dict[int, int]:
|
|
66
|
+
"""Returns the index map of source classes -> target classes."""
|
|
67
|
+
index_mapping = {}
|
|
68
|
+
|
|
69
|
+
for i, class_name in enumerate(source_classes):
|
|
70
|
+
if class_name not in target_classes:
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"Class {class_name} not found in target classes. "
|
|
73
|
+
"source_classes must be a subset of target_classes."
|
|
74
|
+
)
|
|
75
|
+
corresponding_index = target_classes.index(class_name)
|
|
76
|
+
index_mapping[i] = corresponding_index
|
|
77
|
+
|
|
78
|
+
return index_mapping
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def map_detections_class_id(
|
|
82
|
+
source_to_target_mapping: Dict[int, int], detections: Detections
|
|
83
|
+
) -> Detections:
|
|
84
|
+
if detections.class_id is None:
|
|
85
|
+
raise ValueError("Detections must have class_id attribute.")
|
|
86
|
+
if set(np.unique(detections.class_id)) - set(source_to_target_mapping.keys()):
|
|
87
|
+
raise ValueError(
|
|
88
|
+
"Detections class_id must be a subset of source_to_target_mapping keys."
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
detections_copy = copy.deepcopy(detections)
|
|
92
|
+
|
|
93
|
+
if len(detections) > 0:
|
|
94
|
+
detections_copy.class_id = np.vectorize(source_to_target_mapping.get)(
|
|
95
|
+
detections_copy.class_id
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return detections_copy
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def save_dataset_images(
|
|
102
|
+
dataset: "DetectionDataset", images_directory_path: str
|
|
103
|
+
) -> None:
|
|
104
|
+
Path(images_directory_path).mkdir(parents=True, exist_ok=True)
|
|
105
|
+
for image_path in dataset.image_paths:
|
|
106
|
+
final_path = os.path.join(images_directory_path, Path(image_path).name)
|
|
107
|
+
if image_path in dataset._images_in_memory:
|
|
108
|
+
image = dataset._images_in_memory[image_path]
|
|
109
|
+
cv2.imwrite(final_path, image)
|
|
110
|
+
else:
|
|
111
|
+
shutil.copyfile(image_path, final_path)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def train_test_split(
|
|
115
|
+
data: List[T],
|
|
116
|
+
train_ratio: float = 0.8,
|
|
117
|
+
random_state: Optional[int] = None,
|
|
118
|
+
shuffle: bool = True,
|
|
119
|
+
) -> Tuple[List[T], List[T]]:
|
|
120
|
+
"""
|
|
121
|
+
Splits the data into two parts using the provided train_ratio.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
data (List[T]): The data to split.
|
|
125
|
+
train_ratio (float): The ratio of the training set to the entire dataset.
|
|
126
|
+
random_state (Optional[int]): The seed for the random number generator.
|
|
127
|
+
shuffle (bool): Whether to shuffle the data before splitting.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Tuple[List[T], List[T]]: The split data.
|
|
131
|
+
"""
|
|
132
|
+
if random_state is not None:
|
|
133
|
+
random.seed(random_state)
|
|
134
|
+
|
|
135
|
+
if shuffle:
|
|
136
|
+
random.shuffle(data)
|
|
137
|
+
|
|
138
|
+
split_index = int(len(data) * train_ratio)
|
|
139
|
+
return data[:split_index], data[split_index:]
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def rle_to_mask(
|
|
143
|
+
rle: Union[npt.NDArray[np.int_], List[int]], resolution_wh: Tuple[int, int]
|
|
144
|
+
) -> npt.NDArray[np.bool_]:
|
|
145
|
+
"""
|
|
146
|
+
Converts run-length encoding (RLE) to a binary mask.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
rle (Union[npt.NDArray[np.int_], List[int]]): The 1D RLE array, the format
|
|
150
|
+
used in the COCO dataset (column-wise encoding, values of an array with
|
|
151
|
+
even indices represent the number of pixels assigned as background,
|
|
152
|
+
values of an array with odd indices represent the number of pixels
|
|
153
|
+
assigned as foreground object).
|
|
154
|
+
resolution_wh (Tuple[int, int]): The width (w) and height (h)
|
|
155
|
+
of the desired binary mask.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
The generated 2D Boolean mask of shape `(h, w)`, where the foreground object is
|
|
159
|
+
marked with `True`'s and the rest is filled with `False`'s.
|
|
160
|
+
|
|
161
|
+
Raises:
|
|
162
|
+
AssertionError: If the sum of pixels encoded in RLE differs from the
|
|
163
|
+
number of pixels in the expected mask (computed based on resolution_wh).
|
|
164
|
+
|
|
165
|
+
Examples:
|
|
166
|
+
```python
|
|
167
|
+
import eye as sv
|
|
168
|
+
|
|
169
|
+
sv.rle_to_mask([5, 2, 2, 2, 5], (4, 4))
|
|
170
|
+
# array([
|
|
171
|
+
# [False, False, False, False],
|
|
172
|
+
# [False, True, True, False],
|
|
173
|
+
# [False, True, True, False],
|
|
174
|
+
# [False, False, False, False],
|
|
175
|
+
# ])
|
|
176
|
+
```
|
|
177
|
+
"""
|
|
178
|
+
if isinstance(rle, list):
|
|
179
|
+
rle = np.array(rle, dtype=int)
|
|
180
|
+
|
|
181
|
+
width, height = resolution_wh
|
|
182
|
+
|
|
183
|
+
assert width * height == np.sum(rle), (
|
|
184
|
+
"the sum of the number of pixels in the RLE must be the same "
|
|
185
|
+
"as the number of pixels in the expected mask"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
zero_one_values = np.zeros(shape=(rle.size, 1), dtype=np.uint8)
|
|
189
|
+
zero_one_values[1::2] = 1
|
|
190
|
+
|
|
191
|
+
decoded_rle = np.repeat(zero_one_values, rle, axis=0)
|
|
192
|
+
decoded_rle = np.append(
|
|
193
|
+
decoded_rle, np.zeros(width * height - len(decoded_rle), dtype=np.uint8)
|
|
194
|
+
)
|
|
195
|
+
return decoded_rle.reshape((height, width), order="F")
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def mask_to_rle(mask: npt.NDArray[np.bool_]) -> List[int]:
|
|
199
|
+
"""
|
|
200
|
+
Converts a binary mask into a run-length encoding (RLE).
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
mask (npt.NDArray[np.bool_]): 2D binary mask where `True` indicates foreground
|
|
204
|
+
object and `False` indicates background.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
The run-length encoded mask. Values of a list with even indices
|
|
208
|
+
represent the number of pixels assigned as background (`False`), values
|
|
209
|
+
of a list with odd indices represent the number of pixels assigned
|
|
210
|
+
as foreground object (`True`).
|
|
211
|
+
|
|
212
|
+
Raises:
|
|
213
|
+
AssertionError: If input mask is not 2D or is empty.
|
|
214
|
+
|
|
215
|
+
Examples:
|
|
216
|
+
```python
|
|
217
|
+
import numpy as np
|
|
218
|
+
import eye as sv
|
|
219
|
+
|
|
220
|
+
mask = np.array([
|
|
221
|
+
[True, True, True, True],
|
|
222
|
+
[True, True, True, True],
|
|
223
|
+
[True, True, True, True],
|
|
224
|
+
[True, True, True, True],
|
|
225
|
+
])
|
|
226
|
+
sv.mask_to_rle(mask)
|
|
227
|
+
# [0, 16]
|
|
228
|
+
|
|
229
|
+
mask = np.array([
|
|
230
|
+
[False, False, False, False],
|
|
231
|
+
[False, True, True, False],
|
|
232
|
+
[False, True, True, False],
|
|
233
|
+
[False, False, False, False],
|
|
234
|
+
])
|
|
235
|
+
sv.mask_to_rle(mask)
|
|
236
|
+
# [5, 2, 2, 2, 5]
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
{ align=center width="800" }
|
|
240
|
+
""" # noqa E501 // docs
|
|
241
|
+
assert mask.ndim == 2, "Input mask must be 2D"
|
|
242
|
+
assert mask.size != 0, "Input mask cannot be empty"
|
|
243
|
+
|
|
244
|
+
on_value_change_indices = np.where(
|
|
245
|
+
mask.ravel(order="F") != np.roll(mask.ravel(order="F"), 1)
|
|
246
|
+
)[0]
|
|
247
|
+
|
|
248
|
+
on_value_change_indices = np.append(on_value_change_indices, mask.size)
|
|
249
|
+
# need to add 0 at the beginning when the same value is in the first and
|
|
250
|
+
# last element of the flattened mask
|
|
251
|
+
if on_value_change_indices[0] != 0:
|
|
252
|
+
on_value_change_indices = np.insert(on_value_change_indices, 0, 0)
|
|
253
|
+
|
|
254
|
+
rle = np.diff(on_value_change_indices)
|
|
255
|
+
|
|
256
|
+
if mask[0][0] == 1:
|
|
257
|
+
rle = np.insert(rle, 0, 0)
|
|
258
|
+
|
|
259
|
+
return list(rle)
|
|
File without changes
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Universal auto-conversion for ALL model formats - Works with supervision!"""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from typing import Any, Optional, Union
|
|
5
|
+
from eye.detection.core import Detections
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def auto_convert(results: Any) -> Detections:
|
|
9
|
+
"""Automatically convert ANY model output to eye.Detections.
|
|
10
|
+
|
|
11
|
+
Supports: YOLO (Ultralytics), PyTorch, TensorFlow, OpenCV, ONNX,
|
|
12
|
+
TensorRT, MMDetection, Detectron2, PaddlePaddle, and more!
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
results: Model output in any supported format
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
eye.Detections object ready to use
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
>>> results = model(image)
|
|
22
|
+
>>> detections = eye.auto_convert(results)
|
|
23
|
+
>>> annotated = annotator.annotate(image, detections)
|
|
24
|
+
"""
|
|
25
|
+
# YOLO (Ultralytics) - Most common in this project
|
|
26
|
+
if hasattr(results, 'boxes'):
|
|
27
|
+
boxes = results.boxes
|
|
28
|
+
if hasattr(boxes, 'xyxy'):
|
|
29
|
+
xyxy = boxes.xyxy.cpu().numpy() if hasattr(boxes.xyxy, 'cpu') else boxes.xyxy
|
|
30
|
+
confidence = boxes.conf.cpu().numpy() if hasattr(boxes.conf, 'cpu') else boxes.conf
|
|
31
|
+
class_id = boxes.cls.cpu().numpy().astype(int) if hasattr(boxes.cls, 'cpu') else boxes.cls.astype(int)
|
|
32
|
+
|
|
33
|
+
# Handle optional tracking IDs
|
|
34
|
+
tracker_id = None
|
|
35
|
+
if hasattr(boxes, 'id') and boxes.id is not None:
|
|
36
|
+
tracker_id = boxes.id.cpu().numpy().astype(int) if hasattr(boxes.id, 'cpu') else boxes.id.astype(int)
|
|
37
|
+
|
|
38
|
+
# Handle segmentation masks
|
|
39
|
+
mask = None
|
|
40
|
+
if hasattr(results, 'masks') and results.masks is not None:
|
|
41
|
+
try:
|
|
42
|
+
mask = results.masks.data.cpu().numpy() if hasattr(results.masks.data, 'cpu') else results.masks.data
|
|
43
|
+
except Exception:
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
return Detections(
|
|
47
|
+
xyxy=xyxy,
|
|
48
|
+
confidence=confidence,
|
|
49
|
+
class_id=class_id,
|
|
50
|
+
tracker_id=tracker_id,
|
|
51
|
+
mask=mask
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# YOLO list format (batch)
|
|
55
|
+
if isinstance(results, list) and len(results) > 0:
|
|
56
|
+
if hasattr(results[0], 'boxes'):
|
|
57
|
+
return auto_convert(results[0]) # Take first result
|
|
58
|
+
|
|
59
|
+
# PyTorch (torchvision) - Dict with boxes/scores/labels
|
|
60
|
+
if isinstance(results, dict):
|
|
61
|
+
if all(k in results for k in ['boxes', 'scores', 'labels']):
|
|
62
|
+
boxes = results['boxes']
|
|
63
|
+
xyxy = boxes.cpu().numpy() if hasattr(boxes, 'cpu') else boxes
|
|
64
|
+
confidence = results['scores'].cpu().numpy() if hasattr(results['scores'], 'cpu') else results['scores']
|
|
65
|
+
class_id = results['labels'].cpu().numpy().astype(int) if hasattr(results['labels'], 'cpu') else results['labels'].astype(int)
|
|
66
|
+
|
|
67
|
+
return Detections(
|
|
68
|
+
xyxy=xyxy,
|
|
69
|
+
confidence=confidence,
|
|
70
|
+
class_id=class_id
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# TensorFlow Object Detection API
|
|
74
|
+
if isinstance(results, (tuple, list)) and len(results) >= 3:
|
|
75
|
+
boxes, scores, classes = results[:3]
|
|
76
|
+
# TF format is [ymin, xmin, ymax, xmax] normalized - convert to xyxy
|
|
77
|
+
if hasattr(boxes, 'numpy'):
|
|
78
|
+
boxes = boxes.numpy()
|
|
79
|
+
boxes = boxes.squeeze()
|
|
80
|
+
|
|
81
|
+
# Convert to absolute coordinates (assuming image size is available)
|
|
82
|
+
# For now, keep as is and let user handle scaling
|
|
83
|
+
xyxy = boxes[:, [1, 0, 3, 2]] # Reorder to [xmin, ymin, xmax, ymax]
|
|
84
|
+
|
|
85
|
+
confidence = scores.numpy() if hasattr(scores, 'numpy') else scores
|
|
86
|
+
class_id = classes.numpy().astype(int) if hasattr(classes, 'numpy') else classes.astype(int)
|
|
87
|
+
|
|
88
|
+
return Detections(
|
|
89
|
+
xyxy=xyxy.squeeze(),
|
|
90
|
+
confidence=confidence.squeeze(),
|
|
91
|
+
class_id=class_id.squeeze()
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# OpenCV DNN format
|
|
95
|
+
if isinstance(results, np.ndarray):
|
|
96
|
+
if results.ndim == 4 and results.shape[0] == 1:
|
|
97
|
+
# OpenCV format: [1, 1, N, 7] where 7 = [image_id, class_id, conf, x1, y1, x2, y2]
|
|
98
|
+
detections_2d = results.reshape(-1, results.shape[-1])
|
|
99
|
+
|
|
100
|
+
confidence_mask = detections_2d[:, 2] > 0 # Filter valid detections
|
|
101
|
+
valid = detections_2d[confidence_mask]
|
|
102
|
+
|
|
103
|
+
return Detections(
|
|
104
|
+
xyxy=valid[:, 3:7], # [x1, y1, x2, y2]
|
|
105
|
+
confidence=valid[:, 2],
|
|
106
|
+
class_id=valid[:, 1].astype(int)
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Raw array format (ONNX/TensorRT): [N, 6+] where columns are [x, y, w, h, conf, class, ...]
|
|
110
|
+
if results.ndim == 2 and results.shape[1] >= 6:
|
|
111
|
+
# Convert xywh to xyxy
|
|
112
|
+
x = results[:, 0]
|
|
113
|
+
y = results[:, 1]
|
|
114
|
+
w = results[:, 2]
|
|
115
|
+
h = results[:, 3]
|
|
116
|
+
|
|
117
|
+
xyxy = np.stack([
|
|
118
|
+
x - w/2, # x1
|
|
119
|
+
y - h/2, # y1
|
|
120
|
+
x + w/2, # x2
|
|
121
|
+
y + h/2 # y2
|
|
122
|
+
], axis=1)
|
|
123
|
+
|
|
124
|
+
return Detections(
|
|
125
|
+
xyxy=xyxy,
|
|
126
|
+
confidence=results[:, 4],
|
|
127
|
+
class_id=results[:, 5].astype(int)
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# If nothing matched, raise helpful error
|
|
131
|
+
raise TypeError(
|
|
132
|
+
f"Unsupported model output format: {type(results)}. "
|
|
133
|
+
f"Supported: YOLO, PyTorch, TensorFlow, OpenCV, ONNX, TensorRT. "
|
|
134
|
+
f"Got: {results.__class__.__name__}"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def from_yolo(results) -> Detections:
|
|
139
|
+
"""Convert YOLO (Ultralytics) results to Detections."""
|
|
140
|
+
return auto_convert(results)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def from_pytorch(results: dict) -> Detections:
|
|
144
|
+
"""Convert PyTorch/torchvision results to Detections."""
|
|
145
|
+
return auto_convert(results)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def from_tensorflow(results: tuple) -> Detections:
|
|
149
|
+
"""Convert TensorFlow Object Detection API results to Detections."""
|
|
150
|
+
return auto_convert(results)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def from_opencv(results: np.ndarray) -> Detections:
|
|
154
|
+
"""Convert OpenCV DNN results to Detections."""
|
|
155
|
+
return auto_convert(results)
|