eye-cv 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eye/__init__.py +115 -0
- eye/__init___supervision_original.py +120 -0
- eye/annotators/__init__.py +0 -0
- eye/annotators/base.py +22 -0
- eye/annotators/core.py +2699 -0
- eye/annotators/line.py +107 -0
- eye/annotators/modern.py +529 -0
- eye/annotators/trace.py +142 -0
- eye/annotators/utils.py +177 -0
- eye/assets/__init__.py +2 -0
- eye/assets/downloader.py +95 -0
- eye/assets/list.py +83 -0
- eye/classification/__init__.py +0 -0
- eye/classification/core.py +188 -0
- eye/config.py +2 -0
- eye/core/__init__.py +0 -0
- eye/core/trackers/__init__.py +1 -0
- eye/core/trackers/botsort_tracker.py +336 -0
- eye/core/trackers/bytetrack_tracker.py +284 -0
- eye/core/trackers/sort_tracker.py +200 -0
- eye/core/tracking.py +146 -0
- eye/dataset/__init__.py +0 -0
- eye/dataset/core.py +919 -0
- eye/dataset/formats/__init__.py +0 -0
- eye/dataset/formats/coco.py +258 -0
- eye/dataset/formats/pascal_voc.py +279 -0
- eye/dataset/formats/yolo.py +272 -0
- eye/dataset/utils.py +259 -0
- eye/detection/__init__.py +0 -0
- eye/detection/auto_convert.py +155 -0
- eye/detection/core.py +1529 -0
- eye/detection/detections_enhanced.py +392 -0
- eye/detection/line_zone.py +859 -0
- eye/detection/lmm.py +184 -0
- eye/detection/overlap_filter.py +270 -0
- eye/detection/tools/__init__.py +0 -0
- eye/detection/tools/csv_sink.py +181 -0
- eye/detection/tools/inference_slicer.py +288 -0
- eye/detection/tools/json_sink.py +142 -0
- eye/detection/tools/polygon_zone.py +202 -0
- eye/detection/tools/smoother.py +123 -0
- eye/detection/tools/smoothing.py +179 -0
- eye/detection/tools/smoothing_config.py +202 -0
- eye/detection/tools/transformers.py +247 -0
- eye/detection/utils.py +1175 -0
- eye/draw/__init__.py +0 -0
- eye/draw/color.py +154 -0
- eye/draw/utils.py +374 -0
- eye/filters.py +112 -0
- eye/geometry/__init__.py +0 -0
- eye/geometry/core.py +128 -0
- eye/geometry/utils.py +47 -0
- eye/keypoint/__init__.py +0 -0
- eye/keypoint/annotators.py +442 -0
- eye/keypoint/core.py +687 -0
- eye/keypoint/skeletons.py +2647 -0
- eye/metrics/__init__.py +21 -0
- eye/metrics/core.py +72 -0
- eye/metrics/detection.py +843 -0
- eye/metrics/f1_score.py +648 -0
- eye/metrics/mean_average_precision.py +628 -0
- eye/metrics/mean_average_recall.py +697 -0
- eye/metrics/precision.py +653 -0
- eye/metrics/recall.py +652 -0
- eye/metrics/utils/__init__.py +0 -0
- eye/metrics/utils/object_size.py +158 -0
- eye/metrics/utils/utils.py +9 -0
- eye/py.typed +0 -0
- eye/quick.py +104 -0
- eye/tracker/__init__.py +0 -0
- eye/tracker/byte_tracker/__init__.py +0 -0
- eye/tracker/byte_tracker/core.py +386 -0
- eye/tracker/byte_tracker/kalman_filter.py +205 -0
- eye/tracker/byte_tracker/matching.py +69 -0
- eye/tracker/byte_tracker/single_object_track.py +178 -0
- eye/tracker/byte_tracker/utils.py +18 -0
- eye/utils/__init__.py +0 -0
- eye/utils/conversion.py +132 -0
- eye/utils/file.py +159 -0
- eye/utils/image.py +794 -0
- eye/utils/internal.py +200 -0
- eye/utils/iterables.py +84 -0
- eye/utils/notebook.py +114 -0
- eye/utils/video.py +307 -0
- eye/utils_eye/__init__.py +1 -0
- eye/utils_eye/geometry.py +71 -0
- eye/utils_eye/nms.py +55 -0
- eye/validators/__init__.py +140 -0
- eye/web.py +271 -0
- eye_cv-1.0.0.dist-info/METADATA +319 -0
- eye_cv-1.0.0.dist-info/RECORD +94 -0
- eye_cv-1.0.0.dist-info/WHEEL +5 -0
- eye_cv-1.0.0.dist-info/licenses/LICENSE +21 -0
- eye_cv-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Tracker implementations."""
|
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""BoT-SORT: Robust Associations Multi-Pedestrian Tracking
|
|
2
|
+
Based on: https://arxiv.org/abs/2206.14651
|
|
3
|
+
Combines SORT with appearance features and camera motion compensation.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
try:
|
|
9
|
+
import lap
|
|
10
|
+
LAP_AVAILABLE = True
|
|
11
|
+
except ImportError:
|
|
12
|
+
LAP_AVAILABLE = False
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class KalmanBoxTracker:
|
|
16
|
+
"""Kalman Filter for tracking bounding boxes with camera motion compensation."""
|
|
17
|
+
count = 0
|
|
18
|
+
|
|
19
|
+
def __init__(self, bbox):
|
|
20
|
+
"""Initialize tracker with bounding box [x1, y1, x2, y2]."""
|
|
21
|
+
# State: [x_center, y_center, area, aspect_ratio, vx, vy, va]
|
|
22
|
+
self.kf_x = np.zeros(7)
|
|
23
|
+
self.kf_x[:4] = self._convert_bbox_to_z(bbox)
|
|
24
|
+
|
|
25
|
+
# Covariance matrix
|
|
26
|
+
self.kf_P = np.eye(7) * 10.
|
|
27
|
+
self.kf_P[4:, 4:] *= 1000.
|
|
28
|
+
|
|
29
|
+
# State transition matrix
|
|
30
|
+
self.kf_F = np.eye(7)
|
|
31
|
+
for i in range(3):
|
|
32
|
+
self.kf_F[i, i+4] = 1.0
|
|
33
|
+
|
|
34
|
+
# Measurement matrix
|
|
35
|
+
self.kf_H = np.eye(4, 7)
|
|
36
|
+
|
|
37
|
+
# Process noise
|
|
38
|
+
self.kf_Q = np.eye(7)
|
|
39
|
+
self.kf_Q[4:, 4:] *= 0.01
|
|
40
|
+
|
|
41
|
+
# Measurement noise
|
|
42
|
+
self.kf_R = np.eye(4) * 10.
|
|
43
|
+
|
|
44
|
+
self.time_since_update = 0
|
|
45
|
+
self.id = KalmanBoxTracker.count
|
|
46
|
+
KalmanBoxTracker.count += 1
|
|
47
|
+
self.history = []
|
|
48
|
+
self.hits = 0
|
|
49
|
+
self.hit_streak = 0
|
|
50
|
+
self.age = 0
|
|
51
|
+
self.last_observation = bbox
|
|
52
|
+
|
|
53
|
+
# For appearance features (placeholder)
|
|
54
|
+
self.smooth_feat = None
|
|
55
|
+
self.curr_feat = None
|
|
56
|
+
self.features = []
|
|
57
|
+
self.alpha = 0.9 # EMA coefficient
|
|
58
|
+
|
|
59
|
+
def _convert_bbox_to_z(self, bbox):
|
|
60
|
+
"""Convert [x1,y1,x2,y2] to [x,y,s,r]."""
|
|
61
|
+
w = bbox[2] - bbox[0]
|
|
62
|
+
h = bbox[3] - bbox[1]
|
|
63
|
+
x = bbox[0] + w/2.
|
|
64
|
+
y = bbox[1] + h/2.
|
|
65
|
+
s = w * h
|
|
66
|
+
r = w / float(h) if h != 0 else 1.0
|
|
67
|
+
return np.array([x, y, s, r])
|
|
68
|
+
|
|
69
|
+
def _convert_x_to_bbox(self, x):
|
|
70
|
+
"""Convert [x,y,s,r] to [x1,y1,x2,y2]."""
|
|
71
|
+
w = np.sqrt(x[2] * x[3])
|
|
72
|
+
h = x[2] / w if w != 0 else 1.0
|
|
73
|
+
return np.array([
|
|
74
|
+
x[0] - w/2.,
|
|
75
|
+
x[1] - h/2.,
|
|
76
|
+
x[0] + w/2.,
|
|
77
|
+
x[1] + h/2.
|
|
78
|
+
])
|
|
79
|
+
|
|
80
|
+
def update(self, bbox, feature=None):
|
|
81
|
+
"""Update state with observed bbox and optional appearance feature."""
|
|
82
|
+
self.time_since_update = 0
|
|
83
|
+
self.history = []
|
|
84
|
+
self.hits += 1
|
|
85
|
+
self.hit_streak += 1
|
|
86
|
+
self.last_observation = bbox
|
|
87
|
+
|
|
88
|
+
z = self._convert_bbox_to_z(bbox)
|
|
89
|
+
|
|
90
|
+
# Kalman update
|
|
91
|
+
y = z - np.dot(self.kf_H, self.kf_x[:4])
|
|
92
|
+
S = np.dot(np.dot(self.kf_H, self.kf_P[:4, :4]), self.kf_H.T) + self.kf_R
|
|
93
|
+
K = np.dot(np.dot(self.kf_P[:4, :], self.kf_H.T), np.linalg.inv(S))
|
|
94
|
+
self.kf_x[:4] = self.kf_x[:4] + np.dot(K, y)
|
|
95
|
+
self.kf_P[:4, :4] = self.kf_P[:4, :4] - np.dot(np.dot(K, self.kf_H), self.kf_P[:4, :4])
|
|
96
|
+
|
|
97
|
+
# Update appearance feature with EMA
|
|
98
|
+
if feature is not None:
|
|
99
|
+
self.curr_feat = feature
|
|
100
|
+
if self.smooth_feat is None:
|
|
101
|
+
self.smooth_feat = feature
|
|
102
|
+
else:
|
|
103
|
+
self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feature
|
|
104
|
+
self.features.append(feature)
|
|
105
|
+
|
|
106
|
+
def predict(self):
|
|
107
|
+
"""Predict next state."""
|
|
108
|
+
self.kf_x = np.dot(self.kf_F, self.kf_x)
|
|
109
|
+
self.kf_P = np.dot(np.dot(self.kf_F, self.kf_P), self.kf_F.T) + self.kf_Q
|
|
110
|
+
|
|
111
|
+
self.age += 1
|
|
112
|
+
if self.time_since_update > 0:
|
|
113
|
+
self.hit_streak = 0
|
|
114
|
+
self.time_since_update += 1
|
|
115
|
+
self.history.append(self._convert_x_to_bbox(self.kf_x))
|
|
116
|
+
return self.history[-1]
|
|
117
|
+
|
|
118
|
+
def get_state(self):
|
|
119
|
+
"""Return current bounding box estimate."""
|
|
120
|
+
return self._convert_x_to_bbox(self.kf_x)
|
|
121
|
+
|
|
122
|
+
def apply_camera_motion(self, warp_matrix):
|
|
123
|
+
"""Apply camera motion compensation using warp matrix."""
|
|
124
|
+
x1, y1, x2, y2 = self.get_state()
|
|
125
|
+
x1_, y1_, _ = warp_matrix @ np.array([x1, y1, 1.])
|
|
126
|
+
x2_, y2_, _ = warp_matrix @ np.array([x2, y2, 1.])
|
|
127
|
+
|
|
128
|
+
# Update state
|
|
129
|
+
self.kf_x[:4] = self._convert_bbox_to_z([x1_, y1_, x2_, y2_])
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def iou_batch(bb_test, bb_gt):
|
|
133
|
+
"""Compute IoU between two sets of boxes."""
|
|
134
|
+
bb_gt = np.expand_dims(bb_gt, 0)
|
|
135
|
+
bb_test = np.expand_dims(bb_test, 1)
|
|
136
|
+
|
|
137
|
+
xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0])
|
|
138
|
+
yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1])
|
|
139
|
+
xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2])
|
|
140
|
+
yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3])
|
|
141
|
+
w = np.maximum(0., xx2 - xx1)
|
|
142
|
+
h = np.maximum(0., yy2 - yy1)
|
|
143
|
+
wh = w * h
|
|
144
|
+
|
|
145
|
+
area_test = (bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1])
|
|
146
|
+
area_gt = (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1])
|
|
147
|
+
|
|
148
|
+
iou = wh / (area_test + area_gt - wh)
|
|
149
|
+
return iou
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def embedding_distance(tracks, detections, metric='cosine'):
|
|
153
|
+
"""Compute appearance feature distance."""
|
|
154
|
+
track_features = np.array([trk.smooth_feat for trk in tracks if trk.smooth_feat is not None])
|
|
155
|
+
det_features = detections # Assume detections are features
|
|
156
|
+
|
|
157
|
+
if len(track_features) == 0 or len(det_features) == 0:
|
|
158
|
+
return np.ones((len(tracks), len(detections))) * 1e9
|
|
159
|
+
|
|
160
|
+
if metric == 'cosine':
|
|
161
|
+
# Cosine distance
|
|
162
|
+
track_features = track_features / np.linalg.norm(track_features, axis=1, keepdims=True)
|
|
163
|
+
det_features = det_features / np.linalg.norm(det_features, axis=1, keepdims=True)
|
|
164
|
+
cost_matrix = 1 - np.dot(track_features, det_features.T)
|
|
165
|
+
else:
|
|
166
|
+
# Euclidean distance
|
|
167
|
+
cost_matrix = np.linalg.norm(track_features[:, None] - det_features[None, :], axis=2)
|
|
168
|
+
|
|
169
|
+
return cost_matrix
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def linear_assignment(cost_matrix):
|
|
173
|
+
"""Solve linear assignment problem."""
|
|
174
|
+
if not LAP_AVAILABLE:
|
|
175
|
+
# Fallback: greedy assignment
|
|
176
|
+
matches = []
|
|
177
|
+
cost = cost_matrix.copy()
|
|
178
|
+
for _ in range(min(cost.shape)):
|
|
179
|
+
i, j = np.unravel_index(cost.argmin(), cost.shape)
|
|
180
|
+
if cost[i, j] < 1e9:
|
|
181
|
+
matches.append([i, j])
|
|
182
|
+
cost[i, :] = 1e9
|
|
183
|
+
cost[:, j] = 1e9
|
|
184
|
+
|
|
185
|
+
matches = np.array(matches) if matches else np.empty((0, 2), dtype=int)
|
|
186
|
+
unmatched_a = list(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
|
|
187
|
+
unmatched_b = list(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
|
|
188
|
+
return matches, unmatched_a, unmatched_b
|
|
189
|
+
|
|
190
|
+
_, x, y = lap.lapjv(cost_matrix, extend_cost=True)
|
|
191
|
+
matches = [[i, x[i]] for i in range(len(x)) if x[i] >= 0]
|
|
192
|
+
unmatched_a = [i for i in range(len(x)) if x[i] < 0]
|
|
193
|
+
unmatched_b = [j for j in range(len(y)) if y[j] < 0]
|
|
194
|
+
return np.array(matches), unmatched_a, unmatched_b
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class BoTSORTTracker:
|
|
198
|
+
"""BoT-SORT tracker with appearance features and camera motion compensation."""
|
|
199
|
+
|
|
200
|
+
def __init__(
|
|
201
|
+
self,
|
|
202
|
+
max_age: int = 30,
|
|
203
|
+
min_hits: int = 3,
|
|
204
|
+
iou_threshold: float = 0.3,
|
|
205
|
+
proximity_thresh: float = 0.5,
|
|
206
|
+
appearance_thresh: float = 0.25
|
|
207
|
+
):
|
|
208
|
+
"""
|
|
209
|
+
Args:
|
|
210
|
+
max_age: Maximum frames to keep lost tracks
|
|
211
|
+
min_hits: Minimum hits to confirm track
|
|
212
|
+
iou_threshold: IoU threshold for matching
|
|
213
|
+
proximity_thresh: Proximity threshold for second matching
|
|
214
|
+
appearance_thresh: Appearance feature threshold
|
|
215
|
+
"""
|
|
216
|
+
self.max_age = max_age
|
|
217
|
+
self.min_hits = min_hits
|
|
218
|
+
self.iou_threshold = iou_threshold
|
|
219
|
+
self.proximity_thresh = proximity_thresh
|
|
220
|
+
self.appearance_thresh = appearance_thresh
|
|
221
|
+
self.trackers = []
|
|
222
|
+
self.frame_count = 0
|
|
223
|
+
self.camera_update = np.eye(3) # Camera motion warp matrix
|
|
224
|
+
|
|
225
|
+
def update(self, detections, features=None):
|
|
226
|
+
"""
|
|
227
|
+
Args:
|
|
228
|
+
detections: nx5 array [x1, y1, x2, y2, score]
|
|
229
|
+
features: Optional appearance features (n x feature_dim)
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
nx5 array [x1, y1, x2, y2, track_id]
|
|
233
|
+
"""
|
|
234
|
+
self.frame_count += 1
|
|
235
|
+
|
|
236
|
+
# Apply camera motion compensation
|
|
237
|
+
for trk in self.trackers:
|
|
238
|
+
trk.apply_camera_motion(self.camera_update)
|
|
239
|
+
|
|
240
|
+
# Get predictions
|
|
241
|
+
trks = np.zeros((len(self.trackers), 5))
|
|
242
|
+
to_del = []
|
|
243
|
+
for t, trk in enumerate(trks):
|
|
244
|
+
pos = self.trackers[t].predict()
|
|
245
|
+
trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
|
|
246
|
+
if np.any(np.isnan(pos)):
|
|
247
|
+
to_del.append(t)
|
|
248
|
+
|
|
249
|
+
trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
|
|
250
|
+
for t in reversed(to_del):
|
|
251
|
+
self.trackers.pop(t)
|
|
252
|
+
|
|
253
|
+
# First association: IoU matching
|
|
254
|
+
matched, unmatched_dets, unmatched_trks = self._associate_detections_to_trackers(
|
|
255
|
+
detections, trks, self.iou_threshold
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# Update matched trackers
|
|
259
|
+
for m in matched:
|
|
260
|
+
feat = features[m[0]] if features is not None else None
|
|
261
|
+
self.trackers[m[1]].update(detections[m[0], :4], feat)
|
|
262
|
+
|
|
263
|
+
# Second association: Appearance + proximity for unmatched
|
|
264
|
+
if len(unmatched_dets) > 0 and len(unmatched_trks) > 0 and features is not None:
|
|
265
|
+
unmatched_trks_features = [self.trackers[i] for i in unmatched_trks]
|
|
266
|
+
unmatched_dets_features = features[unmatched_dets]
|
|
267
|
+
|
|
268
|
+
# Combine IoU and appearance costs
|
|
269
|
+
iou_cost = 1 - iou_batch(detections[unmatched_dets, :4], trks[unmatched_trks, :4])
|
|
270
|
+
app_cost = embedding_distance(unmatched_trks_features, unmatched_dets_features)
|
|
271
|
+
|
|
272
|
+
# Fuse costs
|
|
273
|
+
cost_matrix = 0.5 * iou_cost + 0.5 * app_cost
|
|
274
|
+
|
|
275
|
+
matched2, unmatched_dets2, unmatched_trks2 = linear_assignment(cost_matrix)
|
|
276
|
+
|
|
277
|
+
for m in matched2:
|
|
278
|
+
if cost_matrix[m[0], m[1]] < self.appearance_thresh:
|
|
279
|
+
feat = features[unmatched_dets[m[0]]]
|
|
280
|
+
self.trackers[unmatched_trks[m[1]]].update(detections[unmatched_dets[m[0]], :4], feat)
|
|
281
|
+
|
|
282
|
+
unmatched_dets = [unmatched_dets[i] for i in unmatched_dets2]
|
|
283
|
+
unmatched_trks = [unmatched_trks[i] for i in unmatched_trks2]
|
|
284
|
+
|
|
285
|
+
# Create new trackers
|
|
286
|
+
for i in unmatched_dets:
|
|
287
|
+
trk = KalmanBoxTracker(detections[i, :4])
|
|
288
|
+
if features is not None:
|
|
289
|
+
trk.curr_feat = features[i]
|
|
290
|
+
trk.smooth_feat = features[i]
|
|
291
|
+
self.trackers.append(trk)
|
|
292
|
+
|
|
293
|
+
# Remove dead trackers
|
|
294
|
+
i = len(self.trackers)
|
|
295
|
+
ret = []
|
|
296
|
+
for trk in reversed(self.trackers):
|
|
297
|
+
d = trk.get_state()
|
|
298
|
+
if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
|
|
299
|
+
ret.append(np.concatenate((d, [trk.id])).reshape(1, -1))
|
|
300
|
+
i -= 1
|
|
301
|
+
if trk.time_since_update > self.max_age:
|
|
302
|
+
self.trackers.pop(i)
|
|
303
|
+
|
|
304
|
+
if len(ret) > 0:
|
|
305
|
+
return np.concatenate(ret)
|
|
306
|
+
return np.empty((0, 5))
|
|
307
|
+
|
|
308
|
+
def _associate_detections_to_trackers(self, detections, trackers, iou_threshold):
|
|
309
|
+
"""Associate detections to tracked objects."""
|
|
310
|
+
if len(trackers) == 0:
|
|
311
|
+
return np.empty((0, 2), dtype=int), list(range(len(detections))), []
|
|
312
|
+
|
|
313
|
+
iou_matrix = iou_batch(detections[:, :4], trackers[:, :4])
|
|
314
|
+
|
|
315
|
+
if min(iou_matrix.shape) > 0:
|
|
316
|
+
matched_indices, unmatched_a, unmatched_b = linear_assignment(-iou_matrix)
|
|
317
|
+
else:
|
|
318
|
+
matched_indices = np.empty((0, 2), dtype=int)
|
|
319
|
+
unmatched_a = list(range(len(detections)))
|
|
320
|
+
unmatched_b = list(range(len(trackers)))
|
|
321
|
+
|
|
322
|
+
# Filter out matched with low IoU
|
|
323
|
+
matches = []
|
|
324
|
+
for m in matched_indices:
|
|
325
|
+
if iou_matrix[m[0], m[1]] < iou_threshold:
|
|
326
|
+
unmatched_a.append(m[0])
|
|
327
|
+
unmatched_b.append(m[1])
|
|
328
|
+
else:
|
|
329
|
+
matches.append(m.reshape(1, 2))
|
|
330
|
+
|
|
331
|
+
if len(matches) == 0:
|
|
332
|
+
matches = np.empty((0, 2), dtype=int)
|
|
333
|
+
else:
|
|
334
|
+
matches = np.concatenate(matches, axis=0)
|
|
335
|
+
|
|
336
|
+
return matches, unmatched_a, unmatched_b
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
"""ByteTrack: Multi-Object Tracking by Associating Every Detection Box
|
|
2
|
+
Based on: https://arxiv.org/abs/2110.06864
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from typing import List, Tuple
|
|
7
|
+
try:
|
|
8
|
+
import lap
|
|
9
|
+
LAP_AVAILABLE = True
|
|
10
|
+
except ImportError:
|
|
11
|
+
LAP_AVAILABLE = False
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class KalmanBoxTracker:
|
|
15
|
+
"""Kalman Filter for tracking bounding boxes."""
|
|
16
|
+
count = 0
|
|
17
|
+
|
|
18
|
+
def __init__(self, bbox):
|
|
19
|
+
"""Initialize tracker with bounding box [x1, y1, x2, y2]."""
|
|
20
|
+
# State: [x_center, y_center, area, aspect_ratio, vx, vy, va, vr]
|
|
21
|
+
self.kf_x = np.zeros(8)
|
|
22
|
+
self.kf_x[:4] = self._convert_bbox_to_z(bbox)
|
|
23
|
+
|
|
24
|
+
# Covariance matrix
|
|
25
|
+
self.kf_P = np.eye(8) * 10.
|
|
26
|
+
self.kf_P[4:, 4:] *= 1000. # High uncertainty for velocities
|
|
27
|
+
|
|
28
|
+
# State transition matrix
|
|
29
|
+
self.kf_F = np.eye(8)
|
|
30
|
+
for i in range(4):
|
|
31
|
+
self.kf_F[i, i+4] = 1.0 # Position += velocity
|
|
32
|
+
|
|
33
|
+
# Measurement matrix
|
|
34
|
+
self.kf_H = np.eye(4, 8)
|
|
35
|
+
|
|
36
|
+
# Process noise
|
|
37
|
+
self.kf_Q = np.eye(8)
|
|
38
|
+
self.kf_Q[4:, 4:] *= 0.01
|
|
39
|
+
|
|
40
|
+
# Measurement noise
|
|
41
|
+
self.kf_R = np.eye(4)
|
|
42
|
+
|
|
43
|
+
self.time_since_update = 0
|
|
44
|
+
self.id = KalmanBoxTracker.count
|
|
45
|
+
KalmanBoxTracker.count += 1
|
|
46
|
+
self.history = []
|
|
47
|
+
self.hits = 0
|
|
48
|
+
self.hit_streak = 0
|
|
49
|
+
self.age = 0
|
|
50
|
+
|
|
51
|
+
def _convert_bbox_to_z(self, bbox):
|
|
52
|
+
"""Convert [x1,y1,x2,y2] to [x,y,s,r] where s=area, r=aspect_ratio."""
|
|
53
|
+
w = bbox[2] - bbox[0]
|
|
54
|
+
h = bbox[3] - bbox[1]
|
|
55
|
+
x = bbox[0] + w/2.
|
|
56
|
+
y = bbox[1] + h/2.
|
|
57
|
+
s = w * h
|
|
58
|
+
r = w / float(h) if h != 0 else 1.0
|
|
59
|
+
return np.array([x, y, s, r])
|
|
60
|
+
|
|
61
|
+
def _convert_x_to_bbox(self, x):
|
|
62
|
+
"""Convert [x,y,s,r] to [x1,y1,x2,y2]."""
|
|
63
|
+
w = np.sqrt(x[2] * x[3])
|
|
64
|
+
h = x[2] / w if w != 0 else 1.0
|
|
65
|
+
return np.array([
|
|
66
|
+
x[0] - w/2.,
|
|
67
|
+
x[1] - h/2.,
|
|
68
|
+
x[0] + w/2.,
|
|
69
|
+
x[1] + h/2.
|
|
70
|
+
])
|
|
71
|
+
|
|
72
|
+
def update(self, bbox):
|
|
73
|
+
"""Update state with observed bbox."""
|
|
74
|
+
self.time_since_update = 0
|
|
75
|
+
self.history = []
|
|
76
|
+
self.hits += 1
|
|
77
|
+
self.hit_streak += 1
|
|
78
|
+
|
|
79
|
+
z = self._convert_bbox_to_z(bbox)
|
|
80
|
+
|
|
81
|
+
# Kalman update
|
|
82
|
+
y = z - np.dot(self.kf_H, self.kf_x[:4])
|
|
83
|
+
S = np.dot(np.dot(self.kf_H, self.kf_P[:4, :4]), self.kf_H.T) + self.kf_R
|
|
84
|
+
K = np.dot(np.dot(self.kf_P[:4, :], self.kf_H.T), np.linalg.inv(S))
|
|
85
|
+
self.kf_x[:4] = self.kf_x[:4] + np.dot(K, y)
|
|
86
|
+
self.kf_P[:4, :4] = self.kf_P[:4, :4] - np.dot(np.dot(K, self.kf_H), self.kf_P[:4, :4])
|
|
87
|
+
|
|
88
|
+
def predict(self):
|
|
89
|
+
"""Predict next state."""
|
|
90
|
+
# Kalman predict
|
|
91
|
+
self.kf_x = np.dot(self.kf_F, self.kf_x)
|
|
92
|
+
self.kf_P = np.dot(np.dot(self.kf_F, self.kf_P), self.kf_F.T) + self.kf_Q
|
|
93
|
+
|
|
94
|
+
self.age += 1
|
|
95
|
+
if self.time_since_update > 0:
|
|
96
|
+
self.hit_streak = 0
|
|
97
|
+
self.time_since_update += 1
|
|
98
|
+
self.history.append(self._convert_x_to_bbox(self.kf_x))
|
|
99
|
+
return self.history[-1]
|
|
100
|
+
|
|
101
|
+
def get_state(self):
|
|
102
|
+
"""Return current bounding box estimate."""
|
|
103
|
+
return self._convert_x_to_bbox(self.kf_x)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def iou_batch(bb_test, bb_gt):
|
|
107
|
+
"""Compute IoU between two sets of boxes."""
|
|
108
|
+
bb_gt = np.expand_dims(bb_gt, 0)
|
|
109
|
+
bb_test = np.expand_dims(bb_test, 1)
|
|
110
|
+
|
|
111
|
+
xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0])
|
|
112
|
+
yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1])
|
|
113
|
+
xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2])
|
|
114
|
+
yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3])
|
|
115
|
+
w = np.maximum(0., xx2 - xx1)
|
|
116
|
+
h = np.maximum(0., yy2 - yy1)
|
|
117
|
+
wh = w * h
|
|
118
|
+
|
|
119
|
+
area_test = (bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1])
|
|
120
|
+
area_gt = (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1])
|
|
121
|
+
|
|
122
|
+
iou = wh / (area_test + area_gt - wh)
|
|
123
|
+
return iou
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def linear_assignment(cost_matrix):
|
|
127
|
+
"""Solve linear assignment problem."""
|
|
128
|
+
if not LAP_AVAILABLE:
|
|
129
|
+
# Fallback: greedy assignment
|
|
130
|
+
matches = []
|
|
131
|
+
cost = cost_matrix.copy()
|
|
132
|
+
for _ in range(min(cost.shape)):
|
|
133
|
+
i, j = np.unravel_index(cost.argmin(), cost.shape)
|
|
134
|
+
if cost[i, j] < 1e9:
|
|
135
|
+
matches.append([i, j])
|
|
136
|
+
cost[i, :] = 1e9
|
|
137
|
+
cost[:, j] = 1e9
|
|
138
|
+
|
|
139
|
+
matches = np.array(matches) if matches else np.empty((0, 2), dtype=int)
|
|
140
|
+
unmatched_a = list(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
|
|
141
|
+
unmatched_b = list(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
|
|
142
|
+
return matches, unmatched_a, unmatched_b
|
|
143
|
+
|
|
144
|
+
# Use lap for optimal assignment
|
|
145
|
+
_, x, y = lap.lapjv(cost_matrix, extend_cost=True)
|
|
146
|
+
matches = [[i, x[i]] for i in range(len(x)) if x[i] >= 0]
|
|
147
|
+
unmatched_a = [i for i in range(len(x)) if x[i] < 0]
|
|
148
|
+
unmatched_b = [j for j in range(len(y)) if y[j] < 0]
|
|
149
|
+
return np.array(matches), unmatched_a, unmatched_b
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class ByteTrackTracker:
|
|
153
|
+
"""ByteTrack tracker with high/low confidence detection association."""
|
|
154
|
+
|
|
155
|
+
def __init__(
|
|
156
|
+
self,
|
|
157
|
+
max_age: int = 30,
|
|
158
|
+
min_hits: int = 3,
|
|
159
|
+
iou_threshold: float = 0.3,
|
|
160
|
+
high_threshold: float = 0.5,
|
|
161
|
+
low_threshold: float = 0.1
|
|
162
|
+
):
|
|
163
|
+
"""
|
|
164
|
+
Args:
|
|
165
|
+
max_age: Maximum frames to keep lost tracks
|
|
166
|
+
min_hits: Minimum hits to confirm track
|
|
167
|
+
iou_threshold: IoU threshold for matching
|
|
168
|
+
high_threshold: High confidence threshold
|
|
169
|
+
low_threshold: Low confidence threshold
|
|
170
|
+
"""
|
|
171
|
+
self.max_age = max_age
|
|
172
|
+
self.min_hits = min_hits
|
|
173
|
+
self.iou_threshold = iou_threshold
|
|
174
|
+
self.high_threshold = high_threshold
|
|
175
|
+
self.low_threshold = low_threshold
|
|
176
|
+
self.trackers = []
|
|
177
|
+
self.frame_count = 0
|
|
178
|
+
|
|
179
|
+
def update(self, detections):
|
|
180
|
+
"""
|
|
181
|
+
Args:
|
|
182
|
+
detections: nx5 array [x1, y1, x2, y2, score]
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
nx5 array [x1, y1, x2, y2, track_id]
|
|
186
|
+
"""
|
|
187
|
+
self.frame_count += 1
|
|
188
|
+
|
|
189
|
+
# Get predictions from existing trackers
|
|
190
|
+
trks = np.zeros((len(self.trackers), 5))
|
|
191
|
+
to_del = []
|
|
192
|
+
for t, trk in enumerate(trks):
|
|
193
|
+
pos = self.trackers[t].predict()
|
|
194
|
+
trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
|
|
195
|
+
if np.any(np.isnan(pos)):
|
|
196
|
+
to_del.append(t)
|
|
197
|
+
|
|
198
|
+
trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
|
|
199
|
+
for t in reversed(to_del):
|
|
200
|
+
self.trackers.pop(t)
|
|
201
|
+
|
|
202
|
+
# Split detections by confidence
|
|
203
|
+
if len(detections) > 0:
|
|
204
|
+
high_dets = detections[detections[:, 4] >= self.high_threshold]
|
|
205
|
+
low_dets = detections[(detections[:, 4] >= self.low_threshold) &
|
|
206
|
+
(detections[:, 4] < self.high_threshold)]
|
|
207
|
+
else:
|
|
208
|
+
high_dets = np.empty((0, 5))
|
|
209
|
+
low_dets = np.empty((0, 5))
|
|
210
|
+
|
|
211
|
+
# First association: high confidence detections
|
|
212
|
+
matched, unmatched_dets, unmatched_trks = self._associate_detections_to_trackers(
|
|
213
|
+
high_dets, trks, self.iou_threshold
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Update matched trackers
|
|
217
|
+
for m in matched:
|
|
218
|
+
self.trackers[m[1]].update(high_dets[m[0], :4])
|
|
219
|
+
|
|
220
|
+
# Second association: low confidence detections with unmatched tracks
|
|
221
|
+
if len(low_dets) > 0 and len(unmatched_trks) > 0:
|
|
222
|
+
unmatched_trks_boxes = trks[unmatched_trks]
|
|
223
|
+
matched_low, unmatched_dets_low, unmatched_trks_low = self._associate_detections_to_trackers(
|
|
224
|
+
low_dets, unmatched_trks_boxes, self.iou_threshold
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
for m in matched_low:
|
|
228
|
+
self.trackers[unmatched_trks[m[1]]].update(low_dets[m[0], :4])
|
|
229
|
+
|
|
230
|
+
unmatched_trks = [unmatched_trks[i] for i in unmatched_trks_low]
|
|
231
|
+
|
|
232
|
+
# Create new trackers for unmatched high confidence detections
|
|
233
|
+
for i in unmatched_dets:
|
|
234
|
+
trk = KalmanBoxTracker(high_dets[i, :4])
|
|
235
|
+
self.trackers.append(trk)
|
|
236
|
+
|
|
237
|
+
# Remove dead trackers
|
|
238
|
+
i = len(self.trackers)
|
|
239
|
+
ret = []
|
|
240
|
+
for trk in reversed(self.trackers):
|
|
241
|
+
d = trk.get_state()
|
|
242
|
+
if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
|
|
243
|
+
ret.append(np.concatenate((d, [trk.id])).reshape(1, -1))
|
|
244
|
+
i -= 1
|
|
245
|
+
if trk.time_since_update > self.max_age:
|
|
246
|
+
self.trackers.pop(i)
|
|
247
|
+
|
|
248
|
+
if len(ret) > 0:
|
|
249
|
+
return np.concatenate(ret)
|
|
250
|
+
return np.empty((0, 5))
|
|
251
|
+
|
|
252
|
+
def _associate_detections_to_trackers(self, detections, trackers, iou_threshold):
|
|
253
|
+
"""Associate detections to tracked objects."""
|
|
254
|
+
if len(trackers) == 0:
|
|
255
|
+
return np.empty((0, 2), dtype=int), list(range(len(detections))), []
|
|
256
|
+
|
|
257
|
+
iou_matrix = iou_batch(detections[:, :4], trackers[:, :4])
|
|
258
|
+
|
|
259
|
+
if min(iou_matrix.shape) > 0:
|
|
260
|
+
a = (iou_matrix > iou_threshold).astype(np.int32)
|
|
261
|
+
if a.sum(1).max() == 1 and a.sum(0).max() == 1:
|
|
262
|
+
matched_indices = np.stack(np.where(a), axis=1)
|
|
263
|
+
else:
|
|
264
|
+
matched_indices, unmatched_a, unmatched_b = linear_assignment(-iou_matrix)
|
|
265
|
+
else:
|
|
266
|
+
matched_indices = np.empty((0, 2), dtype=int)
|
|
267
|
+
unmatched_a = list(range(len(detections)))
|
|
268
|
+
unmatched_b = list(range(len(trackers)))
|
|
269
|
+
|
|
270
|
+
# Filter out matched with low IoU
|
|
271
|
+
matches = []
|
|
272
|
+
for m in matched_indices:
|
|
273
|
+
if iou_matrix[m[0], m[1]] < iou_threshold:
|
|
274
|
+
unmatched_a.append(m[0])
|
|
275
|
+
unmatched_b.append(m[1])
|
|
276
|
+
else:
|
|
277
|
+
matches.append(m.reshape(1, 2))
|
|
278
|
+
|
|
279
|
+
if len(matches) == 0:
|
|
280
|
+
matches = np.empty((0, 2), dtype=int)
|
|
281
|
+
else:
|
|
282
|
+
matches = np.concatenate(matches, axis=0)
|
|
283
|
+
|
|
284
|
+
return matches, unmatched_a, unmatched_b
|