dnt 0.3.1.3__py3-none-any.whl → 0.3.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dnt might be problematic. Click here for more details.
- dnt/__init__.py +1 -1
- dnt/analysis/__init__.py +3 -3
- dnt/analysis/count.py +54 -37
- dnt/analysis/{interaction.py → interaction2.py} +23 -8
- dnt/analysis/stop3.py +7 -3
- dnt/detect/signal/detector.py +13 -4
- dnt/detect/timestamp.py +105 -0
- dnt/detect/yolov8/detector.py +72 -29
- dnt/detect/yolov8/segmentor.py +60 -2
- dnt/filter/filter.py +19 -8
- dnt/label/labeler2.py +170 -67
- dnt/shared/synhcro.py +1 -1
- dnt/track/dsort/deep_sort/deep_sort.py +4 -3
- dnt/track/dsort/deep_sort/sort/detection.py +2 -1
- dnt/track/dsort/deep_sort/sort/track.py +2 -1
- dnt/track/dsort/deep_sort/sort/tracker.py +1 -1
- dnt/track/dsort/dsort.py +34 -17
- dnt/track/re_class.py +29 -6
- dnt/track/sort/sort.py +4 -5
- dnt/track/tracker.py +9 -5
- {dnt-0.3.1.3.dist-info → dnt-0.3.1.7.dist-info}/METADATA +16 -8
- {dnt-0.3.1.3.dist-info → dnt-0.3.1.7.dist-info}/RECORD +25 -24
- {dnt-0.3.1.3.dist-info → dnt-0.3.1.7.dist-info}/WHEEL +1 -1
- {dnt-0.3.1.3.dist-info → dnt-0.3.1.7.dist-info/licenses}/LICENSE +0 -0
- {dnt-0.3.1.3.dist-info → dnt-0.3.1.7.dist-info}/top_level.txt +0 -0
dnt/filter/filter.py
CHANGED
|
@@ -8,7 +8,8 @@ class Filter:
|
|
|
8
8
|
pass
|
|
9
9
|
|
|
10
10
|
@staticmethod
|
|
11
|
-
def filter_iou(detections: pd.DataFrame, zones: geometry.multipolygon = None,
|
|
11
|
+
def filter_iou(detections: pd.DataFrame, zones: geometry.multipolygon = None,
|
|
12
|
+
class_list: list[int] = None, score_threshold: float = 0)->pd.DataFrame:
|
|
12
13
|
|
|
13
14
|
detections = detections.loc[detections[6]>=score_threshold].copy()
|
|
14
15
|
|
|
@@ -232,7 +233,7 @@ class Filter:
|
|
|
232
233
|
|
|
233
234
|
@staticmethod
|
|
234
235
|
def filter_tracks_by_zones(tracks:pd.DataFrame,
|
|
235
|
-
zones:
|
|
236
|
+
zones: list[Polygon] = None,
|
|
236
237
|
method: str = 'list',
|
|
237
238
|
ref_point: str = 'bc',
|
|
238
239
|
offset: tuple = (0, 0),
|
|
@@ -244,10 +245,19 @@ class Filter:
|
|
|
244
245
|
Inputs:
|
|
245
246
|
tracks - tracks
|
|
246
247
|
zones - zones (polygon)
|
|
247
|
-
method - 'list' (default)
|
|
248
|
+
method - 'list' (default) - List track ids within zones
|
|
249
|
+
'filter' - filter tracks within zones
|
|
250
|
+
'label' - label tracks with zone index
|
|
248
251
|
ref_point - the reference point of a track bbox,
|
|
249
|
-
br - buttom_right,
|
|
250
|
-
|
|
252
|
+
br - buttom_right,
|
|
253
|
+
bl - bottom_left
|
|
254
|
+
bc - bottom_center
|
|
255
|
+
cc - center_point,
|
|
256
|
+
cl - left_center,
|
|
257
|
+
cr - right_center,
|
|
258
|
+
tc - top_center,
|
|
259
|
+
tl - top_left,
|
|
260
|
+
tr - top_right,
|
|
251
261
|
offset - the offset to ref_point, default is (0, 0)
|
|
252
262
|
aggregate - combine outputs to one dataframe, add zone column
|
|
253
263
|
zone_name - if aggregate, the field name of zone variable, default is 'zone'
|
|
@@ -291,7 +301,7 @@ class Filter:
|
|
|
291
301
|
pbar.set_description_str("Filtering zones {} of {}".format(video_index, video_tot))
|
|
292
302
|
else:
|
|
293
303
|
pbar.set_description_str("Filtering zones ")
|
|
294
|
-
|
|
304
|
+
|
|
295
305
|
for zone in zones:
|
|
296
306
|
matched = geo_tracks[geo_tracks.geometry.within(zone)]
|
|
297
307
|
if len(matched)>0:
|
|
@@ -305,7 +315,7 @@ class Filter:
|
|
|
305
315
|
for i in range(len(matched_ids)):
|
|
306
316
|
tracks.loc[tracks['track'].isin(matched_ids[i]), zone_name] = i
|
|
307
317
|
if method == 'filter':
|
|
308
|
-
results = tracks[tracks[
|
|
318
|
+
results = tracks[tracks[zone_name]!=-1].copy()
|
|
309
319
|
else:
|
|
310
320
|
results = tracks
|
|
311
321
|
else:
|
|
@@ -375,7 +385,8 @@ class Filter:
|
|
|
375
385
|
tolerance: int = 0,
|
|
376
386
|
bbox_size: int = 0,
|
|
377
387
|
force_line_indexes: list[int] = None,
|
|
378
|
-
video_index:int = None,
|
|
388
|
+
video_index:int = None,
|
|
389
|
+
video_tot:int = None) -> pd.DataFrame:
|
|
379
390
|
'''
|
|
380
391
|
Filter tracks by lines
|
|
381
392
|
Inputs:
|
dnt/label/labeler2.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os, sys
|
|
2
2
|
sys.path.append(os.path.dirname(__file__))
|
|
3
|
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
|
|
3
4
|
|
|
4
5
|
import cv2
|
|
5
6
|
import numpy as np
|
|
@@ -7,37 +8,74 @@ import matplotlib.pyplot as plt
|
|
|
7
8
|
from tqdm import tqdm
|
|
8
9
|
import pandas as pd
|
|
9
10
|
import itertools
|
|
10
|
-
from
|
|
11
|
+
from shared.util import load_classes
|
|
11
12
|
import random
|
|
13
|
+
import subprocess
|
|
12
14
|
|
|
13
15
|
class Labeler:
|
|
14
|
-
def __init__(self,
|
|
15
|
-
|
|
16
|
+
def __init__(self,
|
|
17
|
+
method:str='opencv',
|
|
18
|
+
encoder:str='libx264',
|
|
19
|
+
preset:str='medium',
|
|
20
|
+
crf:int=23,
|
|
21
|
+
pix_fmt:str='bgr24',
|
|
22
|
+
compress_message:bool=False,
|
|
23
|
+
nodraw_empty:bool=True):
|
|
24
|
+
'''
|
|
25
|
+
Parameters:\n
|
|
26
|
+
- method: 'opencv' (default) - use opencv to draw labels),
|
|
27
|
+
'ffmpeg' - use ffmpeg to draw labels
|
|
28
|
+
- encoder: 'libx264' (default) - use libx264 encoder for ffmpeg,
|
|
29
|
+
'libx265' - use libx265 encoder for ffmpeg
|
|
30
|
+
'h264_nvenc' - use h264_nvenc encoder for ffmpeg
|
|
31
|
+
'hevc_nvenc' - use hevc_nvenc encoder for ffmpeg
|
|
32
|
+
- preset: 'medium' (default) - use medium preset for ffmpeg
|
|
33
|
+
'slow' - use slow preset for ffmpeg
|
|
34
|
+
'fast' - use fast preset for ffmpeg
|
|
35
|
+
- crf: 23 (default) - use 23 crf for ffmpeg, lower is better quality
|
|
36
|
+
- compress_message: False (default) - show compress message in progress bar
|
|
37
|
+
- nodraw_empty: True (default) - not draw empty frames
|
|
38
|
+
'''
|
|
16
39
|
self.method = method
|
|
40
|
+
self.encoder = encoder
|
|
41
|
+
self.preset = preset
|
|
42
|
+
self.crf = crf
|
|
43
|
+
self.pix_fmt = pix_fmt
|
|
17
44
|
self.compress_message=compress_message
|
|
18
45
|
self.nodraw_empty = nodraw_empty
|
|
19
46
|
|
|
20
|
-
def draw(self,
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
47
|
+
def draw(self,
|
|
48
|
+
input_video:str,
|
|
49
|
+
output_video:str,
|
|
50
|
+
draws:pd.DataFrame=None,
|
|
51
|
+
draw_file:str=None,
|
|
52
|
+
start_frame:int=None,
|
|
53
|
+
end_frame:int=None,
|
|
54
|
+
video_index:int=None,
|
|
55
|
+
video_tot:int=None,
|
|
56
|
+
verbose:bool=True):
|
|
24
57
|
'''
|
|
25
|
-
General labeling function
|
|
26
|
-
Inputs
|
|
58
|
+
General labeling function\n
|
|
59
|
+
Inputs:\n
|
|
27
60
|
draws: a DataFrame contains labeling information, if None, read label_file
|
|
28
61
|
label_file: a txt file with a header ['frame','type','coords','color','size','thick','desc']
|
|
29
62
|
input_video: raw video
|
|
30
63
|
output_video: labeled video
|
|
31
|
-
start_frame:
|
|
32
|
-
end_frame:
|
|
64
|
+
start_frame: starting frame
|
|
65
|
+
end_frame: ending frame
|
|
33
66
|
video_index: display video index in batch processing
|
|
34
67
|
video_tot: display total video number in batch processing
|
|
35
68
|
'''
|
|
36
69
|
if draws is not None:
|
|
37
70
|
data = draws
|
|
38
71
|
else:
|
|
39
|
-
data = pd.read_csv(draw_file,
|
|
40
|
-
|
|
72
|
+
data = pd.read_csv(draw_file,
|
|
73
|
+
dtype={'frame':int,
|
|
74
|
+
'type':str,
|
|
75
|
+
'size':float,
|
|
76
|
+
'desc':str,
|
|
77
|
+
'thick':int},
|
|
78
|
+
converters={'coords': lambda x:list(eval(x)), 'color': lambda x:eval(x)})
|
|
41
79
|
|
|
42
80
|
cmap = plt.get_cmap('tab20b')
|
|
43
81
|
colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
|
|
@@ -55,8 +93,31 @@ class Labeler:
|
|
|
55
93
|
fps = int(cap.get(cv2.CAP_PROP_FPS))
|
|
56
94
|
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
57
95
|
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
58
|
-
|
|
59
|
-
|
|
96
|
+
|
|
97
|
+
if self.method == 'ffmpeg':
|
|
98
|
+
# FFmpeg command to write H.265 encoded video
|
|
99
|
+
ffmpeg_cmd = [
|
|
100
|
+
"ffmpeg",
|
|
101
|
+
"-y", # Overwrite output file if it exists
|
|
102
|
+
"-f", "rawvideo",
|
|
103
|
+
"-vcodec", "rawvideo",
|
|
104
|
+
"-pix_fmt", self.pix_fmt,
|
|
105
|
+
"-s", f"{width}x{height}",
|
|
106
|
+
"-r", str(fps),
|
|
107
|
+
"-i", "-", # Read input from stdin
|
|
108
|
+
"-c:v", self.encoder, # H.265 codec
|
|
109
|
+
"-preset", self.preset, # Adjust preset as needed (ultrafast, fast, medium, slow, etc.)
|
|
110
|
+
"-crf", str(self.crf), # Constant Rate Factor (higher = more compression, lower = better quality)
|
|
111
|
+
output_video]
|
|
112
|
+
|
|
113
|
+
# Start FFmpeg process
|
|
114
|
+
process = subprocess.Popen(ffmpeg_cmd,
|
|
115
|
+
stdin=subprocess.PIPE,
|
|
116
|
+
stdout=subprocess.DEVNULL,
|
|
117
|
+
stderr=subprocess.DEVNULL)
|
|
118
|
+
else:
|
|
119
|
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
|
120
|
+
writer = cv2.VideoWriter(output_video, fourcc, fps, (width, height))
|
|
60
121
|
|
|
61
122
|
if verbose:
|
|
62
123
|
pbar = tqdm(total=tot_frames, unit=" frames")
|
|
@@ -128,11 +189,11 @@ class Labeler:
|
|
|
128
189
|
color = element['color']
|
|
129
190
|
thick = element['thick']
|
|
130
191
|
cv2.polylines(frame, [np.array(coords)], isClosed=False, color=color, thickness=thick)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
192
|
+
|
|
193
|
+
if self.method == 'ffmpeg':
|
|
194
|
+
process.stdin.write(frame.tobytes())
|
|
195
|
+
else:
|
|
196
|
+
writer.write(frame)
|
|
136
197
|
|
|
137
198
|
if verbose:
|
|
138
199
|
pbar.update()
|
|
@@ -141,17 +202,31 @@ class Labeler:
|
|
|
141
202
|
pbar.close()
|
|
142
203
|
#cv2.destroyAllWindows()
|
|
143
204
|
cap.release()
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
205
|
+
if self.method == 'ffmpeg':
|
|
206
|
+
process.stdin.close()
|
|
207
|
+
process.wait()
|
|
208
|
+
else:
|
|
209
|
+
writer.release()
|
|
210
|
+
|
|
211
|
+
def draw_track_clips(self,
|
|
212
|
+
input_video:str,
|
|
213
|
+
output_path:str,
|
|
214
|
+
tracks:pd.DataFrame=None,
|
|
215
|
+
track_file:str = None,
|
|
216
|
+
method:str='all',
|
|
217
|
+
random_number:int=10,
|
|
218
|
+
track_ids:list=None,
|
|
219
|
+
start_frame_offset:int=0,
|
|
220
|
+
end_frame_offset:int=0,
|
|
221
|
+
tail:int=0,
|
|
222
|
+
prefix:bool=False,
|
|
223
|
+
size:int=1,
|
|
224
|
+
thick:int=1,
|
|
225
|
+
video_index:int=None,
|
|
226
|
+
video_tot:int=None,
|
|
227
|
+
verbose:bool=True):
|
|
153
228
|
'''
|
|
154
|
-
Parameters
|
|
229
|
+
Parameters:\n
|
|
155
230
|
input_video: the raw video file
|
|
156
231
|
outputh_path: the folder for outputing track clips
|
|
157
232
|
tracks: the dataframe of tracks
|
|
@@ -184,7 +259,7 @@ class Labeler:
|
|
|
184
259
|
else:
|
|
185
260
|
track_ids = tracks['track'].unique().tolist()
|
|
186
261
|
|
|
187
|
-
pbar = tqdm(total=len(track_ids), desc='Labeling tracks ', unit='
|
|
262
|
+
#pbar = tqdm(total=len(track_ids), desc='Labeling tracks ', unit='clips')
|
|
188
263
|
for id in track_ids:
|
|
189
264
|
selected_tracks = tracks[tracks['track']==id].copy()
|
|
190
265
|
start_frame = max(selected_tracks['frame'].min() - start_frame_offset, 0)
|
|
@@ -195,20 +270,42 @@ class Labeler:
|
|
|
195
270
|
else:
|
|
196
271
|
out_video = os.path.join(output_path, str(id)+'.mp4')
|
|
197
272
|
|
|
198
|
-
self.draw_tracks(input_video=input_video,
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
273
|
+
self.draw_tracks(input_video=input_video,
|
|
274
|
+
output_video=out_video,
|
|
275
|
+
tracks=selected_tracks,
|
|
276
|
+
start_frame=start_frame,
|
|
277
|
+
end_frame=end_frame,
|
|
278
|
+
verbose=verbose,
|
|
279
|
+
tail=tail,
|
|
280
|
+
thick=thick,
|
|
281
|
+
size=size,
|
|
282
|
+
video_index=video_index,
|
|
283
|
+
video_tot=video_tot)
|
|
284
|
+
|
|
285
|
+
#pbar.update()
|
|
286
|
+
#pbar.close()
|
|
287
|
+
|
|
288
|
+
def draw_tracks(self,
|
|
289
|
+
input_video:str,
|
|
290
|
+
output_video:str,
|
|
291
|
+
tracks:pd.DataFrame=None,
|
|
292
|
+
track_file:str=None,
|
|
293
|
+
label_file:str=None,
|
|
294
|
+
color=None,
|
|
295
|
+
tail:int=0,
|
|
296
|
+
thick:int=2,
|
|
297
|
+
size:int=1,
|
|
298
|
+
class_name=False,
|
|
299
|
+
start_frame:int=None,
|
|
300
|
+
end_frame:int=None,
|
|
301
|
+
video_index:int=None,
|
|
302
|
+
video_tot:int=None,
|
|
303
|
+
verbose:bool=True):
|
|
209
304
|
|
|
210
305
|
if tracks is None:
|
|
211
|
-
tracks = pd.read_csv(track_file,
|
|
306
|
+
tracks = pd.read_csv(track_file,
|
|
307
|
+
header=None,
|
|
308
|
+
dtype={0:int, 1:int, 2:int, 3:int, 4:int, 5:int, 6:float, 7:int, 8:int, 9:int})
|
|
212
309
|
tracks.columns = ['frame', 'track', 'x', 'y', 'w', 'h', 'score', 'cls', 'r3', 'r4']
|
|
213
310
|
|
|
214
311
|
cmap = plt.get_cmap('tab20b')
|
|
@@ -223,22 +320,20 @@ class Labeler:
|
|
|
223
320
|
if end_frame is None:
|
|
224
321
|
end_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))-1
|
|
225
322
|
|
|
226
|
-
|
|
323
|
+
selected_tracks = tracks.loc[(tracks['frame']>=start_frame) & (tracks['frame']<=end_frame)].copy()
|
|
227
324
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
325
|
+
pbar_desc = ""
|
|
326
|
+
if self.compress_message:
|
|
327
|
+
pbar_desc = "Generating labels"
|
|
328
|
+
else:
|
|
329
|
+
if video_index and video_tot:
|
|
330
|
+
pbar_desc = "Generating labels {} of {}".format(video_index, video_tot)
|
|
232
331
|
else:
|
|
233
|
-
|
|
234
|
-
pbar.set_description_str("Generating labels {} of {}".format(video_index, video_tot))
|
|
235
|
-
else:
|
|
236
|
-
pbar.set_description_str("Generating labels {} ".format(input_video))
|
|
237
|
-
|
|
238
|
-
selected_tracks = tracks.loc[(tracks['frame']>=start_frame) & (tracks['frame']<=end_frame)].copy()
|
|
332
|
+
pbar_desc = "Generating labels {} ".format(input_video)
|
|
239
333
|
|
|
334
|
+
pbar = tqdm(total=len(selected_tracks), unit=" frames", desc=pbar_desc)
|
|
240
335
|
results = []
|
|
241
|
-
for
|
|
336
|
+
for _, track in selected_tracks.iterrows():
|
|
242
337
|
|
|
243
338
|
if color is None:
|
|
244
339
|
final_color = colors[int(track['track']) % len(colors)]
|
|
@@ -265,7 +360,7 @@ class Labeler:
|
|
|
265
360
|
|
|
266
361
|
if verbose:
|
|
267
362
|
pbar.update()
|
|
268
|
-
|
|
363
|
+
|
|
269
364
|
if verbose:
|
|
270
365
|
pbar.close()
|
|
271
366
|
|
|
@@ -308,8 +403,9 @@ class Labeler:
|
|
|
308
403
|
end_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))-1
|
|
309
404
|
|
|
310
405
|
tot_frames = end_frame - start_frame + 1
|
|
406
|
+
selected_dets = dets.loc[(dets[0]>=start_frame) & (dets[0]<=end_frame)].copy()
|
|
311
407
|
|
|
312
|
-
pbar = tqdm(total=len(
|
|
408
|
+
pbar = tqdm(total=len(selected_dets), unit=" dets")
|
|
313
409
|
if self.compress_message:
|
|
314
410
|
pbar.set_description_str("Generating labels")
|
|
315
411
|
else:
|
|
@@ -318,8 +414,6 @@ class Labeler:
|
|
|
318
414
|
else:
|
|
319
415
|
pbar.set_description_str("Generating labels {} ".format(input_video))
|
|
320
416
|
|
|
321
|
-
selected_dets = dets.loc[(dets[0]>=start_frame) & (dets[0]<=end_frame)].copy()
|
|
322
|
-
|
|
323
417
|
results = []
|
|
324
418
|
for index, det in selected_dets.iterrows():
|
|
325
419
|
|
|
@@ -492,11 +586,20 @@ class LabelGenerator():
|
|
|
492
586
|
|
|
493
587
|
|
|
494
588
|
if __name__=='__main__':
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
labeler
|
|
589
|
+
|
|
590
|
+
video_file = "/mnt/d/videos/sample/traffic.mp4"
|
|
591
|
+
iou_file = "/mnt/d/videos/sample/dets/traffic_iou.txt"
|
|
592
|
+
track_file = "/mnt/d/videos/sample/tracks/traffic_track.txt"
|
|
593
|
+
label_video = "/mnt/d/videos/sample/labels/traffic_track-ffmpeg.mp4"
|
|
594
|
+
label_file = "/mnt/d/videos/sample/labels/traffic_track.txt"
|
|
595
|
+
|
|
596
|
+
labeler = Labeler(method='ffmpeg',
|
|
597
|
+
encoder='hevc_nvenc',
|
|
598
|
+
preset='medium',
|
|
599
|
+
crf=23,
|
|
600
|
+
compress_message=True)
|
|
601
|
+
labeler.draw_tracks(input_video=video_file,
|
|
602
|
+
output_video=label_video,
|
|
603
|
+
track_file=track_file)
|
|
604
|
+
|
|
605
|
+
|
dnt/shared/synhcro.py
CHANGED
|
@@ -66,7 +66,7 @@ class Synchronizer():
|
|
|
66
66
|
milliseconds_per_frame = 1/fps * 1000
|
|
67
67
|
|
|
68
68
|
dif_frame = frame - ref_frame
|
|
69
|
-
return round(ref_time + dif_frame * milliseconds_per_frame)
|
|
69
|
+
return round(ref_time + float(dif_frame) * milliseconds_per_frame)
|
|
70
70
|
|
|
71
71
|
@staticmethod
|
|
72
72
|
def add_unix_time_to_frames(frames:pd.DataFrame, ref_frame:int, ref_time:int, ref_timezone:str='US/Eastern',
|
|
@@ -34,13 +34,13 @@ class DeepSort(object):
|
|
|
34
34
|
metric = NearestNeighborDistanceMetric("cosine", self.max_cosine_distance, self.nn_budget)
|
|
35
35
|
self.tracker = Tracker(metric, max_iou_distance=self.max_iou_distance, max_age=self.max_age, n_init=self.n_init)
|
|
36
36
|
|
|
37
|
-
def update(self, bbox_xywh, confidences, ori_img):
|
|
37
|
+
def update(self, bbox_xywh, confidences, classes, ori_img):
|
|
38
38
|
self.height, self.width = ori_img.shape[:2]
|
|
39
39
|
|
|
40
40
|
# generate detections
|
|
41
41
|
features = self._get_features(bbox_xywh, ori_img) # extract features for bboxes
|
|
42
42
|
bbox_tlwh = self._xywh_to_tlwh(bbox_xywh) # convert bbox from xc_yc_w_h to left top width height/width
|
|
43
|
-
detections = [Detection(bbox_tlwh[i], conf, features[i])
|
|
43
|
+
detections = [Detection(bbox_tlwh[i], conf, classes[i], features[i])
|
|
44
44
|
for i, conf in enumerate(confidences) if conf>self.min_confidence] # ignore low confidence bboxes
|
|
45
45
|
|
|
46
46
|
# run on non-maximum supression
|
|
@@ -61,7 +61,8 @@ class DeepSort(object):
|
|
|
61
61
|
box = track.to_tlwh()
|
|
62
62
|
x1,y1,x2,y2 = self._tlwh_to_xyxy(box)
|
|
63
63
|
track_id = track.track_id
|
|
64
|
-
|
|
64
|
+
track_cls = track.cls
|
|
65
|
+
outputs.append(np.array([x1,y1,x2,y2,track_id,track_cls], dtype=int))
|
|
65
66
|
if len(outputs) > 0:
|
|
66
67
|
outputs = np.stack(outputs,axis=0)
|
|
67
68
|
return outputs
|
|
@@ -26,10 +26,11 @@ class Detection(object):
|
|
|
26
26
|
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
|
-
def __init__(self, tlwh, confidence, feature):
|
|
29
|
+
def __init__(self, tlwh, confidence, cls, feature):
|
|
30
30
|
self.tlwh = np.asarray(tlwh, dtype=float)
|
|
31
31
|
self.confidence = float(confidence)
|
|
32
32
|
self.feature = np.asarray(feature, dtype=float)
|
|
33
|
+
self.cls = cls
|
|
33
34
|
|
|
34
35
|
def to_tlbr(self):
|
|
35
36
|
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
|
|
@@ -64,7 +64,7 @@ class Track:
|
|
|
64
64
|
"""
|
|
65
65
|
|
|
66
66
|
def __init__(self, mean, covariance, track_id, n_init, max_age,
|
|
67
|
-
feature=None):
|
|
67
|
+
feature=None, cls=None):
|
|
68
68
|
self.mean = mean
|
|
69
69
|
self.covariance = covariance
|
|
70
70
|
self.track_id = track_id
|
|
@@ -73,6 +73,7 @@ class Track:
|
|
|
73
73
|
self.time_since_update = 0
|
|
74
74
|
|
|
75
75
|
self.state = TrackState.Tentative
|
|
76
|
+
self.cls = cls
|
|
76
77
|
self.features = []
|
|
77
78
|
if feature is not None:
|
|
78
79
|
self.features.append(feature)
|
dnt/track/dsort/dsort.py
CHANGED
|
@@ -4,15 +4,31 @@ import torch
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
6
6
|
from tqdm import tqdm
|
|
7
|
-
|
|
8
|
-
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
|
9
|
-
|
|
10
7
|
from deep_sort import DeepSort
|
|
11
8
|
from config import Config
|
|
12
9
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
10
|
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
|
11
|
+
|
|
12
|
+
def track(video_file:str,
|
|
13
|
+
det_file:str,
|
|
14
|
+
out_file:str=None,
|
|
15
|
+
device:str='auto',
|
|
16
|
+
half:bool=False,
|
|
17
|
+
cfg:dict=Config.get_cfg_dsort('default'),
|
|
18
|
+
video_index:int=None,
|
|
19
|
+
total_videos:int=None):
|
|
20
|
+
'''
|
|
21
|
+
Track objects in a video using Deep SORT.
|
|
22
|
+
Args:
|
|
23
|
+
video_file (str): Path to the input video file.
|
|
24
|
+
det_file (str): Path to the detection results file.
|
|
25
|
+
out_file (str): Path to save the tracking results.
|
|
26
|
+
device (str): Device to run the model on ('cpu' or 'cuda').
|
|
27
|
+
half (bool): Whether to use half precision.
|
|
28
|
+
cfg (dict): Configuration dictionary for Deep SORT.
|
|
29
|
+
video_index (int): Index of the current video in a batch.
|
|
30
|
+
total_videos (int): Total number of videos in a batch.
|
|
31
|
+
'''
|
|
16
32
|
#device = torch.device('cuda') if (torch.cuda.is_available() and gpu) else torch.device('cpu')
|
|
17
33
|
cap = cv2.VideoCapture(video_file)
|
|
18
34
|
if not cap.isOpened():
|
|
@@ -50,11 +66,18 @@ def track(video_file:str, det_file:str, out_file:str = None, device:str = 'auto'
|
|
|
50
66
|
bbox_xywh = np.array(bbox_xywh)
|
|
51
67
|
conf_score = np.array(frame_dets[:,6])
|
|
52
68
|
classes = np.array(frame_dets[:,7])
|
|
53
|
-
outputs = deepsort.update(bbox_xywh, conf_score, im)
|
|
69
|
+
outputs = deepsort.update(bbox_xywh, conf_score, classes, im)
|
|
54
70
|
|
|
55
71
|
if len(outputs) > 0:
|
|
56
72
|
for output in outputs:
|
|
57
|
-
results.append([pos_frame,
|
|
73
|
+
results.append([pos_frame,
|
|
74
|
+
output[4],
|
|
75
|
+
output[0],
|
|
76
|
+
output[1],
|
|
77
|
+
output[2]-output[0],
|
|
78
|
+
output[3]-output[1],
|
|
79
|
+
output[5],
|
|
80
|
+
-1, -1, -1])
|
|
58
81
|
|
|
59
82
|
pbar.update()
|
|
60
83
|
|
|
@@ -65,15 +88,9 @@ def track(video_file:str, det_file:str, out_file:str = None, device:str = 'auto'
|
|
|
65
88
|
df.to_csv(out_file, index=False, header=None)
|
|
66
89
|
|
|
67
90
|
if __name__ == "__main__":
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
iou_file = "/mnt/d/videos/ped2stage/dets/gh1293_iou_ped.txt"
|
|
72
|
-
out_file = "/mnt/d/videos/ped2stage/tracks/gh1293_ped_track_2.txt"
|
|
73
|
-
'''
|
|
74
|
-
video_file = "/mnt/d/videos/samples/ped_veh.mp4"
|
|
75
|
-
iou_file = "/mnt/d/videos/samples/dets/ped_veh_iou.txt"
|
|
76
|
-
out_file = "/mnt/d/videos/samples/tracks/ped_veh_track.txt"
|
|
91
|
+
video_file = "/mnt/d/videos/sample/traffic.mp4"
|
|
92
|
+
iou_file = "/mnt/d/videos/sample/dets/traffic_det.txt"
|
|
93
|
+
out_file = "/mnt/d/videos/sample/tracks/traffic_track.txt"
|
|
77
94
|
|
|
78
95
|
track(video_file, iou_file, out_file)
|
|
79
96
|
|
dnt/track/re_class.py
CHANGED
|
@@ -5,8 +5,25 @@ from ..detect import Detector
|
|
|
5
5
|
from ..engine.iob import iobs
|
|
6
6
|
|
|
7
7
|
class ReClass:
|
|
8
|
-
def __init__(self,
|
|
9
|
-
|
|
8
|
+
def __init__(self,
|
|
9
|
+
num_frames:int=25,
|
|
10
|
+
threshold:float=0.75,
|
|
11
|
+
model:str='rtdetr',
|
|
12
|
+
weights:str='x',
|
|
13
|
+
device:str='auto',
|
|
14
|
+
default_class:int=0,
|
|
15
|
+
match_class:list=[1, 36]) -> None:
|
|
16
|
+
"""
|
|
17
|
+
Re-classify tracks based on detection results
|
|
18
|
+
Parameters:
|
|
19
|
+
num_frames: Number of frames to consider for re-classification, default 25
|
|
20
|
+
threshold: Threshold for matching, default 0.75
|
|
21
|
+
model: Detection model to use, default 'rtdetr'
|
|
22
|
+
weights: Weights for the detection model, default 'x'
|
|
23
|
+
device: Device to use for detection, default 'auto'
|
|
24
|
+
default_class: Default class to assign if no match found, default 0 (pedestrian)
|
|
25
|
+
match_class: List of classes to match, default [1, 36] (bicycle, skateboard/scooter)
|
|
26
|
+
"""
|
|
10
27
|
self.detector = Detector(model=model, device=device)
|
|
11
28
|
self.num_frames = num_frames
|
|
12
29
|
self.threshold = threshold
|
|
@@ -36,8 +53,12 @@ class ReClass:
|
|
|
36
53
|
|
|
37
54
|
return hit, avg_score
|
|
38
55
|
|
|
39
|
-
def re_classify(self,
|
|
40
|
-
|
|
56
|
+
def re_classify(self,
|
|
57
|
+
tracks:pd.DataFrame,
|
|
58
|
+
input_video:str,
|
|
59
|
+
track_ids:list=None,
|
|
60
|
+
out_file:str=None,
|
|
61
|
+
verbose:bool=True)->pd.DataFrame:
|
|
41
62
|
"""
|
|
42
63
|
Re-classify tracks
|
|
43
64
|
Parameters:
|
|
@@ -55,7 +76,8 @@ class ReClass:
|
|
|
55
76
|
track_ids = tracks['track'].unique().tolist()
|
|
56
77
|
|
|
57
78
|
results = []
|
|
58
|
-
|
|
79
|
+
if verbose:
|
|
80
|
+
pbar = tqdm(total=len(track_ids), unit='track', desc='Re-classifying tracks')
|
|
59
81
|
for track_id in track_ids:
|
|
60
82
|
|
|
61
83
|
target_track = tracks[tracks['track'] == track_id].copy()
|
|
@@ -85,7 +107,8 @@ class ReClass:
|
|
|
85
107
|
results.append([track_id, cls, round(avg_score, 2)])
|
|
86
108
|
if verbose:
|
|
87
109
|
pbar.update()
|
|
88
|
-
|
|
110
|
+
if verbose:
|
|
111
|
+
pbar.close()
|
|
89
112
|
|
|
90
113
|
df = pd.DataFrame(results, columns=['track', 'cls', 'avg_score'])
|
|
91
114
|
if out_file:
|
dnt/track/sort/sort.py
CHANGED
|
@@ -202,7 +202,7 @@ class Sort(object):
|
|
|
202
202
|
def update(self, dets=np.empty((0, 5))):
|
|
203
203
|
"""
|
|
204
204
|
Params:
|
|
205
|
-
dets - a numpy array of detections in the format [[x1,y1,x2,y2,score]
|
|
205
|
+
dets - a numpy array of detections in the format [[x1,y1,x2,y2,score]
|
|
206
206
|
Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections).
|
|
207
207
|
Returns the a similar array, where the last column is the object ID.
|
|
208
208
|
|
|
@@ -246,9 +246,9 @@ class Sort(object):
|
|
|
246
246
|
|
|
247
247
|
def track(det_file, out_file, max_age=1, min_inits=3, iou_threshold=0.3, video_index = None, total_videos = None):
|
|
248
248
|
|
|
249
|
-
tracker = Sort(max_age=max_age,
|
|
250
|
-
|
|
251
|
-
|
|
249
|
+
tracker = Sort(max_age=max_age,
|
|
250
|
+
min_hits=min_inits,
|
|
251
|
+
iou_threshold=iou_threshold)
|
|
252
252
|
dets = np.loadtxt(det_file, delimiter=',')
|
|
253
253
|
start_frame = int(dets[:,0].min())
|
|
254
254
|
end_frame = int(dets[:,0].max())
|
|
@@ -265,7 +265,6 @@ def track(det_file, out_file, max_age=1, min_inits=3, iou_threshold=0.3, video_i
|
|
|
265
265
|
if len(dets_frame)>0:
|
|
266
266
|
dets_revised[:,[0,1]] = dets_frame[:,[2,3]]
|
|
267
267
|
dets_revised[:,2] = dets_frame[:,2] + dets_frame[:,4]
|
|
268
|
-
|
|
269
268
|
dets_revised[:,3] = dets_frame[:,3] + dets_frame[:,5]
|
|
270
269
|
dets_revised[:,4] = dets_frame[:,6] / 100
|
|
271
270
|
|