dnt 0.3.1.3__py3-none-any.whl → 0.3.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dnt might be problematic. Click here for more details.

dnt/filter/filter.py CHANGED
@@ -8,7 +8,8 @@ class Filter:
8
8
  pass
9
9
 
10
10
  @staticmethod
11
- def filter_iou(detections: pd.DataFrame, zones: geometry.multipolygon = None, class_list: list[int] = None, score_threshold: float = 0):
11
+ def filter_iou(detections: pd.DataFrame, zones: geometry.multipolygon = None,
12
+ class_list: list[int] = None, score_threshold: float = 0)->pd.DataFrame:
12
13
 
13
14
  detections = detections.loc[detections[6]>=score_threshold].copy()
14
15
 
@@ -232,7 +233,7 @@ class Filter:
232
233
 
233
234
  @staticmethod
234
235
  def filter_tracks_by_zones(tracks:pd.DataFrame,
235
- zones: geometry.MultiPolygon = None,
236
+ zones: list[Polygon] = None,
236
237
  method: str = 'list',
237
238
  ref_point: str = 'bc',
238
239
  offset: tuple = (0, 0),
@@ -244,10 +245,19 @@ class Filter:
244
245
  Inputs:
245
246
  tracks - tracks
246
247
  zones - zones (polygon)
247
- method - 'list' (default), 'filter', 'label'
248
+ method - 'list' (default) - List track ids within zones
249
+ 'filter' - filter tracks within zones
250
+ 'label' - label tracks with zone index
248
251
  ref_point - the reference point of a track bbox,
249
- br - buttom_right, center_point,
250
- left_up, right_up, left_buttom, right_buttom
252
+ br - buttom_right,
253
+ bl - bottom_left
254
+ bc - bottom_center
255
+ cc - center_point,
256
+ cl - left_center,
257
+ cr - right_center,
258
+ tc - top_center,
259
+ tl - top_left,
260
+ tr - top_right,
251
261
  offset - the offset to ref_point, default is (0, 0)
252
262
  aggregate - combine outputs to one dataframe, add zone column
253
263
  zone_name - if aggregate, the field name of zone variable, default is 'zone'
@@ -291,7 +301,7 @@ class Filter:
291
301
  pbar.set_description_str("Filtering zones {} of {}".format(video_index, video_tot))
292
302
  else:
293
303
  pbar.set_description_str("Filtering zones ")
294
-
304
+
295
305
  for zone in zones:
296
306
  matched = geo_tracks[geo_tracks.geometry.within(zone)]
297
307
  if len(matched)>0:
@@ -305,7 +315,7 @@ class Filter:
305
315
  for i in range(len(matched_ids)):
306
316
  tracks.loc[tracks['track'].isin(matched_ids[i]), zone_name] = i
307
317
  if method == 'filter':
308
- results = tracks[tracks['zone']!=-1].copy()
318
+ results = tracks[tracks[zone_name]!=-1].copy()
309
319
  else:
310
320
  results = tracks
311
321
  else:
@@ -375,7 +385,8 @@ class Filter:
375
385
  tolerance: int = 0,
376
386
  bbox_size: int = 0,
377
387
  force_line_indexes: list[int] = None,
378
- video_index:int = None, video_tot:int = None) -> pd.DataFrame:
388
+ video_index:int = None,
389
+ video_tot:int = None) -> pd.DataFrame:
379
390
  '''
380
391
  Filter tracks by lines
381
392
  Inputs:
dnt/label/labeler2.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import os, sys
2
2
  sys.path.append(os.path.dirname(__file__))
3
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
3
4
 
4
5
  import cv2
5
6
  import numpy as np
@@ -7,37 +8,74 @@ import matplotlib.pyplot as plt
7
8
  from tqdm import tqdm
8
9
  import pandas as pd
9
10
  import itertools
10
- from ..shared.util import load_classes
11
+ from shared.util import load_classes
11
12
  import random
13
+ import subprocess
12
14
 
13
15
  class Labeler:
14
- def __init__(self, method:str='opencv', compress_message:bool=False, nodraw_empty:bool=True):
15
-
16
+ def __init__(self,
17
+ method:str='opencv',
18
+ encoder:str='libx264',
19
+ preset:str='medium',
20
+ crf:int=23,
21
+ pix_fmt:str='bgr24',
22
+ compress_message:bool=False,
23
+ nodraw_empty:bool=True):
24
+ '''
25
+ Parameters:\n
26
+ - method: 'opencv' (default) - use opencv to draw labels),
27
+ 'ffmpeg' - use ffmpeg to draw labels
28
+ - encoder: 'libx264' (default) - use libx264 encoder for ffmpeg,
29
+ 'libx265' - use libx265 encoder for ffmpeg
30
+ 'h264_nvenc' - use h264_nvenc encoder for ffmpeg
31
+ 'hevc_nvenc' - use hevc_nvenc encoder for ffmpeg
32
+ - preset: 'medium' (default) - use medium preset for ffmpeg
33
+ 'slow' - use slow preset for ffmpeg
34
+ 'fast' - use fast preset for ffmpeg
35
+ - crf: 23 (default) - use 23 crf for ffmpeg, lower is better quality
36
+ - compress_message: False (default) - show compress message in progress bar
37
+ - nodraw_empty: True (default) - not draw empty frames
38
+ '''
16
39
  self.method = method
40
+ self.encoder = encoder
41
+ self.preset = preset
42
+ self.crf = crf
43
+ self.pix_fmt = pix_fmt
17
44
  self.compress_message=compress_message
18
45
  self.nodraw_empty = nodraw_empty
19
46
 
20
- def draw(self, input_video:str, output_video:str,
21
- draws:pd.DataFrame = None, draw_file:str = None,
22
- start_frame:int=None, end_frame:int=None,
23
- video_index:int=None, video_tot:int=None, verbose:bool=True):
47
+ def draw(self,
48
+ input_video:str,
49
+ output_video:str,
50
+ draws:pd.DataFrame=None,
51
+ draw_file:str=None,
52
+ start_frame:int=None,
53
+ end_frame:int=None,
54
+ video_index:int=None,
55
+ video_tot:int=None,
56
+ verbose:bool=True):
24
57
  '''
25
- General labeling function
26
- Inputs:
58
+ General labeling function\n
59
+ Inputs:\n
27
60
  draws: a DataFrame contains labeling information, if None, read label_file
28
61
  label_file: a txt file with a header ['frame','type','coords','color','size','thick','desc']
29
62
  input_video: raw video
30
63
  output_video: labeled video
31
- start_frame:
32
- end_frame:
64
+ start_frame: starting frame
65
+ end_frame: ending frame
33
66
  video_index: display video index in batch processing
34
67
  video_tot: display total video number in batch processing
35
68
  '''
36
69
  if draws is not None:
37
70
  data = draws
38
71
  else:
39
- data = pd.read_csv(draw_file, dtype={'frame':int, 'type':str, 'size':float, 'desc':str, 'thick':int},
40
- converters={'coords': lambda x:list(eval(x)), 'color': lambda x:eval(x)})
72
+ data = pd.read_csv(draw_file,
73
+ dtype={'frame':int,
74
+ 'type':str,
75
+ 'size':float,
76
+ 'desc':str,
77
+ 'thick':int},
78
+ converters={'coords': lambda x:list(eval(x)), 'color': lambda x:eval(x)})
41
79
 
42
80
  cmap = plt.get_cmap('tab20b')
43
81
  colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
@@ -55,8 +93,31 @@ class Labeler:
55
93
  fps = int(cap.get(cv2.CAP_PROP_FPS))
56
94
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
57
95
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
58
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
59
- writer = cv2.VideoWriter(output_video, fourcc, fps, (width, height))
96
+
97
+ if self.method == 'ffmpeg':
98
+ # FFmpeg command to write H.265 encoded video
99
+ ffmpeg_cmd = [
100
+ "ffmpeg",
101
+ "-y", # Overwrite output file if it exists
102
+ "-f", "rawvideo",
103
+ "-vcodec", "rawvideo",
104
+ "-pix_fmt", self.pix_fmt,
105
+ "-s", f"{width}x{height}",
106
+ "-r", str(fps),
107
+ "-i", "-", # Read input from stdin
108
+ "-c:v", self.encoder, # H.265 codec
109
+ "-preset", self.preset, # Adjust preset as needed (ultrafast, fast, medium, slow, etc.)
110
+ "-crf", str(self.crf), # Constant Rate Factor (higher = more compression, lower = better quality)
111
+ output_video]
112
+
113
+ # Start FFmpeg process
114
+ process = subprocess.Popen(ffmpeg_cmd,
115
+ stdin=subprocess.PIPE,
116
+ stdout=subprocess.DEVNULL,
117
+ stderr=subprocess.DEVNULL)
118
+ else:
119
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
120
+ writer = cv2.VideoWriter(output_video, fourcc, fps, (width, height))
60
121
 
61
122
  if verbose:
62
123
  pbar = tqdm(total=tot_frames, unit=" frames")
@@ -128,11 +189,11 @@ class Labeler:
128
189
  color = element['color']
129
190
  thick = element['thick']
130
191
  cv2.polylines(frame, [np.array(coords)], isClosed=False, color=color, thickness=thick)
131
-
132
- writer.write(frame)
133
- #key = cv2.waitKey(1) & 0xFF
134
- #if key == ord("q"):
135
- # break
192
+
193
+ if self.method == 'ffmpeg':
194
+ process.stdin.write(frame.tobytes())
195
+ else:
196
+ writer.write(frame)
136
197
 
137
198
  if verbose:
138
199
  pbar.update()
@@ -141,17 +202,31 @@ class Labeler:
141
202
  pbar.close()
142
203
  #cv2.destroyAllWindows()
143
204
  cap.release()
144
- writer.release()
145
-
146
- def draw_track_clips(self, input_video:str, output_path:str,
147
- tracks:pd.DataFrame = None, track_file:str = None,
148
- method:str='all', random_number:int=10, track_ids:list=None,
149
- start_frame_offset:int=0, end_frame_offset:int=0,
150
- tail:int=0, prefix:bool=False,
151
- size:int=1, thick:int=1,
152
- verbose:bool=True):
205
+ if self.method == 'ffmpeg':
206
+ process.stdin.close()
207
+ process.wait()
208
+ else:
209
+ writer.release()
210
+
211
+ def draw_track_clips(self,
212
+ input_video:str,
213
+ output_path:str,
214
+ tracks:pd.DataFrame=None,
215
+ track_file:str = None,
216
+ method:str='all',
217
+ random_number:int=10,
218
+ track_ids:list=None,
219
+ start_frame_offset:int=0,
220
+ end_frame_offset:int=0,
221
+ tail:int=0,
222
+ prefix:bool=False,
223
+ size:int=1,
224
+ thick:int=1,
225
+ video_index:int=None,
226
+ video_tot:int=None,
227
+ verbose:bool=True):
153
228
  '''
154
- Parameters:
229
+ Parameters:\n
155
230
  input_video: the raw video file
156
231
  outputh_path: the folder for outputing track clips
157
232
  tracks: the dataframe of tracks
@@ -184,7 +259,7 @@ class Labeler:
184
259
  else:
185
260
  track_ids = tracks['track'].unique().tolist()
186
261
 
187
- pbar = tqdm(total=len(track_ids), desc='Labeling tracks ', unit='videos')
262
+ #pbar = tqdm(total=len(track_ids), desc='Labeling tracks ', unit='clips')
188
263
  for id in track_ids:
189
264
  selected_tracks = tracks[tracks['track']==id].copy()
190
265
  start_frame = max(selected_tracks['frame'].min() - start_frame_offset, 0)
@@ -195,20 +270,42 @@ class Labeler:
195
270
  else:
196
271
  out_video = os.path.join(output_path, str(id)+'.mp4')
197
272
 
198
- self.draw_tracks(input_video=input_video, output_video=out_video, tracks=selected_tracks,
199
- start_frame=start_frame, end_frame=end_frame, verbose=False, tail=tail, thick=thick, size=size)
200
- pbar.update()
201
- pbar.close()
202
-
203
- def draw_tracks(self, input_video:str, output_video:str,
204
- tracks:pd.DataFrame = None, track_file:str = None, label_file:str=None,
205
- color = None, tail:int=0, thick:int=2, size:int=1,
206
- class_name = False,
207
- start_frame:int=None, end_frame:int=None,
208
- video_index:int=None, video_tot:int=None, verbose:bool=True):
273
+ self.draw_tracks(input_video=input_video,
274
+ output_video=out_video,
275
+ tracks=selected_tracks,
276
+ start_frame=start_frame,
277
+ end_frame=end_frame,
278
+ verbose=verbose,
279
+ tail=tail,
280
+ thick=thick,
281
+ size=size,
282
+ video_index=video_index,
283
+ video_tot=video_tot)
284
+
285
+ #pbar.update()
286
+ #pbar.close()
287
+
288
+ def draw_tracks(self,
289
+ input_video:str,
290
+ output_video:str,
291
+ tracks:pd.DataFrame=None,
292
+ track_file:str=None,
293
+ label_file:str=None,
294
+ color=None,
295
+ tail:int=0,
296
+ thick:int=2,
297
+ size:int=1,
298
+ class_name=False,
299
+ start_frame:int=None,
300
+ end_frame:int=None,
301
+ video_index:int=None,
302
+ video_tot:int=None,
303
+ verbose:bool=True):
209
304
 
210
305
  if tracks is None:
211
- tracks = pd.read_csv(track_file, header=None, dtype={0:int, 1:int, 2:int, 3:int, 4:int, 5:int, 6:float, 7:int, 8:int, 9:int})
306
+ tracks = pd.read_csv(track_file,
307
+ header=None,
308
+ dtype={0:int, 1:int, 2:int, 3:int, 4:int, 5:int, 6:float, 7:int, 8:int, 9:int})
212
309
  tracks.columns = ['frame', 'track', 'x', 'y', 'w', 'h', 'score', 'cls', 'r3', 'r4']
213
310
 
214
311
  cmap = plt.get_cmap('tab20b')
@@ -223,22 +320,20 @@ class Labeler:
223
320
  if end_frame is None:
224
321
  end_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))-1
225
322
 
226
- tot_frames = end_frame - start_frame + 1
323
+ selected_tracks = tracks.loc[(tracks['frame']>=start_frame) & (tracks['frame']<=end_frame)].copy()
227
324
 
228
- if verbose:
229
- pbar = tqdm(total=tot_frames, unit=" frames")
230
- if self.compress_message:
231
- pbar.set_description_str("Generating labels")
325
+ pbar_desc = ""
326
+ if self.compress_message:
327
+ pbar_desc = "Generating labels"
328
+ else:
329
+ if video_index and video_tot:
330
+ pbar_desc = "Generating labels {} of {}".format(video_index, video_tot)
232
331
  else:
233
- if video_index and video_tot:
234
- pbar.set_description_str("Generating labels {} of {}".format(video_index, video_tot))
235
- else:
236
- pbar.set_description_str("Generating labels {} ".format(input_video))
237
-
238
- selected_tracks = tracks.loc[(tracks['frame']>=start_frame) & (tracks['frame']<=end_frame)].copy()
332
+ pbar_desc = "Generating labels {} ".format(input_video)
239
333
 
334
+ pbar = tqdm(total=len(selected_tracks), unit=" frames", desc=pbar_desc)
240
335
  results = []
241
- for index, track in selected_tracks.iterrows():
336
+ for _, track in selected_tracks.iterrows():
242
337
 
243
338
  if color is None:
244
339
  final_color = colors[int(track['track']) % len(colors)]
@@ -265,7 +360,7 @@ class Labeler:
265
360
 
266
361
  if verbose:
267
362
  pbar.update()
268
-
363
+
269
364
  if verbose:
270
365
  pbar.close()
271
366
 
@@ -308,8 +403,9 @@ class Labeler:
308
403
  end_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))-1
309
404
 
310
405
  tot_frames = end_frame - start_frame + 1
406
+ selected_dets = dets.loc[(dets[0]>=start_frame) & (dets[0]<=end_frame)].copy()
311
407
 
312
- pbar = tqdm(total=len(dets), unit=" dets")
408
+ pbar = tqdm(total=len(selected_dets), unit=" dets")
313
409
  if self.compress_message:
314
410
  pbar.set_description_str("Generating labels")
315
411
  else:
@@ -318,8 +414,6 @@ class Labeler:
318
414
  else:
319
415
  pbar.set_description_str("Generating labels {} ".format(input_video))
320
416
 
321
- selected_dets = dets.loc[(dets[0]>=start_frame) & (dets[0]<=end_frame)].copy()
322
-
323
417
  results = []
324
418
  for index, det in selected_dets.iterrows():
325
419
 
@@ -492,11 +586,20 @@ class LabelGenerator():
492
586
 
493
587
 
494
588
  if __name__=='__main__':
495
- video_file = "/mnt/d/videos/hfst/Standard_SCU7WH_2022-09-16_0630.02.001.mp4"
496
- iou_file = "/mnt/d/videos/hfst/Standard_SCU7WH_2022-09-16_0630.02.001_iou.txt"
497
- track_file = "/mnt/d/videos/hfst/tracks/Standard_SCU7WH_2022-09-16_0630.02.001_track.txt"
498
- label_video = "/mnt/d/videos/hfst/labels/Standard_SCU7WH_2022-09-16_0630.02.001_track.mp4"
499
- label_file = "/mnt/d/videos/hfst/tracks/Standard_SCU7WH_2022-09-16_0630.02.001_label.txt"
500
-
501
- labeler = Labeler(video_file, zoom_factor=0.5, nodraw_empty=True, label_fields=[6])
502
- labeler.draw(label_file, video_file, label_video)
589
+
590
+ video_file = "/mnt/d/videos/sample/traffic.mp4"
591
+ iou_file = "/mnt/d/videos/sample/dets/traffic_iou.txt"
592
+ track_file = "/mnt/d/videos/sample/tracks/traffic_track.txt"
593
+ label_video = "/mnt/d/videos/sample/labels/traffic_track-ffmpeg.mp4"
594
+ label_file = "/mnt/d/videos/sample/labels/traffic_track.txt"
595
+
596
+ labeler = Labeler(method='ffmpeg',
597
+ encoder='hevc_nvenc',
598
+ preset='medium',
599
+ crf=23,
600
+ compress_message=True)
601
+ labeler.draw_tracks(input_video=video_file,
602
+ output_video=label_video,
603
+ track_file=track_file)
604
+
605
+
dnt/shared/synhcro.py CHANGED
@@ -66,7 +66,7 @@ class Synchronizer():
66
66
  milliseconds_per_frame = 1/fps * 1000
67
67
 
68
68
  dif_frame = frame - ref_frame
69
- return round(ref_time + dif_frame * milliseconds_per_frame)
69
+ return round(ref_time + float(dif_frame) * milliseconds_per_frame)
70
70
 
71
71
  @staticmethod
72
72
  def add_unix_time_to_frames(frames:pd.DataFrame, ref_frame:int, ref_time:int, ref_timezone:str='US/Eastern',
@@ -34,13 +34,13 @@ class DeepSort(object):
34
34
  metric = NearestNeighborDistanceMetric("cosine", self.max_cosine_distance, self.nn_budget)
35
35
  self.tracker = Tracker(metric, max_iou_distance=self.max_iou_distance, max_age=self.max_age, n_init=self.n_init)
36
36
 
37
- def update(self, bbox_xywh, confidences, ori_img):
37
+ def update(self, bbox_xywh, confidences, classes, ori_img):
38
38
  self.height, self.width = ori_img.shape[:2]
39
39
 
40
40
  # generate detections
41
41
  features = self._get_features(bbox_xywh, ori_img) # extract features for bboxes
42
42
  bbox_tlwh = self._xywh_to_tlwh(bbox_xywh) # convert bbox from xc_yc_w_h to left top width height/width
43
- detections = [Detection(bbox_tlwh[i], conf, features[i])
43
+ detections = [Detection(bbox_tlwh[i], conf, classes[i], features[i])
44
44
  for i, conf in enumerate(confidences) if conf>self.min_confidence] # ignore low confidence bboxes
45
45
 
46
46
  # run on non-maximum supression
@@ -61,7 +61,8 @@ class DeepSort(object):
61
61
  box = track.to_tlwh()
62
62
  x1,y1,x2,y2 = self._tlwh_to_xyxy(box)
63
63
  track_id = track.track_id
64
- outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=int))
64
+ track_cls = track.cls
65
+ outputs.append(np.array([x1,y1,x2,y2,track_id,track_cls], dtype=int))
65
66
  if len(outputs) > 0:
66
67
  outputs = np.stack(outputs,axis=0)
67
68
  return outputs
@@ -26,10 +26,11 @@ class Detection(object):
26
26
 
27
27
  """
28
28
 
29
- def __init__(self, tlwh, confidence, feature):
29
+ def __init__(self, tlwh, confidence, cls, feature):
30
30
  self.tlwh = np.asarray(tlwh, dtype=float)
31
31
  self.confidence = float(confidence)
32
32
  self.feature = np.asarray(feature, dtype=float)
33
+ self.cls = cls
33
34
 
34
35
  def to_tlbr(self):
35
36
  """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
@@ -64,7 +64,7 @@ class Track:
64
64
  """
65
65
 
66
66
  def __init__(self, mean, covariance, track_id, n_init, max_age,
67
- feature=None):
67
+ feature=None, cls=None):
68
68
  self.mean = mean
69
69
  self.covariance = covariance
70
70
  self.track_id = track_id
@@ -73,6 +73,7 @@ class Track:
73
73
  self.time_since_update = 0
74
74
 
75
75
  self.state = TrackState.Tentative
76
+ self.cls = cls
76
77
  self.features = []
77
78
  if feature is not None:
78
79
  self.features.append(feature)
@@ -134,5 +134,5 @@ class Tracker:
134
134
  mean, covariance = self.kf.initiate(detection.to_xyah())
135
135
  self.tracks.append(Track(
136
136
  mean, covariance, self._next_id, self.n_init, self.max_age,
137
- detection.feature))
137
+ detection.feature, detection.cls))
138
138
  self._next_id += 1
dnt/track/dsort/dsort.py CHANGED
@@ -4,15 +4,31 @@ import torch
4
4
  import numpy as np
5
5
  import pandas as pd
6
6
  from tqdm import tqdm
7
-
8
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
9
-
10
7
  from deep_sort import DeepSort
11
8
  from config import Config
12
9
 
13
- def track(video_file:str, det_file:str, out_file:str = None, device:str = 'auto', half:bool = False,
14
- cfg:dict = Config.get_cfg_dsort('default'), video_index:int = None, total_videos:int = None):
15
-
10
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
11
+
12
+ def track(video_file:str,
13
+ det_file:str,
14
+ out_file:str=None,
15
+ device:str='auto',
16
+ half:bool=False,
17
+ cfg:dict=Config.get_cfg_dsort('default'),
18
+ video_index:int=None,
19
+ total_videos:int=None):
20
+ '''
21
+ Track objects in a video using Deep SORT.
22
+ Args:
23
+ video_file (str): Path to the input video file.
24
+ det_file (str): Path to the detection results file.
25
+ out_file (str): Path to save the tracking results.
26
+ device (str): Device to run the model on ('cpu' or 'cuda').
27
+ half (bool): Whether to use half precision.
28
+ cfg (dict): Configuration dictionary for Deep SORT.
29
+ video_index (int): Index of the current video in a batch.
30
+ total_videos (int): Total number of videos in a batch.
31
+ '''
16
32
  #device = torch.device('cuda') if (torch.cuda.is_available() and gpu) else torch.device('cpu')
17
33
  cap = cv2.VideoCapture(video_file)
18
34
  if not cap.isOpened():
@@ -50,11 +66,18 @@ def track(video_file:str, det_file:str, out_file:str = None, device:str = 'auto'
50
66
  bbox_xywh = np.array(bbox_xywh)
51
67
  conf_score = np.array(frame_dets[:,6])
52
68
  classes = np.array(frame_dets[:,7])
53
- outputs = deepsort.update(bbox_xywh, conf_score, im)
69
+ outputs = deepsort.update(bbox_xywh, conf_score, classes, im)
54
70
 
55
71
  if len(outputs) > 0:
56
72
  for output in outputs:
57
- results.append([pos_frame, output[4], output[0], output[1], output[2]-output[0], output[3]-output[1], -1, -1, -1, -1])
73
+ results.append([pos_frame,
74
+ output[4],
75
+ output[0],
76
+ output[1],
77
+ output[2]-output[0],
78
+ output[3]-output[1],
79
+ output[5],
80
+ -1, -1, -1])
58
81
 
59
82
  pbar.update()
60
83
 
@@ -65,15 +88,9 @@ def track(video_file:str, det_file:str, out_file:str = None, device:str = 'auto'
65
88
  df.to_csv(out_file, index=False, header=None)
66
89
 
67
90
  if __name__ == "__main__":
68
-
69
- '''
70
- video_file = "/mnt/d/videos/ped2stage/videos/gh1293.mp4"
71
- iou_file = "/mnt/d/videos/ped2stage/dets/gh1293_iou_ped.txt"
72
- out_file = "/mnt/d/videos/ped2stage/tracks/gh1293_ped_track_2.txt"
73
- '''
74
- video_file = "/mnt/d/videos/samples/ped_veh.mp4"
75
- iou_file = "/mnt/d/videos/samples/dets/ped_veh_iou.txt"
76
- out_file = "/mnt/d/videos/samples/tracks/ped_veh_track.txt"
91
+ video_file = "/mnt/d/videos/sample/traffic.mp4"
92
+ iou_file = "/mnt/d/videos/sample/dets/traffic_det.txt"
93
+ out_file = "/mnt/d/videos/sample/tracks/traffic_track.txt"
77
94
 
78
95
  track(video_file, iou_file, out_file)
79
96
 
dnt/track/re_class.py CHANGED
@@ -5,8 +5,25 @@ from ..detect import Detector
5
5
  from ..engine.iob import iobs
6
6
 
7
7
  class ReClass:
8
- def __init__(self, num_frames:int=25, threshold:float=0.75, model:str='rtdetr', device:str='auto',
9
- default_class:int=0, match_class:list=[1, 36]) -> None:
8
+ def __init__(self,
9
+ num_frames:int=25,
10
+ threshold:float=0.75,
11
+ model:str='rtdetr',
12
+ weights:str='x',
13
+ device:str='auto',
14
+ default_class:int=0,
15
+ match_class:list=[1, 36]) -> None:
16
+ """
17
+ Re-classify tracks based on detection results
18
+ Parameters:
19
+ num_frames: Number of frames to consider for re-classification, default 25
20
+ threshold: Threshold for matching, default 0.75
21
+ model: Detection model to use, default 'rtdetr'
22
+ weights: Weights for the detection model, default 'x'
23
+ device: Device to use for detection, default 'auto'
24
+ default_class: Default class to assign if no match found, default 0 (pedestrian)
25
+ match_class: List of classes to match, default [1, 36] (bicycle, skateboard/scooter)
26
+ """
10
27
  self.detector = Detector(model=model, device=device)
11
28
  self.num_frames = num_frames
12
29
  self.threshold = threshold
@@ -36,8 +53,12 @@ class ReClass:
36
53
 
37
54
  return hit, avg_score
38
55
 
39
- def re_classify(self, tracks:pd.DataFrame, input_video:str, track_ids:list=None,
40
- out_file:str=None, verbose:bool=True)->pd.DataFrame:
56
+ def re_classify(self,
57
+ tracks:pd.DataFrame,
58
+ input_video:str,
59
+ track_ids:list=None,
60
+ out_file:str=None,
61
+ verbose:bool=True)->pd.DataFrame:
41
62
  """
42
63
  Re-classify tracks
43
64
  Parameters:
@@ -55,7 +76,8 @@ class ReClass:
55
76
  track_ids = tracks['track'].unique().tolist()
56
77
 
57
78
  results = []
58
- pbar = tqdm(total=len(track_ids), unit='track', desc='Re-classifying tracks')
79
+ if verbose:
80
+ pbar = tqdm(total=len(track_ids), unit='track', desc='Re-classifying tracks')
59
81
  for track_id in track_ids:
60
82
 
61
83
  target_track = tracks[tracks['track'] == track_id].copy()
@@ -85,7 +107,8 @@ class ReClass:
85
107
  results.append([track_id, cls, round(avg_score, 2)])
86
108
  if verbose:
87
109
  pbar.update()
88
- pbar.close()
110
+ if verbose:
111
+ pbar.close()
89
112
 
90
113
  df = pd.DataFrame(results, columns=['track', 'cls', 'avg_score'])
91
114
  if out_file:
dnt/track/sort/sort.py CHANGED
@@ -202,7 +202,7 @@ class Sort(object):
202
202
  def update(self, dets=np.empty((0, 5))):
203
203
  """
204
204
  Params:
205
- dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
205
+ dets - a numpy array of detections in the format [[x1,y1,x2,y2,score]
206
206
  Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections).
207
207
  Returns the a similar array, where the last column is the object ID.
208
208
 
@@ -246,9 +246,9 @@ class Sort(object):
246
246
 
247
247
  def track(det_file, out_file, max_age=1, min_inits=3, iou_threshold=0.3, video_index = None, total_videos = None):
248
248
 
249
- tracker = Sort(max_age=max_age,
250
- min_hits=min_inits,
251
- iou_threshold=iou_threshold)
249
+ tracker = Sort(max_age=max_age,
250
+ min_hits=min_inits,
251
+ iou_threshold=iou_threshold)
252
252
  dets = np.loadtxt(det_file, delimiter=',')
253
253
  start_frame = int(dets[:,0].min())
254
254
  end_frame = int(dets[:,0].max())
@@ -265,7 +265,6 @@ def track(det_file, out_file, max_age=1, min_inits=3, iou_threshold=0.3, video_i
265
265
  if len(dets_frame)>0:
266
266
  dets_revised[:,[0,1]] = dets_frame[:,[2,3]]
267
267
  dets_revised[:,2] = dets_frame[:,2] + dets_frame[:,4]
268
-
269
268
  dets_revised[:,3] = dets_frame[:,3] + dets_frame[:,5]
270
269
  dets_revised[:,4] = dets_frame[:,6] / 100
271
270