megadetector 10.0.2__py3-none-any.whl → 10.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

@@ -0,0 +1,607 @@
1
+ """
2
+
3
+ visualize_video_output.py
4
+
5
+ Render a folder of videos with bounding boxes to a new folder, based on a
6
+ detector output file.
7
+
8
+ """
9
+
10
+ #%% Imports
11
+
12
+ import argparse
13
+ import os
14
+ import random
15
+ import cv2
16
+
17
+ from tqdm import tqdm
18
+ from PIL import Image
19
+ import numpy as np
20
+
21
+ from megadetector.data_management.annotations.annotation_constants import detector_bbox_category_id_to_name
22
+ from megadetector.detection.video_utils import run_callback_on_frames, default_fourcc, is_video_file
23
+ from megadetector.utils.path_utils import path_is_abs
24
+ from megadetector.utils.wi_utils import load_md_or_speciesnet_file
25
+ from megadetector.visualization.visualization_utils import render_detection_bounding_boxes
26
+
27
+
28
+ #%% Constants
29
+
30
+ # This will only be used if a category mapping is not available in the results file
31
+ DEFAULT_DETECTOR_LABEL_MAP = {
32
+ str(k): v for k, v in detector_bbox_category_id_to_name.items()
33
+ }
34
+
35
+ DEFAULT_CLASSIFICATION_THRESHOLD = 0.4
36
+ DEFAULT_DETECTION_THRESHOLD = 0.15
37
+
38
+
39
+ #%% Classes
40
+
41
+ class VideoVisualizationOptions:
42
+ """
43
+ Options controlling the behavior of visualize_video_output()
44
+ """
45
+
46
+ def __init__(self):
47
+
48
+ #: Confidence threshold for including detections
49
+ self.confidence_threshold = DEFAULT_DETECTION_THRESHOLD
50
+
51
+ #: Sample N videos to process (-1 for all videos)
52
+ self.sample = -1
53
+
54
+ #: Random seed for sampling
55
+ self.random_seed = None
56
+
57
+ #: Confidence threshold for including classifications
58
+ self.classification_confidence_threshold = DEFAULT_CLASSIFICATION_THRESHOLD
59
+
60
+ #: Frame rate for output videos. Either a float (fps) or 'auto' to calculate
61
+ #: based on detection frame intervals
62
+ self.rendering_fs = 'auto'
63
+
64
+ #: Fourcc codec specification for video encoding
65
+ self.fourcc = default_fourcc
66
+
67
+ #: Skip frames before first and after last above-threshold detection
68
+ self.trim_to_detections = False
69
+
70
+ # ...class VideoVisualizationOptions
71
+
72
+
73
+ #%% Support functions
74
+
75
+ def _get_video_output_framerate(video_entry, original_framerate, rendering_fs='auto'):
76
+ """
77
+ Calculate the appropriate output frame rate for a video based on detection frame numbers.
78
+
79
+ Args:
80
+ video_entry (dict): video entry from results file containing detections
81
+ original_framerate (float): original frame rate of the video
82
+ rendering_fs (str or float): 'auto' for automatic calculation, negative float for
83
+ speedup factor, positive float for explicit fps
84
+
85
+ Returns:
86
+ float: calculated output frame rate
87
+ """
88
+
89
+ if rendering_fs != 'auto':
90
+
91
+ if float(rendering_fs) < 0:
92
+
93
+ # Negative value means speedup factor
94
+ speedup_factor = abs(float(rendering_fs))
95
+ if ('detections' not in video_entry) or (len(video_entry['detections']) == 0):
96
+ # This is a bit arbitrary, but a reasonable thing to do when we have no basis
97
+ # to determine the output frame rate
98
+ return original_framerate * speedup_factor
99
+
100
+ frame_numbers = []
101
+ for detection in video_entry['detections']:
102
+ if 'frame_number' in detection:
103
+ frame_numbers.append(detection['frame_number'])
104
+
105
+ if len(frame_numbers) < 2:
106
+ # This is a bit arbitrary, but a reasonable thing to do when we have no basis
107
+ # to determine the output frame rate
108
+ return original_framerate * speedup_factor
109
+
110
+ frame_numbers = sorted(set(frame_numbers))
111
+ first_interval = frame_numbers[1] - frame_numbers[0]
112
+
113
+ # Calculate base output frame rate based on first interval, then apply speedup
114
+ base_output_fps = original_framerate / first_interval
115
+ return base_output_fps * speedup_factor
116
+
117
+ else:
118
+
119
+ # Positive value means explicit fps
120
+ return float(rendering_fs)
121
+
122
+ # ...if we're using an explicit/speedup-based frame rate
123
+
124
+ # ...if we aren't in "auto" frame rate mode
125
+
126
+ # Auto mode
127
+ if 'detections' not in video_entry or len(video_entry['detections']) == 0:
128
+ return original_framerate
129
+
130
+ frame_numbers = []
131
+ for detection in video_entry['detections']:
132
+ if 'frame_number' in detection:
133
+ frame_numbers.append(detection['frame_number'])
134
+
135
+ if len(frame_numbers) < 2:
136
+ return original_framerate
137
+
138
+ frame_numbers = sorted(set(frame_numbers))
139
+ first_interval = frame_numbers[1] - frame_numbers[0]
140
+
141
+ # Calculate output frame rate based on first interval
142
+ output_fps = original_framerate / first_interval
143
+
144
+ return output_fps
145
+
146
+
147
+ def _get_frames_to_process(video_entry, confidence_threshold, trim_to_detections=False):
148
+ """
149
+ Get list of frame numbers that have detections for this video.
150
+
151
+ Args:
152
+ video_entry (dict): video entry from results file
153
+ confidence_threshold (float): minimum confidence for detections to be considered
154
+ trim_to_detections (bool): if True, only include frames between first and last
155
+ above-threshold detections (inclusive)
156
+
157
+ Returns:
158
+ list: sorted list of unique frame numbers to process
159
+ """
160
+
161
+ if 'detections' not in video_entry:
162
+ return []
163
+
164
+ if 'frames_processed' in video_entry:
165
+ frame_numbers = set(video_entry['frames_processed'])
166
+ else:
167
+ frame_numbers = set()
168
+
169
+ for detection in video_entry['detections']:
170
+
171
+ if 'frame_number' in detection:
172
+ # If this file includes the list of frames processed (required as of format
173
+ # version 1.5), every frame with detections should be included in that list
174
+ if 'frames_processed' in video_entry:
175
+ if detection['frame_number'] not in frame_numbers:
176
+ print('Warning: frames_processed field present in {}, but frame {} is missing'.\
177
+ format(video_entry['file'],detection['frame_number']))
178
+ frame_numbers.add(detection['frame_number'])
179
+ else:
180
+ print('Warning: detections in {} lack frame numbers'.format(video_entry['file']))
181
+
182
+ # ...for each detection
183
+
184
+ frame_numbers = sorted(list(frame_numbers))
185
+
186
+ if trim_to_detections and (len(frame_numbers) > 0):
187
+
188
+ # Find first and last frames with above-threshold detections
189
+
190
+ above_threshold_frames = set()
191
+ for detection in video_entry['detections']:
192
+ if detection['conf'] >= confidence_threshold:
193
+ above_threshold_frames.add(detection['frame_number'])
194
+
195
+ if len(above_threshold_frames) > 0:
196
+
197
+ above_threshold_frames = sorted(list(above_threshold_frames))
198
+ first_detection_frame = above_threshold_frames[0]
199
+ last_detection_frame = above_threshold_frames[-1]
200
+
201
+ # Return all frames between first and last above-threshold detections (inclusive)
202
+ trimmed_frames = []
203
+ for frame_num in frame_numbers:
204
+ if (first_detection_frame <= frame_num) and (frame_num <= last_detection_frame):
205
+ trimmed_frames.append(frame_num)
206
+ return trimmed_frames
207
+
208
+ else:
209
+ # No above-threshold detections, return empty list
210
+ return []
211
+
212
+ # ...if we're supposed to be trimming to non-empty frames
213
+
214
+ return frame_numbers
215
+
216
+
217
+ def _get_detections_for_frame(video_entry, frame_number, confidence_threshold):
218
+ """
219
+ Get all detections for a specific frame that meet confidence thresholds.
220
+
221
+ Args:
222
+ video_entry (dict): video entry from results file
223
+ frame_number (int): frame number to get detections for
224
+ confidence_threshold (float): minimum detection confidence
225
+
226
+ Returns:
227
+ list: list of detection dictionaries for this frame
228
+ """
229
+
230
+ if 'detections' not in video_entry:
231
+ return []
232
+
233
+ frame_detections = []
234
+
235
+ for detection in video_entry['detections']:
236
+ if ((detection['frame_number'] == frame_number) and
237
+ (detection['conf'] >= confidence_threshold)):
238
+ frame_detections.append(detection)
239
+
240
+ return frame_detections
241
+
242
+
243
+ def _process_video(video_entry,
244
+ detector_label_map,
245
+ classification_label_map,
246
+ options,
247
+ video_dir,
248
+ out_dir):
249
+ """
250
+ Process a single video, rendering detections on frames and creating output video.
251
+
252
+ Args:
253
+ video_entry (dict): video entry from results file
254
+ detector_label_map (dict): mapping of detection category IDs to names
255
+ classification_label_map (dict): mapping of classification category IDs to names
256
+ options (VideoVisualizationOptions): processing options
257
+ video_dir (str): input video directory
258
+ out_dir (str): output directory
259
+
260
+ Returns:
261
+ dict: processing result information
262
+ """
263
+
264
+ result = {
265
+ 'file': video_entry['file'],
266
+ 'success': False,
267
+ 'error': None,
268
+ 'frames_processed': 0
269
+ }
270
+
271
+ # Handle failure cases
272
+ if ('failure' in video_entry) and (video_entry['failure'] is not None):
273
+ result['error'] = 'Ignoring failed video: {}'.format(video_entry['failure'])
274
+ return result
275
+
276
+ # Construct input and output paths
277
+ if video_dir is None:
278
+ input_video_path = video_entry['file']
279
+ assert path_is_abs(input_video_path), \
280
+ 'Absolute paths are required when no video base dir is supplied'
281
+ else:
282
+ assert not path_is_abs(video_entry['file']), \
283
+ 'Relative paths are required when a video base dir is supplied'
284
+ input_video_path = os.path.join(video_dir, video_entry['file'])
285
+
286
+ if not os.path.exists(input_video_path):
287
+ result['error'] = 'Video not found: {}'.format(input_video_path)
288
+ return result
289
+
290
+ # Create output path preserving directory structure
291
+ rel_path = video_entry['file']
292
+ output_video_path = os.path.join(out_dir, rel_path)
293
+ os.makedirs(os.path.dirname(output_video_path), exist_ok=True)
294
+
295
+ # Get frames to process
296
+ frames_to_process = _get_frames_to_process(video_entry,
297
+ options.confidence_threshold,
298
+ options.trim_to_detections)
299
+ if len(frames_to_process) == 0:
300
+ result['error'] = 'No frames with detections to process'
301
+ return result
302
+
303
+ # Determine output frame rate
304
+ original_framerate = video_entry['frame_rate']
305
+ output_framerate = _get_video_output_framerate(video_entry,
306
+ original_framerate,
307
+ options.rendering_fs)
308
+
309
+ # Storage for rendered frames
310
+ rendered_frames = []
311
+
312
+ def frame_callback(frame_array, frame_id):
313
+ """
314
+ Callback function for processing each frame.
315
+
316
+ Args:
317
+ frame_array (np.array): frame image data
318
+ frame_id (str): frame identifier (unused)
319
+
320
+ Returns:
321
+ np.array: processed frame
322
+ """
323
+
324
+ # Extract frame number from the current processing context
325
+ current_frame_idx = len(rendered_frames)
326
+ if current_frame_idx >= len(frames_to_process):
327
+ print('Warning: received an extra frame (index {} of {}) for video {}'.format(
328
+ current_frame_idx,len(frames_to_process),video_entry['file']
329
+ ))
330
+ return frame_array
331
+
332
+ current_frame_number = frames_to_process[current_frame_idx]
333
+
334
+ # Convert numpy array to PIL Image
335
+ if frame_array.dtype != np.uint8:
336
+ frame_array = (frame_array * 255).astype(np.uint8)
337
+
338
+ # Convert from BGR (OpenCV) to RGB (PIL) if needed
339
+ if len(frame_array.shape) == 3 and frame_array.shape[2] == 3:
340
+ frame_array = cv2.cvtColor(frame_array, cv2.COLOR_BGR2RGB)
341
+
342
+ pil_image = Image.fromarray(frame_array)
343
+
344
+ # Get detections for this frame
345
+ frame_detections = _get_detections_for_frame(
346
+ video_entry,
347
+ current_frame_number,
348
+ options.confidence_threshold
349
+ )
350
+
351
+ # Render detections on the frame
352
+ if frame_detections:
353
+ render_detection_bounding_boxes(
354
+ frame_detections,
355
+ pil_image,
356
+ detector_label_map,
357
+ classification_label_map,
358
+ classification_confidence_threshold=options.classification_confidence_threshold
359
+ )
360
+
361
+ # Convert back to numpy array for video writing
362
+ frame_array = np.array(pil_image)
363
+ if (len(frame_array.shape) == 3) and (frame_array.shape[2] == 3):
364
+ frame_array = cv2.cvtColor(frame_array, cv2.COLOR_RGB2BGR)
365
+
366
+ rendered_frames.append(frame_array)
367
+ return frame_array
368
+
369
+ # ...def frame_callback(...)
370
+
371
+ # Process video frames
372
+ try:
373
+ run_callback_on_frames(
374
+ input_video_path,
375
+ frame_callback,
376
+ frames_to_process=frames_to_process,
377
+ verbose=False
378
+ )
379
+ except Exception as e:
380
+ import traceback
381
+ trace = traceback.format_exc()
382
+ result['error'] = 'Error processing video frames: {} ({})'.format(str(e),trace)
383
+ return result
384
+
385
+ # Write output video
386
+ if len(rendered_frames) > 0:
387
+
388
+ try:
389
+
390
+ # Get frame dimensions
391
+ height, width = rendered_frames[0].shape[:2]
392
+
393
+ # Create VideoWriter
394
+ fourcc = cv2.VideoWriter_fourcc(*options.fourcc)
395
+ video_writer = cv2.VideoWriter(output_video_path, fourcc, output_framerate, (width, height))
396
+
397
+ if not video_writer.isOpened():
398
+ result['error'] = 'Failed to open video writer for {}'.format(output_video_path)
399
+ return result
400
+
401
+ # Write frames
402
+ for frame in rendered_frames:
403
+ video_writer.write(frame)
404
+
405
+ video_writer.release()
406
+ result['success'] = True
407
+ result['frames_processed'] = len(rendered_frames)
408
+
409
+ except Exception as e:
410
+
411
+ result['error'] = 'Error writing output video: {}'.format(str(e))
412
+ return result
413
+
414
+ # ...try/except
415
+
416
+ else:
417
+
418
+ result['error'] = 'No frames were processed for video {}'.format(video_entry['file'])
419
+
420
+ return result
421
+
422
+ # ...def _process_video(...)
423
+
424
+
425
+ #%% Main function
426
+
427
+ def visualize_video_output(detector_output_path,
428
+ out_dir,
429
+ video_dir,
430
+ options=None):
431
+ """
432
+ Renders videos with bounding boxes based on detector output.
433
+
434
+ Args:
435
+ detector_output_path (str): path to .json file containing detection results
436
+ out_dir (str): output directory for rendered videos
437
+ video_dir (str): input video directory
438
+ options (VideoVisualizationOptions, optional): processing options
439
+
440
+ Returns:
441
+ list: list of processing results for each video
442
+ """
443
+
444
+ if options is None:
445
+ options = VideoVisualizationOptions()
446
+
447
+ # Validate that input and output directories are different
448
+ if (video_dir is not None) and (os.path.abspath(out_dir) == os.path.abspath(video_dir)):
449
+ raise ValueError('Output directory cannot be the same as video directory')
450
+
451
+ # Load results file
452
+ print('Loading results from {}'.format(detector_output_path))
453
+ results_data = load_md_or_speciesnet_file(detector_output_path)
454
+
455
+ # Get label mappings
456
+ detector_label_map = results_data.get('detection_categories', DEFAULT_DETECTOR_LABEL_MAP)
457
+ classification_label_map = results_data.get('classification_categories', {})
458
+
459
+ # Filter to video entries only
460
+ video_entries = []
461
+ for entry in results_data['images']:
462
+ if is_video_file(entry['file']):
463
+ video_entries.append(entry)
464
+
465
+ print('Found {} videos in results file'.format(len(video_entries)))
466
+
467
+ # Apply sampling if requested
468
+ if (options.sample > 0) and (len(video_entries) > options.sample):
469
+ if options.random_seed is not None:
470
+ random.seed(options.random_seed)
471
+ n_videos_available = len(video_entries)
472
+ video_entries = random.sample(video_entries, options.sample)
473
+ print('Sampled {} of {} videos for processing'.format(
474
+ len(video_entries),n_videos_available))
475
+
476
+ # Create output directory
477
+ os.makedirs(out_dir, exist_ok=True)
478
+
479
+ # Process each video
480
+ results = []
481
+
482
+ for video_entry in tqdm(video_entries, desc='Processing videos'):
483
+ result = _process_video(
484
+ video_entry,
485
+ detector_label_map,
486
+ classification_label_map,
487
+ options,
488
+ video_dir,
489
+ out_dir
490
+ )
491
+ results.append(result)
492
+
493
+ if not result['success']:
494
+ print('Warning: Failed to process {}: {}'.format(result['file'],result['error']))
495
+
496
+ # ...for each video
497
+
498
+ # Print summary
499
+ successful = sum(1 for r in results if r['success'])
500
+ failed = len(results) - successful
501
+ total_frames = sum(r['frames_processed'] for r in results if r['success'])
502
+
503
+ print('\nProcessing complete:')
504
+ print(f' Successfully processed: {successful} videos')
505
+ print(f' Failed: {failed} videos')
506
+ print(f' Total frames rendered: {total_frames}')
507
+
508
+ return results
509
+
510
+ # ...def visualize_video_output(...)
511
+
512
+
513
+ #%% Command-line driver
514
+
515
+ def main():
516
+ """
517
+ Command-line driver for visualize_video_output
518
+ """
519
+
520
+ parser = argparse.ArgumentParser(
521
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
522
+ description='Render videos with bounding boxes predicted by a detector above '
523
+ 'a confidence threshold, and save the rendered videos.')
524
+
525
+ parser.add_argument(
526
+ 'detector_output_path',
527
+ type=str,
528
+ help='Path to json output file of the detector')
529
+
530
+ parser.add_argument(
531
+ 'out_dir',
532
+ type=str,
533
+ help='Path to directory where the rendered videos will be saved. '
534
+ 'The directory will be created if it does not exist.')
535
+
536
+ parser.add_argument(
537
+ 'video_dir',
538
+ type=str,
539
+ help='Path to directory containing the input videos')
540
+
541
+ parser.add_argument(
542
+ '--confidence_threshold',
543
+ type=float,
544
+ default=DEFAULT_DETECTION_THRESHOLD,
545
+ help='Confidence threshold above which detections will be rendered')
546
+
547
+ parser.add_argument(
548
+ '--sample',
549
+ type=int,
550
+ default=-1,
551
+ help='Number of videos to randomly sample for processing. '
552
+ 'Set to -1 to process all videos')
553
+
554
+ parser.add_argument(
555
+ '--random_seed',
556
+ type=int,
557
+ default=None,
558
+ help='Random seed for reproducible sampling')
559
+
560
+ parser.add_argument(
561
+ '--classification_confidence_threshold',
562
+ type=float,
563
+ default=DEFAULT_CLASSIFICATION_THRESHOLD,
564
+ help='Value between 0 and 1, indicating the confidence threshold '
565
+ 'above which classifications will be rendered')
566
+
567
+ parser.add_argument(
568
+ '--rendering_fs',
569
+ default='auto',
570
+ help='Frame rate for output videos. Use "auto" to calculate based on '
571
+ 'detection frame intervals, positive float for explicit fps, '
572
+ 'or negative float for speedup factor (e.g. -2.0 = 2x faster)')
573
+
574
+ parser.add_argument(
575
+ '--fourcc',
576
+ type=str,
577
+ default=default_fourcc,
578
+ help='Fourcc codec specification for video encoding')
579
+
580
+ parser.add_argument(
581
+ '--trim_to_detections',
582
+ action='store_true',
583
+ help='Skip frames before first and after last above-threshold detection')
584
+
585
+ args = parser.parse_args()
586
+
587
+ # Create options object
588
+ options = VideoVisualizationOptions()
589
+ options.confidence_threshold = args.confidence_threshold
590
+ options.sample = args.sample
591
+ options.random_seed = args.random_seed
592
+ options.classification_confidence_threshold = args.classification_confidence_threshold
593
+ options.rendering_fs = args.rendering_fs
594
+ options.fourcc = args.fourcc
595
+ options.trim_to_detections = args.trim_to_detections
596
+
597
+ # Run visualization
598
+ visualize_video_output(
599
+ args.detector_output_path,
600
+ args.out_dir,
601
+ args.video_dir,
602
+ options
603
+ )
604
+
605
+
606
+ if __name__ == '__main__':
607
+ main()