megadetector 5.0.13__py3-none-any.whl → 5.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/read_exif.py +11 -5
- megadetector/detection/process_video.py +228 -68
- megadetector/detection/pytorch_detector.py +16 -11
- megadetector/detection/run_detector.py +17 -5
- megadetector/detection/run_detector_batch.py +179 -65
- megadetector/detection/tf_detector.py +11 -3
- megadetector/detection/video_utils.py +174 -43
- megadetector/postprocessing/convert_output_format.py +12 -5
- megadetector/utils/md_tests.py +80 -24
- megadetector/utils/path_utils.py +38 -6
- megadetector/utils/process_utils.py +8 -2
- megadetector/visualization/visualization_utils.py +7 -2
- megadetector/visualization/visualize_detector_output.py +0 -1
- {megadetector-5.0.13.dist-info → megadetector-5.0.14.dist-info}/METADATA +1 -1
- {megadetector-5.0.13.dist-info → megadetector-5.0.14.dist-info}/RECORD +18 -18
- {megadetector-5.0.13.dist-info → megadetector-5.0.14.dist-info}/LICENSE +0 -0
- {megadetector-5.0.13.dist-info → megadetector-5.0.14.dist-info}/WHEEL +0 -0
- {megadetector-5.0.13.dist-info → megadetector-5.0.14.dist-info}/top_level.txt +0 -0
|
@@ -16,7 +16,7 @@ path. No attempt is made to be consistent in format across the two approaches.
|
|
|
16
16
|
import os
|
|
17
17
|
import subprocess
|
|
18
18
|
import json
|
|
19
|
-
from datetime import datetime
|
|
19
|
+
from datetime import date, datetime
|
|
20
20
|
|
|
21
21
|
from multiprocessing.pool import ThreadPool as ThreadPool
|
|
22
22
|
from multiprocessing.pool import Pool as Pool
|
|
@@ -64,7 +64,7 @@ class ReadExifOptions:
|
|
|
64
64
|
#:
|
|
65
65
|
#: A useful set of tags one might want to limit queries for:
|
|
66
66
|
#:
|
|
67
|
-
#: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight',
|
|
67
|
+
#: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight',
|
|
68
68
|
#: 'DateTimeOriginal','Orientation']
|
|
69
69
|
self.tags_to_include = None
|
|
70
70
|
|
|
@@ -103,7 +103,7 @@ class ExifResultsToCCTOptions:
|
|
|
103
103
|
|
|
104
104
|
#: Function for extracting location information, should take a string
|
|
105
105
|
#: and return a string. Defaults to ct_utils.image_file_to_camera_folder. If
|
|
106
|
-
#: this is None,
|
|
106
|
+
#: this is None, location is written as "unknown".
|
|
107
107
|
self.filename_to_location_function = image_file_to_camera_folder
|
|
108
108
|
|
|
109
109
|
|
|
@@ -689,7 +689,7 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
|
689
689
|
|
|
690
690
|
# By default we assume that each leaf-node folder is a location
|
|
691
691
|
if options.filename_to_location_function is None:
|
|
692
|
-
im['location'] =
|
|
692
|
+
im['location'] = 'unknown'
|
|
693
693
|
else:
|
|
694
694
|
im['location'] = options.filename_to_location_function(exif_result['file_name'])
|
|
695
695
|
|
|
@@ -738,9 +738,15 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
|
|
|
738
738
|
d['annotations'] = []
|
|
739
739
|
d['categories'] = []
|
|
740
740
|
|
|
741
|
+
def json_serialize_datetime(obj):
|
|
742
|
+
if isinstance(obj, (datetime, date)):
|
|
743
|
+
return obj.isoformat()
|
|
744
|
+
raise TypeError('Object {} (type {}) not serializable'.format(
|
|
745
|
+
str(obj),type(obj)))
|
|
746
|
+
|
|
741
747
|
if cct_output_file is not None:
|
|
742
748
|
with open(cct_output_file,'w') as f:
|
|
743
|
-
json.dump(d,indent=1)
|
|
749
|
+
json.dump(d,f,indent=1,default=json_serialize_datetime)
|
|
744
750
|
|
|
745
751
|
return d
|
|
746
752
|
|
|
@@ -12,8 +12,6 @@ writing them to disk. The upside, though, is that this approach allows you to r
|
|
|
12
12
|
detection elimination after running MegaDetector, and it allows allows more efficient re-use
|
|
13
13
|
of frames if you end up running MD more than once, or running multiple versions of MD.
|
|
14
14
|
|
|
15
|
-
TODO: optionally skip writing frames to disk, and process frames in memory.
|
|
16
|
-
|
|
17
15
|
"""
|
|
18
16
|
|
|
19
17
|
#%% Imports
|
|
@@ -36,6 +34,7 @@ from megadetector.utils.path_utils import insert_before_extension, clean_path
|
|
|
36
34
|
from megadetector.detection.video_utils import video_to_frames
|
|
37
35
|
from megadetector.detection.video_utils import frames_to_video
|
|
38
36
|
from megadetector.detection.video_utils import frame_results_to_video_results
|
|
37
|
+
from megadetector.detection.video_utils import _add_frame_numbers_to_results
|
|
39
38
|
from megadetector.detection.video_utils import video_folder_to_frames
|
|
40
39
|
from megadetector.detection.video_utils import default_fourcc
|
|
41
40
|
|
|
@@ -50,6 +49,11 @@ class ProcessVideoOptions:
|
|
|
50
49
|
def __init__(self):
|
|
51
50
|
|
|
52
51
|
#: Can be a model filename (.pt or .pb) or a model name (e.g. "MDV5A")
|
|
52
|
+
#:
|
|
53
|
+
#: Use the string "no_detection" to indicate that you only want to extract frames,
|
|
54
|
+
#: not run a model. If you do this, you almost definitely want to set
|
|
55
|
+
#: keep_extracted_frames to "True", otherwise everything in this module is a no-op.
|
|
56
|
+
#: I.e., there's no reason to extract frames, do nothing with them, then delete them.
|
|
53
57
|
self.model_file = 'MDV5A'
|
|
54
58
|
|
|
55
59
|
#: Video (of folder of videos) to process
|
|
@@ -66,7 +70,7 @@ class ProcessVideoOptions:
|
|
|
66
70
|
#: if this is None
|
|
67
71
|
self.frame_folder = None
|
|
68
72
|
|
|
69
|
-
|
|
73
|
+
#: Folder to use for rendered frames (if rendering output video); will use a folder
|
|
70
74
|
#: in system temp space if this is None
|
|
71
75
|
self.frame_rendering_folder = None
|
|
72
76
|
|
|
@@ -111,6 +115,10 @@ class ProcessVideoOptions:
|
|
|
111
115
|
#: fourcc code to use for writing videos; only relevant if render_output_video is True
|
|
112
116
|
self.fourcc = None
|
|
113
117
|
|
|
118
|
+
#: force a specific frame rate for output videos; only relevant if render_output_video
|
|
119
|
+
#: is True
|
|
120
|
+
self.rendering_fs = None
|
|
121
|
+
|
|
114
122
|
#: Confidence threshold to use for writing videos with boxes, only relevant if
|
|
115
123
|
#: if render_output_video is True. Defaults to choosing a reasonable threshold
|
|
116
124
|
#: based on the model version.
|
|
@@ -121,9 +129,13 @@ class ProcessVideoOptions:
|
|
|
121
129
|
|
|
122
130
|
#: Sample every Nth frame; set to None (default) or 1 to sample every frame. Typically
|
|
123
131
|
#: we sample down to around 3 fps, so for typical 30 fps videos, frame_sample=10 is a
|
|
124
|
-
#: typical value.
|
|
132
|
+
#: typical value. Mutually exclusive with [frames_to_extract].
|
|
125
133
|
self.frame_sample = None
|
|
126
134
|
|
|
135
|
+
#: Extract a specific set of frames (list of ints, or a single int). Mutually exclusive with
|
|
136
|
+
#: [frame_sample].
|
|
137
|
+
self.frames_to_extract = None
|
|
138
|
+
|
|
127
139
|
#: Number of workers to use for parallelization; set to <= 1 to disable parallelization
|
|
128
140
|
self.n_cores = 1
|
|
129
141
|
|
|
@@ -138,7 +150,14 @@ class ProcessVideoOptions:
|
|
|
138
150
|
self.quality = 90
|
|
139
151
|
|
|
140
152
|
#: Resize frames so they're at most this wide
|
|
141
|
-
self.max_width =
|
|
153
|
+
self.max_width = None
|
|
154
|
+
|
|
155
|
+
#: Run the model at this image size (don't mess with this unless you know what you're
|
|
156
|
+
#: getting into)
|
|
157
|
+
self.image_size = None
|
|
158
|
+
|
|
159
|
+
#: Enable image augmentation
|
|
160
|
+
self.augment = False
|
|
142
161
|
|
|
143
162
|
# ...class ProcessVideoOptions
|
|
144
163
|
|
|
@@ -278,7 +297,8 @@ def _clean_up_extracted_frames(options,frame_output_folder,frame_filenames):
|
|
|
278
297
|
|
|
279
298
|
def process_video(options):
|
|
280
299
|
"""
|
|
281
|
-
Process a single video through MD, optionally writing a new video with boxes
|
|
300
|
+
Process a single video through MD, optionally writing a new video with boxes.
|
|
301
|
+
Can also be used just to split a video into frames, without running a model.
|
|
282
302
|
|
|
283
303
|
Args:
|
|
284
304
|
options (ProcessVideoOptions): all the parameters used to control this process,
|
|
@@ -294,6 +314,10 @@ def process_video(options):
|
|
|
294
314
|
if options.render_output_video and (options.output_video_file is None):
|
|
295
315
|
options.output_video_file = options.input_video_file + '.detections.mp4'
|
|
296
316
|
|
|
317
|
+
if options.model_file == 'no_detection' and not options.keep_extracted_frames:
|
|
318
|
+
print('Warning: you asked for no detection, but did not specify keep_extracted_frames, this is a no-op')
|
|
319
|
+
return
|
|
320
|
+
|
|
297
321
|
# Track whether frame and rendering folders were created by this script
|
|
298
322
|
caller_provided_frame_output_folder = (options.frame_folder is not None)
|
|
299
323
|
caller_provided_rendering_output_folder = (options.frame_rendering_folder is not None)
|
|
@@ -309,15 +333,31 @@ def process_video(options):
|
|
|
309
333
|
|
|
310
334
|
os.makedirs(frame_output_folder, exist_ok=True)
|
|
311
335
|
|
|
336
|
+
|
|
337
|
+
## Extract frames
|
|
338
|
+
|
|
312
339
|
frame_filenames, Fs = video_to_frames(
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
340
|
+
options.input_video_file,
|
|
341
|
+
frame_output_folder,
|
|
342
|
+
every_n_frames=options.frame_sample,
|
|
343
|
+
overwrite=(not options.reuse_frames_if_available),
|
|
344
|
+
quality=options.quality,
|
|
345
|
+
max_width=options.max_width,
|
|
346
|
+
verbose=options.verbose,
|
|
347
|
+
frames_to_extract=options.frames_to_extract)
|
|
316
348
|
|
|
317
349
|
image_file_names = frame_filenames
|
|
318
350
|
if options.debug_max_frames > 0:
|
|
319
351
|
image_file_names = image_file_names[0:options.debug_max_frames]
|
|
320
|
-
|
|
352
|
+
|
|
353
|
+
if options.model_file == 'no_detection':
|
|
354
|
+
assert options.keep_extracted_frames, \
|
|
355
|
+
'Internal error: keep_extracted_frames not set, but no model specified'
|
|
356
|
+
return
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
## Run MegaDetector
|
|
360
|
+
|
|
321
361
|
if options.reuse_results_if_available and \
|
|
322
362
|
os.path.isfile(options.output_json_file):
|
|
323
363
|
print('Loading results from {}'.format(options.output_json_file))
|
|
@@ -325,12 +365,17 @@ def process_video(options):
|
|
|
325
365
|
results = json.load(f)
|
|
326
366
|
else:
|
|
327
367
|
results = run_detector_batch.load_and_run_detector_batch(
|
|
328
|
-
options.model_file,
|
|
368
|
+
options.model_file,
|
|
369
|
+
image_file_names,
|
|
329
370
|
confidence_threshold=options.json_confidence_threshold,
|
|
330
371
|
n_cores=options.n_cores,
|
|
331
372
|
class_mapping_filename=options.class_mapping_filename,
|
|
332
|
-
quiet=True
|
|
333
|
-
|
|
373
|
+
quiet=True,
|
|
374
|
+
augment=options.augment,
|
|
375
|
+
image_size=options.image_size)
|
|
376
|
+
|
|
377
|
+
_add_frame_numbers_to_results(results)
|
|
378
|
+
|
|
334
379
|
run_detector_batch.write_results_to_file(
|
|
335
380
|
results, options.output_json_file,
|
|
336
381
|
relative_path_base=frame_output_folder,
|
|
@@ -357,14 +402,20 @@ def process_video(options):
|
|
|
357
402
|
confidence_threshold=options.rendering_confidence_threshold)
|
|
358
403
|
|
|
359
404
|
# Combine into a video
|
|
360
|
-
if options.
|
|
405
|
+
if options.rendering_fs is not None:
|
|
406
|
+
rendering_fs = options.rendering_fs
|
|
407
|
+
elif options.frame_sample is None:
|
|
361
408
|
rendering_fs = Fs
|
|
362
409
|
else:
|
|
410
|
+
# If the original video was 30fps and we sampled every 10th frame,
|
|
411
|
+
# render at 3fps
|
|
363
412
|
rendering_fs = Fs / options.frame_sample
|
|
364
413
|
|
|
365
414
|
print('Rendering {} frames to {} at {} fps (original video {} fps)'.format(
|
|
366
415
|
len(detected_frame_files), options.output_video_file,rendering_fs,Fs))
|
|
367
|
-
frames_to_video(detected_frame_files,
|
|
416
|
+
frames_to_video(detected_frame_files,
|
|
417
|
+
rendering_fs,
|
|
418
|
+
options.output_video_file,
|
|
368
419
|
codec_spec=options.fourcc)
|
|
369
420
|
|
|
370
421
|
# Possibly clean up rendered frames
|
|
@@ -381,7 +432,13 @@ def process_video(options):
|
|
|
381
432
|
|
|
382
433
|
def process_video_folder(options):
|
|
383
434
|
"""
|
|
384
|
-
Process a folder of videos through MD
|
|
435
|
+
Process a folder of videos through MD. Can also be used just to split a folder of
|
|
436
|
+
videos into frames, without running a model.
|
|
437
|
+
|
|
438
|
+
When this function is used to run MD, two .json files will get written, one with
|
|
439
|
+
an entry for each *frame* (identical to what's created by process_video()), and
|
|
440
|
+
one with an entry for each *video* (which is more suitable for, e.g., reading into
|
|
441
|
+
Timelapse).
|
|
385
442
|
|
|
386
443
|
Args:
|
|
387
444
|
options (ProcessVideoOptions): all the parameters used to control this process,
|
|
@@ -393,13 +450,17 @@ def process_video_folder(options):
|
|
|
393
450
|
assert os.path.isdir(options.input_video_file), \
|
|
394
451
|
'{} is not a folder'.format(options.input_video_file)
|
|
395
452
|
|
|
396
|
-
|
|
397
|
-
'
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
453
|
+
if options.model_file == 'no_detection' and not options.keep_extracted_frames:
|
|
454
|
+
print('Warning: you asked for no detection, but did not specify keep_extracted_frames, this is a no-op')
|
|
455
|
+
return
|
|
456
|
+
|
|
457
|
+
if options.model_file != 'no_detection':
|
|
458
|
+
assert options.output_json_file is not None, \
|
|
459
|
+
'When processing a folder, you must specify an output .json file'
|
|
460
|
+
assert options.output_json_file.endswith('.json')
|
|
461
|
+
video_json = options.output_json_file
|
|
462
|
+
frames_json = options.output_json_file.replace('.json','.frames.json')
|
|
463
|
+
os.makedirs(os.path.dirname(video_json),exist_ok=True)
|
|
403
464
|
|
|
404
465
|
# Track whether frame and rendering folders were created by this script
|
|
405
466
|
caller_provided_frame_output_folder = (options.frame_folder is not None)
|
|
@@ -420,6 +481,7 @@ def process_video_folder(options):
|
|
|
420
481
|
os.makedirs(frame_output_folder, exist_ok=True)
|
|
421
482
|
|
|
422
483
|
print('Extracting frames')
|
|
484
|
+
|
|
423
485
|
frame_filenames, Fs, video_filenames = \
|
|
424
486
|
video_folder_to_frames(input_folder=options.input_video_file,
|
|
425
487
|
output_folder_base=frame_output_folder,
|
|
@@ -429,8 +491,10 @@ def process_video_folder(options):
|
|
|
429
491
|
every_n_frames=options.frame_sample,
|
|
430
492
|
verbose=options.verbose,
|
|
431
493
|
quality=options.quality,
|
|
432
|
-
max_width=options.max_width
|
|
494
|
+
max_width=options.max_width,
|
|
495
|
+
frames_to_extract=options.frames_to_extract)
|
|
433
496
|
|
|
497
|
+
print('Extracted frames for {} videos'.format(len(set(video_filenames))))
|
|
434
498
|
image_file_names = list(itertools.chain.from_iterable(frame_filenames))
|
|
435
499
|
|
|
436
500
|
if len(image_file_names) == 0:
|
|
@@ -443,6 +507,11 @@ def process_video_folder(options):
|
|
|
443
507
|
|
|
444
508
|
if options.debug_max_frames is not None and options.debug_max_frames > 0:
|
|
445
509
|
image_file_names = image_file_names[0:options.debug_max_frames]
|
|
510
|
+
|
|
511
|
+
if options.model_file == 'no_detection':
|
|
512
|
+
assert options.keep_extracted_frames, \
|
|
513
|
+
'Internal error: keep_extracted_frames not set, but no model specified'
|
|
514
|
+
return
|
|
446
515
|
|
|
447
516
|
|
|
448
517
|
## Run MegaDetector on the extracted frames
|
|
@@ -454,12 +523,17 @@ def process_video_folder(options):
|
|
|
454
523
|
else:
|
|
455
524
|
print('Running MegaDetector')
|
|
456
525
|
results = run_detector_batch.load_and_run_detector_batch(
|
|
457
|
-
options.model_file,
|
|
526
|
+
options.model_file,
|
|
527
|
+
image_file_names,
|
|
458
528
|
confidence_threshold=options.json_confidence_threshold,
|
|
459
529
|
n_cores=options.n_cores,
|
|
460
530
|
class_mapping_filename=options.class_mapping_filename,
|
|
461
|
-
quiet=True
|
|
531
|
+
quiet=True,
|
|
532
|
+
augment=options.augment,
|
|
533
|
+
image_size=options.image_size)
|
|
462
534
|
|
|
535
|
+
_add_frame_numbers_to_results(results)
|
|
536
|
+
|
|
463
537
|
run_detector_batch.write_results_to_file(
|
|
464
538
|
results, frames_json,
|
|
465
539
|
relative_path_base=frame_output_folder,
|
|
@@ -518,9 +592,13 @@ def process_video_folder(options):
|
|
|
518
592
|
|
|
519
593
|
video_fs = Fs[i_video]
|
|
520
594
|
|
|
521
|
-
if options.
|
|
595
|
+
if options.rendering_fs is not None:
|
|
596
|
+
rendering_fs = options.rendering_fs
|
|
597
|
+
elif options.frame_sample is None:
|
|
522
598
|
rendering_fs = video_fs
|
|
523
599
|
else:
|
|
600
|
+
# If the original video was 30fps and we sampled every 10th frame,
|
|
601
|
+
# render at 3fps
|
|
524
602
|
rendering_fs = video_fs / options.frame_sample
|
|
525
603
|
|
|
526
604
|
input_video_file_relative = os.path.relpath(input_video_file_abs,options.input_video_file)
|
|
@@ -547,7 +625,10 @@ def process_video_folder(options):
|
|
|
547
625
|
# Create the output video
|
|
548
626
|
print('Rendering detections for video {} to {} at {} fps (original video {} fps)'.format(
|
|
549
627
|
input_video_file_relative,video_output_file,rendering_fs,video_fs))
|
|
550
|
-
frames_to_video(video_frame_files,
|
|
628
|
+
frames_to_video(video_frame_files,
|
|
629
|
+
rendering_fs,
|
|
630
|
+
video_output_file,
|
|
631
|
+
codec_spec=options.fourcc)
|
|
551
632
|
|
|
552
633
|
# ...for each video
|
|
553
634
|
|
|
@@ -607,6 +688,14 @@ def options_to_command(options):
|
|
|
607
688
|
cmd += ' --n_cores ' + str(options.n_cores)
|
|
608
689
|
if options.frame_sample is not None:
|
|
609
690
|
cmd += ' --frame_sample ' + str(options.frame_sample)
|
|
691
|
+
if options.frames_to_extract is not None:
|
|
692
|
+
cmd += ' --frames_to_extract '
|
|
693
|
+
if isinstance(options.frames_to_extract,int):
|
|
694
|
+
frames_to_extract = [options.frames_to_extract]
|
|
695
|
+
else:
|
|
696
|
+
frames_to_extract = options.frames_to_extract
|
|
697
|
+
for frame_number in frames_to_extract:
|
|
698
|
+
cmd += ' {}'.format(frame_number)
|
|
610
699
|
if options.debug_max_frames is not None:
|
|
611
700
|
cmd += ' --debug_max_frames ' + str(options.debug_max_frames)
|
|
612
701
|
if options.class_mapping_filename is not None:
|
|
@@ -631,16 +720,20 @@ def options_to_command(options):
|
|
|
631
720
|
|
|
632
721
|
if False:
|
|
633
722
|
|
|
723
|
+
pass
|
|
724
|
+
|
|
634
725
|
#%% Process a folder of videos
|
|
635
726
|
|
|
636
727
|
model_file = 'MDV5A'
|
|
637
|
-
input_dir = r'g:\temp\test-videos'
|
|
728
|
+
# input_dir = r'g:\temp\test-videos'
|
|
729
|
+
input_dir = r'G:\temp\md-test-package\md-test-images\video-samples'
|
|
638
730
|
output_base = r'g:\temp\video_test'
|
|
639
731
|
frame_folder = os.path.join(output_base,'frames')
|
|
640
732
|
rendering_folder = os.path.join(output_base,'rendered-frames')
|
|
641
733
|
output_json_file = os.path.join(output_base,'video-test.json')
|
|
642
734
|
output_video_folder = os.path.join(output_base,'output_videos')
|
|
643
735
|
|
|
736
|
+
|
|
644
737
|
print('Processing folder {}'.format(input_dir))
|
|
645
738
|
|
|
646
739
|
options = ProcessVideoOptions()
|
|
@@ -654,32 +747,28 @@ if False:
|
|
|
654
747
|
options.quality = 90
|
|
655
748
|
options.frame_sample = 10
|
|
656
749
|
options.max_width = 1280
|
|
657
|
-
options.n_cores =
|
|
750
|
+
options.n_cores = 4
|
|
658
751
|
options.verbose = True
|
|
659
|
-
options.render_output_video = True
|
|
660
|
-
|
|
661
|
-
options.
|
|
662
|
-
options.
|
|
663
|
-
|
|
664
|
-
options.
|
|
665
|
-
options.
|
|
666
|
-
options.force_extracted_frame_folder_deletion = True
|
|
667
|
-
options.force_rendered_frame_folder_deletion = True
|
|
668
|
-
|
|
669
|
-
# options.confidence_threshold = 0.15
|
|
752
|
+
options.render_output_video = True
|
|
753
|
+
options.frame_folder = frame_folder
|
|
754
|
+
options.frame_rendering_folder = rendering_folder
|
|
755
|
+
options.keep_extracted_frames = True
|
|
756
|
+
options.keep_rendered_frames = True
|
|
757
|
+
options.force_extracted_frame_folder_deletion = False
|
|
758
|
+
options.force_rendered_frame_folder_deletion = False
|
|
670
759
|
options.fourcc = 'mp4v'
|
|
760
|
+
# options.rendering_confidence_threshold = 0.15
|
|
671
761
|
|
|
672
762
|
cmd = options_to_command(options); print(cmd)
|
|
673
763
|
|
|
674
|
-
import clipboard; clipboard.copy(cmd)
|
|
675
|
-
|
|
676
|
-
if False:
|
|
677
|
-
process_video_folder(options)
|
|
764
|
+
# import clipboard; clipboard.copy(cmd)
|
|
765
|
+
# process_video_folder(options)
|
|
678
766
|
|
|
679
767
|
|
|
680
768
|
#%% Process a single video
|
|
681
769
|
|
|
682
770
|
fn = r'g:\temp\test-videos\person_and_dog\DSCF0056.AVI'
|
|
771
|
+
assert os.path.isfile(fn)
|
|
683
772
|
model_file = 'MDV5A'
|
|
684
773
|
input_video_file = fn
|
|
685
774
|
|
|
@@ -687,39 +776,89 @@ if False:
|
|
|
687
776
|
frame_folder = os.path.join(output_base,'frames')
|
|
688
777
|
rendering_folder = os.path.join(output_base,'rendered-frames')
|
|
689
778
|
output_json_file = os.path.join(output_base,'video-test.json')
|
|
690
|
-
output_video_file = os.path.join(output_base,'
|
|
779
|
+
output_video_file = os.path.join(output_base,'output_video.mp4')
|
|
691
780
|
|
|
692
781
|
options = ProcessVideoOptions()
|
|
693
782
|
options.model_file = model_file
|
|
694
783
|
options.input_video_file = input_video_file
|
|
695
784
|
options.render_output_video = True
|
|
696
785
|
options.output_video_file = output_video_file
|
|
697
|
-
|
|
698
|
-
options.verbose = True
|
|
699
|
-
|
|
786
|
+
options.output_json_file = output_json_file
|
|
787
|
+
options.verbose = True
|
|
700
788
|
options.quality = 75
|
|
701
|
-
options.frame_sample =
|
|
702
|
-
options.max_width =
|
|
703
|
-
|
|
704
|
-
options.
|
|
705
|
-
options.frame_rendering_folder = None # rendering_folder
|
|
706
|
-
|
|
789
|
+
options.frame_sample = 10
|
|
790
|
+
options.max_width = 1600
|
|
791
|
+
options.frame_folder = frame_folder
|
|
792
|
+
options.frame_rendering_folder = rendering_folder
|
|
707
793
|
options.keep_extracted_frames = False
|
|
708
794
|
options.keep_rendered_frames = False
|
|
709
795
|
options.force_extracted_frame_folder_deletion = True
|
|
710
|
-
options.force_rendered_frame_folder_deletion = True
|
|
711
|
-
|
|
712
|
-
# options.confidence_threshold = 0.15
|
|
796
|
+
options.force_rendered_frame_folder_deletion = True
|
|
713
797
|
options.fourcc = 'mp4v'
|
|
798
|
+
# options.rendering_confidence_threshold = 0.15
|
|
714
799
|
|
|
715
800
|
cmd = options_to_command(options); print(cmd)
|
|
716
801
|
|
|
717
|
-
import clipboard; clipboard.copy(cmd)
|
|
718
|
-
|
|
719
|
-
if False:
|
|
720
|
-
process_video(options)
|
|
802
|
+
# import clipboard; clipboard.copy(cmd)
|
|
803
|
+
process_video(options)
|
|
721
804
|
|
|
722
805
|
|
|
806
|
+
#%% Extract specific frames from a single video, no detection
|
|
807
|
+
|
|
808
|
+
fn = r'g:\temp\test-videos\person_and_dog\DSCF0064.AVI'
|
|
809
|
+
assert os.path.isfile(fn)
|
|
810
|
+
model_file = 'no_detection'
|
|
811
|
+
input_video_file = fn
|
|
812
|
+
|
|
813
|
+
output_base = r'g:\temp\video_test'
|
|
814
|
+
frame_folder = os.path.join(output_base,'frames')
|
|
815
|
+
output_video_file = os.path.join(output_base,'output_videos.mp4')
|
|
816
|
+
|
|
817
|
+
options = ProcessVideoOptions()
|
|
818
|
+
options.model_file = model_file
|
|
819
|
+
options.input_video_file = input_video_file
|
|
820
|
+
options.verbose = True
|
|
821
|
+
options.quality = 90
|
|
822
|
+
options.frame_sample = None
|
|
823
|
+
options.frames_to_extract = [0,100]
|
|
824
|
+
options.max_width = None
|
|
825
|
+
options.frame_folder = frame_folder
|
|
826
|
+
options.keep_extracted_frames = True
|
|
827
|
+
|
|
828
|
+
cmd = options_to_command(options); print(cmd)
|
|
829
|
+
|
|
830
|
+
# import clipboard; clipboard.copy(cmd)
|
|
831
|
+
process_video(options)
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
#%% Extract specific frames from a folder, no detection
|
|
835
|
+
|
|
836
|
+
fn = r'g:\temp\test-videos\person_and_dog'
|
|
837
|
+
assert os.path.isdir(fn)
|
|
838
|
+
model_file = 'no_detection'
|
|
839
|
+
input_video_file = fn
|
|
840
|
+
|
|
841
|
+
output_base = r'g:\temp\video_test'
|
|
842
|
+
frame_folder = os.path.join(output_base,'frames')
|
|
843
|
+
output_video_file = os.path.join(output_base,'output_videos.mp4')
|
|
844
|
+
|
|
845
|
+
options = ProcessVideoOptions()
|
|
846
|
+
options.model_file = model_file
|
|
847
|
+
options.input_video_file = input_video_file
|
|
848
|
+
options.verbose = True
|
|
849
|
+
options.quality = 90
|
|
850
|
+
options.frame_sample = None
|
|
851
|
+
options.frames_to_extract = [0,100]
|
|
852
|
+
options.max_width = None
|
|
853
|
+
options.frame_folder = frame_folder
|
|
854
|
+
options.keep_extracted_frames = True
|
|
855
|
+
|
|
856
|
+
cmd = options_to_command(options); print(cmd)
|
|
857
|
+
|
|
858
|
+
# import clipboard; clipboard.copy(cmd)
|
|
859
|
+
process_video(options)
|
|
860
|
+
|
|
861
|
+
|
|
723
862
|
#%% Command-line driver
|
|
724
863
|
|
|
725
864
|
def main():
|
|
@@ -731,7 +870,8 @@ def main():
|
|
|
731
870
|
'producing a new video with detections annotated'))
|
|
732
871
|
|
|
733
872
|
parser.add_argument('model_file', type=str,
|
|
734
|
-
help='MegaDetector model file (.pt or .pb) or model name (e.g. "MDV5A")'
|
|
873
|
+
help='MegaDetector model file (.pt or .pb) or model name (e.g. "MDV5A"), '\
|
|
874
|
+
'or the string "no_detection" to run just frame extraction')
|
|
735
875
|
|
|
736
876
|
parser.add_argument('input_video_file', type=str,
|
|
737
877
|
help='video file (or folder) to process')
|
|
@@ -788,22 +928,33 @@ def main():
|
|
|
788
928
|
'whether other files were present in the folder.')
|
|
789
929
|
|
|
790
930
|
parser.add_argument('--rendering_confidence_threshold', type=float,
|
|
791
|
-
default=None,
|
|
931
|
+
default=None,
|
|
932
|
+
help="don't render boxes with confidence below this threshold (defaults to choosing based on the MD version)")
|
|
933
|
+
|
|
934
|
+
parser.add_argument('--rendering_fs', type=float,
|
|
935
|
+
default=None,
|
|
936
|
+
help='force a specific frame rate for output videos (only relevant when using '\
|
|
937
|
+
'--render_output_video) (defaults to the original frame rate)')
|
|
792
938
|
|
|
793
939
|
parser.add_argument('--json_confidence_threshold', type=float,
|
|
794
|
-
default=
|
|
940
|
+
default=default_options.json_confidence_threshold,
|
|
941
|
+
help="don't include boxes in the .json file with confidence "\
|
|
795
942
|
'below this threshold (default {})'.format(
|
|
796
943
|
default_options.json_confidence_threshold))
|
|
797
944
|
|
|
798
945
|
parser.add_argument('--n_cores', type=int,
|
|
799
|
-
default=
|
|
946
|
+
default=default_options.n_cores,
|
|
947
|
+
help='Number of cores to use for frame separation and detection. '\
|
|
800
948
|
'If using a GPU, this option will be respected for frame separation but '\
|
|
801
949
|
'ignored for detection. Only relevant to frame separation when processing '\
|
|
802
|
-
'a folder.')
|
|
950
|
+
'a folder. Default {}.'.format(default_options.n_cores))
|
|
803
951
|
|
|
804
952
|
parser.add_argument('--frame_sample', type=int,
|
|
805
953
|
default=None, help='process every Nth frame (defaults to every frame)')
|
|
806
954
|
|
|
955
|
+
parser.add_argument('--frames_to_extract', nargs='+', type=int,
|
|
956
|
+
default=None, help='extract specific frames (one or more ints)')
|
|
957
|
+
|
|
807
958
|
parser.add_argument('--quality', type=int,
|
|
808
959
|
default=default_options.quality,
|
|
809
960
|
help='JPEG quality for extracted frames (defaults to {})'.format(
|
|
@@ -828,7 +979,16 @@ def main():
|
|
|
828
979
|
parser.add_argument('--verbose', action='store_true',
|
|
829
980
|
help='Enable additional debug output')
|
|
830
981
|
|
|
831
|
-
|
|
982
|
+
parser.add_argument('--image_size',
|
|
983
|
+
type=int,
|
|
984
|
+
default=None,
|
|
985
|
+
help=('Force image resizing to a specific integer size on the long '\
|
|
986
|
+
'axis (not recommended to change this)'))
|
|
987
|
+
|
|
988
|
+
parser.add_argument('--augment',
|
|
989
|
+
action='store_true',
|
|
990
|
+
help='Enable image augmentation')
|
|
991
|
+
|
|
832
992
|
if len(sys.argv[1:]) == 0:
|
|
833
993
|
parser.print_help()
|
|
834
994
|
parser.exit()
|
|
@@ -180,9 +180,12 @@ class PTDetector:
|
|
|
180
180
|
|
|
181
181
|
return model
|
|
182
182
|
|
|
183
|
-
def generate_detections_one_image(self, img_original,
|
|
184
|
-
|
|
185
|
-
|
|
183
|
+
def generate_detections_one_image(self, img_original,
|
|
184
|
+
image_id='unknown',
|
|
185
|
+
detection_threshold=0.00001,
|
|
186
|
+
image_size=None,
|
|
187
|
+
skip_image_resizing=False,
|
|
188
|
+
augment=False):
|
|
186
189
|
"""
|
|
187
190
|
Applies the detector to an image.
|
|
188
191
|
|
|
@@ -192,11 +195,11 @@ class PTDetector:
|
|
|
192
195
|
of the output object
|
|
193
196
|
detection_threshold (float, optional): only detections above this confidence threshold
|
|
194
197
|
will be included in the return value
|
|
195
|
-
image_size (tuple, optional): image size to use for inference, only mess with this
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
198
|
+
image_size (tuple, optional): image size to use for inference, only mess with this if
|
|
199
|
+
(a) you're using a model other than MegaDetector or (b) you know what you're getting into
|
|
200
|
+
skip_image_resizing (bool, optional): whether to skip internal image resizing (and rely on
|
|
201
|
+
external resizing)
|
|
202
|
+
augment (bool, optional): enable (implementation-specific) image augmentation
|
|
200
203
|
|
|
201
204
|
Returns:
|
|
202
205
|
dict: a dictionary with the following fields:
|
|
@@ -242,8 +245,10 @@ class PTDetector:
|
|
|
242
245
|
if skip_image_resizing:
|
|
243
246
|
img = img_original
|
|
244
247
|
else:
|
|
245
|
-
letterbox_result = letterbox(img_original,
|
|
246
|
-
|
|
248
|
+
letterbox_result = letterbox(img_original,
|
|
249
|
+
new_shape=target_size,
|
|
250
|
+
stride=PTDetector.STRIDE,
|
|
251
|
+
auto=True)
|
|
247
252
|
img = letterbox_result[0]
|
|
248
253
|
|
|
249
254
|
# HWC to CHW; PIL Image is RGB already
|
|
@@ -258,7 +263,7 @@ class PTDetector:
|
|
|
258
263
|
if len(img.shape) == 3:
|
|
259
264
|
img = torch.unsqueeze(img, 0)
|
|
260
265
|
|
|
261
|
-
pred
|
|
266
|
+
pred = self.model(img,augment=augment)[0]
|
|
262
267
|
|
|
263
268
|
# NMS
|
|
264
269
|
if self.device == 'mps':
|