megadetector 10.0.2__py3-none-any.whl → 10.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/animl_to_md.py +158 -0
- megadetector/data_management/zamba_to_md.py +188 -0
- megadetector/detection/process_video.py +165 -946
- megadetector/detection/pytorch_detector.py +575 -276
- megadetector/detection/run_detector_batch.py +629 -202
- megadetector/detection/run_md_and_speciesnet.py +1319 -0
- megadetector/detection/video_utils.py +243 -107
- megadetector/postprocessing/classification_postprocessing.py +12 -1
- megadetector/postprocessing/combine_batch_outputs.py +2 -0
- megadetector/postprocessing/compare_batch_results.py +21 -2
- megadetector/postprocessing/merge_detections.py +16 -12
- megadetector/postprocessing/separate_detections_into_folders.py +1 -1
- megadetector/postprocessing/subset_json_detector_output.py +1 -3
- megadetector/postprocessing/validate_batch_results.py +25 -2
- megadetector/tests/__init__.py +0 -0
- megadetector/tests/test_nms_synthetic.py +335 -0
- megadetector/utils/ct_utils.py +69 -5
- megadetector/utils/extract_frames_from_video.py +303 -0
- megadetector/utils/md_tests.py +583 -524
- megadetector/utils/path_utils.py +4 -15
- megadetector/utils/wi_utils.py +20 -4
- megadetector/visualization/visualization_utils.py +1 -1
- megadetector/visualization/visualize_db.py +8 -22
- megadetector/visualization/visualize_detector_output.py +7 -5
- megadetector/visualization/visualize_video_output.py +607 -0
- {megadetector-10.0.2.dist-info → megadetector-10.0.4.dist-info}/METADATA +134 -135
- {megadetector-10.0.2.dist-info → megadetector-10.0.4.dist-info}/RECORD +30 -23
- {megadetector-10.0.2.dist-info → megadetector-10.0.4.dist-info}/licenses/LICENSE +0 -0
- {megadetector-10.0.2.dist-info → megadetector-10.0.4.dist-info}/top_level.txt +0 -0
- {megadetector-10.0.2.dist-info → megadetector-10.0.4.dist-info}/WHEEL +0 -0
|
@@ -27,10 +27,11 @@ from megadetector.visualization import visualization_utils as vis_utils
|
|
|
27
27
|
|
|
28
28
|
default_fourcc = 'h264'
|
|
29
29
|
|
|
30
|
+
video_progress_bar_description = 'Processing video'
|
|
30
31
|
|
|
31
32
|
#%% Path utilities
|
|
32
33
|
|
|
33
|
-
VIDEO_EXTENSIONS = ('.mp4','.avi','.mpeg','.mpg','.mov','.mkv')
|
|
34
|
+
VIDEO_EXTENSIONS = ('.mp4','.avi','.mpeg','.mpg','.mov','.mkv','.flv')
|
|
34
35
|
|
|
35
36
|
def is_video_file(s,video_extensions=VIDEO_EXTENSIONS):
|
|
36
37
|
"""
|
|
@@ -200,11 +201,17 @@ def _add_frame_numbers_to_results(results):
|
|
|
200
201
|
"""
|
|
201
202
|
Given the 'images' list from a set of MD results that was generated on video frames,
|
|
202
203
|
add a 'frame_number' field to each image, and return the list, sorted by frame number.
|
|
204
|
+
Also modifies "results" in place.
|
|
203
205
|
|
|
204
206
|
Args:
|
|
205
207
|
results (list): list of image dicts
|
|
206
208
|
"""
|
|
207
209
|
|
|
210
|
+
# This indicate that this was a failure for a single video
|
|
211
|
+
if isinstance(results,dict):
|
|
212
|
+
assert 'failure' in results
|
|
213
|
+
return results
|
|
214
|
+
|
|
208
215
|
# Add video-specific fields to the results
|
|
209
216
|
for im in results:
|
|
210
217
|
fn = im['file']
|
|
@@ -228,8 +235,11 @@ def run_callback_on_frames(input_video_file,
|
|
|
228
235
|
Args:
|
|
229
236
|
input_video_file (str): video file to process
|
|
230
237
|
frame_callback (function): callback to run on frames, should take an np.array and a string and
|
|
231
|
-
return a single value. callback should expect
|
|
232
|
-
|
|
238
|
+
return a single value. callback should expect two arguments: (1) a numpy array with image
|
|
239
|
+
data, in the typical PIL image orientation/channel order, and (2) a string identifier
|
|
240
|
+
for the frame, typically something like "frame0006.jpg" (even though it's not a JPEG
|
|
241
|
+
image, this is just an identifier for the frame).
|
|
242
|
+
every_n_frames (int or float, optional): sample every Nth frame starting from the first frame;
|
|
233
243
|
if this is None or 1, every frame is processed. If this is a negative value, it's
|
|
234
244
|
interpreted as a sampling rate in seconds, which is rounded to the nearest frame sampling
|
|
235
245
|
rate. Mutually exclusive with frames_to_process.
|
|
@@ -243,8 +253,10 @@ def run_callback_on_frames(input_video_file,
|
|
|
243
253
|
|
|
244
254
|
Returns:
|
|
245
255
|
dict: dict with keys 'frame_filenames' (list), 'frame_rate' (float), 'results' (list).
|
|
246
|
-
'frame_filenames' are synthetic filenames (e.g. frame000000.jpg)
|
|
247
|
-
|
|
256
|
+
'frame_filenames' are synthetic filenames (e.g. frame000000.jpg). Elements in
|
|
257
|
+
'results' are whatever is returned by the callback, typically dicts in the same format used in
|
|
258
|
+
the 'images' array in the MD results format. [frame_filenames] and [results] both have
|
|
259
|
+
one element per processed frame.
|
|
248
260
|
"""
|
|
249
261
|
|
|
250
262
|
assert os.path.isfile(input_video_file), 'File {} not found'.format(input_video_file)
|
|
@@ -255,64 +267,88 @@ def run_callback_on_frames(input_video_file,
|
|
|
255
267
|
if (frames_to_process is not None) and (every_n_frames is not None):
|
|
256
268
|
raise ValueError('frames_to_process and every_n_frames are mutually exclusive')
|
|
257
269
|
|
|
258
|
-
vidcap =
|
|
259
|
-
n_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
260
|
-
frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
|
|
270
|
+
vidcap = None
|
|
261
271
|
|
|
262
|
-
|
|
263
|
-
print('Video {} contains {} frames at {} Hz'.format(input_video_file,n_frames,frame_rate))
|
|
272
|
+
try:
|
|
264
273
|
|
|
265
|
-
|
|
266
|
-
|
|
274
|
+
vidcap = cv2.VideoCapture(input_video_file)
|
|
275
|
+
n_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
276
|
+
frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
|
|
267
277
|
|
|
268
|
-
if (every_n_frames is not None) and (every_n_frames < 0):
|
|
269
|
-
every_n_seconds = abs(every_n_frames)
|
|
270
|
-
every_n_frames = int(every_n_seconds * frame_rate)
|
|
271
278
|
if verbose:
|
|
272
|
-
print('
|
|
273
|
-
every_n_seconds,every_n_frames))
|
|
279
|
+
print('Video {} contains {} frames at {} Hz'.format(input_video_file,n_frames,frame_rate))
|
|
274
280
|
|
|
275
|
-
|
|
276
|
-
|
|
281
|
+
frame_filenames = []
|
|
282
|
+
results = []
|
|
277
283
|
|
|
278
|
-
|
|
284
|
+
if (every_n_frames is not None):
|
|
279
285
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
286
|
+
if (every_n_frames < 0):
|
|
287
|
+
every_n_seconds = abs(every_n_frames)
|
|
288
|
+
every_n_frames = int(every_n_seconds * frame_rate)
|
|
289
|
+
if verbose:
|
|
290
|
+
print('Interpreting a time sampling rate of {} hz as a frame interval of {}'.format(
|
|
291
|
+
every_n_seconds,every_n_frames))
|
|
292
|
+
# 0 and 1 both mean "process every frame"
|
|
293
|
+
elif every_n_frames == 0:
|
|
294
|
+
every_n_frames = 1
|
|
295
|
+
elif every_n_frames > 0:
|
|
296
|
+
every_n_frames = int(every_n_frames)
|
|
285
297
|
|
|
286
|
-
if every_n_frames
|
|
287
|
-
|
|
288
|
-
|
|
298
|
+
# ...if every_n_frames was supplied
|
|
299
|
+
|
|
300
|
+
# frame_number = 0
|
|
301
|
+
for frame_number in range(0,n_frames):
|
|
302
|
+
|
|
303
|
+
success,image = vidcap.read()
|
|
289
304
|
|
|
290
|
-
|
|
291
|
-
|
|
305
|
+
if not success:
|
|
306
|
+
assert image is None
|
|
307
|
+
if verbose:
|
|
308
|
+
print('Read terminating at frame {} of {}'.format(frame_number,n_frames))
|
|
292
309
|
break
|
|
293
|
-
if frame_number not in frames_to_process:
|
|
294
|
-
continue
|
|
295
310
|
|
|
296
|
-
|
|
297
|
-
|
|
311
|
+
if every_n_frames is not None:
|
|
312
|
+
if (frame_number % every_n_frames) != 0:
|
|
313
|
+
continue
|
|
298
314
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
315
|
+
if frames_to_process is not None:
|
|
316
|
+
if frame_number > max(frames_to_process):
|
|
317
|
+
break
|
|
318
|
+
if frame_number not in frames_to_process:
|
|
319
|
+
continue
|
|
302
320
|
|
|
303
|
-
|
|
321
|
+
frame_filename_relative = _frame_number_to_filename(frame_number)
|
|
322
|
+
frame_filenames.append(frame_filename_relative)
|
|
304
323
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
print('Warning: found no frames in file {}'.format(input_video_file))
|
|
308
|
-
else:
|
|
309
|
-
raise Exception('Error: found no frames in file {}'.format(input_video_file))
|
|
324
|
+
# Convert from OpenCV conventions to PIL conventions
|
|
325
|
+
image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
310
326
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
327
|
+
# Run the callback
|
|
328
|
+
frame_results = frame_callback(image_np,frame_filename_relative)
|
|
329
|
+
|
|
330
|
+
results.append(frame_results)
|
|
331
|
+
|
|
332
|
+
# ...for each frame
|
|
333
|
+
|
|
334
|
+
if len(frame_filenames) == 0:
|
|
335
|
+
if allow_empty_videos:
|
|
336
|
+
print('Warning: found no frames in file {}'.format(input_video_file))
|
|
337
|
+
else:
|
|
338
|
+
raise Exception('Error: found no frames in file {}'.format(input_video_file))
|
|
339
|
+
|
|
340
|
+
if verbose:
|
|
341
|
+
print('\nProcessed {} of {} frames for {}'.format(
|
|
342
|
+
len(frame_filenames),n_frames,input_video_file))
|
|
343
|
+
|
|
344
|
+
finally:
|
|
345
|
+
|
|
346
|
+
if vidcap is not None:
|
|
347
|
+
try:
|
|
348
|
+
vidcap.release()
|
|
349
|
+
except Exception:
|
|
350
|
+
pass
|
|
314
351
|
|
|
315
|
-
vidcap.release()
|
|
316
352
|
to_return = {}
|
|
317
353
|
to_return['frame_filenames'] = frame_filenames
|
|
318
354
|
to_return['frame_rate'] = frame_rate
|
|
@@ -328,7 +364,8 @@ def run_callback_on_frames_for_folder(input_video_folder,
|
|
|
328
364
|
every_n_frames=None,
|
|
329
365
|
verbose=False,
|
|
330
366
|
allow_empty_videos=False,
|
|
331
|
-
recursive=True
|
|
367
|
+
recursive=True,
|
|
368
|
+
files_to_process_relative=None):
|
|
332
369
|
"""
|
|
333
370
|
Calls the function frame_callback(np.array,image_id) on all (or selected) frames in
|
|
334
371
|
all videos in [input_video_folder].
|
|
@@ -336,8 +373,11 @@ def run_callback_on_frames_for_folder(input_video_folder,
|
|
|
336
373
|
Args:
|
|
337
374
|
input_video_folder (str): video folder to process
|
|
338
375
|
frame_callback (function): callback to run on frames, should take an np.array and a string and
|
|
339
|
-
return a single value. callback should expect
|
|
340
|
-
|
|
376
|
+
return a single value. callback should expect two arguments: (1) a numpy array with image
|
|
377
|
+
data, in the typical PIL image orientation/channel order, and (2) a string identifier
|
|
378
|
+
for the frame, typically something like "frame0006.jpg" (even though it's not a JPEG
|
|
379
|
+
image, this is just an identifier for the frame).
|
|
380
|
+
every_n_frames (int or float, optional): sample every Nth frame starting from the first frame;
|
|
341
381
|
if this is None or 1, every frame is processed. If this is a negative value, it's
|
|
342
382
|
interpreted as a sampling rate in seconds, which is rounded to the nearest frame
|
|
343
383
|
sampling rate.
|
|
@@ -345,43 +385,76 @@ def run_callback_on_frames_for_folder(input_video_folder,
|
|
|
345
385
|
allow_empty_videos (bool, optional): Just print a warning if a video appears to have no
|
|
346
386
|
frames (by default, this is an error).
|
|
347
387
|
recursive (bool, optional): recurse into [input_video_folder]
|
|
388
|
+
files_to_process_relative (list, optional): only process specific relative paths
|
|
348
389
|
|
|
349
390
|
Returns:
|
|
350
391
|
dict: dict with keys 'video_filenames' (list of str), 'frame_rates' (list of floats),
|
|
351
392
|
'results' (list of list of dicts). 'video_filenames' will contain *relative* filenames.
|
|
393
|
+
'results' is a list (one element per video) of lists (one element per frame) of whatever the
|
|
394
|
+
callback returns, typically (but not necessarily) dicts in the MD results format.
|
|
395
|
+
|
|
396
|
+
For failed videos, the frame rate will be represented by -1, and "results"
|
|
397
|
+
will be a dict with at least the key "failure".
|
|
352
398
|
"""
|
|
353
399
|
|
|
354
400
|
to_return = {'video_filenames':[],'frame_rates':[],'results':[]}
|
|
355
401
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
402
|
+
if files_to_process_relative is not None:
|
|
403
|
+
input_files_full_paths = \
|
|
404
|
+
[os.path.join(input_video_folder,fn) for fn in files_to_process_relative]
|
|
405
|
+
input_files_full_paths = [fn.replace('\\','/') for fn in input_files_full_paths]
|
|
406
|
+
else:
|
|
407
|
+
# Recursively enumerate video files
|
|
408
|
+
input_files_full_paths = find_videos(input_video_folder,
|
|
409
|
+
recursive=recursive,
|
|
410
|
+
convert_slashes=True,
|
|
411
|
+
return_relative_paths=False)
|
|
412
|
+
|
|
413
|
+
print('Processing {} videos from folder {}'.format(len(input_files_full_paths),input_video_folder))
|
|
362
414
|
|
|
363
415
|
if len(input_files_full_paths) == 0:
|
|
416
|
+
print('No videos to process')
|
|
364
417
|
return to_return
|
|
365
418
|
|
|
366
419
|
# Process each video
|
|
367
420
|
|
|
368
421
|
# video_fn_abs = input_files_full_paths[0]
|
|
369
|
-
for video_fn_abs in tqdm(input_files_full_paths):
|
|
370
|
-
|
|
371
|
-
frame_callback=frame_callback,
|
|
372
|
-
every_n_frames=every_n_frames,
|
|
373
|
-
verbose=verbose,
|
|
374
|
-
frames_to_process=None,
|
|
375
|
-
allow_empty_videos=allow_empty_videos)
|
|
376
|
-
|
|
377
|
-
"""
|
|
378
|
-
dict: dict with keys 'frame_filenames' (list), 'frame_rate' (float), 'results' (list).
|
|
379
|
-
'frame_filenames' are synthetic filenames (e.g. frame000000.jpg); 'results' are
|
|
380
|
-
in the same format used in the 'images' array in the MD results format.
|
|
381
|
-
"""
|
|
422
|
+
for video_fn_abs in tqdm(input_files_full_paths,desc=video_progress_bar_description):
|
|
423
|
+
|
|
382
424
|
video_filename_relative = os.path.relpath(video_fn_abs,input_video_folder)
|
|
383
425
|
video_filename_relative = video_filename_relative.replace('\\','/')
|
|
384
426
|
to_return['video_filenames'].append(video_filename_relative)
|
|
427
|
+
|
|
428
|
+
try:
|
|
429
|
+
|
|
430
|
+
# video_results is a dict with fields:
|
|
431
|
+
#
|
|
432
|
+
# frame_rate
|
|
433
|
+
#
|
|
434
|
+
# results (list of objects returned by the callback, typically dicts in the MD
|
|
435
|
+
# per-image format)
|
|
436
|
+
#
|
|
437
|
+
# frame_filenames (list of frame IDs, i.e. synthetic filenames)
|
|
438
|
+
video_results = run_callback_on_frames(input_video_file=video_fn_abs,
|
|
439
|
+
frame_callback=frame_callback,
|
|
440
|
+
every_n_frames=every_n_frames,
|
|
441
|
+
verbose=verbose,
|
|
442
|
+
frames_to_process=None,
|
|
443
|
+
allow_empty_videos=allow_empty_videos)
|
|
444
|
+
|
|
445
|
+
except Exception as e:
|
|
446
|
+
|
|
447
|
+
print('Warning: error processing video {}: {}'.format(
|
|
448
|
+
video_fn_abs,str(e)
|
|
449
|
+
))
|
|
450
|
+
to_return['frame_rates'].append(-1.0)
|
|
451
|
+
failure_result = {}
|
|
452
|
+
failure_result['failure'] = 'Failure processing video: {}'.format(str(e))
|
|
453
|
+
to_return['results'].append(failure_result)
|
|
454
|
+
continue
|
|
455
|
+
|
|
456
|
+
# ...try/except
|
|
457
|
+
|
|
385
458
|
to_return['frame_rates'].append(video_results['frame_rate'])
|
|
386
459
|
for r in video_results['results']:
|
|
387
460
|
assert r['file'].startswith('frame')
|
|
@@ -665,6 +738,25 @@ def video_to_frames(input_video_file,
|
|
|
665
738
|
# ...def video_to_frames(...)
|
|
666
739
|
|
|
667
740
|
|
|
741
|
+
def _video_to_frames_with_per_video_frames(args):
|
|
742
|
+
"""
|
|
743
|
+
Wrapper function to handle extracting a different list of frames for
|
|
744
|
+
each video in a multiprocessing context.
|
|
745
|
+
|
|
746
|
+
Takes a tuple of (relative_fn, frames_for_this_video, other_args),
|
|
747
|
+
where (other_args) contains the arguments that are the same for each
|
|
748
|
+
iteration.
|
|
749
|
+
"""
|
|
750
|
+
|
|
751
|
+
relative_fn, frames_for_this_video, other_args = args
|
|
752
|
+
(input_folder, output_folder_base, every_n_frames, overwrite, verbose,
|
|
753
|
+
quality, max_width, allow_empty_videos) = other_args
|
|
754
|
+
|
|
755
|
+
return _video_to_frames_for_folder(relative_fn, input_folder, output_folder_base,
|
|
756
|
+
every_n_frames, overwrite, verbose, quality, max_width,
|
|
757
|
+
frames_for_this_video, allow_empty_videos)
|
|
758
|
+
|
|
759
|
+
|
|
668
760
|
def _video_to_frames_for_folder(relative_fn,input_folder,output_folder_base,
|
|
669
761
|
every_n_frames,overwrite,verbose,quality,max_width,
|
|
670
762
|
frames_to_extract,allow_empty_videos):
|
|
@@ -708,7 +800,8 @@ def video_folder_to_frames(input_folder,
|
|
|
708
800
|
quality=None,
|
|
709
801
|
max_width=None,
|
|
710
802
|
frames_to_extract=None,
|
|
711
|
-
allow_empty_videos=False
|
|
803
|
+
allow_empty_videos=False,
|
|
804
|
+
relative_paths_to_process=None):
|
|
712
805
|
"""
|
|
713
806
|
For every video file in input_folder, creates a folder within output_folder_base, and
|
|
714
807
|
renders frame of that video to images in that folder.
|
|
@@ -721,9 +814,9 @@ def video_folder_to_frames(input_folder,
|
|
|
721
814
|
overwrite (bool, optional): whether to overwrite existing frame images
|
|
722
815
|
n_threads (int, optional): number of concurrent workers to use; set to <= 1 to disable
|
|
723
816
|
parallelism
|
|
724
|
-
every_n_frames (int, optional): sample every Nth frame starting from the first
|
|
725
|
-
if this is None or 1, every frame is extracted. If this is a negative value,
|
|
726
|
-
interpreted as a sampling rate in seconds, which is rounded to the nearest frame
|
|
817
|
+
every_n_frames (int or float, optional): sample every Nth frame starting from the first
|
|
818
|
+
frame; if this is None or 1, every frame is extracted. If this is a negative value,
|
|
819
|
+
it's interpreted as a sampling rate in seconds, which is rounded to the nearest frame
|
|
727
820
|
sampling rate. Mutually exclusive with frames_to_extract.
|
|
728
821
|
verbose (bool, optional): enable additional debug console output
|
|
729
822
|
parallelization_uses_threads (bool, optional): whether to use threads (True) or
|
|
@@ -731,14 +824,17 @@ def video_folder_to_frames(input_folder,
|
|
|
731
824
|
quality (int, optional): JPEG quality for frame output, from 0-100. Defaults
|
|
732
825
|
to the opencv default (typically 95).
|
|
733
826
|
max_width (int, optional): resize frames to be no wider than [max_width]
|
|
734
|
-
frames_to_extract (list of int, optional): extract this specific set of frames
|
|
735
|
-
each video; mutually exclusive with every_n_frames. If all values are beyond
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
827
|
+
frames_to_extract (int, list of int, or dict, optional): extract this specific set of frames
|
|
828
|
+
from each video; mutually exclusive with every_n_frames. If all values are beyond the
|
|
829
|
+
length of a video, no frames are extracted. Can also be a single int, specifying a single
|
|
830
|
+
frame number. In the special case where frames_to_extract is [], this function still
|
|
831
|
+
reads video frame rates and verifies that videos are readable, but no frames are
|
|
832
|
+
extracted. Can be a dict mapping relative paths to lists of frame numbers to extract different
|
|
833
|
+
frames from each video.
|
|
834
|
+
allow_empty_videos (bool, optional): just print a warning if a video appears to have no
|
|
741
835
|
frames (by default, this is an error).
|
|
836
|
+
relative_paths_to_process (list, optional): only process the relative paths on this
|
|
837
|
+
list
|
|
742
838
|
|
|
743
839
|
Returns:
|
|
744
840
|
tuple: a length-3 tuple containing:
|
|
@@ -748,16 +844,21 @@ def video_folder_to_frames(input_folder,
|
|
|
748
844
|
- list of video filenames
|
|
749
845
|
"""
|
|
750
846
|
|
|
751
|
-
#
|
|
752
|
-
if
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
847
|
+
# Enumerate video files if necessary
|
|
848
|
+
if relative_paths_to_process is None:
|
|
849
|
+
if verbose:
|
|
850
|
+
print('Enumerating videos in {}'.format(input_folder))
|
|
851
|
+
input_files_full_paths = find_videos(input_folder,recursive=recursive)
|
|
852
|
+
if verbose:
|
|
853
|
+
print('Found {} videos in folder {}'.format(len(input_files_full_paths),input_folder))
|
|
854
|
+
if len(input_files_full_paths) == 0:
|
|
855
|
+
return [],[],[]
|
|
856
|
+
|
|
857
|
+
input_files_relative_paths = [os.path.relpath(s,input_folder) for s in input_files_full_paths]
|
|
858
|
+
else:
|
|
859
|
+
input_files_relative_paths = relative_paths_to_process
|
|
860
|
+
input_files_full_paths = [os.path.join(input_folder,fn) for fn in input_files_relative_paths]
|
|
759
861
|
|
|
760
|
-
input_files_relative_paths = [os.path.relpath(s,input_folder) for s in input_files_full_paths]
|
|
761
862
|
input_files_relative_paths = [s.replace('\\','/') for s in input_files_relative_paths]
|
|
762
863
|
|
|
763
864
|
os.makedirs(output_folder_base,exist_ok=True)
|
|
@@ -766,10 +867,17 @@ def video_folder_to_frames(input_folder,
|
|
|
766
867
|
fs_by_video = []
|
|
767
868
|
|
|
768
869
|
if n_threads == 1:
|
|
870
|
+
|
|
769
871
|
# For each video
|
|
770
872
|
#
|
|
771
873
|
# input_fn_relative = input_files_relative_paths[0]
|
|
772
|
-
for input_fn_relative in tqdm(input_files_relative_paths):
|
|
874
|
+
for input_fn_relative in tqdm(input_files_relative_paths,desc='Video to frames'):
|
|
875
|
+
|
|
876
|
+
# If frames_to_extract is a dict, get the specific frames for this video
|
|
877
|
+
if isinstance(frames_to_extract, dict):
|
|
878
|
+
frames_for_this_video = frames_to_extract.get(input_fn_relative, [])
|
|
879
|
+
else:
|
|
880
|
+
frames_for_this_video = frames_to_extract
|
|
773
881
|
|
|
774
882
|
frame_filenames,fs = \
|
|
775
883
|
_video_to_frames_for_folder(input_fn_relative,
|
|
@@ -780,40 +888,69 @@ def video_folder_to_frames(input_folder,
|
|
|
780
888
|
verbose,
|
|
781
889
|
quality,
|
|
782
890
|
max_width,
|
|
783
|
-
|
|
891
|
+
frames_for_this_video,
|
|
784
892
|
allow_empty_videos)
|
|
785
893
|
frame_filenames_by_video.append(frame_filenames)
|
|
786
894
|
fs_by_video.append(fs)
|
|
895
|
+
|
|
787
896
|
else:
|
|
897
|
+
|
|
788
898
|
pool = None
|
|
789
899
|
results = None
|
|
790
900
|
try:
|
|
901
|
+
|
|
791
902
|
if parallelization_uses_threads:
|
|
792
903
|
print('Starting a worker pool with {} threads'.format(n_threads))
|
|
793
904
|
pool = ThreadPool(n_threads)
|
|
794
905
|
else:
|
|
795
906
|
print('Starting a worker pool with {} processes'.format(n_threads))
|
|
796
907
|
pool = Pool(n_threads)
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
908
|
+
|
|
909
|
+
if isinstance(frames_to_extract, dict):
|
|
910
|
+
|
|
911
|
+
# For the dict case, we need to extract different frames from each video.
|
|
912
|
+
|
|
913
|
+
# These arguments are the same for every iteration
|
|
914
|
+
other_args = (input_folder, output_folder_base, every_n_frames, overwrite,
|
|
915
|
+
verbose, quality, max_width, allow_empty_videos)
|
|
916
|
+
|
|
917
|
+
# The filename and list of frames to extract vary with each iteration
|
|
918
|
+
args_for_pool = [(relative_fn, frames_to_extract.get(relative_fn, []), other_args)
|
|
919
|
+
for relative_fn in input_files_relative_paths]
|
|
920
|
+
|
|
921
|
+
results = list(tqdm(pool.imap(_video_to_frames_with_per_video_frames, args_for_pool),
|
|
922
|
+
total=len(args_for_pool),desc='Video to frames'))
|
|
923
|
+
|
|
924
|
+
else:
|
|
925
|
+
|
|
926
|
+
process_video_with_options = partial(_video_to_frames_for_folder,
|
|
927
|
+
input_folder=input_folder,
|
|
928
|
+
output_folder_base=output_folder_base,
|
|
929
|
+
every_n_frames=every_n_frames,
|
|
930
|
+
overwrite=overwrite,
|
|
931
|
+
verbose=verbose,
|
|
932
|
+
quality=quality,
|
|
933
|
+
max_width=max_width,
|
|
934
|
+
frames_to_extract=frames_to_extract,
|
|
935
|
+
allow_empty_videos=allow_empty_videos)
|
|
936
|
+
results = list(tqdm(pool.imap(process_video_with_options, input_files_relative_paths),
|
|
937
|
+
total=len(input_files_relative_paths),desc='Video to frames'))
|
|
938
|
+
|
|
939
|
+
# ...if we need to pass different frames for each video
|
|
940
|
+
|
|
810
941
|
finally:
|
|
942
|
+
|
|
811
943
|
pool.close()
|
|
812
944
|
pool.join()
|
|
813
|
-
print(
|
|
945
|
+
print('Pool closed and joined for video processing')
|
|
946
|
+
|
|
947
|
+
# ...try/finally
|
|
948
|
+
|
|
814
949
|
frame_filenames_by_video = [x[0] for x in results]
|
|
815
950
|
fs_by_video = [x[1] for x in results]
|
|
816
951
|
|
|
952
|
+
# ...if we're working on a single thread vs. multiple workers
|
|
953
|
+
|
|
817
954
|
return frame_filenames_by_video,fs_by_video,input_files_full_paths
|
|
818
955
|
|
|
819
956
|
# ...def video_folder_to_frames(...)
|
|
@@ -901,8 +1038,7 @@ def frame_results_to_video_results(input_file,
|
|
|
901
1038
|
options.non_video_behavior))
|
|
902
1039
|
|
|
903
1040
|
# Attach video-specific fields to the output, specifically attach the frame
|
|
904
|
-
# number to both the video and each detection.
|
|
905
|
-
# canonical detection will end up in the video-level output file.
|
|
1041
|
+
# number to both the video and each detection.
|
|
906
1042
|
frame_number = _filename_to_frame_number(fn)
|
|
907
1043
|
im['frame_number'] = frame_number
|
|
908
1044
|
for detection in im['detections']:
|
|
@@ -505,7 +505,6 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
505
505
|
|
|
506
506
|
# If we're doing taxonomic processing, at this stage, don't turn children
|
|
507
507
|
# into parents; we'll likely turn parents into children in the next stage.
|
|
508
|
-
|
|
509
508
|
if process_taxonomic_rules:
|
|
510
509
|
|
|
511
510
|
most_common_category_description = \
|
|
@@ -612,10 +611,22 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
612
611
|
if len(category_description_candidate_child) == 0:
|
|
613
612
|
continue
|
|
614
613
|
|
|
614
|
+
# This handles a case that doesn't come up with "pure" SpeciesNet results;
|
|
615
|
+
# if two categories have different IDs but the same "clean" description, this
|
|
616
|
+
# means they're different common names for the same species, which we use
|
|
617
|
+
# for things like "white-tailed deer buck" and "white-tailed deer fawn".
|
|
618
|
+
#
|
|
619
|
+
# Currently we don't support smoothing those predictions, because it's not a
|
|
620
|
+
# parent/child relationship.
|
|
621
|
+
if category_description_candidate_child == \
|
|
622
|
+
category_description_this_classification:
|
|
623
|
+
continue
|
|
624
|
+
|
|
615
625
|
# As long as we're using "clean" descriptions, parent/child taxonomic
|
|
616
626
|
# relationships are defined by a substring relationship
|
|
617
627
|
is_child = category_description_this_classification in \
|
|
618
628
|
category_description_candidate_child
|
|
629
|
+
|
|
619
630
|
if not is_child:
|
|
620
631
|
continue
|
|
621
632
|
|
|
@@ -40,6 +40,8 @@ def combine_batch_output_files(input_files,
|
|
|
40
40
|
Merges the list of MD results files [input_files] into a single
|
|
41
41
|
dictionary, optionally writing the result to [output_file].
|
|
42
42
|
|
|
43
|
+
Always overwrites [output_file] if it exists.
|
|
44
|
+
|
|
43
45
|
Args:
|
|
44
46
|
input_files (list of str): paths to JSON detection files
|
|
45
47
|
output_file (str, optional): path to write merged JSON
|
|
@@ -1851,12 +1851,15 @@ def find_equivalent_threshold(results_a,
|
|
|
1851
1851
|
|
|
1852
1852
|
return confidence_values, images_above_threshold
|
|
1853
1853
|
|
|
1854
|
+
# ...def _get_confidence_values_for_results(...)
|
|
1855
|
+
|
|
1854
1856
|
confidence_values_a,images_above_threshold_a = \
|
|
1855
1857
|
_get_confidence_values_for_results(results_a['images'],
|
|
1856
1858
|
category_ids_to_consider_a,
|
|
1857
1859
|
threshold_a)
|
|
1858
1860
|
|
|
1859
|
-
#
|
|
1861
|
+
# Not necessary, but facilitates debugging
|
|
1862
|
+
confidence_values_a = sorted(confidence_values_a)
|
|
1860
1863
|
|
|
1861
1864
|
if verbose:
|
|
1862
1865
|
print('For result set A, considering {} of {} images'.format(
|
|
@@ -1869,13 +1872,29 @@ def find_equivalent_threshold(results_a,
|
|
|
1869
1872
|
if verbose:
|
|
1870
1873
|
print('For result set B, considering {} of {} images'.format(
|
|
1871
1874
|
len(confidence_values_b),len(results_b['images'])))
|
|
1875
|
+
|
|
1872
1876
|
confidence_values_b = sorted(confidence_values_b)
|
|
1873
1877
|
|
|
1878
|
+
# Find the threshold that produces the same fraction of detections for results_b
|
|
1874
1879
|
target_detection_fraction = len(confidence_values_a_above_threshold) / len(confidence_values_a)
|
|
1875
1880
|
|
|
1876
|
-
|
|
1881
|
+
# How many detections do we want in results_b?
|
|
1882
|
+
target_number_of_detections = round(len(confidence_values_b) * target_detection_fraction)
|
|
1883
|
+
|
|
1884
|
+
# How many non-detections do we want in results_b?
|
|
1885
|
+
target_number_of_non_detections = len(confidence_values_b) - target_number_of_detections
|
|
1886
|
+
detection_cutoff_index = max(target_number_of_non_detections,0)
|
|
1877
1887
|
threshold_b = confidence_values_b[detection_cutoff_index]
|
|
1878
1888
|
|
|
1889
|
+
confidence_values_b_above_threshold = [c for c in confidence_values_b if c >= threshold_b]
|
|
1890
|
+
confidence_values_b_above_reference_threshold = [c for c in confidence_values_b if c >= threshold_a]
|
|
1891
|
+
|
|
1892
|
+
# Special case: if the number of detections above the selected threshold is the same as the
|
|
1893
|
+
# number above the reference threshold, use the reference threshold
|
|
1894
|
+
if len(confidence_values_b_above_threshold) == len(confidence_values_b_above_reference_threshold):
|
|
1895
|
+
print('Detection count for reference threshold matches target threshold')
|
|
1896
|
+
threshold_b = threshold_a
|
|
1897
|
+
|
|
1879
1898
|
if verbose:
|
|
1880
1899
|
print('{} confidence values above threshold (A)'.format(
|
|
1881
1900
|
len(confidence_values_a_above_threshold)))
|