megadetector 10.0.2__py3-none-any.whl → 10.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/detection/process_video.py +120 -913
- megadetector/detection/pytorch_detector.py +572 -263
- megadetector/detection/run_detector_batch.py +525 -143
- megadetector/detection/run_md_and_speciesnet.py +1301 -0
- megadetector/detection/video_utils.py +240 -105
- megadetector/postprocessing/classification_postprocessing.py +12 -1
- megadetector/postprocessing/compare_batch_results.py +21 -2
- megadetector/postprocessing/merge_detections.py +16 -12
- megadetector/postprocessing/validate_batch_results.py +25 -2
- megadetector/tests/__init__.py +0 -0
- megadetector/tests/test_nms_synthetic.py +335 -0
- megadetector/utils/ct_utils.py +16 -5
- megadetector/utils/extract_frames_from_video.py +303 -0
- megadetector/utils/md_tests.py +578 -520
- megadetector/utils/wi_utils.py +20 -4
- megadetector/visualization/visualize_db.py +8 -22
- megadetector/visualization/visualize_detector_output.py +1 -1
- megadetector/visualization/visualize_video_output.py +607 -0
- {megadetector-10.0.2.dist-info → megadetector-10.0.3.dist-info}/METADATA +134 -135
- {megadetector-10.0.2.dist-info → megadetector-10.0.3.dist-info}/RECORD +23 -18
- {megadetector-10.0.2.dist-info → megadetector-10.0.3.dist-info}/licenses/LICENSE +0 -0
- {megadetector-10.0.2.dist-info → megadetector-10.0.3.dist-info}/top_level.txt +0 -0
- {megadetector-10.0.2.dist-info → megadetector-10.0.3.dist-info}/WHEEL +0 -0
|
@@ -30,7 +30,7 @@ default_fourcc = 'h264'
|
|
|
30
30
|
|
|
31
31
|
#%% Path utilities
|
|
32
32
|
|
|
33
|
-
VIDEO_EXTENSIONS = ('.mp4','.avi','.mpeg','.mpg','.mov','.mkv')
|
|
33
|
+
VIDEO_EXTENSIONS = ('.mp4','.avi','.mpeg','.mpg','.mov','.mkv','.flv')
|
|
34
34
|
|
|
35
35
|
def is_video_file(s,video_extensions=VIDEO_EXTENSIONS):
|
|
36
36
|
"""
|
|
@@ -200,11 +200,17 @@ def _add_frame_numbers_to_results(results):
|
|
|
200
200
|
"""
|
|
201
201
|
Given the 'images' list from a set of MD results that was generated on video frames,
|
|
202
202
|
add a 'frame_number' field to each image, and return the list, sorted by frame number.
|
|
203
|
+
Also modifies "results" in place.
|
|
203
204
|
|
|
204
205
|
Args:
|
|
205
206
|
results (list): list of image dicts
|
|
206
207
|
"""
|
|
207
208
|
|
|
209
|
+
# This indicate that this was a failure for a single video
|
|
210
|
+
if isinstance(results,dict):
|
|
211
|
+
assert 'failure' in results
|
|
212
|
+
return results
|
|
213
|
+
|
|
208
214
|
# Add video-specific fields to the results
|
|
209
215
|
for im in results:
|
|
210
216
|
fn = im['file']
|
|
@@ -228,8 +234,11 @@ def run_callback_on_frames(input_video_file,
|
|
|
228
234
|
Args:
|
|
229
235
|
input_video_file (str): video file to process
|
|
230
236
|
frame_callback (function): callback to run on frames, should take an np.array and a string and
|
|
231
|
-
return a single value. callback should expect
|
|
232
|
-
|
|
237
|
+
return a single value. callback should expect two arguments: (1) a numpy array with image
|
|
238
|
+
data, in the typical PIL image orientation/channel order, and (2) a string identifier
|
|
239
|
+
for the frame, typically something like "frame0006.jpg" (even though it's not a JPEG
|
|
240
|
+
image, this is just an identifier for the frame).
|
|
241
|
+
every_n_frames (int or float, optional): sample every Nth frame starting from the first frame;
|
|
233
242
|
if this is None or 1, every frame is processed. If this is a negative value, it's
|
|
234
243
|
interpreted as a sampling rate in seconds, which is rounded to the nearest frame sampling
|
|
235
244
|
rate. Mutually exclusive with frames_to_process.
|
|
@@ -243,8 +252,10 @@ def run_callback_on_frames(input_video_file,
|
|
|
243
252
|
|
|
244
253
|
Returns:
|
|
245
254
|
dict: dict with keys 'frame_filenames' (list), 'frame_rate' (float), 'results' (list).
|
|
246
|
-
'frame_filenames' are synthetic filenames (e.g. frame000000.jpg)
|
|
247
|
-
|
|
255
|
+
'frame_filenames' are synthetic filenames (e.g. frame000000.jpg). Elements in
|
|
256
|
+
'results' are whatever is returned by the callback, typically dicts in the same format used in
|
|
257
|
+
the 'images' array in the MD results format. [frame_filenames] and [results] both have
|
|
258
|
+
one element per processed frame.
|
|
248
259
|
"""
|
|
249
260
|
|
|
250
261
|
assert os.path.isfile(input_video_file), 'File {} not found'.format(input_video_file)
|
|
@@ -255,64 +266,88 @@ def run_callback_on_frames(input_video_file,
|
|
|
255
266
|
if (frames_to_process is not None) and (every_n_frames is not None):
|
|
256
267
|
raise ValueError('frames_to_process and every_n_frames are mutually exclusive')
|
|
257
268
|
|
|
258
|
-
vidcap =
|
|
259
|
-
n_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
260
|
-
frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
|
|
269
|
+
vidcap = None
|
|
261
270
|
|
|
262
|
-
|
|
263
|
-
print('Video {} contains {} frames at {} Hz'.format(input_video_file,n_frames,frame_rate))
|
|
271
|
+
try:
|
|
264
272
|
|
|
265
|
-
|
|
266
|
-
|
|
273
|
+
vidcap = cv2.VideoCapture(input_video_file)
|
|
274
|
+
n_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
275
|
+
frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
|
|
267
276
|
|
|
268
|
-
if (every_n_frames is not None) and (every_n_frames < 0):
|
|
269
|
-
every_n_seconds = abs(every_n_frames)
|
|
270
|
-
every_n_frames = int(every_n_seconds * frame_rate)
|
|
271
277
|
if verbose:
|
|
272
|
-
print('
|
|
273
|
-
every_n_seconds,every_n_frames))
|
|
278
|
+
print('Video {} contains {} frames at {} Hz'.format(input_video_file,n_frames,frame_rate))
|
|
274
279
|
|
|
275
|
-
|
|
276
|
-
|
|
280
|
+
frame_filenames = []
|
|
281
|
+
results = []
|
|
277
282
|
|
|
278
|
-
|
|
283
|
+
if (every_n_frames is not None):
|
|
279
284
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
+
if (every_n_frames < 0):
|
|
286
|
+
every_n_seconds = abs(every_n_frames)
|
|
287
|
+
every_n_frames = int(every_n_seconds * frame_rate)
|
|
288
|
+
if verbose:
|
|
289
|
+
print('Interpreting a time sampling rate of {} hz as a frame interval of {}'.format(
|
|
290
|
+
every_n_seconds,every_n_frames))
|
|
291
|
+
# 0 and 1 both mean "process every frame"
|
|
292
|
+
elif every_n_frames == 0:
|
|
293
|
+
every_n_frames = 1
|
|
294
|
+
elif every_n_frames > 0:
|
|
295
|
+
every_n_frames = int(every_n_frames)
|
|
285
296
|
|
|
286
|
-
if every_n_frames
|
|
287
|
-
|
|
288
|
-
|
|
297
|
+
# ...if every_n_frames was supplied
|
|
298
|
+
|
|
299
|
+
# frame_number = 0
|
|
300
|
+
for frame_number in range(0,n_frames):
|
|
301
|
+
|
|
302
|
+
success,image = vidcap.read()
|
|
289
303
|
|
|
290
|
-
|
|
291
|
-
|
|
304
|
+
if not success:
|
|
305
|
+
assert image is None
|
|
306
|
+
if verbose:
|
|
307
|
+
print('Read terminating at frame {} of {}'.format(frame_number,n_frames))
|
|
292
308
|
break
|
|
293
|
-
if frame_number not in frames_to_process:
|
|
294
|
-
continue
|
|
295
309
|
|
|
296
|
-
|
|
297
|
-
|
|
310
|
+
if every_n_frames is not None:
|
|
311
|
+
if (frame_number % every_n_frames) != 0:
|
|
312
|
+
continue
|
|
298
313
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
314
|
+
if frames_to_process is not None:
|
|
315
|
+
if frame_number > max(frames_to_process):
|
|
316
|
+
break
|
|
317
|
+
if frame_number not in frames_to_process:
|
|
318
|
+
continue
|
|
302
319
|
|
|
303
|
-
|
|
320
|
+
frame_filename_relative = _frame_number_to_filename(frame_number)
|
|
321
|
+
frame_filenames.append(frame_filename_relative)
|
|
304
322
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
print('Warning: found no frames in file {}'.format(input_video_file))
|
|
308
|
-
else:
|
|
309
|
-
raise Exception('Error: found no frames in file {}'.format(input_video_file))
|
|
323
|
+
# Convert from OpenCV conventions to PIL conventions
|
|
324
|
+
image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
310
325
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
326
|
+
# Run the callback
|
|
327
|
+
frame_results = frame_callback(image_np,frame_filename_relative)
|
|
328
|
+
|
|
329
|
+
results.append(frame_results)
|
|
330
|
+
|
|
331
|
+
# ...for each frame
|
|
332
|
+
|
|
333
|
+
if len(frame_filenames) == 0:
|
|
334
|
+
if allow_empty_videos:
|
|
335
|
+
print('Warning: found no frames in file {}'.format(input_video_file))
|
|
336
|
+
else:
|
|
337
|
+
raise Exception('Error: found no frames in file {}'.format(input_video_file))
|
|
338
|
+
|
|
339
|
+
if verbose:
|
|
340
|
+
print('\nProcessed {} of {} frames for {}'.format(
|
|
341
|
+
len(frame_filenames),n_frames,input_video_file))
|
|
342
|
+
|
|
343
|
+
finally:
|
|
344
|
+
|
|
345
|
+
if vidcap is not None:
|
|
346
|
+
try:
|
|
347
|
+
vidcap.release()
|
|
348
|
+
except Exception:
|
|
349
|
+
pass
|
|
314
350
|
|
|
315
|
-
vidcap.release()
|
|
316
351
|
to_return = {}
|
|
317
352
|
to_return['frame_filenames'] = frame_filenames
|
|
318
353
|
to_return['frame_rate'] = frame_rate
|
|
@@ -328,7 +363,8 @@ def run_callback_on_frames_for_folder(input_video_folder,
|
|
|
328
363
|
every_n_frames=None,
|
|
329
364
|
verbose=False,
|
|
330
365
|
allow_empty_videos=False,
|
|
331
|
-
recursive=True
|
|
366
|
+
recursive=True,
|
|
367
|
+
files_to_process_relative=None):
|
|
332
368
|
"""
|
|
333
369
|
Calls the function frame_callback(np.array,image_id) on all (or selected) frames in
|
|
334
370
|
all videos in [input_video_folder].
|
|
@@ -336,8 +372,11 @@ def run_callback_on_frames_for_folder(input_video_folder,
|
|
|
336
372
|
Args:
|
|
337
373
|
input_video_folder (str): video folder to process
|
|
338
374
|
frame_callback (function): callback to run on frames, should take an np.array and a string and
|
|
339
|
-
return a single value. callback should expect
|
|
340
|
-
|
|
375
|
+
return a single value. callback should expect two arguments: (1) a numpy array with image
|
|
376
|
+
data, in the typical PIL image orientation/channel order, and (2) a string identifier
|
|
377
|
+
for the frame, typically something like "frame0006.jpg" (even though it's not a JPEG
|
|
378
|
+
image, this is just an identifier for the frame).
|
|
379
|
+
every_n_frames (int or float, optional): sample every Nth frame starting from the first frame;
|
|
341
380
|
if this is None or 1, every frame is processed. If this is a negative value, it's
|
|
342
381
|
interpreted as a sampling rate in seconds, which is rounded to the nearest frame
|
|
343
382
|
sampling rate.
|
|
@@ -345,43 +384,76 @@ def run_callback_on_frames_for_folder(input_video_folder,
|
|
|
345
384
|
allow_empty_videos (bool, optional): Just print a warning if a video appears to have no
|
|
346
385
|
frames (by default, this is an error).
|
|
347
386
|
recursive (bool, optional): recurse into [input_video_folder]
|
|
387
|
+
files_to_process_relative (list, optional): only process specific relative paths
|
|
348
388
|
|
|
349
389
|
Returns:
|
|
350
390
|
dict: dict with keys 'video_filenames' (list of str), 'frame_rates' (list of floats),
|
|
351
391
|
'results' (list of list of dicts). 'video_filenames' will contain *relative* filenames.
|
|
392
|
+
'results' is a list (one element per video) of lists (one element per frame) of whatever the
|
|
393
|
+
callback returns, typically (but not necessarily) dicts in the MD results format.
|
|
394
|
+
|
|
395
|
+
For failed videos, the frame rate will be represented by -1, and "results"
|
|
396
|
+
will be a dict with at least the key "failure".
|
|
352
397
|
"""
|
|
353
398
|
|
|
354
399
|
to_return = {'video_filenames':[],'frame_rates':[],'results':[]}
|
|
355
400
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
401
|
+
if files_to_process_relative is not None:
|
|
402
|
+
input_files_full_paths = \
|
|
403
|
+
[os.path.join(input_video_folder,fn) for fn in files_to_process_relative]
|
|
404
|
+
input_files_full_paths = [fn.replace('\\','/') for fn in input_files_full_paths]
|
|
405
|
+
else:
|
|
406
|
+
# Recursively enumerate video files
|
|
407
|
+
input_files_full_paths = find_videos(input_video_folder,
|
|
408
|
+
recursive=recursive,
|
|
409
|
+
convert_slashes=True,
|
|
410
|
+
return_relative_paths=False)
|
|
411
|
+
|
|
412
|
+
print('Processing {} videos from folder {}'.format(len(input_files_full_paths),input_video_folder))
|
|
362
413
|
|
|
363
414
|
if len(input_files_full_paths) == 0:
|
|
415
|
+
print('No videos to process')
|
|
364
416
|
return to_return
|
|
365
417
|
|
|
366
418
|
# Process each video
|
|
367
419
|
|
|
368
420
|
# video_fn_abs = input_files_full_paths[0]
|
|
369
421
|
for video_fn_abs in tqdm(input_files_full_paths):
|
|
370
|
-
|
|
371
|
-
frame_callback=frame_callback,
|
|
372
|
-
every_n_frames=every_n_frames,
|
|
373
|
-
verbose=verbose,
|
|
374
|
-
frames_to_process=None,
|
|
375
|
-
allow_empty_videos=allow_empty_videos)
|
|
376
|
-
|
|
377
|
-
"""
|
|
378
|
-
dict: dict with keys 'frame_filenames' (list), 'frame_rate' (float), 'results' (list).
|
|
379
|
-
'frame_filenames' are synthetic filenames (e.g. frame000000.jpg); 'results' are
|
|
380
|
-
in the same format used in the 'images' array in the MD results format.
|
|
381
|
-
"""
|
|
422
|
+
|
|
382
423
|
video_filename_relative = os.path.relpath(video_fn_abs,input_video_folder)
|
|
383
424
|
video_filename_relative = video_filename_relative.replace('\\','/')
|
|
384
425
|
to_return['video_filenames'].append(video_filename_relative)
|
|
426
|
+
|
|
427
|
+
try:
|
|
428
|
+
|
|
429
|
+
# video_results is a dict with fields:
|
|
430
|
+
#
|
|
431
|
+
# frame_rate
|
|
432
|
+
#
|
|
433
|
+
# results (list of objects returned by the callback, typically dicts in the MD
|
|
434
|
+
# per-image format)
|
|
435
|
+
#
|
|
436
|
+
# frame_filenames (list of frame IDs, i.e. synthetic filenames)
|
|
437
|
+
video_results = run_callback_on_frames(input_video_file=video_fn_abs,
|
|
438
|
+
frame_callback=frame_callback,
|
|
439
|
+
every_n_frames=every_n_frames,
|
|
440
|
+
verbose=verbose,
|
|
441
|
+
frames_to_process=None,
|
|
442
|
+
allow_empty_videos=allow_empty_videos)
|
|
443
|
+
|
|
444
|
+
except Exception as e:
|
|
445
|
+
|
|
446
|
+
print('Warning: error processing video {}: {}'.format(
|
|
447
|
+
video_fn_abs,str(e)
|
|
448
|
+
))
|
|
449
|
+
to_return['frame_rates'].append(-1.0)
|
|
450
|
+
failure_result = {}
|
|
451
|
+
failure_result['failure'] = 'Failure processing video: {}'.format(str(e))
|
|
452
|
+
to_return['results'].append(failure_result)
|
|
453
|
+
continue
|
|
454
|
+
|
|
455
|
+
# ...try/except
|
|
456
|
+
|
|
385
457
|
to_return['frame_rates'].append(video_results['frame_rate'])
|
|
386
458
|
for r in video_results['results']:
|
|
387
459
|
assert r['file'].startswith('frame')
|
|
@@ -665,6 +737,25 @@ def video_to_frames(input_video_file,
|
|
|
665
737
|
# ...def video_to_frames(...)
|
|
666
738
|
|
|
667
739
|
|
|
740
|
+
def _video_to_frames_with_per_video_frames(args):
|
|
741
|
+
"""
|
|
742
|
+
Wrapper function to handle extracting a different list of frames for
|
|
743
|
+
each video in a multiprocessing context.
|
|
744
|
+
|
|
745
|
+
Takes a tuple of (relative_fn, frames_for_this_video, other_args),
|
|
746
|
+
where (other_args) contains the arguments that are the same for each
|
|
747
|
+
iteration.
|
|
748
|
+
"""
|
|
749
|
+
|
|
750
|
+
relative_fn, frames_for_this_video, other_args = args
|
|
751
|
+
(input_folder, output_folder_base, every_n_frames, overwrite, verbose,
|
|
752
|
+
quality, max_width, allow_empty_videos) = other_args
|
|
753
|
+
|
|
754
|
+
return _video_to_frames_for_folder(relative_fn, input_folder, output_folder_base,
|
|
755
|
+
every_n_frames, overwrite, verbose, quality, max_width,
|
|
756
|
+
frames_for_this_video, allow_empty_videos)
|
|
757
|
+
|
|
758
|
+
|
|
668
759
|
def _video_to_frames_for_folder(relative_fn,input_folder,output_folder_base,
|
|
669
760
|
every_n_frames,overwrite,verbose,quality,max_width,
|
|
670
761
|
frames_to_extract,allow_empty_videos):
|
|
@@ -708,7 +799,8 @@ def video_folder_to_frames(input_folder,
|
|
|
708
799
|
quality=None,
|
|
709
800
|
max_width=None,
|
|
710
801
|
frames_to_extract=None,
|
|
711
|
-
allow_empty_videos=False
|
|
802
|
+
allow_empty_videos=False,
|
|
803
|
+
relative_paths_to_process=None):
|
|
712
804
|
"""
|
|
713
805
|
For every video file in input_folder, creates a folder within output_folder_base, and
|
|
714
806
|
renders frame of that video to images in that folder.
|
|
@@ -721,9 +813,9 @@ def video_folder_to_frames(input_folder,
|
|
|
721
813
|
overwrite (bool, optional): whether to overwrite existing frame images
|
|
722
814
|
n_threads (int, optional): number of concurrent workers to use; set to <= 1 to disable
|
|
723
815
|
parallelism
|
|
724
|
-
every_n_frames (int, optional): sample every Nth frame starting from the first
|
|
725
|
-
if this is None or 1, every frame is extracted. If this is a negative value,
|
|
726
|
-
interpreted as a sampling rate in seconds, which is rounded to the nearest frame
|
|
816
|
+
every_n_frames (int or float, optional): sample every Nth frame starting from the first
|
|
817
|
+
frame; if this is None or 1, every frame is extracted. If this is a negative value,
|
|
818
|
+
it's interpreted as a sampling rate in seconds, which is rounded to the nearest frame
|
|
727
819
|
sampling rate. Mutually exclusive with frames_to_extract.
|
|
728
820
|
verbose (bool, optional): enable additional debug console output
|
|
729
821
|
parallelization_uses_threads (bool, optional): whether to use threads (True) or
|
|
@@ -731,14 +823,17 @@ def video_folder_to_frames(input_folder,
|
|
|
731
823
|
quality (int, optional): JPEG quality for frame output, from 0-100. Defaults
|
|
732
824
|
to the opencv default (typically 95).
|
|
733
825
|
max_width (int, optional): resize frames to be no wider than [max_width]
|
|
734
|
-
frames_to_extract (list of int, optional): extract this specific set of frames
|
|
735
|
-
each video; mutually exclusive with every_n_frames. If all values are beyond
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
826
|
+
frames_to_extract (int, list of int, or dict, optional): extract this specific set of frames
|
|
827
|
+
from each video; mutually exclusive with every_n_frames. If all values are beyond the
|
|
828
|
+
length of a video, no frames are extracted. Can also be a single int, specifying a single
|
|
829
|
+
frame number. In the special case where frames_to_extract is [], this function still
|
|
830
|
+
reads video frame rates and verifies that videos are readable, but no frames are
|
|
831
|
+
extracted. Can be a dict mapping relative paths to lists of frame numbers to extract different
|
|
832
|
+
frames from each video.
|
|
833
|
+
allow_empty_videos (bool, optional): just print a warning if a video appears to have no
|
|
741
834
|
frames (by default, this is an error).
|
|
835
|
+
relative_paths_to_process (list, optional): only process the relative paths on this
|
|
836
|
+
list
|
|
742
837
|
|
|
743
838
|
Returns:
|
|
744
839
|
tuple: a length-3 tuple containing:
|
|
@@ -748,16 +843,21 @@ def video_folder_to_frames(input_folder,
|
|
|
748
843
|
- list of video filenames
|
|
749
844
|
"""
|
|
750
845
|
|
|
751
|
-
#
|
|
752
|
-
if
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
846
|
+
# Enumerate video files if necessary
|
|
847
|
+
if relative_paths_to_process is None:
|
|
848
|
+
if verbose:
|
|
849
|
+
print('Enumerating videos in {}'.format(input_folder))
|
|
850
|
+
input_files_full_paths = find_videos(input_folder,recursive=recursive)
|
|
851
|
+
if verbose:
|
|
852
|
+
print('Found {} videos in folder {}'.format(len(input_files_full_paths),input_folder))
|
|
853
|
+
if len(input_files_full_paths) == 0:
|
|
854
|
+
return [],[],[]
|
|
855
|
+
|
|
856
|
+
input_files_relative_paths = [os.path.relpath(s,input_folder) for s in input_files_full_paths]
|
|
857
|
+
else:
|
|
858
|
+
input_files_relative_paths = relative_paths_to_process
|
|
859
|
+
input_files_full_paths = [os.path.join(input_folder,fn) for fn in input_files_relative_paths]
|
|
759
860
|
|
|
760
|
-
input_files_relative_paths = [os.path.relpath(s,input_folder) for s in input_files_full_paths]
|
|
761
861
|
input_files_relative_paths = [s.replace('\\','/') for s in input_files_relative_paths]
|
|
762
862
|
|
|
763
863
|
os.makedirs(output_folder_base,exist_ok=True)
|
|
@@ -766,11 +866,18 @@ def video_folder_to_frames(input_folder,
|
|
|
766
866
|
fs_by_video = []
|
|
767
867
|
|
|
768
868
|
if n_threads == 1:
|
|
869
|
+
|
|
769
870
|
# For each video
|
|
770
871
|
#
|
|
771
872
|
# input_fn_relative = input_files_relative_paths[0]
|
|
772
873
|
for input_fn_relative in tqdm(input_files_relative_paths):
|
|
773
874
|
|
|
875
|
+
# If frames_to_extract is a dict, get the specific frames for this video
|
|
876
|
+
if isinstance(frames_to_extract, dict):
|
|
877
|
+
frames_for_this_video = frames_to_extract.get(input_fn_relative, [])
|
|
878
|
+
else:
|
|
879
|
+
frames_for_this_video = frames_to_extract
|
|
880
|
+
|
|
774
881
|
frame_filenames,fs = \
|
|
775
882
|
_video_to_frames_for_folder(input_fn_relative,
|
|
776
883
|
input_folder,
|
|
@@ -780,40 +887,69 @@ def video_folder_to_frames(input_folder,
|
|
|
780
887
|
verbose,
|
|
781
888
|
quality,
|
|
782
889
|
max_width,
|
|
783
|
-
|
|
890
|
+
frames_for_this_video,
|
|
784
891
|
allow_empty_videos)
|
|
785
892
|
frame_filenames_by_video.append(frame_filenames)
|
|
786
893
|
fs_by_video.append(fs)
|
|
894
|
+
|
|
787
895
|
else:
|
|
896
|
+
|
|
788
897
|
pool = None
|
|
789
898
|
results = None
|
|
790
899
|
try:
|
|
900
|
+
|
|
791
901
|
if parallelization_uses_threads:
|
|
792
902
|
print('Starting a worker pool with {} threads'.format(n_threads))
|
|
793
903
|
pool = ThreadPool(n_threads)
|
|
794
904
|
else:
|
|
795
905
|
print('Starting a worker pool with {} processes'.format(n_threads))
|
|
796
906
|
pool = Pool(n_threads)
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
907
|
+
|
|
908
|
+
if isinstance(frames_to_extract, dict):
|
|
909
|
+
|
|
910
|
+
# For the dict case, we need to extract different frames from each video.
|
|
911
|
+
|
|
912
|
+
# These arguments are the same for every iteration
|
|
913
|
+
other_args = (input_folder, output_folder_base, every_n_frames, overwrite,
|
|
914
|
+
verbose, quality, max_width, allow_empty_videos)
|
|
915
|
+
|
|
916
|
+
# The filename and list of frames to extract vary with each iteration
|
|
917
|
+
args_for_pool = [(relative_fn, frames_to_extract.get(relative_fn, []), other_args)
|
|
918
|
+
for relative_fn in input_files_relative_paths]
|
|
919
|
+
|
|
920
|
+
results = list(tqdm(pool.imap(_video_to_frames_with_per_video_frames, args_for_pool),
|
|
921
|
+
total=len(args_for_pool)))
|
|
922
|
+
|
|
923
|
+
else:
|
|
924
|
+
|
|
925
|
+
process_video_with_options = partial(_video_to_frames_for_folder,
|
|
926
|
+
input_folder=input_folder,
|
|
927
|
+
output_folder_base=output_folder_base,
|
|
928
|
+
every_n_frames=every_n_frames,
|
|
929
|
+
overwrite=overwrite,
|
|
930
|
+
verbose=verbose,
|
|
931
|
+
quality=quality,
|
|
932
|
+
max_width=max_width,
|
|
933
|
+
frames_to_extract=frames_to_extract,
|
|
934
|
+
allow_empty_videos=allow_empty_videos)
|
|
935
|
+
results = list(tqdm(pool.imap(process_video_with_options, input_files_relative_paths),
|
|
936
|
+
total=len(input_files_relative_paths)))
|
|
937
|
+
|
|
938
|
+
# ...if we need to pass different frames for each video
|
|
939
|
+
|
|
810
940
|
finally:
|
|
941
|
+
|
|
811
942
|
pool.close()
|
|
812
943
|
pool.join()
|
|
813
|
-
print(
|
|
944
|
+
print('Pool closed and joined for video processing')
|
|
945
|
+
|
|
946
|
+
# ...try/finally
|
|
947
|
+
|
|
814
948
|
frame_filenames_by_video = [x[0] for x in results]
|
|
815
949
|
fs_by_video = [x[1] for x in results]
|
|
816
950
|
|
|
951
|
+
# ...if we're working on a single thread vs. multiple workers
|
|
952
|
+
|
|
817
953
|
return frame_filenames_by_video,fs_by_video,input_files_full_paths
|
|
818
954
|
|
|
819
955
|
# ...def video_folder_to_frames(...)
|
|
@@ -901,8 +1037,7 @@ def frame_results_to_video_results(input_file,
|
|
|
901
1037
|
options.non_video_behavior))
|
|
902
1038
|
|
|
903
1039
|
# Attach video-specific fields to the output, specifically attach the frame
|
|
904
|
-
# number to both the video and each detection.
|
|
905
|
-
# canonical detection will end up in the video-level output file.
|
|
1040
|
+
# number to both the video and each detection.
|
|
906
1041
|
frame_number = _filename_to_frame_number(fn)
|
|
907
1042
|
im['frame_number'] = frame_number
|
|
908
1043
|
for detection in im['detections']:
|
|
@@ -505,7 +505,6 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
505
505
|
|
|
506
506
|
# If we're doing taxonomic processing, at this stage, don't turn children
|
|
507
507
|
# into parents; we'll likely turn parents into children in the next stage.
|
|
508
|
-
|
|
509
508
|
if process_taxonomic_rules:
|
|
510
509
|
|
|
511
510
|
most_common_category_description = \
|
|
@@ -612,10 +611,22 @@ def _smooth_classifications_for_list_of_detections(detections,
|
|
|
612
611
|
if len(category_description_candidate_child) == 0:
|
|
613
612
|
continue
|
|
614
613
|
|
|
614
|
+
# This handles a case that doesn't come up with "pure" SpeciesNet results;
|
|
615
|
+
# if two categories have different IDs but the same "clean" description, this
|
|
616
|
+
# means they're different common names for the same species, which we use
|
|
617
|
+
# for things like "white-tailed deer buck" and "white-tailed deer fawn".
|
|
618
|
+
#
|
|
619
|
+
# Currently we don't support smoothing those predictions, because it's not a
|
|
620
|
+
# parent/child relationship.
|
|
621
|
+
if category_description_candidate_child == \
|
|
622
|
+
category_description_this_classification:
|
|
623
|
+
continue
|
|
624
|
+
|
|
615
625
|
# As long as we're using "clean" descriptions, parent/child taxonomic
|
|
616
626
|
# relationships are defined by a substring relationship
|
|
617
627
|
is_child = category_description_this_classification in \
|
|
618
628
|
category_description_candidate_child
|
|
629
|
+
|
|
619
630
|
if not is_child:
|
|
620
631
|
continue
|
|
621
632
|
|
|
@@ -1851,12 +1851,15 @@ def find_equivalent_threshold(results_a,
|
|
|
1851
1851
|
|
|
1852
1852
|
return confidence_values, images_above_threshold
|
|
1853
1853
|
|
|
1854
|
+
# ...def _get_confidence_values_for_results(...)
|
|
1855
|
+
|
|
1854
1856
|
confidence_values_a,images_above_threshold_a = \
|
|
1855
1857
|
_get_confidence_values_for_results(results_a['images'],
|
|
1856
1858
|
category_ids_to_consider_a,
|
|
1857
1859
|
threshold_a)
|
|
1858
1860
|
|
|
1859
|
-
#
|
|
1861
|
+
# Not necessary, but facilitates debugging
|
|
1862
|
+
confidence_values_a = sorted(confidence_values_a)
|
|
1860
1863
|
|
|
1861
1864
|
if verbose:
|
|
1862
1865
|
print('For result set A, considering {} of {} images'.format(
|
|
@@ -1869,13 +1872,29 @@ def find_equivalent_threshold(results_a,
|
|
|
1869
1872
|
if verbose:
|
|
1870
1873
|
print('For result set B, considering {} of {} images'.format(
|
|
1871
1874
|
len(confidence_values_b),len(results_b['images'])))
|
|
1875
|
+
|
|
1872
1876
|
confidence_values_b = sorted(confidence_values_b)
|
|
1873
1877
|
|
|
1878
|
+
# Find the threshold that produces the same fraction of detections for results_b
|
|
1874
1879
|
target_detection_fraction = len(confidence_values_a_above_threshold) / len(confidence_values_a)
|
|
1875
1880
|
|
|
1876
|
-
|
|
1881
|
+
# How many detections do we want in results_b?
|
|
1882
|
+
target_number_of_detections = round(len(confidence_values_b) * target_detection_fraction)
|
|
1883
|
+
|
|
1884
|
+
# How many non-detections do we want in results_b?
|
|
1885
|
+
target_number_of_non_detections = len(confidence_values_b) - target_number_of_detections
|
|
1886
|
+
detection_cutoff_index = max(target_number_of_non_detections,0)
|
|
1877
1887
|
threshold_b = confidence_values_b[detection_cutoff_index]
|
|
1878
1888
|
|
|
1889
|
+
confidence_values_b_above_threshold = [c for c in confidence_values_b if c >= threshold_b]
|
|
1890
|
+
confidence_values_b_above_reference_threshold = [c for c in confidence_values_b if c >= threshold_a]
|
|
1891
|
+
|
|
1892
|
+
# Special case: if the number of detections above the selected threshold is the same as the
|
|
1893
|
+
# number above the reference threshold, use the reference threshold
|
|
1894
|
+
if len(confidence_values_b_above_threshold) == len(confidence_values_b_above_reference_threshold):
|
|
1895
|
+
print('Detection count for reference threshold matches target threshold')
|
|
1896
|
+
threshold_b = threshold_a
|
|
1897
|
+
|
|
1879
1898
|
if verbose:
|
|
1880
1899
|
print('{} confidence values above threshold (A)'.format(
|
|
1881
1900
|
len(confidence_values_a_above_threshold)))
|