megadetector 10.0.2__py3-none-any.whl → 10.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (30) hide show
  1. megadetector/data_management/animl_to_md.py +158 -0
  2. megadetector/data_management/zamba_to_md.py +188 -0
  3. megadetector/detection/process_video.py +165 -946
  4. megadetector/detection/pytorch_detector.py +575 -276
  5. megadetector/detection/run_detector_batch.py +629 -202
  6. megadetector/detection/run_md_and_speciesnet.py +1319 -0
  7. megadetector/detection/video_utils.py +243 -107
  8. megadetector/postprocessing/classification_postprocessing.py +12 -1
  9. megadetector/postprocessing/combine_batch_outputs.py +2 -0
  10. megadetector/postprocessing/compare_batch_results.py +21 -2
  11. megadetector/postprocessing/merge_detections.py +16 -12
  12. megadetector/postprocessing/separate_detections_into_folders.py +1 -1
  13. megadetector/postprocessing/subset_json_detector_output.py +1 -3
  14. megadetector/postprocessing/validate_batch_results.py +25 -2
  15. megadetector/tests/__init__.py +0 -0
  16. megadetector/tests/test_nms_synthetic.py +335 -0
  17. megadetector/utils/ct_utils.py +69 -5
  18. megadetector/utils/extract_frames_from_video.py +303 -0
  19. megadetector/utils/md_tests.py +583 -524
  20. megadetector/utils/path_utils.py +4 -15
  21. megadetector/utils/wi_utils.py +20 -4
  22. megadetector/visualization/visualization_utils.py +1 -1
  23. megadetector/visualization/visualize_db.py +8 -22
  24. megadetector/visualization/visualize_detector_output.py +7 -5
  25. megadetector/visualization/visualize_video_output.py +607 -0
  26. {megadetector-10.0.2.dist-info → megadetector-10.0.4.dist-info}/METADATA +134 -135
  27. {megadetector-10.0.2.dist-info → megadetector-10.0.4.dist-info}/RECORD +30 -23
  28. {megadetector-10.0.2.dist-info → megadetector-10.0.4.dist-info}/licenses/LICENSE +0 -0
  29. {megadetector-10.0.2.dist-info → megadetector-10.0.4.dist-info}/top_level.txt +0 -0
  30. {megadetector-10.0.2.dist-info → megadetector-10.0.4.dist-info}/WHEEL +0 -0
@@ -27,10 +27,11 @@ from megadetector.visualization import visualization_utils as vis_utils
27
27
 
28
28
  default_fourcc = 'h264'
29
29
 
30
+ video_progress_bar_description = 'Processing video'
30
31
 
31
32
  #%% Path utilities
32
33
 
33
- VIDEO_EXTENSIONS = ('.mp4','.avi','.mpeg','.mpg','.mov','.mkv')
34
+ VIDEO_EXTENSIONS = ('.mp4','.avi','.mpeg','.mpg','.mov','.mkv','.flv')
34
35
 
35
36
  def is_video_file(s,video_extensions=VIDEO_EXTENSIONS):
36
37
  """
@@ -200,11 +201,17 @@ def _add_frame_numbers_to_results(results):
200
201
  """
201
202
  Given the 'images' list from a set of MD results that was generated on video frames,
202
203
  add a 'frame_number' field to each image, and return the list, sorted by frame number.
204
+ Also modifies "results" in place.
203
205
 
204
206
  Args:
205
207
  results (list): list of image dicts
206
208
  """
207
209
 
210
+ # This indicate that this was a failure for a single video
211
+ if isinstance(results,dict):
212
+ assert 'failure' in results
213
+ return results
214
+
208
215
  # Add video-specific fields to the results
209
216
  for im in results:
210
217
  fn = im['file']
@@ -228,8 +235,11 @@ def run_callback_on_frames(input_video_file,
228
235
  Args:
229
236
  input_video_file (str): video file to process
230
237
  frame_callback (function): callback to run on frames, should take an np.array and a string and
231
- return a single value. callback should expect PIL-formatted (RGB) images.
232
- every_n_frames (float, optional): sample every Nth frame starting from the first frame;
238
+ return a single value. callback should expect two arguments: (1) a numpy array with image
239
+ data, in the typical PIL image orientation/channel order, and (2) a string identifier
240
+ for the frame, typically something like "frame0006.jpg" (even though it's not a JPEG
241
+ image, this is just an identifier for the frame).
242
+ every_n_frames (int or float, optional): sample every Nth frame starting from the first frame;
233
243
  if this is None or 1, every frame is processed. If this is a negative value, it's
234
244
  interpreted as a sampling rate in seconds, which is rounded to the nearest frame sampling
235
245
  rate. Mutually exclusive with frames_to_process.
@@ -243,8 +253,10 @@ def run_callback_on_frames(input_video_file,
243
253
 
244
254
  Returns:
245
255
  dict: dict with keys 'frame_filenames' (list), 'frame_rate' (float), 'results' (list).
246
- 'frame_filenames' are synthetic filenames (e.g. frame000000.jpg); 'results' are
247
- in the same format used in the 'images' array in the MD results format.
256
+ 'frame_filenames' are synthetic filenames (e.g. frame000000.jpg). Elements in
257
+ 'results' are whatever is returned by the callback, typically dicts in the same format used in
258
+ the 'images' array in the MD results format. [frame_filenames] and [results] both have
259
+ one element per processed frame.
248
260
  """
249
261
 
250
262
  assert os.path.isfile(input_video_file), 'File {} not found'.format(input_video_file)
@@ -255,64 +267,88 @@ def run_callback_on_frames(input_video_file,
255
267
  if (frames_to_process is not None) and (every_n_frames is not None):
256
268
  raise ValueError('frames_to_process and every_n_frames are mutually exclusive')
257
269
 
258
- vidcap = cv2.VideoCapture(input_video_file)
259
- n_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
260
- frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
270
+ vidcap = None
261
271
 
262
- if verbose:
263
- print('Video {} contains {} frames at {} Hz'.format(input_video_file,n_frames,frame_rate))
272
+ try:
264
273
 
265
- frame_filenames = []
266
- results = []
274
+ vidcap = cv2.VideoCapture(input_video_file)
275
+ n_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
276
+ frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
267
277
 
268
- if (every_n_frames is not None) and (every_n_frames < 0):
269
- every_n_seconds = abs(every_n_frames)
270
- every_n_frames = int(every_n_seconds * frame_rate)
271
278
  if verbose:
272
- print('Interpreting a time sampling rate of {} hz as a frame interval of {}'.format(
273
- every_n_seconds,every_n_frames))
279
+ print('Video {} contains {} frames at {} Hz'.format(input_video_file,n_frames,frame_rate))
274
280
 
275
- # frame_number = 0
276
- for frame_number in range(0,n_frames):
281
+ frame_filenames = []
282
+ results = []
277
283
 
278
- success,image = vidcap.read()
284
+ if (every_n_frames is not None):
279
285
 
280
- if not success:
281
- assert image is None
282
- if verbose:
283
- print('Read terminating at frame {} of {}'.format(frame_number,n_frames))
284
- break
286
+ if (every_n_frames < 0):
287
+ every_n_seconds = abs(every_n_frames)
288
+ every_n_frames = int(every_n_seconds * frame_rate)
289
+ if verbose:
290
+ print('Interpreting a time sampling rate of {} hz as a frame interval of {}'.format(
291
+ every_n_seconds,every_n_frames))
292
+ # 0 and 1 both mean "process every frame"
293
+ elif every_n_frames == 0:
294
+ every_n_frames = 1
295
+ elif every_n_frames > 0:
296
+ every_n_frames = int(every_n_frames)
285
297
 
286
- if every_n_frames is not None:
287
- if frame_number % every_n_frames != 0:
288
- continue
298
+ # ...if every_n_frames was supplied
299
+
300
+ # frame_number = 0
301
+ for frame_number in range(0,n_frames):
302
+
303
+ success,image = vidcap.read()
289
304
 
290
- if frames_to_process is not None:
291
- if frame_number > max(frames_to_process):
305
+ if not success:
306
+ assert image is None
307
+ if verbose:
308
+ print('Read terminating at frame {} of {}'.format(frame_number,n_frames))
292
309
  break
293
- if frame_number not in frames_to_process:
294
- continue
295
310
 
296
- frame_filename_relative = _frame_number_to_filename(frame_number)
297
- frame_filenames.append(frame_filename_relative)
311
+ if every_n_frames is not None:
312
+ if (frame_number % every_n_frames) != 0:
313
+ continue
298
314
 
299
- image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
300
- frame_results = frame_callback(image_np,frame_filename_relative)
301
- results.append(frame_results)
315
+ if frames_to_process is not None:
316
+ if frame_number > max(frames_to_process):
317
+ break
318
+ if frame_number not in frames_to_process:
319
+ continue
302
320
 
303
- # ...for each frame
321
+ frame_filename_relative = _frame_number_to_filename(frame_number)
322
+ frame_filenames.append(frame_filename_relative)
304
323
 
305
- if len(frame_filenames) == 0:
306
- if allow_empty_videos:
307
- print('Warning: found no frames in file {}'.format(input_video_file))
308
- else:
309
- raise Exception('Error: found no frames in file {}'.format(input_video_file))
324
+ # Convert from OpenCV conventions to PIL conventions
325
+ image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
310
326
 
311
- if verbose:
312
- print('\nProcessed {} of {} frames for {}'.format(
313
- len(frame_filenames),n_frames,input_video_file))
327
+ # Run the callback
328
+ frame_results = frame_callback(image_np,frame_filename_relative)
329
+
330
+ results.append(frame_results)
331
+
332
+ # ...for each frame
333
+
334
+ if len(frame_filenames) == 0:
335
+ if allow_empty_videos:
336
+ print('Warning: found no frames in file {}'.format(input_video_file))
337
+ else:
338
+ raise Exception('Error: found no frames in file {}'.format(input_video_file))
339
+
340
+ if verbose:
341
+ print('\nProcessed {} of {} frames for {}'.format(
342
+ len(frame_filenames),n_frames,input_video_file))
343
+
344
+ finally:
345
+
346
+ if vidcap is not None:
347
+ try:
348
+ vidcap.release()
349
+ except Exception:
350
+ pass
314
351
 
315
- vidcap.release()
316
352
  to_return = {}
317
353
  to_return['frame_filenames'] = frame_filenames
318
354
  to_return['frame_rate'] = frame_rate
@@ -328,7 +364,8 @@ def run_callback_on_frames_for_folder(input_video_folder,
328
364
  every_n_frames=None,
329
365
  verbose=False,
330
366
  allow_empty_videos=False,
331
- recursive=True):
367
+ recursive=True,
368
+ files_to_process_relative=None):
332
369
  """
333
370
  Calls the function frame_callback(np.array,image_id) on all (or selected) frames in
334
371
  all videos in [input_video_folder].
@@ -336,8 +373,11 @@ def run_callback_on_frames_for_folder(input_video_folder,
336
373
  Args:
337
374
  input_video_folder (str): video folder to process
338
375
  frame_callback (function): callback to run on frames, should take an np.array and a string and
339
- return a single value. callback should expect PIL-formatted (RGB) images.
340
- every_n_frames (int, optional): sample every Nth frame starting from the first frame;
376
+ return a single value. callback should expect two arguments: (1) a numpy array with image
377
+ data, in the typical PIL image orientation/channel order, and (2) a string identifier
378
+ for the frame, typically something like "frame0006.jpg" (even though it's not a JPEG
379
+ image, this is just an identifier for the frame).
380
+ every_n_frames (int or float, optional): sample every Nth frame starting from the first frame;
341
381
  if this is None or 1, every frame is processed. If this is a negative value, it's
342
382
  interpreted as a sampling rate in seconds, which is rounded to the nearest frame
343
383
  sampling rate.
@@ -345,43 +385,76 @@ def run_callback_on_frames_for_folder(input_video_folder,
345
385
  allow_empty_videos (bool, optional): Just print a warning if a video appears to have no
346
386
  frames (by default, this is an error).
347
387
  recursive (bool, optional): recurse into [input_video_folder]
388
+ files_to_process_relative (list, optional): only process specific relative paths
348
389
 
349
390
  Returns:
350
391
  dict: dict with keys 'video_filenames' (list of str), 'frame_rates' (list of floats),
351
392
  'results' (list of list of dicts). 'video_filenames' will contain *relative* filenames.
393
+ 'results' is a list (one element per video) of lists (one element per frame) of whatever the
394
+ callback returns, typically (but not necessarily) dicts in the MD results format.
395
+
396
+ For failed videos, the frame rate will be represented by -1, and "results"
397
+ will be a dict with at least the key "failure".
352
398
  """
353
399
 
354
400
  to_return = {'video_filenames':[],'frame_rates':[],'results':[]}
355
401
 
356
- # Recursively enumerate video files
357
- input_files_full_paths = find_videos(input_video_folder,
358
- recursive=recursive,
359
- convert_slashes=True,
360
- return_relative_paths=False)
361
- print('Found {} videos in folder {}'.format(len(input_files_full_paths),input_video_folder))
402
+ if files_to_process_relative is not None:
403
+ input_files_full_paths = \
404
+ [os.path.join(input_video_folder,fn) for fn in files_to_process_relative]
405
+ input_files_full_paths = [fn.replace('\\','/') for fn in input_files_full_paths]
406
+ else:
407
+ # Recursively enumerate video files
408
+ input_files_full_paths = find_videos(input_video_folder,
409
+ recursive=recursive,
410
+ convert_slashes=True,
411
+ return_relative_paths=False)
412
+
413
+ print('Processing {} videos from folder {}'.format(len(input_files_full_paths),input_video_folder))
362
414
 
363
415
  if len(input_files_full_paths) == 0:
416
+ print('No videos to process')
364
417
  return to_return
365
418
 
366
419
  # Process each video
367
420
 
368
421
  # video_fn_abs = input_files_full_paths[0]
369
- for video_fn_abs in tqdm(input_files_full_paths):
370
- video_results = run_callback_on_frames(input_video_file=video_fn_abs,
371
- frame_callback=frame_callback,
372
- every_n_frames=every_n_frames,
373
- verbose=verbose,
374
- frames_to_process=None,
375
- allow_empty_videos=allow_empty_videos)
376
-
377
- """
378
- dict: dict with keys 'frame_filenames' (list), 'frame_rate' (float), 'results' (list).
379
- 'frame_filenames' are synthetic filenames (e.g. frame000000.jpg); 'results' are
380
- in the same format used in the 'images' array in the MD results format.
381
- """
422
+ for video_fn_abs in tqdm(input_files_full_paths,desc=video_progress_bar_description):
423
+
382
424
  video_filename_relative = os.path.relpath(video_fn_abs,input_video_folder)
383
425
  video_filename_relative = video_filename_relative.replace('\\','/')
384
426
  to_return['video_filenames'].append(video_filename_relative)
427
+
428
+ try:
429
+
430
+ # video_results is a dict with fields:
431
+ #
432
+ # frame_rate
433
+ #
434
+ # results (list of objects returned by the callback, typically dicts in the MD
435
+ # per-image format)
436
+ #
437
+ # frame_filenames (list of frame IDs, i.e. synthetic filenames)
438
+ video_results = run_callback_on_frames(input_video_file=video_fn_abs,
439
+ frame_callback=frame_callback,
440
+ every_n_frames=every_n_frames,
441
+ verbose=verbose,
442
+ frames_to_process=None,
443
+ allow_empty_videos=allow_empty_videos)
444
+
445
+ except Exception as e:
446
+
447
+ print('Warning: error processing video {}: {}'.format(
448
+ video_fn_abs,str(e)
449
+ ))
450
+ to_return['frame_rates'].append(-1.0)
451
+ failure_result = {}
452
+ failure_result['failure'] = 'Failure processing video: {}'.format(str(e))
453
+ to_return['results'].append(failure_result)
454
+ continue
455
+
456
+ # ...try/except
457
+
385
458
  to_return['frame_rates'].append(video_results['frame_rate'])
386
459
  for r in video_results['results']:
387
460
  assert r['file'].startswith('frame')
@@ -665,6 +738,25 @@ def video_to_frames(input_video_file,
665
738
  # ...def video_to_frames(...)
666
739
 
667
740
 
741
+ def _video_to_frames_with_per_video_frames(args):
742
+ """
743
+ Wrapper function to handle extracting a different list of frames for
744
+ each video in a multiprocessing context.
745
+
746
+ Takes a tuple of (relative_fn, frames_for_this_video, other_args),
747
+ where (other_args) contains the arguments that are the same for each
748
+ iteration.
749
+ """
750
+
751
+ relative_fn, frames_for_this_video, other_args = args
752
+ (input_folder, output_folder_base, every_n_frames, overwrite, verbose,
753
+ quality, max_width, allow_empty_videos) = other_args
754
+
755
+ return _video_to_frames_for_folder(relative_fn, input_folder, output_folder_base,
756
+ every_n_frames, overwrite, verbose, quality, max_width,
757
+ frames_for_this_video, allow_empty_videos)
758
+
759
+
668
760
  def _video_to_frames_for_folder(relative_fn,input_folder,output_folder_base,
669
761
  every_n_frames,overwrite,verbose,quality,max_width,
670
762
  frames_to_extract,allow_empty_videos):
@@ -708,7 +800,8 @@ def video_folder_to_frames(input_folder,
708
800
  quality=None,
709
801
  max_width=None,
710
802
  frames_to_extract=None,
711
- allow_empty_videos=False):
803
+ allow_empty_videos=False,
804
+ relative_paths_to_process=None):
712
805
  """
713
806
  For every video file in input_folder, creates a folder within output_folder_base, and
714
807
  renders frame of that video to images in that folder.
@@ -721,9 +814,9 @@ def video_folder_to_frames(input_folder,
721
814
  overwrite (bool, optional): whether to overwrite existing frame images
722
815
  n_threads (int, optional): number of concurrent workers to use; set to <= 1 to disable
723
816
  parallelism
724
- every_n_frames (int, optional): sample every Nth frame starting from the first frame;
725
- if this is None or 1, every frame is extracted. If this is a negative value, it's
726
- interpreted as a sampling rate in seconds, which is rounded to the nearest frame
817
+ every_n_frames (int or float, optional): sample every Nth frame starting from the first
818
+ frame; if this is None or 1, every frame is extracted. If this is a negative value,
819
+ it's interpreted as a sampling rate in seconds, which is rounded to the nearest frame
727
820
  sampling rate. Mutually exclusive with frames_to_extract.
728
821
  verbose (bool, optional): enable additional debug console output
729
822
  parallelization_uses_threads (bool, optional): whether to use threads (True) or
@@ -731,14 +824,17 @@ def video_folder_to_frames(input_folder,
731
824
  quality (int, optional): JPEG quality for frame output, from 0-100. Defaults
732
825
  to the opencv default (typically 95).
733
826
  max_width (int, optional): resize frames to be no wider than [max_width]
734
- frames_to_extract (list of int, optional): extract this specific set of frames from
735
- each video; mutually exclusive with every_n_frames. If all values are beyond
736
- the length of a video, no frames are extracted. Can also be a single int,
737
- specifying a single frame number. In the special case where frames_to_extract
738
- is [], this function still reads video frame rates and verifies that videos
739
- are readable, but no frames are extracted.
740
- allow_empty_videos (bool, optional): Just print a warning if a video appears to have no
827
+ frames_to_extract (int, list of int, or dict, optional): extract this specific set of frames
828
+ from each video; mutually exclusive with every_n_frames. If all values are beyond the
829
+ length of a video, no frames are extracted. Can also be a single int, specifying a single
830
+ frame number. In the special case where frames_to_extract is [], this function still
831
+ reads video frame rates and verifies that videos are readable, but no frames are
832
+ extracted. Can be a dict mapping relative paths to lists of frame numbers to extract different
833
+ frames from each video.
834
+ allow_empty_videos (bool, optional): just print a warning if a video appears to have no
741
835
  frames (by default, this is an error).
836
+ relative_paths_to_process (list, optional): only process the relative paths on this
837
+ list
742
838
 
743
839
  Returns:
744
840
  tuple: a length-3 tuple containing:
@@ -748,16 +844,21 @@ def video_folder_to_frames(input_folder,
748
844
  - list of video filenames
749
845
  """
750
846
 
751
- # Recursively enumerate video files
752
- if verbose:
753
- print('Enumerating videos in {}'.format(input_folder))
754
- input_files_full_paths = find_videos(input_folder,recursive=recursive)
755
- if verbose:
756
- print('Found {} videos in folder {}'.format(len(input_files_full_paths),input_folder))
757
- if len(input_files_full_paths) == 0:
758
- return [],[],[]
847
+ # Enumerate video files if necessary
848
+ if relative_paths_to_process is None:
849
+ if verbose:
850
+ print('Enumerating videos in {}'.format(input_folder))
851
+ input_files_full_paths = find_videos(input_folder,recursive=recursive)
852
+ if verbose:
853
+ print('Found {} videos in folder {}'.format(len(input_files_full_paths),input_folder))
854
+ if len(input_files_full_paths) == 0:
855
+ return [],[],[]
856
+
857
+ input_files_relative_paths = [os.path.relpath(s,input_folder) for s in input_files_full_paths]
858
+ else:
859
+ input_files_relative_paths = relative_paths_to_process
860
+ input_files_full_paths = [os.path.join(input_folder,fn) for fn in input_files_relative_paths]
759
861
 
760
- input_files_relative_paths = [os.path.relpath(s,input_folder) for s in input_files_full_paths]
761
862
  input_files_relative_paths = [s.replace('\\','/') for s in input_files_relative_paths]
762
863
 
763
864
  os.makedirs(output_folder_base,exist_ok=True)
@@ -766,10 +867,17 @@ def video_folder_to_frames(input_folder,
766
867
  fs_by_video = []
767
868
 
768
869
  if n_threads == 1:
870
+
769
871
  # For each video
770
872
  #
771
873
  # input_fn_relative = input_files_relative_paths[0]
772
- for input_fn_relative in tqdm(input_files_relative_paths):
874
+ for input_fn_relative in tqdm(input_files_relative_paths,desc='Video to frames'):
875
+
876
+ # If frames_to_extract is a dict, get the specific frames for this video
877
+ if isinstance(frames_to_extract, dict):
878
+ frames_for_this_video = frames_to_extract.get(input_fn_relative, [])
879
+ else:
880
+ frames_for_this_video = frames_to_extract
773
881
 
774
882
  frame_filenames,fs = \
775
883
  _video_to_frames_for_folder(input_fn_relative,
@@ -780,40 +888,69 @@ def video_folder_to_frames(input_folder,
780
888
  verbose,
781
889
  quality,
782
890
  max_width,
783
- frames_to_extract,
891
+ frames_for_this_video,
784
892
  allow_empty_videos)
785
893
  frame_filenames_by_video.append(frame_filenames)
786
894
  fs_by_video.append(fs)
895
+
787
896
  else:
897
+
788
898
  pool = None
789
899
  results = None
790
900
  try:
901
+
791
902
  if parallelization_uses_threads:
792
903
  print('Starting a worker pool with {} threads'.format(n_threads))
793
904
  pool = ThreadPool(n_threads)
794
905
  else:
795
906
  print('Starting a worker pool with {} processes'.format(n_threads))
796
907
  pool = Pool(n_threads)
797
- process_video_with_options = partial(_video_to_frames_for_folder,
798
- input_folder=input_folder,
799
- output_folder_base=output_folder_base,
800
- every_n_frames=every_n_frames,
801
- overwrite=overwrite,
802
- verbose=verbose,
803
- quality=quality,
804
- max_width=max_width,
805
- frames_to_extract=frames_to_extract,
806
- allow_empty_videos=allow_empty_videos)
807
- results = list(tqdm(pool.imap(
808
- partial(process_video_with_options),input_files_relative_paths),
809
- total=len(input_files_relative_paths)))
908
+
909
+ if isinstance(frames_to_extract, dict):
910
+
911
+ # For the dict case, we need to extract different frames from each video.
912
+
913
+ # These arguments are the same for every iteration
914
+ other_args = (input_folder, output_folder_base, every_n_frames, overwrite,
915
+ verbose, quality, max_width, allow_empty_videos)
916
+
917
+ # The filename and list of frames to extract vary with each iteration
918
+ args_for_pool = [(relative_fn, frames_to_extract.get(relative_fn, []), other_args)
919
+ for relative_fn in input_files_relative_paths]
920
+
921
+ results = list(tqdm(pool.imap(_video_to_frames_with_per_video_frames, args_for_pool),
922
+ total=len(args_for_pool),desc='Video to frames'))
923
+
924
+ else:
925
+
926
+ process_video_with_options = partial(_video_to_frames_for_folder,
927
+ input_folder=input_folder,
928
+ output_folder_base=output_folder_base,
929
+ every_n_frames=every_n_frames,
930
+ overwrite=overwrite,
931
+ verbose=verbose,
932
+ quality=quality,
933
+ max_width=max_width,
934
+ frames_to_extract=frames_to_extract,
935
+ allow_empty_videos=allow_empty_videos)
936
+ results = list(tqdm(pool.imap(process_video_with_options, input_files_relative_paths),
937
+ total=len(input_files_relative_paths),desc='Video to frames'))
938
+
939
+ # ...if we need to pass different frames for each video
940
+
810
941
  finally:
942
+
811
943
  pool.close()
812
944
  pool.join()
813
- print("Pool closed and joined for video processing")
945
+ print('Pool closed and joined for video processing')
946
+
947
+ # ...try/finally
948
+
814
949
  frame_filenames_by_video = [x[0] for x in results]
815
950
  fs_by_video = [x[1] for x in results]
816
951
 
952
+ # ...if we're working on a single thread vs. multiple workers
953
+
817
954
  return frame_filenames_by_video,fs_by_video,input_files_full_paths
818
955
 
819
956
  # ...def video_folder_to_frames(...)
@@ -901,8 +1038,7 @@ def frame_results_to_video_results(input_file,
901
1038
  options.non_video_behavior))
902
1039
 
903
1040
  # Attach video-specific fields to the output, specifically attach the frame
904
- # number to both the video and each detection. Only the frame number for the
905
- # canonical detection will end up in the video-level output file.
1041
+ # number to both the video and each detection.
906
1042
  frame_number = _filename_to_frame_number(fn)
907
1043
  im['frame_number'] = frame_number
908
1044
  for detection in im['detections']:
@@ -505,7 +505,6 @@ def _smooth_classifications_for_list_of_detections(detections,
505
505
 
506
506
  # If we're doing taxonomic processing, at this stage, don't turn children
507
507
  # into parents; we'll likely turn parents into children in the next stage.
508
-
509
508
  if process_taxonomic_rules:
510
509
 
511
510
  most_common_category_description = \
@@ -612,10 +611,22 @@ def _smooth_classifications_for_list_of_detections(detections,
612
611
  if len(category_description_candidate_child) == 0:
613
612
  continue
614
613
 
614
+ # This handles a case that doesn't come up with "pure" SpeciesNet results;
615
+ # if two categories have different IDs but the same "clean" description, this
616
+ # means they're different common names for the same species, which we use
617
+ # for things like "white-tailed deer buck" and "white-tailed deer fawn".
618
+ #
619
+ # Currently we don't support smoothing those predictions, because it's not a
620
+ # parent/child relationship.
621
+ if category_description_candidate_child == \
622
+ category_description_this_classification:
623
+ continue
624
+
615
625
  # As long as we're using "clean" descriptions, parent/child taxonomic
616
626
  # relationships are defined by a substring relationship
617
627
  is_child = category_description_this_classification in \
618
628
  category_description_candidate_child
629
+
619
630
  if not is_child:
620
631
  continue
621
632
 
@@ -40,6 +40,8 @@ def combine_batch_output_files(input_files,
40
40
  Merges the list of MD results files [input_files] into a single
41
41
  dictionary, optionally writing the result to [output_file].
42
42
 
43
+ Always overwrites [output_file] if it exists.
44
+
43
45
  Args:
44
46
  input_files (list of str): paths to JSON detection files
45
47
  output_file (str, optional): path to write merged JSON
@@ -1851,12 +1851,15 @@ def find_equivalent_threshold(results_a,
1851
1851
 
1852
1852
  return confidence_values, images_above_threshold
1853
1853
 
1854
+ # ...def _get_confidence_values_for_results(...)
1855
+
1854
1856
  confidence_values_a,images_above_threshold_a = \
1855
1857
  _get_confidence_values_for_results(results_a['images'],
1856
1858
  category_ids_to_consider_a,
1857
1859
  threshold_a)
1858
1860
 
1859
- # ...def _get_confidence_values_for_results(...)
1861
+ # Not necessary, but facilitates debugging
1862
+ confidence_values_a = sorted(confidence_values_a)
1860
1863
 
1861
1864
  if verbose:
1862
1865
  print('For result set A, considering {} of {} images'.format(
@@ -1869,13 +1872,29 @@ def find_equivalent_threshold(results_a,
1869
1872
  if verbose:
1870
1873
  print('For result set B, considering {} of {} images'.format(
1871
1874
  len(confidence_values_b),len(results_b['images'])))
1875
+
1872
1876
  confidence_values_b = sorted(confidence_values_b)
1873
1877
 
1878
+ # Find the threshold that produces the same fraction of detections for results_b
1874
1879
  target_detection_fraction = len(confidence_values_a_above_threshold) / len(confidence_values_a)
1875
1880
 
1876
- detection_cutoff_index = round((1.0-target_detection_fraction) * len(confidence_values_b))
1881
+ # How many detections do we want in results_b?
1882
+ target_number_of_detections = round(len(confidence_values_b) * target_detection_fraction)
1883
+
1884
+ # How many non-detections do we want in results_b?
1885
+ target_number_of_non_detections = len(confidence_values_b) - target_number_of_detections
1886
+ detection_cutoff_index = max(target_number_of_non_detections,0)
1877
1887
  threshold_b = confidence_values_b[detection_cutoff_index]
1878
1888
 
1889
+ confidence_values_b_above_threshold = [c for c in confidence_values_b if c >= threshold_b]
1890
+ confidence_values_b_above_reference_threshold = [c for c in confidence_values_b if c >= threshold_a]
1891
+
1892
+ # Special case: if the number of detections above the selected threshold is the same as the
1893
+ # number above the reference threshold, use the reference threshold
1894
+ if len(confidence_values_b_above_threshold) == len(confidence_values_b_above_reference_threshold):
1895
+ print('Detection count for reference threshold matches target threshold')
1896
+ threshold_b = threshold_a
1897
+
1879
1898
  if verbose:
1880
1899
  print('{} confidence values above threshold (A)'.format(
1881
1900
  len(confidence_values_a_above_threshold)))