megadetector 5.0.15__py3-none-any.whl → 5.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (29) hide show
  1. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +387 -0
  2. megadetector/data_management/lila/generate_lila_per_image_labels.py +3 -3
  3. megadetector/data_management/lila/test_lila_metadata_urls.py +2 -2
  4. megadetector/data_management/remove_exif.py +61 -36
  5. megadetector/data_management/yolo_to_coco.py +25 -6
  6. megadetector/detection/process_video.py +259 -126
  7. megadetector/detection/pytorch_detector.py +13 -11
  8. megadetector/detection/run_detector.py +9 -2
  9. megadetector/detection/run_detector_batch.py +7 -0
  10. megadetector/detection/run_inference_with_yolov5_val.py +58 -10
  11. megadetector/detection/tf_detector.py +8 -2
  12. megadetector/detection/video_utils.py +201 -16
  13. megadetector/postprocessing/md_to_coco.py +31 -9
  14. megadetector/postprocessing/postprocess_batch_results.py +19 -3
  15. megadetector/postprocessing/subset_json_detector_output.py +22 -12
  16. megadetector/taxonomy_mapping/map_new_lila_datasets.py +3 -3
  17. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +2 -1
  18. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +1 -1
  19. megadetector/taxonomy_mapping/simple_image_download.py +5 -0
  20. megadetector/taxonomy_mapping/species_lookup.py +1 -1
  21. megadetector/utils/md_tests.py +196 -49
  22. megadetector/utils/path_utils.py +2 -2
  23. megadetector/utils/url_utils.py +7 -1
  24. megadetector/visualization/visualize_db.py +16 -0
  25. {megadetector-5.0.15.dist-info → megadetector-5.0.16.dist-info}/LICENSE +0 -0
  26. {megadetector-5.0.15.dist-info → megadetector-5.0.16.dist-info}/METADATA +2 -2
  27. {megadetector-5.0.15.dist-info → megadetector-5.0.16.dist-info}/RECORD +29 -28
  28. {megadetector-5.0.15.dist-info → megadetector-5.0.16.dist-info}/WHEEL +1 -1
  29. {megadetector-5.0.15.dist-info → megadetector-5.0.16.dist-info}/top_level.txt +0 -0
@@ -106,7 +106,9 @@ class YoloInferenceOptions:
106
106
 
107
107
  #: Image size to use; this is a single int, which in ultralytics's terminology means
108
108
  #: "scale the long side of the image to this size, and preserve aspect ratio".
109
- self.image_size = default_image_size_with_augmentation
109
+ #:
110
+ #: If None, will choose based on whether augmentation is enabled.
111
+ self.image_size = None
110
112
 
111
113
  #: Detections below this threshold will not be included in the output file
112
114
  self.conf_thres = '0.001'
@@ -276,10 +278,10 @@ def run_inference_with_yolo_val(options):
276
278
 
277
279
  if options.input_folder is not None:
278
280
  options.input_folder = options.input_folder.replace('\\','/')
281
+
279
282
 
280
-
281
283
  ##%% Other input handling
282
-
284
+
283
285
  if isinstance(options.yolo_category_id_to_name,str):
284
286
 
285
287
  assert os.path.isfile(options.yolo_category_id_to_name)
@@ -328,7 +330,9 @@ def run_inference_with_yolo_val(options):
328
330
  image_files_relative = None
329
331
  image_files_absolute = None
330
332
 
333
+ # If the caller just provided a folder, not a list of files...
331
334
  if options.image_filename_list is None:
335
+
332
336
  assert options.input_folder is not None and os.path.isdir(options.input_folder), \
333
337
  'Could not find input folder {}'.format(options.input_folder)
334
338
  image_files_relative = path_utils.find_images(options.input_folder,
@@ -337,18 +341,23 @@ def run_inference_with_yolo_val(options):
337
341
  convert_slashes=True)
338
342
  image_files_absolute = [os.path.join(options.input_folder,fn) for \
339
343
  fn in image_files_relative]
344
+
340
345
  else:
341
346
 
342
- if is_iterable(options.image_filename_list):
347
+ # If the caller provided a list of image files (rather than a filename pointing
348
+ # to a list of image files)...
349
+ if is_iterable(options.image_filename_list) and not isinstance(options.image_filename_list,str):
343
350
 
344
351
  image_files_relative = options.image_filename_list
345
352
 
353
+ # If the caller provided a filename pointing to a list of image files...
346
354
  else:
355
+
347
356
  assert isinstance(options.image_filename_list,str), \
348
357
  'Unrecognized image filename list object type: {}'.format(options.image_filename_list)
349
358
  assert os.path.isfile(options.image_filename_list), \
350
359
  'Could not find image filename list file: {}'.format(options.image_filename_list)
351
- ext = os.path.splitext(options.image_filename_list).lower()
360
+ ext = os.path.splitext(options.image_filename_list)[-1].lower()
352
361
  assert ext in ('.json','.txt'), \
353
362
  'Unrecognized image filename list file extension: {}'.format(options.image_filename_list)
354
363
  if ext == '.json':
@@ -364,8 +373,11 @@ def run_inference_with_yolo_val(options):
364
373
  # ...whether the image filename list was supplied as list vs. a filename
365
374
 
366
375
  if options.input_folder is None:
376
+
367
377
  image_files_absolute = image_files_relative
378
+
368
379
  else:
380
+
369
381
  # The list should be relative filenames
370
382
  for fn in image_files_relative:
371
383
  assert not path_is_abs(fn), \
@@ -373,12 +385,14 @@ def run_inference_with_yolo_val(options):
373
385
 
374
386
  image_files_absolute = \
375
387
  [os.path.join(options.input_folder,fn) for fn in image_files_relative]
388
+
376
389
  for fn in image_files_absolute:
377
390
  assert os.path.isfile(fn), 'Could not find image file {}'.format(fn)
378
391
 
379
392
  # ...whether the caller supplied a list of filenames
380
393
 
381
394
  image_files_absolute = [fn.replace('\\','/') for fn in image_files_absolute]
395
+
382
396
  del image_files_relative
383
397
 
384
398
 
@@ -549,6 +563,7 @@ def run_inference_with_yolo_val(options):
549
563
  for i_image,image_fn in tqdm(enumerate(image_files_absolute),total=len(image_files_absolute)):
550
564
 
551
565
  ext = os.path.splitext(image_fn)[1]
566
+ image_fn_without_extension = os.path.splitext(image_fn)[0]
552
567
 
553
568
  # YOLO .json output identifies images by the base filename without the extension
554
569
  image_id = str(i_image).zfill(10)
@@ -557,12 +572,25 @@ def run_inference_with_yolo_val(options):
557
572
  symlink_full_path = os.path.join(symlink_folder_inner,symlink_name)
558
573
  link_full_paths.append(symlink_full_path)
559
574
 
575
+ # If annotation files exist, link those too; only useful if we're reading the computed
576
+ # mAP value, but it doesn't hurt.
577
+ annotation_fn = image_fn_without_extension + '.txt'
578
+ annotation_file_exists = False
579
+ if os.path.isfile(annotation_fn):
580
+ annotation_file_exists = True
581
+ annotation_symlink_name = image_id + '.txt'
582
+ annotation_symlink_full_path = os.path.join(symlink_folder_inner,annotation_symlink_name)
583
+
560
584
  try:
561
585
 
562
586
  if options.use_symlinks:
563
587
  path_utils.safe_create_link(image_fn,symlink_full_path)
588
+ if annotation_file_exists:
589
+ path_utils.safe_create_link(annotation_fn,annotation_symlink_full_path)
564
590
  else:
565
591
  shutil.copyfile(image_fn,symlink_full_path)
592
+ if annotation_file_exists:
593
+ shutil.copyfile(annotation_fn,annotation_symlink_full_path)
566
594
 
567
595
  except Exception as e:
568
596
 
@@ -648,7 +676,15 @@ def run_inference_with_yolo_val(options):
648
676
 
649
677
  ##%% Prepare Python command or YOLO CLI command
650
678
 
651
- image_size_string = str(round(options.image_size))
679
+ if options.image_size is None:
680
+ if options.augment:
681
+ image_size = default_image_size_with_augmentation
682
+ else:
683
+ image_size = default_image_size_with_no_augmentation
684
+ else:
685
+ image_size = options.image_size
686
+
687
+ image_size_string = str(round(image_size))
652
688
 
653
689
  if options.model_type == 'yolov5':
654
690
 
@@ -659,6 +695,9 @@ def run_inference_with_yolo_val(options):
659
695
  cmd += ' --device "{}" --save-json'.format(options.device_string)
660
696
  cmd += ' --project "{}" --name "{}" --exist-ok'.format(yolo_results_folder,'yolo_results')
661
697
 
698
+ # This is the NMS IoU threshold
699
+ # cmd += ' --iou-thres 0.6'
700
+
662
701
  if options.augment:
663
702
  cmd += ' --augment'
664
703
 
@@ -837,7 +876,7 @@ def run_inference_with_yolo_val(options):
837
876
  _clean_up_temporary_folders(options,
838
877
  symlink_folder,yolo_results_folder,
839
878
  symlink_folder_is_temp_folder,yolo_folder_is_temp_folder)
840
-
879
+
841
880
  # ...def run_inference_with_yolo_val()
842
881
 
843
882
 
@@ -856,7 +895,7 @@ def main():
856
895
  help='model file name')
857
896
  parser.add_argument(
858
897
  'input_folder',type=str,
859
- help='folder on which to recursively run the model')
898
+ help='folder on which to recursively run the model, or a .json or .txt file containing a list of absolute image paths')
860
899
  parser.add_argument(
861
900
  'output_file',type=str,
862
901
  help='.json file where output will be written')
@@ -967,7 +1006,15 @@ def main():
967
1006
 
968
1007
  if args.yolo_dataset_file is not None:
969
1008
  options.yolo_category_id_to_name = args.yolo_dataset_file
970
- del options.yolo_dataset_file
1009
+
1010
+ # The function convention is that input_folder should be None when we want to use a list of
1011
+ # absolute paths, but the CLI convention is that the required argument is always valid, whether
1012
+ # it's a folder or a list of absolute paths.
1013
+ if os.path.isfile(options.input_folder):
1014
+ assert options.image_filename_list is None, \
1015
+ 'image_filename_list should not be specified when input_folder is a file'
1016
+ options.image_filename_list = options.input_folder
1017
+ options.input_folder = None
971
1018
 
972
1019
  options.recursive = (not options.nonrecursive)
973
1020
  options.remove_symlink_folder = (not options.no_remove_symlink_folder)
@@ -980,6 +1027,7 @@ def main():
980
1027
  del options.no_remove_yolo_results_folder
981
1028
  del options.no_use_symlinks
982
1029
  del options.augment_enabled
1030
+ del options.yolo_dataset_file
983
1031
 
984
1032
  print(options.__dict__)
985
1033
 
@@ -1001,7 +1049,7 @@ if False:
1001
1049
  yolo_working_folder = r'c:\git\yolov5-tegus'
1002
1050
  dataset_file = r'g:\temp\dataset.yaml'
1003
1051
 
1004
- # This only impacts the output file name, it's not passed to the inference functio
1052
+ # This only impacts the output file name, it's not passed to the inference function
1005
1053
  job_name = 'yolo-inference-test'
1006
1054
 
1007
1055
  model_name = os.path.splitext(os.path.basename(model_filename))[0]
@@ -110,7 +110,10 @@ class TFDetector:
110
110
  Runs the detector on a single image.
111
111
  """
112
112
 
113
- np_im = np.asarray(image, np.uint8)
113
+ if isinstance(image,np.ndarray):
114
+ np_im = image
115
+ else:
116
+ np_im = np.asarray(image, np.uint8)
114
117
  im_w_batch_dim = np.expand_dims(np_im, axis=0)
115
118
 
116
119
  # need to change the above line to the following if supporting a batch size > 1 and resizing to the same size
@@ -136,7 +139,8 @@ class TFDetector:
136
139
  Runs the detector on an image.
137
140
 
138
141
  Args:
139
- image (Image): the PIL Image object on which we should run the detector
142
+ image (Image): the PIL Image object (or numpy array) on which we should run the detector, with
143
+ EXIF rotation already handled.
140
144
  image_id (str): a path to identify the image; will be in the "file" field of the output object
141
145
  detection_threshold (float): only detections above this threshold will be included in the return
142
146
  value
@@ -166,6 +170,7 @@ class TFDetector:
166
170
  result = { 'file': image_id }
167
171
 
168
172
  try:
173
+
169
174
  b_box, b_score, b_class = self._generate_detections_one_image(image)
170
175
 
171
176
  # our batch size is 1; need to loop the batch dim if supporting batch size > 1
@@ -190,6 +195,7 @@ class TFDetector:
190
195
  result['detections'] = detections_cur_image
191
196
 
192
197
  except Exception as e:
198
+
193
199
  result['failure'] = FAILURE_INFER
194
200
  print('TFDetector: image {} failed during inference: {}'.format(image_id, str(e)))
195
201
 
@@ -88,14 +88,14 @@ def find_videos(dirname,
88
88
  else:
89
89
  files = glob.glob(os.path.join(dirname, '*.*'))
90
90
 
91
+ files = [fn for fn in files if os.path.isfile(fn)]
92
+
91
93
  if return_relative_paths:
92
94
  files = [os.path.relpath(fn,dirname) for fn in files]
93
95
 
94
96
  if convert_slashes:
95
97
  files = [fn.replace('\\', '/') for fn in files]
96
98
 
97
- files = [fn for fn in files if os.path.isfile(fn)]
98
-
99
99
  return find_video_strings(files)
100
100
 
101
101
 
@@ -210,6 +210,181 @@ def _add_frame_numbers_to_results(results):
210
210
  im['frame_number'] = frame_number
211
211
 
212
212
 
213
+ def run_callback_on_frames(input_video_file,
214
+ frame_callback,
215
+ every_n_frames=None,
216
+ verbose=False,
217
+ frames_to_process=None,
218
+ allow_empty_videos=False):
219
+ """
220
+ Calls the function frame_callback(np.array,image_id) on all (or selected) frames in
221
+ [input_video_file].
222
+
223
+ Args:
224
+ input_video_file (str): video file to process
225
+ frame_callback (function): callback to run on frames, should take an np.array and a string and
226
+ return a single value. callback should expect PIL-formatted (RGB) images.
227
+ every_n_frames (int, optional): sample every Nth frame starting from the first frame;
228
+ if this is None or 1, every frame is processed. Mutually exclusive with
229
+ frames_to_process.
230
+ verbose (bool, optional): enable additional debug console output
231
+ frames_to_process (list of int, optional): process this specific set of frames;
232
+ mutually exclusive with every_n_frames. If all values are beyond the length
233
+ of the video, no frames are extracted. Can also be a single int, specifying
234
+ a single frame number.
235
+ allow_empty_videos (bool, optional): Just print a warning if a video appears to have no
236
+ frames (by default, this is an error).
237
+
238
+ Returns:
239
+ dict: dict with keys 'frame_filenames' (list), 'frame_rate' (float), 'results' (list).
240
+ 'frame_filenames' are synthetic filenames (e.g. frame000000.jpg); 'results' are
241
+ in the same format used in the 'images' array in the MD results format.
242
+ """
243
+
244
+ assert os.path.isfile(input_video_file), 'File {} not found'.format(input_video_file)
245
+
246
+ if isinstance(frames_to_process,int):
247
+ frames_to_process = [frames_to_process]
248
+
249
+ if (frames_to_process is not None) and (every_n_frames is not None):
250
+ raise ValueError('frames_to_process and every_n_frames are mutually exclusive')
251
+
252
+ vidcap = cv2.VideoCapture(input_video_file)
253
+ n_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
254
+ frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
255
+
256
+ if verbose:
257
+ print('Video {} contains {} frames at {} Hz'.format(input_video_file,n_frames,frame_rate))
258
+
259
+ frame_filenames = []
260
+ results = []
261
+
262
+ # frame_number = 0
263
+ for frame_number in range(0,n_frames):
264
+
265
+ success,image = vidcap.read()
266
+
267
+ if not success:
268
+ assert image is None
269
+ if verbose:
270
+ print('Read terminating at frame {} of {}'.format(frame_number,n_frames))
271
+ break
272
+
273
+ if every_n_frames is not None:
274
+ if frame_number % every_n_frames != 0:
275
+ continue
276
+
277
+ if frames_to_process is not None:
278
+ if frame_number > max(frames_to_process):
279
+ break
280
+ if frame_number not in frames_to_process:
281
+ continue
282
+
283
+ frame_filename_relative = _frame_number_to_filename(frame_number)
284
+ frame_filenames.append(frame_filename_relative)
285
+
286
+ image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
287
+ frame_results = frame_callback(image_np,frame_filename_relative)
288
+ results.append(frame_results)
289
+
290
+ # ...for each frame
291
+
292
+ if len(frame_filenames) == 0:
293
+ if allow_empty_videos:
294
+ print('Warning: found no frames in file {}'.format(input_video_file))
295
+ else:
296
+ raise Exception('Error: found no frames in file {}'.format(input_video_file))
297
+
298
+ if verbose:
299
+ print('\nProcessed {} of {} frames for {}'.format(
300
+ len(frame_filenames),n_frames,input_video_file))
301
+
302
+ vidcap.release()
303
+ to_return = {}
304
+ to_return['frame_filenames'] = frame_filenames
305
+ to_return['frame_rate'] = frame_rate
306
+ to_return['results'] = results
307
+
308
+ return to_return
309
+
310
+ # ...def run_callback_on_frames(...)
311
+
312
+
313
+ def run_callback_on_frames_for_folder(input_video_folder,
314
+ frame_callback,
315
+ every_n_frames=None,
316
+ verbose=False,
317
+ allow_empty_videos=False,
318
+ recursive=True):
319
+ """
320
+ Calls the function frame_callback(np.array,image_id) on all (or selected) frames in
321
+ all videos in [input_video_folder].
322
+
323
+ Args:
324
+ input_video_folder (str): video folder to process
325
+ frame_callback (function): callback to run on frames, should take an np.array and a string and
326
+ return a single value. callback should expect PIL-formatted (RGB) images.
327
+ every_n_frames (int, optional): sample every Nth frame starting from the first frame;
328
+ if this is None or 1, every frame is processed.
329
+ verbose (bool, optional): enable additional debug console output
330
+ allow_empty_videos (bool, optional): Just print a warning if a video appears to have no
331
+ frames (by default, this is an error).
332
+ recursive (bool, optional): recurse into [input_video_folder]
333
+
334
+ Returns:
335
+ dict: dict with keys 'video_filenames' (list), 'frame_rates' (list of floats), 'results' (list).
336
+ video_filenames will be *relative* filenames.
337
+ """
338
+
339
+ to_return = {'video_filenames':[],'frame_rates':[],'results':[]}
340
+
341
+ # Recursively enumerate video files
342
+ input_files_full_paths = find_videos(input_video_folder,
343
+ recursive=recursive,
344
+ convert_slashes=True,
345
+ return_relative_paths=False)
346
+ print('Found {} videos in folder {}'.format(len(input_files_full_paths),input_video_folder))
347
+
348
+ if len(input_files_full_paths) == 0:
349
+ return to_return
350
+
351
+ # Process each video
352
+
353
+ # video_fn_abs = input_files_full_paths[0]
354
+ for video_fn_abs in tqdm(input_files_full_paths):
355
+ video_results = run_callback_on_frames(input_video_file=video_fn_abs,
356
+ frame_callback=frame_callback,
357
+ every_n_frames=every_n_frames,
358
+ verbose=verbose,
359
+ frames_to_process=None,
360
+ allow_empty_videos=allow_empty_videos)
361
+
362
+ """
363
+ dict: dict with keys 'frame_filenames' (list), 'frame_rate' (float), 'results' (list).
364
+ 'frame_filenames' are synthetic filenames (e.g. frame000000.jpg); 'results' are
365
+ in the same format used in the 'images' array in the MD results format.
366
+ """
367
+ video_filename_relative = os.path.relpath(video_fn_abs,input_video_folder)
368
+ video_filename_relative = video_filename_relative.replace('\\','/')
369
+ to_return['video_filenames'].append(video_filename_relative)
370
+ to_return['frame_rates'].append(video_results['frame_rate'])
371
+ for r in video_results['results']:
372
+ assert r['file'].startswith('frame')
373
+ r['file'] = video_filename_relative + '/' + r['file']
374
+ to_return['results'].append(video_results['results'])
375
+
376
+ # ...for each video
377
+
378
+ n_videos = len(input_files_full_paths)
379
+ assert len(to_return['video_filenames']) == n_videos
380
+ assert len(to_return['frame_rates']) == n_videos
381
+ assert len(to_return['results']) == n_videos
382
+
383
+ return to_return
384
+
385
+ # ...def run_callback_on_frames_for_folder(...)
386
+
387
+
213
388
  def video_to_frames(input_video_file,
214
389
  output_folder,
215
390
  overwrite=True,
@@ -220,7 +395,7 @@ def video_to_frames(input_video_file,
220
395
  frames_to_extract=None,
221
396
  allow_empty_videos=False):
222
397
  """
223
- Renders frames from [input_video_file] to a .jpg in [output_folder].
398
+ Renders frames from [input_video_file] to .jpg files in [output_folder].
224
399
 
225
400
  With help from:
226
401
 
@@ -341,7 +516,7 @@ def video_to_frames(input_video_file,
341
516
  # ...if we need to check whether to skip this video entirely
342
517
 
343
518
  if verbose:
344
- print('Reading {} frames at {} Hz from {}'.format(n_frames,Fs,input_video_file))
519
+ print('Video {} contains {} frames at {} Hz'.format(input_video_file,n_frames,Fs))
345
520
 
346
521
  frame_filenames = []
347
522
 
@@ -410,8 +585,8 @@ def video_to_frames(input_video_file,
410
585
 
411
586
  # ...if we need to deal with resizing
412
587
 
413
- frame_filename = _frame_number_to_filename(frame_number)
414
- frame_filename = os.path.join(output_folder,frame_filename)
588
+ frame_filename_relative = _frame_number_to_filename(frame_number)
589
+ frame_filename = os.path.join(output_folder,frame_filename_relative)
415
590
  frame_filenames.append(frame_filename)
416
591
 
417
592
  if overwrite == False and os.path.isfile(frame_filename):
@@ -441,9 +616,13 @@ def video_to_frames(input_video_file,
441
616
  except Exception as e:
442
617
  print('Error on frame {} of {}: {}'.format(frame_number,n_frames,str(e)))
443
618
 
619
+ # ...for each frame
620
+
444
621
  if len(frame_filenames) == 0:
445
- raise Exception('Error: found no frames in file {}'.format(
446
- input_video_file))
622
+ if allow_empty_videos:
623
+ print('Warning: found no frames in file {}'.format(input_video_file))
624
+ else:
625
+ raise Exception('Error: found no frames in file {}'.format(input_video_file))
447
626
 
448
627
  if verbose:
449
628
  print('\nExtracted {} of {} frames for {}'.format(
@@ -457,7 +636,7 @@ def video_to_frames(input_video_file,
457
636
 
458
637
  def _video_to_frames_for_folder(relative_fn,input_folder,output_folder_base,
459
638
  every_n_frames,overwrite,verbose,quality,max_width,
460
- frames_to_extract):
639
+ frames_to_extract,allow_empty_videos):
461
640
  """
462
641
  Internal function to call video_to_frames for a single video in the context of
463
642
  video_folder_to_frames; makes sure the right output folder exists, then calls
@@ -474,10 +653,15 @@ def _video_to_frames_for_folder(relative_fn,input_folder,output_folder_base,
474
653
 
475
654
  # Render frames
476
655
  # input_video_file = input_fn_absolute; output_folder = output_folder_video
477
- frame_filenames,fs = video_to_frames(input_fn_absolute,output_folder_video,
478
- overwrite=overwrite,every_n_frames=every_n_frames,
479
- verbose=verbose,quality=quality,max_width=max_width,
480
- frames_to_extract=frames_to_extract)
656
+ frame_filenames,fs = video_to_frames(input_fn_absolute,
657
+ output_folder_video,
658
+ overwrite=overwrite,
659
+ every_n_frames=every_n_frames,
660
+ verbose=verbose,
661
+ quality=quality,
662
+ max_width=max_width,
663
+ frames_to_extract=frames_to_extract,
664
+ allow_empty_videos=allow_empty_videos)
481
665
 
482
666
  return frame_filenames,fs
483
667
 
@@ -487,7 +671,7 @@ def video_folder_to_frames(input_folder, output_folder_base,
487
671
  n_threads=1, every_n_frames=None,
488
672
  verbose=False, parallelization_uses_threads=True,
489
673
  quality=None, max_width=None,
490
- frames_to_extract=None):
674
+ frames_to_extract=None, allow_empty_videos=False):
491
675
  """
492
676
  For every video file in input_folder, creates a folder within output_folder_base, and
493
677
  renders frame of that video to images in that folder.
@@ -545,7 +729,7 @@ def video_folder_to_frames(input_folder, output_folder_base,
545
729
  frame_filenames,fs = \
546
730
  _video_to_frames_for_folder(input_fn_relative,input_folder,output_folder_base,
547
731
  every_n_frames,overwrite,verbose,quality,max_width,
548
- frames_to_extract)
732
+ frames_to_extract,allow_empty_videos)
549
733
  frame_filenames_by_video.append(frame_filenames)
550
734
  fs_by_video.append(fs)
551
735
  else:
@@ -563,7 +747,8 @@ def video_folder_to_frames(input_folder, output_folder_base,
563
747
  verbose=verbose,
564
748
  quality=quality,
565
749
  max_width=max_width,
566
- frames_to_extract=frames_to_extract)
750
+ frames_to_extract=frames_to_extract,
751
+ allow_empty_videos=allow_empty_videos)
567
752
  results = list(tqdm(pool.imap(
568
753
  partial(process_video_with_options),input_files_relative_paths),
569
754
  total=len(input_files_relative_paths)))
@@ -31,11 +31,18 @@ def md_to_coco(md_results_file,
31
31
  validate_image_sizes=False,
32
32
  info=None,
33
33
  preserve_nonstandard_metadata=True,
34
- include_failed_images=True):
34
+ include_failed_images=True,
35
+ include_annotations_without_bounding_boxes=True,
36
+ empty_category_id='0'):
35
37
  """
36
38
  "Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
37
39
  this is an opinionated transformation that requires a confidence threshold.
38
40
 
41
+ The default confidence threshold is not 0; the assumption is that by default, you are
42
+ going to treat the resulting COCO file as a set of labels. If you are using the resulting COCO
43
+ file to evaluate a detector, you likely want a default confidence threshold of 0. Confidence
44
+ values will be written to the semi-standard "score" field for each image
45
+
39
46
  A folder of images is required if width and height information are not available
40
47
  in the MD results file.
41
48
 
@@ -54,8 +61,13 @@ def md_to_coco(md_results_file,
54
61
  preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
55
62
  non-standard "conf" field in each annotation, and any random fields present in each image's data
56
63
  (e.g. EXIF metadata) will be propagated to COCO output
57
- include_failed_images (boo, optional): if this is True, failed images will be propagated to COCO output
64
+ include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
58
65
  with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
66
+ include_annotations_without_bounding_boxes (bool, optional): if this is True, annotations with
67
+ only class labels (no bounding boxes) will be included in the output. If this is False, empty
68
+ images will be represented with no annotations.
69
+ empty_category_id (str, optional): category ID reserved for the 'empty' class, should not be
70
+ attached to any bounding boxes
59
71
 
60
72
  Returns:
61
73
  dict: the COCO data dict, identical to what's written to [coco_output_file] if [coco_output_file]
@@ -67,6 +79,8 @@ def md_to_coco(md_results_file,
67
79
 
68
80
  coco_images = []
69
81
  coco_annotations = []
82
+
83
+ print('Converting MD results to COCO...')
70
84
 
71
85
  # im = md_results['images'][0]
72
86
  for im in tqdm(md_results['images']):
@@ -129,13 +143,13 @@ def md_to_coco(md_results_file,
129
143
  coco_category_id = int(md_category_id)
130
144
  ann['category_id'] = coco_category_id
131
145
 
132
- # In very esoteric cases, we use the empty category (0) in MD-formatted output files
133
- if md_category_id != '0':
146
+ if md_category_id != empty_category_id:
134
147
 
135
148
  assert 'bbox' in detection,\
136
149
  'Oops: non-empty category with no bbox in {}'.format(im['file'])
137
150
 
138
151
  ann['bbox'] = detection['bbox']
152
+
139
153
  # MegaDetector: [x,y,width,height] (normalized, origin upper-left)
140
154
  # COCO: [x,y,width,height] (absolute, origin upper-left)
141
155
  ann['bbox'][0] = ann['bbox'][0] * coco_im['width']
@@ -144,13 +158,19 @@ def md_to_coco(md_results_file,
144
158
  ann['bbox'][3] = ann['bbox'][3] * coco_im['height']
145
159
 
146
160
  else:
147
-
148
- print('Warning: empty category annotation in file {}'.format(im['file']))
161
+
162
+ # In very esoteric cases, we use the empty category (0) in MD-formatted output files
163
+ print('Warning: empty category ({}) used for annotation in file {}'.format(
164
+ empty_category_id,im['file']))
165
+ pass
149
166
 
150
167
  if preserve_nonstandard_metadata:
151
- ann['conf'] = detection['conf']
152
-
153
- coco_annotations.append(ann)
168
+ # "Score" is a semi-standard string here, recognized by at least pycocotools
169
+ # ann['conf'] = detection['conf']
170
+ ann['score'] = detection['conf']
171
+
172
+ if 'bbox' in ann or include_annotations_without_bounding_boxes:
173
+ coco_annotations.append(ann)
154
174
 
155
175
  # ...for each detection
156
176
 
@@ -176,6 +196,8 @@ def md_to_coco(md_results_file,
176
196
  'name':md_results['detection_categories'][md_category_id]}
177
197
  output_dict['categories'].append(coco_category)
178
198
 
199
+ print('Writing COCO output file...')
200
+
179
201
  if coco_output_file is not None:
180
202
  with open(coco_output_file,'w') as f:
181
203
  json.dump(output_dict,f,indent=1)
@@ -949,6 +949,13 @@ def process_batch_results(options):
949
949
  f'negative, {n_positive} positive, {n_unknown} unknown, '
950
950
  f'{n_ambiguous} ambiguous')
951
951
 
952
+ if n_positive == 0:
953
+ print('\n*** Warning: no positives found in ground truth, analysis won\'t be very meaningful ***\n')
954
+ if n_negative == 0:
955
+ print('\n*** Warning: no negatives found in ground truth, analysis won\'t be very meaningful ***\n')
956
+ if n_ambiguous > 0:
957
+ print('\n*** Warning: {} images with ambiguous positive/negative status found in ground truth ***\n'.format(
958
+ n_ambiguous))
952
959
 
953
960
  ##%% Load detection (and possibly classification) results
954
961
 
@@ -1095,25 +1102,34 @@ def process_batch_results(options):
1095
1102
 
1096
1103
  ##%% Detection evaluation: compute precision/recall
1097
1104
 
1098
- # numpy array of detection probabilities
1105
+ # numpy array of maximum confidence values
1099
1106
  p_detection = detections_df['max_detection_conf'].values
1100
- n_detections = len(p_detection)
1107
+ n_detection_values = len(p_detection)
1101
1108
 
1102
1109
  # numpy array of bools (0.0/1.0), and -1 as null value
1103
- gt_detections = np.zeros(n_detections, dtype=float)
1110
+ gt_detections = np.zeros(n_detection_values, dtype=float)
1104
1111
 
1112
+ n_positive = 0
1113
+ n_negative = 0
1114
+
1105
1115
  for i_detection, fn in enumerate(detector_files):
1116
+
1106
1117
  image_id = ground_truth_indexed_db.filename_to_id[fn]
1107
1118
  image = ground_truth_indexed_db.image_id_to_image[image_id]
1108
1119
  detection_status = image['_detection_status']
1109
1120
 
1110
1121
  if detection_status == DetectionStatus.DS_NEGATIVE:
1111
1122
  gt_detections[i_detection] = 0.0
1123
+ n_negative += 1
1112
1124
  elif detection_status == DetectionStatus.DS_POSITIVE:
1113
1125
  gt_detections[i_detection] = 1.0
1126
+ n_positive += 1
1114
1127
  else:
1115
1128
  gt_detections[i_detection] = -1.0
1116
1129
 
1130
+ print('Of {} ground truth values, found {} positives and {} negatives'.format(
1131
+ len(detections_df),n_positive,n_negative))
1132
+
1117
1133
  # Don't include ambiguous/unknown ground truth in precision/recall analysis
1118
1134
  b_valid_ground_truth = gt_detections >= 0.0
1119
1135