megadetector 5.0.19__py3-none-any.whl → 5.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (23) hide show
  1. megadetector/data_management/importers/bellevue_to_json.py +0 -1
  2. megadetector/data_management/importers/osu-small-animals-to-json.py +364 -0
  3. megadetector/data_management/lila/generate_lila_per_image_labels.py +1 -1
  4. megadetector/data_management/lila/get_lila_annotation_counts.py +2 -0
  5. megadetector/data_management/lila/lila_common.py +28 -12
  6. megadetector/data_management/lila/test_lila_metadata_urls.py +17 -8
  7. megadetector/data_management/read_exif.py +73 -0
  8. megadetector/detection/process_video.py +84 -16
  9. megadetector/detection/run_detector.py +36 -13
  10. megadetector/detection/run_detector_batch.py +104 -15
  11. megadetector/detection/run_inference_with_yolov5_val.py +20 -23
  12. megadetector/detection/video_utils.py +60 -37
  13. megadetector/taxonomy_mapping/map_new_lila_datasets.py +8 -3
  14. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +3 -2
  15. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -1
  16. megadetector/utils/ct_utils.py +20 -0
  17. megadetector/utils/md_tests.py +50 -6
  18. {megadetector-5.0.19.dist-info → megadetector-5.0.20.dist-info}/METADATA +2 -2
  19. {megadetector-5.0.19.dist-info → megadetector-5.0.20.dist-info}/RECORD +22 -22
  20. {megadetector-5.0.19.dist-info → megadetector-5.0.20.dist-info}/WHEEL +1 -1
  21. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +0 -677
  22. {megadetector-5.0.19.dist-info → megadetector-5.0.20.dist-info}/LICENSE +0 -0
  23. {megadetector-5.0.19.dist-info → megadetector-5.0.20.dist-info}/top_level.txt +0 -0
@@ -5,12 +5,10 @@ process_video.py
5
5
  Splits a video (or folder of videos) into frames, runs the frames through run_detector_batch.py,
6
6
  and optionally stitches together results into a new video with detection boxes.
7
7
 
8
- Operates by separating the video into frames, typically sampling every Nth frame, and writing
9
- those frames to disk, before running MD. This approach clearly has a downside: it requires
10
- a bunch more disk space, compared to extracting frames and running MD on them without ever
11
- writing them to disk. The upside, though, is that this approach allows you to run repeat
12
- detection elimination after running MegaDetector, and it allows allows more efficient re-use
13
- of frames if you end up running MD more than once, or running multiple versions of MD.
8
+ When possible, video processing happens in memory, without writing intermediate frames to disk.
9
+ If the caller requests that frames be saved, frames are written before processing, and the MD
10
+ results correspond to the frames that were written to disk (which simplifies, for example,
11
+ repeat detection elimination).
14
12
 
15
13
  """
16
14
 
@@ -36,6 +34,7 @@ from megadetector.detection.video_utils import run_callback_on_frames
36
34
  from megadetector.detection.video_utils import run_callback_on_frames_for_folder
37
35
  from megadetector.detection.video_utils import frames_to_video
38
36
  from megadetector.detection.video_utils import frame_results_to_video_results
37
+ from megadetector.detection.video_utils import FrameToVideoOptions
39
38
  from megadetector.detection.video_utils import _add_frame_numbers_to_results
40
39
  from megadetector.detection.video_utils import video_folder_to_frames
41
40
  from megadetector.detection.video_utils import default_fourcc
@@ -133,13 +132,16 @@ class ProcessVideoOptions:
133
132
 
134
133
  #: Sample every Nth frame; set to None (default) or 1 to sample every frame. Typically
135
134
  #: we sample down to around 3 fps, so for typical 30 fps videos, frame_sample=10 is a
136
- #: typical value. Mutually exclusive with [frames_to_extract].
135
+ #: typical value. Mutually exclusive with [frames_to_extract] and [time_sample].
137
136
  self.frame_sample = None
138
137
 
139
138
  #: Extract a specific set of frames (list of ints, or a single int). Mutually exclusive with
140
- #: [frame_sample].
139
+ #: [frame_sample] and [time_sample].
141
140
  self.frames_to_extract = None
142
141
 
142
+ # Sample frames every N seconds. Mutally exclusive with [frame_sample] and [frames_to_extract].
143
+ self.time_sample = None
144
+
143
145
  #: Number of workers to use for parallelization; set to <= 1 to disable parallelization
144
146
  self.n_cores = 1
145
147
 
@@ -172,11 +174,34 @@ class ProcessVideoOptions:
172
174
  #: frame from each video, but a video only has 50 frames.
173
175
  self.allow_empty_videos = False
174
176
 
177
+ #: When processing a folder of videos, should we include just a single representative
178
+ #: frame result for each video (default), or every frame that was processed?
179
+ self.include_all_processed_frames = False
180
+
175
181
  # ...class ProcessVideoOptions
176
182
 
177
183
 
178
184
  #%% Functions
179
185
 
186
+ def _validate_video_options(options):
187
+ """
188
+ Consistency checking for ProcessVideoOptions objects.
189
+ """
190
+
191
+ n_sampling_options_configured = 0
192
+ if options.frame_sample is not None:
193
+ n_sampling_options_configured += 1
194
+ if options.time_sample is not None:
195
+ n_sampling_options_configured += 1
196
+ if options.frames_to_extract is not None:
197
+ n_sampling_options_configured += 1
198
+
199
+ if n_sampling_options_configured > 1:
200
+ raise ValueError('frame_sample, time_sample, and frames_to_extract are mutually exclusive')
201
+
202
+ return True
203
+
204
+
180
205
  def _select_temporary_output_folders(options):
181
206
  """
182
207
  Choose folders in system temp space for writing temporary frames. Does not create folders,
@@ -330,12 +355,19 @@ def process_video(options):
330
355
  dict: frame-level MegaDetector results, identical to what's in the output .json file
331
356
  """
332
357
 
358
+ # Check for incompatible options
359
+ _validate_video_options(options)
360
+
333
361
  if options.output_json_file is None:
334
362
  options.output_json_file = options.input_video_file + '.json'
335
363
 
336
364
  if options.render_output_video and (options.output_video_file is None):
337
365
  options.output_video_file = options.input_video_file + '.detections.mp4'
338
366
 
367
+ if options.time_sample is not None:
368
+ raise ValueError('Time-based sampling is not supported when processing a single video; ' + \
369
+ 'consider processing a folder, or using frame_sample')
370
+
339
371
  if options.model_file == 'no_detection' and not options.keep_extracted_frames:
340
372
  print('Warning: you asked for no detection, but did not specify keep_extracted_frames, this is a no-op')
341
373
  return
@@ -461,7 +493,8 @@ def process_video(options):
461
493
 
462
494
  if options.render_output_video:
463
495
 
464
- # Render detections to images
496
+ ## Render detections to images
497
+
465
498
  if (caller_provided_rendering_output_folder):
466
499
  rendering_output_dir = options.frame_rendering_folder
467
500
  else:
@@ -475,16 +508,24 @@ def process_video(options):
475
508
  images_dir=frame_output_folder,
476
509
  confidence_threshold=options.rendering_confidence_threshold)
477
510
 
478
- # Combine into a video
511
+
512
+ ## Choose the frame rate at which we should render the output video
513
+
479
514
  if options.rendering_fs is not None:
480
515
  rendering_fs = options.rendering_fs
481
- elif options.frame_sample is None:
516
+ elif options.frame_sample is None and options.time_sample is None:
482
517
  rendering_fs = Fs
483
- else:
518
+ elif options.frame_sample is not None:
519
+ assert options.time_sample is None
484
520
  # If the original video was 30fps and we sampled every 10th frame,
485
521
  # render at 3fps
486
522
  rendering_fs = Fs / options.frame_sample
523
+ elif options.time_sample is not None:
524
+ rendering_fs = options.time_sample
487
525
 
526
+
527
+ ## Render the output video
528
+
488
529
  print('Rendering {} frames to {} at {} fps (original video {} fps)'.format(
489
530
  len(detected_frame_files), options.output_video_file,rendering_fs,Fs))
490
531
  frames_to_video(detected_frame_files,
@@ -499,6 +540,7 @@ def process_video(options):
499
540
 
500
541
 
501
542
  ## (Optionally) delete the extracted frames
543
+
502
544
  _clean_up_extracted_frames(options, frame_output_folder, frame_filenames)
503
545
 
504
546
  # ...process_video()
@@ -521,6 +563,9 @@ def process_video_folder(options):
521
563
 
522
564
  ## Validate options
523
565
 
566
+ # Check for incompatible options
567
+ _validate_video_options(options)
568
+
524
569
  assert os.path.isdir(options.input_video_file), \
525
570
  '{} is not a folder'.format(options.input_video_file)
526
571
 
@@ -548,6 +593,11 @@ def process_video_folder(options):
548
593
  image_file_names = None
549
594
  video_filename_to_fs = {}
550
595
 
596
+ if options.time_sample is not None:
597
+ every_n_frames_param = -1 * options.time_sample
598
+ else:
599
+ every_n_frames_param = options.frame_sample
600
+
551
601
  # Run MD in memory if we don't need to generate frames
552
602
  #
553
603
  # Currently if we're generating an output video, we need to generate frames on disk first.
@@ -572,7 +622,7 @@ def process_video_folder(options):
572
622
 
573
623
  md_results = run_callback_on_frames_for_folder(input_video_folder=options.input_video_file,
574
624
  frame_callback=frame_callback,
575
- every_n_frames=options.frame_sample,
625
+ every_n_frames=every_n_frames_param,
576
626
  verbose=options.verbose)
577
627
 
578
628
  video_results = md_results['results']
@@ -679,8 +729,13 @@ def process_video_folder(options):
679
729
 
680
730
  ## Convert frame-level results to video-level results
681
731
 
732
+ frame_to_video_options = FrameToVideoOptions()
733
+ frame_to_video_options.include_all_processed_frames = options.include_all_processed_frames
734
+
682
735
  print('Converting frame-level results to video-level results')
683
- frame_results_to_video_results(frames_json,video_json,
736
+ frame_results_to_video_results(frames_json,
737
+ video_json,
738
+ options=frame_to_video_options,
684
739
  video_filename_to_frame_rate=video_filename_to_fs)
685
740
 
686
741
 
@@ -1088,10 +1143,17 @@ def main():
1088
1143
  'a folder. Default {}.'.format(default_options.n_cores))
1089
1144
 
1090
1145
  parser.add_argument('--frame_sample', type=int,
1091
- default=None, help='process every Nth frame (defaults to every frame)')
1146
+ default=None, help='process every Nth frame (defaults to every frame), mutually exclusive '\
1147
+ 'with --frames_to_extract and --time_sample.')
1092
1148
 
1093
1149
  parser.add_argument('--frames_to_extract', nargs='+', type=int,
1094
- default=None, help='extract specific frames (one or more ints)')
1150
+ default=None, help='extract specific frames (one or more ints), mutually exclusive '\
1151
+ 'with --frame_sample and --time_sample.')
1152
+
1153
+ parser.add_argument('--time_sample', type=float,
1154
+ default=None, help='process frames every N seconds; this is converted to a '\
1155
+ 'frame sampling rate, so it may not be exactly the requested interval in seconds. '\
1156
+ 'mutually exclusive with --frame_sample and --frames_to_extract.')
1095
1157
 
1096
1158
  parser.add_argument('--quality', type=int,
1097
1159
  default=default_options.quality,
@@ -1127,6 +1189,12 @@ def main():
1127
1189
  action='store_true',
1128
1190
  help='Enable image augmentation')
1129
1191
 
1192
+ parser.add_argument('--include_all_processed_frames',
1193
+ action='store_true',
1194
+ help='When processing a folder of videos, this flag indicates that the output '\
1195
+ 'should include results for every frame that was processed, rather than just '\
1196
+ 'one representative frame for each detection category per video.')
1197
+
1130
1198
  parser.add_argument('--allow_empty_videos',
1131
1199
  action='store_true',
1132
1200
  help='By default, videos with no retrievable frames cause an error, this makes it a warning')
@@ -356,22 +356,29 @@ def is_gpu_available(model_file):
356
356
  pass
357
357
  return gpu_available
358
358
  else:
359
- raise ValueError('Unrecognized model file extension for model {}'.format(model_file))
359
+ raise ValueError('Model {} does not have a recognized extension and is not a known model name'.\
360
+ format(model_file))
360
361
 
361
362
 
362
- def load_detector(model_file, force_cpu=False):
363
+ def load_detector(model_file, force_cpu=False, force_model_download=False):
363
364
  r"""
364
365
  Loads a TF or PT detector, depending on the extension of model_file.
365
366
 
366
367
  Args:
367
368
  model_file (str): model filename, e.g. c:/x/z/md_v5a.0.0.pt
369
+ force_cpu (bool, optional): force the model to run on the CPU even if a GPU
370
+ is available
371
+ force_model_download (bool, optional): force downloading the model file if
372
+ a named model (e.g. "MDV5A") is supplied, even if the local file already
373
+ exists
368
374
 
369
375
  Returns:
370
376
  object: loaded detector object
371
377
  """
372
378
 
373
379
  # Possibly automatically download the model
374
- model_file = try_download_known_detector(model_file)
380
+ model_file = try_download_known_detector(model_file,
381
+ force_download=force_model_download)
375
382
 
376
383
  start_time = time.time()
377
384
  if model_file.endswith('.pb'):
@@ -402,7 +409,8 @@ def load_and_run_detector(model_file,
402
409
  box_expansion=DEFAULT_BOX_EXPANSION,
403
410
  image_size=None,
404
411
  label_font_size=DEFAULT_LABEL_FONT_SIZE,
405
- augment=False
412
+ augment=False,
413
+ force_model_download=False
406
414
  ):
407
415
  r"""
408
416
  Loads and runs a detector on target images, and visualizes the results.
@@ -424,6 +432,9 @@ def load_and_run_detector(model_file,
424
432
  label_font_size (float, optional): font size to use for displaying class names
425
433
  and confidence values in the rendered images
426
434
  augment (bool, optional): enable (implementation-specific) image augmentation
435
+ force_model_download (bool, optional): force downloading the model file if
436
+ a named model (e.g. "MDV5A") is supplied, even if the local file already
437
+ exists
427
438
  """
428
439
 
429
440
  if len(image_file_names) == 0:
@@ -431,7 +442,7 @@ def load_and_run_detector(model_file,
431
442
  return
432
443
 
433
444
  # Possibly automatically download the model
434
- model_file = try_download_known_detector(model_file)
445
+ model_file = try_download_known_detector(model_file, force_download=force_model_download)
435
446
 
436
447
  print('GPU available: {}'.format(is_gpu_available(model_file)))
437
448
 
@@ -581,7 +592,7 @@ def download_model(model_name,force_download=False):
581
592
 
582
593
  Args:
583
594
  model_name (str): a known model string, e.g. "MDV5A"
584
- force_download (bool, optional): whether download the model even if the local target
595
+ force_download (bool, optional): whether to download the model even if the local target
585
596
  file already exists
586
597
  """
587
598
 
@@ -597,17 +608,18 @@ def download_model(model_name,force_download=False):
597
608
  os.chmod(model_tempdir,0o777)
598
609
  except Exception:
599
610
  pass
600
- if model_name not in downloadable_models:
611
+ if model_name.upper() not in downloadable_models:
601
612
  print('Unrecognized downloadable model {}'.format(model_name))
602
613
  return None
603
- url = downloadable_models[model_name]
614
+ url = downloadable_models[model_name.upper()]
604
615
  destination_filename = os.path.join(model_tempdir,url.split('/')[-1])
605
616
  local_file = download_url(url, destination_filename=destination_filename, progress_updater=None,
606
617
  force_download=force_download, verbose=True)
618
+ print('Model {} available at {}'.format(model_name,local_file))
607
619
  return local_file
608
620
 
609
621
 
610
- def try_download_known_detector(detector_file):
622
+ def try_download_known_detector(detector_file,force_download=False):
611
623
  """
612
624
  Checks whether detector_file is really the name of a known model, in which case we will
613
625
  either read the actual filename from the corresponding environment variable or download
@@ -616,13 +628,15 @@ def try_download_known_detector(detector_file):
616
628
  Args:
617
629
  detector_file (str): a known model string (e.g. "MDV5A"), or any other string (in which
618
630
  case this function is a no-op)
631
+ force_download (bool, optional): whether to download the model even if the local target
632
+ file already exists
619
633
 
620
634
  Returns:
621
635
  str: the local filename to which the model was downloaded, or the same string that
622
636
  was passed in, if it's not recognized as a well-known model name
623
637
  """
624
638
 
625
- if detector_file in downloadable_models:
639
+ if detector_file.upper() in downloadable_models:
626
640
  if detector_file in os.environ:
627
641
  fn = os.environ[detector_file]
628
642
  print('Reading MD location from environment variable {}: {}'.format(
@@ -630,7 +644,7 @@ def try_download_known_detector(detector_file):
630
644
  detector_file = fn
631
645
  else:
632
646
  print('Downloading model {}'.format(detector_file))
633
- detector_file = download_model(detector_file)
647
+ detector_file = download_model(detector_file,force_download=force_download)
634
648
  return detector_file
635
649
 
636
650
 
@@ -725,6 +739,12 @@ def main():
725
739
  .format(DETECTION_FILENAME_INSERT) + \
726
740
  'This option disables that behavior.'))
727
741
 
742
+ parser.add_argument(
743
+ '--force_model_download',
744
+ action='store_true',
745
+ help=('If a named model (e.g. "MDV5A") is supplied, force a download of that model even if the ' +\
746
+ 'local file already exists.'))
747
+
728
748
  if len(sys.argv[1:]) == 0:
729
749
  parser.print_help()
730
750
  parser.exit()
@@ -733,7 +753,8 @@ def main():
733
753
 
734
754
  # If the specified detector file is really the name of a known model, find
735
755
  # (and possibly download) that model
736
- args.detector_file = try_download_known_detector(args.detector_file)
756
+ args.detector_file = try_download_known_detector(args.detector_file,
757
+ force_download=args.force_model_download)
737
758
 
738
759
  assert os.path.exists(args.detector_file), 'detector file {} does not exist'.format(
739
760
  args.detector_file)
@@ -774,7 +795,9 @@ def main():
774
795
  crop_images=args.crop,
775
796
  image_size=args.image_size,
776
797
  label_font_size=args.label_font_size,
777
- augment=args.augment)
798
+ augment=args.augment,
799
+ # Don't download the model *again*
800
+ force_model_download=False)
778
801
 
779
802
  if __name__ == '__main__':
780
803
  main()
@@ -510,7 +510,8 @@ def load_and_run_detector_batch(model_file,
510
510
  include_image_size=False,
511
511
  include_image_timestamp=False,
512
512
  include_exif_data=False,
513
- augment=False):
513
+ augment=False,
514
+ force_model_download=False):
514
515
  """
515
516
  Load a model file and run it on a list of images.
516
517
 
@@ -539,6 +540,9 @@ def load_and_run_detector_batch(model_file,
539
540
  include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
540
541
  include_exif_data (bool, optional): should we include EXIF data in the output for each image?
541
542
  augment (bool, optional): enable image augmentation
543
+ force_model_download (bool, optional): force downloading the model file if
544
+ a named model (e.g. "MDV5A") is supplied, even if the local file already
545
+ exists
542
546
 
543
547
  Returns:
544
548
  results: list of dicts; each dict represents detections on one image
@@ -599,7 +603,7 @@ def load_and_run_detector_batch(model_file,
599
603
 
600
604
  already_processed = set([i['file'] for i in results])
601
605
 
602
- model_file = try_download_known_detector(model_file)
606
+ model_file = try_download_known_detector(model_file, force_download=force_model_download)
603
607
 
604
608
  print('GPU available: {}'.format(is_gpu_available(model_file)))
605
609
 
@@ -902,8 +906,7 @@ def write_results_to_file(results,
902
906
  # If the caller supplied the entire "info" struct
903
907
  else:
904
908
 
905
- if detector_file is not None:
906
-
909
+ if detector_file is not None:
907
910
  print('Warning (write_results_to_file): info struct and detector file ' + \
908
911
  'supplied, ignoring detector file')
909
912
 
@@ -1145,8 +1148,21 @@ def main():
1145
1148
  type=str,
1146
1149
  default='overwrite',
1147
1150
  help='What should we do if the output file exists? overwrite/skip/error (default overwrite)'
1148
- )
1149
-
1151
+ )
1152
+ parser.add_argument(
1153
+ '--force_model_download',
1154
+ action='store_true',
1155
+ help=('If a named model (e.g. "MDV5A") is supplied, force a download of that model even if the ' +\
1156
+ 'local file already exists.'))
1157
+ parser.add_argument(
1158
+ '--previous_results_file',
1159
+ type=str,
1160
+ default=None,
1161
+ help=('If supplied, this should point to a previous .json results file; any results in that ' +\
1162
+ 'file will be transferred to the output file without reprocessing those images. Useful ' +\
1163
+ 'for "updating" a set of results when you may have added new images to a folder you\'ve ' +\
1164
+ 'already processed. Only supported when using relative paths.'))
1165
+
1150
1166
  if len(sys.argv[1:]) == 0:
1151
1167
  parser.print_help()
1152
1168
  parser.exit()
@@ -1155,7 +1171,8 @@ def main():
1155
1171
 
1156
1172
  # If the specified detector file is really the name of a known model, find
1157
1173
  # (and possibly download) that model
1158
- args.detector_file = try_download_known_detector(args.detector_file)
1174
+ args.detector_file = try_download_known_detector(args.detector_file,
1175
+ force_download=args.force_model_download)
1159
1176
 
1160
1177
  assert os.path.exists(args.detector_file), \
1161
1178
  'detector file {} does not exist'.format(args.detector_file)
@@ -1167,7 +1184,9 @@ def main():
1167
1184
  assert os.path.isdir(args.image_file), \
1168
1185
  f'Could not find folder {args.image_file}, must supply a folder when ' + \
1169
1186
  '--output_relative_filenames is set'
1170
-
1187
+ if args.previous_results_file is not None:
1188
+ assert os.path.isdir(args.image_file) and args.output_relative_filenames, \
1189
+ "Can only process previous results when using relative paths"
1171
1190
  if os.path.exists(args.output_file):
1172
1191
  if args.overwrite_handling == 'overwrite':
1173
1192
  print('Warning: output file {} already exists and will be overwritten'.format(
@@ -1193,8 +1212,8 @@ def main():
1193
1212
 
1194
1213
  # Load the checkpoint if available
1195
1214
  #
1196
- # Relative file names are only output at the end; all file paths in the checkpoint are
1197
- # still absolute paths.
1215
+ # File paths in the checkpoint are always absolute paths; conversion to relative paths
1216
+ # happens below (if necessary).
1198
1217
  if args.resume_from_checkpoint is not None:
1199
1218
  if args.resume_from_checkpoint == 'auto':
1200
1219
  checkpoint_files = os.listdir(output_dir)
@@ -1225,7 +1244,7 @@ def main():
1225
1244
  else:
1226
1245
  results = []
1227
1246
 
1228
- # Find the images to score; images can be a directory, may need to recurse
1247
+ # Find the images to process; images can be a directory, may need to recurse
1229
1248
  if os.path.isdir(args.image_file):
1230
1249
  image_file_names = path_utils.find_images(args.image_file, args.recursive)
1231
1250
  if len(image_file_names) > 0:
@@ -1240,7 +1259,7 @@ def main():
1240
1259
  return
1241
1260
 
1242
1261
  # A json list of image paths
1243
- elif os.path.isfile(args.image_file) and args.image_file.endswith('.json'):
1262
+ elif os.path.isfile(args.image_file) and args.image_file.endswith('.json'):
1244
1263
  with open(args.image_file) as f:
1245
1264
  image_file_names = json.load(f)
1246
1265
  print('Loaded {} image filenames from .json list file {}'.format(
@@ -1263,10 +1282,62 @@ def main():
1263
1282
  raise ValueError('image_file specified is not a directory, a json list, or an image file, '
1264
1283
  '(or does not have recognizable extensions).')
1265
1284
 
1285
+ # At this point, regardless of how they were specified, [image_file_names] is a list of
1286
+ # absolute image paths.
1266
1287
  assert len(image_file_names) > 0, 'Specified image_file does not point to valid image files'
1288
+
1289
+ # Convert to forward slashes to facilitate comparison with previous results
1290
+ image_file_names = [fn.replace('\\','/') for fn in image_file_names]
1291
+
1292
+ # We can head off many problems related to incorrect command line formulation if we confirm
1293
+ # that one image exists before proceeding. The use of the first image for this test is
1294
+ # arbitrary.
1267
1295
  assert os.path.exists(image_file_names[0]), \
1268
1296
  'The first image to be processed does not exist at {}'.format(image_file_names[0])
1269
1297
 
1298
+ # Possibly load results from a previous pass
1299
+ previous_results = None
1300
+
1301
+ if args.previous_results_file is not None:
1302
+
1303
+ assert os.path.isfile(args.previous_results_file), \
1304
+ 'Could not find previous results file {}'.format(args.previous_results_file)
1305
+ with open(args.previous_results_file,'r') as f:
1306
+ previous_results = json.load(f)
1307
+
1308
+ assert previous_results['detection_categories'] == run_detector.DEFAULT_DETECTOR_LABEL_MAP, \
1309
+ "Can't merge previous results when those results use a different set of detection categories"
1310
+
1311
+ print('Loaded previous results for {} images from {}'.format(
1312
+ len(previous_results['images']), args.previous_results_file))
1313
+
1314
+ # Convert previous result filenames to absolute paths if necessary
1315
+ #
1316
+ # We asserted above to make sure that we are using relative paths and processing a
1317
+ # folder, but just to be super-clear...
1318
+ assert os.path.isdir(args.image_file)
1319
+
1320
+ previous_image_files_set = set()
1321
+ for im in previous_results['images']:
1322
+ assert not os.path.isabs(im['file']), \
1323
+ "When processing previous results, relative paths are required"
1324
+ fn_abs = os.path.join(args.image_file,im['file']).replace('\\','/')
1325
+ # Absolute paths are expected at the final output stage below
1326
+ im['file'] = fn_abs
1327
+ previous_image_files_set.add(fn_abs)
1328
+
1329
+ image_file_names_to_keep = []
1330
+ for fn_abs in image_file_names:
1331
+ if fn_abs not in previous_image_files_set:
1332
+ image_file_names_to_keep.append(fn_abs)
1333
+
1334
+ print('Based on previous results file, processing {} of {} images'.format(
1335
+ len(image_file_names_to_keep), len(image_file_names)))
1336
+
1337
+ image_file_names = image_file_names_to_keep
1338
+
1339
+ # ...if we're handling previous results
1340
+
1270
1341
  # Test that we can write to the output_file's dir if checkpointing requested
1271
1342
  if args.checkpoint_frequency != -1:
1272
1343
 
@@ -1322,7 +1393,9 @@ def main():
1322
1393
  include_image_size=args.include_image_size,
1323
1394
  include_image_timestamp=args.include_image_timestamp,
1324
1395
  include_exif_data=args.include_exif_data,
1325
- augment=args.augment)
1396
+ augment=args.augment,
1397
+ # Don't download the model *again*
1398
+ force_model_download=False)
1326
1399
 
1327
1400
  elapsed = time.time() - start_time
1328
1401
  images_per_second = len(results) / elapsed
@@ -1330,10 +1403,26 @@ def main():
1330
1403
  len(results),humanfriendly.format_timespan(elapsed),images_per_second))
1331
1404
 
1332
1405
  relative_path_base = None
1406
+
1407
+ # We asserted above to make sure that if output_relative_filenames is set,
1408
+ # args.image_file is a folder, but we'll double-check for clarity.
1333
1409
  if args.output_relative_filenames:
1410
+ assert os.path.isdir(args.image_file)
1334
1411
  relative_path_base = args.image_file
1335
- write_results_to_file(results, args.output_file, relative_path_base=relative_path_base,
1336
- detector_file=args.detector_file,include_max_conf=args.include_max_conf)
1412
+
1413
+ # Merge results from a previous file if necessary
1414
+ if previous_results is not None:
1415
+ previous_filenames_set = set([im['file'] for im in previous_results['images']])
1416
+ new_filenames_set = set([im['file'] for im in results])
1417
+ assert len(previous_filenames_set.intersection(new_filenames_set)) == 0, \
1418
+ 'Previous results handling error: redundant image filenames'
1419
+ results.extend(previous_results['images'])
1420
+
1421
+ write_results_to_file(results,
1422
+ args.output_file,
1423
+ relative_path_base=relative_path_base,
1424
+ detector_file=args.detector_file,
1425
+ include_max_conf=args.include_max_conf)
1337
1426
 
1338
1427
  if checkpoint_path and os.path.isfile(checkpoint_path):
1339
1428
  os.remove(checkpoint_path)
@@ -788,38 +788,35 @@ def run_inference_with_yolo_val(options):
788
788
  yolo_read_failures = []
789
789
 
790
790
  for line in yolo_console_output:
791
- # Lines look like:
791
+
792
+ #
793
+ # Lines indicating read failures look like:
792
794
  #
793
795
  # For ultralytics val:
794
796
  #
795
- # val: WARNING ⚠️ /a/b/c/d.jpg: ignoring corrupt image/label: [Errno 13] Permission denied: '/a/b/c/d.jpg'
796
797
  # line = "val: WARNING ⚠️ /a/b/c/d.jpg: ignoring corrupt image/label: [Errno 13] Permission denied: '/a/b/c/d.jpg'"
797
798
  #
798
799
  # For yolov5 val.py:
799
800
  #
800
- # test: WARNING: a/b/c/d.jpg: ignoring corrupt image/label: cannot identify image file '/a/b/c/d.jpg'
801
801
  # line = "test: WARNING: a/b/c/d.jpg: ignoring corrupt image/label: cannot identify image file '/a/b/c/d.jpg'"
802
- if 'cannot identify image file' in line:
803
- tokens = line.split('cannot identify image file')
804
- image_name = tokens[-1].strip()
805
- assert image_name[0] == "'" and image_name [-1] == "'"
806
- image_name = image_name[1:-1]
807
- yolo_read_failures.append(image_name)
808
- elif 'ignoring corrupt image/label' in line:
809
- assert 'WARNING' in line
810
- if '⚠️' in line:
811
- assert line.startswith('val'), \
812
- 'Unrecognized line in YOLO output: {}'.format(line)
813
- tokens = line.split('ignoring corrupt image/label')
814
- image_name = tokens[0].split('⚠️')[-1].strip()
815
- else:
816
- assert line.startswith('test'), \
817
- 'Unrecognized line in YOLO output: {}'.format(line)
818
- tokens = line.split('ignoring corrupt image/label')
819
- image_name = tokens[0].split('WARNING:')[-1].strip()
820
- assert image_name.endswith(':')
821
- image_name = image_name[0:-1]
802
+ #
803
+ # In both cases, when we are using symlinks, the first filename is the symlink name, the
804
+ # second filename is the target, e.g.:
805
+ #
806
+ # line = "test: WARNING: /tmp/md_to_yolo/md_to_yolo_xyz/symlinks/xyz/0000000004.jpg: ignoring corrupt image/label: cannot identify image file '/tmp/md-tests/md-test-images/corrupt-images/real-file.jpg'"
807
+ #
808
+ line = line.replace('⚠️',':')
809
+ if 'ignoring corrupt image/label' in line:
810
+
811
+ tokens = line.split('ignoring corrupt image/label')
812
+ assert len(tokens) == 2
813
+ tokens = tokens[0].split(':',maxsplit=3)
814
+ assert len(tokens) == 4
815
+ assert 'warning' in tokens[1].lower()
816
+ image_name = tokens[2].strip()
822
817
  yolo_read_failures.append(image_name)
818
+
819
+ # ...for each line in the console output
823
820
 
824
821
  # image_file = yolo_read_failures[0]
825
822
  for image_file in yolo_read_failures: