megadetector 5.0.21__py3-none-any.whl → 5.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (36) hide show
  1. megadetector/data_management/cct_json_utils.py +143 -7
  2. megadetector/data_management/cct_to_md.py +12 -5
  3. megadetector/data_management/databases/integrity_check_json_db.py +83 -77
  4. megadetector/data_management/importers/raic_csv_to_md_results.py +416 -0
  5. megadetector/data_management/importers/zamba_results_to_md_results.py +1 -2
  6. megadetector/data_management/lila/create_lila_test_set.py +25 -11
  7. megadetector/data_management/lila/download_lila_subset.py +9 -2
  8. megadetector/data_management/lila/generate_lila_per_image_labels.py +3 -2
  9. megadetector/data_management/lila/test_lila_metadata_urls.py +5 -1
  10. megadetector/data_management/read_exif.py +10 -14
  11. megadetector/data_management/rename_images.py +1 -1
  12. megadetector/detection/process_video.py +14 -3
  13. megadetector/detection/pytorch_detector.py +15 -3
  14. megadetector/detection/run_detector.py +4 -3
  15. megadetector/detection/run_inference_with_yolov5_val.py +121 -13
  16. megadetector/detection/video_utils.py +21 -10
  17. megadetector/postprocessing/classification_postprocessing.py +1 -1
  18. megadetector/postprocessing/compare_batch_results.py +931 -142
  19. megadetector/postprocessing/detector_calibration.py +243 -45
  20. megadetector/postprocessing/md_to_coco.py +85 -20
  21. megadetector/postprocessing/postprocess_batch_results.py +0 -1
  22. megadetector/postprocessing/validate_batch_results.py +65 -15
  23. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -12
  24. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
  25. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -1
  26. megadetector/utils/ct_utils.py +64 -2
  27. megadetector/utils/md_tests.py +1 -1
  28. megadetector/utils/path_utils.py +14 -7
  29. megadetector/utils/process_utils.py +9 -3
  30. megadetector/utils/write_html_image_list.py +5 -1
  31. megadetector/visualization/visualization_utils.py +211 -87
  32. {megadetector-5.0.21.dist-info → megadetector-5.0.22.dist-info}/LICENSE +0 -0
  33. {megadetector-5.0.21.dist-info → megadetector-5.0.22.dist-info}/METADATA +143 -142
  34. {megadetector-5.0.21.dist-info → megadetector-5.0.22.dist-info}/RECORD +36 -35
  35. {megadetector-5.0.21.dist-info → megadetector-5.0.22.dist-info}/WHEEL +1 -1
  36. {megadetector-5.0.21.dist-info → megadetector-5.0.22.dist-info}/top_level.txt +0 -0
@@ -665,7 +665,7 @@ def process_video_folder(options):
665
665
  recursive=options.recursive,
666
666
  overwrite=(not options.reuse_frames_if_available),
667
667
  n_threads=options.n_cores,
668
- every_n_frames=options.frame_sample,
668
+ every_n_frames=every_n_frames_param,
669
669
  verbose=options.verbose,
670
670
  quality=options.quality,
671
671
  max_width=options.max_width,
@@ -702,11 +702,15 @@ def process_video_folder(options):
702
702
 
703
703
  if options.reuse_results_if_available and \
704
704
  os.path.isfile(frames_json):
705
+
705
706
  print('Bypassing inference, loading results from {}'.format(frames_json))
706
707
  with open(frames_json,'r') as f:
707
708
  results = json.load(f)
709
+
708
710
  else:
711
+
709
712
  print('Running MegaDetector')
713
+
710
714
  results = run_detector_batch.load_and_run_detector_batch(
711
715
  options.model_file,
712
716
  image_file_names,
@@ -724,6 +728,8 @@ def process_video_folder(options):
724
728
  frames_json,
725
729
  relative_path_base=frame_output_folder,
726
730
  detector_file=options.model_file)
731
+
732
+ # ...if we're re-using existing results / running MD
727
733
 
728
734
  # ...if we're running MD on in-memory frames vs. extracting frames to disk
729
735
 
@@ -917,9 +923,14 @@ if False:
917
923
  #%% Process a folder of videos
918
924
 
919
925
  model_file = 'MDV5A'
920
- input_dir = r'g:\temp\test-videos'
926
+ # input_dir = r'g:\temp\test-videos'
921
927
  # input_dir = r'G:\temp\md-test-package\md-test-images\video-samples'
928
+ input_dir = os.path.expanduser('~/AppData/Local/Temp/md-tests/md-test-images/video-samples')
929
+ assert os.path.isdir(input_dir)
930
+
922
931
  output_base = r'g:\temp\video_test'
932
+ os.makedirs(output_base,exist_ok=True)
933
+
923
934
  frame_folder = os.path.join(output_base,'frames')
924
935
  rendering_folder = os.path.join(output_base,'rendered-frames')
925
936
  output_json_file = os.path.join(output_base,'video-test.json')
@@ -949,7 +960,7 @@ if False:
949
960
  options.force_extracted_frame_folder_deletion = False
950
961
  options.force_rendered_frame_folder_deletion = False
951
962
  options.fourcc = 'mp4v'
952
- options.force_on_disk_frame_extraction = True
963
+ options.force_on_disk_frame_extraction = False
953
964
  # options.rendering_confidence_threshold = 0.15
954
965
 
955
966
  cmd = options_to_command(options); print(cmd)
@@ -162,10 +162,22 @@ class PTDetector:
162
162
  use_map_location = (device != 'mps')
163
163
 
164
164
  if use_map_location:
165
- checkpoint = torch.load(model_pt_path, map_location=device)
165
+ try:
166
+ checkpoint = torch.load(model_pt_path, map_location=device, weights_only=False)
167
+ except Exception as e:
168
+ if "'weights_only' is an invalid keyword" in str(e):
169
+ checkpoint = torch.load(model_pt_path, map_location=device)
170
+ else:
171
+ raise
166
172
  else:
167
- checkpoint = torch.load(model_pt_path)
168
-
173
+ try:
174
+ checkpoint = torch.load(model_pt_path, weights_only=False)
175
+ except Exception as e:
176
+ if "'weights_only' is an invalid keyword" in str(e):
177
+ checkpoint = torch.load(model_pt_path)
178
+ else:
179
+ raise
180
+
169
181
  # Compatibility fix that allows us to load older YOLOv5 models with
170
182
  # newer versions of YOLOv5/PT
171
183
  for m in checkpoint['model'].modules():
@@ -365,7 +365,8 @@ def load_detector(model_file, force_cpu=False, force_model_download=False):
365
365
  Loads a TF or PT detector, depending on the extension of model_file.
366
366
 
367
367
  Args:
368
- model_file (str): model filename, e.g. c:/x/z/md_v5a.0.0.pt
368
+ model_file (str): model filename (e.g. c:/x/z/md_v5a.0.0.pt) or known model
369
+ name (e.g. "MDV5A")
369
370
  force_cpu (bool, optional): force the model to run on the CPU even if a GPU
370
371
  is available
371
372
  force_model_download (bool, optional): force downloading the model file if
@@ -380,6 +381,8 @@ def load_detector(model_file, force_cpu=False, force_model_download=False):
380
381
  model_file = try_download_known_detector(model_file,
381
382
  force_download=force_model_download)
382
383
 
384
+ print('GPU available: {}'.format(is_gpu_available(model_file)))
385
+
383
386
  start_time = time.time()
384
387
  if model_file.endswith('.pb'):
385
388
  from megadetector.detection.tf_detector import TFDetector
@@ -444,8 +447,6 @@ def load_and_run_detector(model_file,
444
447
  # Possibly automatically download the model
445
448
  model_file = try_download_known_detector(model_file, force_download=force_model_download)
446
449
 
447
- print('GPU available: {}'.format(is_gpu_available(model_file)))
448
-
449
450
  detector = load_detector(model_file)
450
451
 
451
452
  detection_results = []
@@ -121,7 +121,7 @@ class YoloInferenceOptions:
121
121
  self.device_string = '0'
122
122
 
123
123
  #: Should we enable test-time augmentation?
124
- self.augment = True
124
+ self.augment = False
125
125
 
126
126
  #: Should we enable half-precision inference?
127
127
  self.half_precision_enabled = None
@@ -183,6 +183,11 @@ class YoloInferenceOptions:
183
183
  #: Maximum number of images to run in a single chunk
184
184
  self.checkpoint_frequency = None
185
185
 
186
+ #: By default, if we're creating symlinks to images, we append a unique job ID to the
187
+ #: symlink folder. If the caller is 100% sure that the symlink folder can be re-used
188
+ #: across calls, this can be set to False.
189
+ self.append_job_id_to_symlink_folder = True
190
+
186
191
  # ...def __init__()
187
192
 
188
193
  # ...YoloInferenceOptions()
@@ -228,7 +233,10 @@ def run_inference_with_yolo_val(options):
228
233
 
229
234
  for k in options.__dict__.keys():
230
235
  if k not in default_options.__dict__:
231
- print('Warning: unexpected variable {} in options object'.format(k))
236
+ # Print warnings about unexpected variables, except for things like
237
+ # "no_append_job_id_to_symlink_folder", which just negate existing objects
238
+ if not k.startswith('no_'):
239
+ print('Warning: unexpected variable {} in options object'.format(k))
232
240
 
233
241
  if options.model_type == 'yolov8':
234
242
 
@@ -318,8 +326,12 @@ def run_inference_with_yolo_val(options):
318
326
  yolo_results_folder = os.path.join(temporary_folder,'yolo_results')
319
327
  yolo_folder_is_temp_folder = True
320
328
 
321
- # Attach a GUID to the symlink folder, regardless of whether we created it
322
- symlink_folder_inner = os.path.join(symlink_folder,job_id)
329
+ if options.append_job_id_to_symlink_folder:
330
+ # Attach a GUID to the symlink folder, regardless of whether we created it
331
+ symlink_folder_inner = os.path.join(symlink_folder,job_id)
332
+ else:
333
+ print('Re-using existing symlink folder {}'.format(symlink_folder))
334
+ symlink_folder_inner = symlink_folder
323
335
 
324
336
  os.makedirs(symlink_folder_inner,exist_ok=True)
325
337
  os.makedirs(yolo_results_folder,exist_ok=True)
@@ -771,7 +783,7 @@ def run_inference_with_yolo_val(options):
771
783
 
772
784
  if options.save_yolo_debug_output:
773
785
 
774
- with open(os.path.join(yolo_results_folder,'yolo_console_output.txt'),'w') as f:
786
+ with open(os.path.join(yolo_results_folder,'yolo_console_output.txt'),'w',encoding='utf-8') as f:
775
787
  for s in yolo_console_output:
776
788
  f.write(s + '\n')
777
789
  with open(os.path.join(yolo_results_folder,'image_id_to_file.json'),'w') as f:
@@ -805,17 +817,37 @@ def run_inference_with_yolo_val(options):
805
817
  #
806
818
  # line = "test: WARNING: /tmp/md_to_yolo/md_to_yolo_xyz/symlinks/xyz/0000000004.jpg: ignoring corrupt image/label: cannot identify image file '/tmp/md-tests/md-test-images/corrupt-images/real-file.jpg'"
807
819
  #
820
+ # Windows example:
821
+ #
822
+ # line = "test: WARNING: g:\\temp\\md-test-images\\corrupt-images\\irfanview-can-still-read-me-caltech_camera_traps_5a0e37cc-23d2-11e8-a6a3-ec086b02610b.jpg: ignoring corrupt image/label: cannot identify image file 'g:\\\\temp\\\\md-test-images\\\\corrupt-images\\\\irfanview-can-still-read-me-caltech_camera_traps_5a0e37cc-23d2-11e8-a6a3-ec086b02610b.jpg'"
823
+ #
824
+
808
825
  line = line.replace('⚠️',':')
809
826
  if 'ignoring corrupt image/label' in line:
810
827
 
811
- tokens = line.split('ignoring corrupt image/label')
812
- assert len(tokens) == 2
813
- tokens = tokens[0].split(':',maxsplit=3)
814
- assert len(tokens) == 4
828
+ line_tokens = line.split('ignoring corrupt image/label')
829
+ assert len(line_tokens) == 2
830
+
831
+ tokens = line_tokens[0].split(':') # ,maxsplit=3)
832
+ tokens = [s.strip() for s in tokens]
833
+
834
+ # ['test', ' WARNING', ' a/b/c/d.jpg', ' ']
835
+ assert len(tokens[-1]) == 0
836
+ tokens = tokens[:-1]
815
837
  assert 'warning' in tokens[1].lower()
816
- image_name = tokens[2].strip()
838
+
839
+ if len(tokens) == 3:
840
+ image_name = tokens[2].strip()
841
+ else:
842
+ # Windows filenames have one extra colon
843
+ assert len(tokens) == 4
844
+ assert len(tokens[2]) == 1
845
+ image_name = ':'.join(tokens[2:4])
846
+
817
847
  yolo_read_failures.append(image_name)
818
848
 
849
+ # ...if this line indicated a corrupt image
850
+
819
851
  # ...for each line in the console output
820
852
 
821
853
  # image_file = yolo_read_failures[0]
@@ -960,10 +992,12 @@ def main():
960
992
  '--checkpoint_frequency', default=options.checkpoint_frequency, type=int,
961
993
  help='break the job into chunks with no more than this many images (default {})'.format(
962
994
  options.checkpoint_frequency))
963
-
995
+ parser.add_argument(
996
+ '--no_append_job_id_to_symlink_folder', action='store_true',
997
+ help="don't append a unique job ID to the symlink folder name")
964
998
  parser.add_argument(
965
999
  '--nonrecursive', action='store_true',
966
- help='Disable recursive folder processing')
1000
+ help='disable recursive folder processing')
967
1001
 
968
1002
  parser.add_argument(
969
1003
  '--preview_yolo_command_only', action='store_true',
@@ -1014,6 +1048,7 @@ def main():
1014
1048
  options.input_folder = None
1015
1049
 
1016
1050
  options.recursive = (not options.nonrecursive)
1051
+ options.append_job_id_to_symlink_folder = (not options.no_append_job_id_to_symlink_folder)
1017
1052
  options.remove_symlink_folder = (not options.no_remove_symlink_folder)
1018
1053
  options.remove_yolo_results_folder = (not options.no_remove_yolo_results_folder)
1019
1054
  options.use_symlinks = (not options.no_use_symlinks)
@@ -1038,6 +1073,80 @@ if __name__ == '__main__':
1038
1073
 
1039
1074
  if False:
1040
1075
 
1076
+
1077
+ #%% Debugging
1078
+
1079
+ input_folder = r'g:\temp\md-test-images'
1080
+ model_filename = 'MDV5A'
1081
+ output_folder = r'g:\temp\yolo-test-out'
1082
+ yolo_working_folder = r'c:\git\yolov5-md'
1083
+ dataset_file = r"g:\temp\md-test-images\dataset.yaml"
1084
+ job_name = 'yolo-debug'
1085
+ symlink_folder = os.path.join(output_folder,'symlinks')
1086
+ yolo_results_folder = os.path.join(output_folder,'yolo_results')
1087
+ model_name = os.path.splitext(os.path.basename(model_filename))[0]
1088
+
1089
+ output_file = os.path.join(output_folder,'{}_{}-md_format.json'.format(
1090
+ job_name,model_name))
1091
+
1092
+ options = YoloInferenceOptions()
1093
+
1094
+ options.yolo_working_folder = yolo_working_folder
1095
+ options.input_folder = input_folder
1096
+ options.output_file = output_file
1097
+
1098
+ options.yolo_category_id_to_name = dataset_file
1099
+ options.augment = False
1100
+ options.conf_thres = '0.001'
1101
+ options.batch_size = 1
1102
+ options.device_string = '0'
1103
+ options.unique_id_strategy = 'auto'
1104
+ options.overwrite_handling = 'overwrite'
1105
+
1106
+ if options.augment:
1107
+ options.image_size = round(1280 * 1.3)
1108
+ else:
1109
+ options.image_size = 1280
1110
+
1111
+ options.model_filename = model_filename
1112
+
1113
+ options.yolo_results_folder = yolo_results_folder # os.path.join(output_folder + 'yolo_results')
1114
+ options.symlink_folder = symlink_folder # os.path.join(output_folder,'symlinks')
1115
+ options.use_symlinks = False
1116
+
1117
+ options.remove_symlink_folder = True
1118
+ options.remove_yolo_results_folder = True
1119
+
1120
+ options.checkpoint_frequency = None
1121
+
1122
+ cmd = f'python run_inference_with_yolov5_val.py {model_filename} {input_folder} ' + \
1123
+ f'{output_file} --yolo_working_folder {yolo_working_folder} ' + \
1124
+ f' --image_size {options.image_size} --conf_thres {options.conf_thres} ' + \
1125
+ f' --batch_size {options.batch_size} ' + \
1126
+ f' --symlink_folder {options.symlink_folder} --yolo_results_folder {options.yolo_results_folder} ' + \
1127
+ f' --yolo_dataset_file {options.yolo_category_id_to_name} ' + \
1128
+ f' --unique_id_strategy {options.unique_id_strategy} --overwrite_handling {options.overwrite_handling}'
1129
+
1130
+ if not options.remove_symlink_folder:
1131
+ cmd += ' --no_remove_symlink_folder'
1132
+ if not options.remove_yolo_results_folder:
1133
+ cmd += ' --no_remove_yolo_results_folder'
1134
+ if options.checkpoint_frequency is not None:
1135
+ cmd += f' --checkpoint_frequency {options.checkpoint_frequency}'
1136
+ if not options.use_symlinks:
1137
+ cmd += ' --no_use_symlinks'
1138
+ if not options.augment:
1139
+ cmd += ' --augment_enabled 0'
1140
+
1141
+ print(cmd)
1142
+ execute_in_python = False
1143
+ if execute_in_python:
1144
+ run_inference_with_yolo_val(options)
1145
+ else:
1146
+ import clipboard; clipboard.copy(cmd)
1147
+
1148
+
1149
+
1041
1150
  #%% Run inference on a folder
1042
1151
 
1043
1152
  input_folder = r'g:\temp\tegu-val-mini'.replace('\\','/')
@@ -1129,4 +1238,3 @@ if False:
1129
1238
  run_inference_with_yolo_val(options)
1130
1239
  else:
1131
1240
  import clipboard; clipboard.copy(cmd)
1132
-
@@ -30,7 +30,7 @@ default_fourcc = 'h264'
30
30
 
31
31
  #%% Path utilities
32
32
 
33
- VIDEO_EXTENSIONS = ('.mp4','.avi','.mpeg','.mpg')
33
+ VIDEO_EXTENSIONS = ('.mp4','.avi','.mpeg','.mpg','.mov','.mkv')
34
34
 
35
35
  def is_video_file(s,video_extensions=VIDEO_EXTENSIONS):
36
36
  """
@@ -229,7 +229,7 @@ def run_callback_on_frames(input_video_file,
229
229
  frame_callback (function): callback to run on frames, should take an np.array and a string and
230
230
  return a single value. callback should expect PIL-formatted (RGB) images.
231
231
  every_n_frames (float, optional): sample every Nth frame starting from the first frame;
232
- if this is None or 1, every frame is processed. If this is a negative value, that's
232
+ if this is None or 1, every frame is processed. If this is a negative value, it's
233
233
  interpreted as a sampling rate in seconds, which is rounded to the nearest frame sampling
234
234
  rate. Mutually exclusive with frames_to_process.
235
235
  verbose (bool, optional): enable additional debug console output
@@ -264,12 +264,12 @@ def run_callback_on_frames(input_video_file,
264
264
  frame_filenames = []
265
265
  results = []
266
266
 
267
- if every_n_frames is not None and every_n_frames < 0:
267
+ if (every_n_frames is not None) and (every_n_frames < 0):
268
268
  every_n_seconds = abs(every_n_frames)
269
269
  every_n_frames = int(every_n_seconds * frame_rate)
270
270
  if verbose:
271
271
  print('Interpreting a time sampling rate of {} hz as a frame interval of {}'.format(
272
- every_n_seconds,every_n_frames))
272
+ every_n_seconds,every_n_frames))
273
273
 
274
274
  # frame_number = 0
275
275
  for frame_number in range(0,n_frames):
@@ -337,7 +337,9 @@ def run_callback_on_frames_for_folder(input_video_folder,
337
337
  frame_callback (function): callback to run on frames, should take an np.array and a string and
338
338
  return a single value. callback should expect PIL-formatted (RGB) images.
339
339
  every_n_frames (int, optional): sample every Nth frame starting from the first frame;
340
- if this is None or 1, every frame is processed.
340
+ if this is None or 1, every frame is processed. If this is a negative value, it's
341
+ interpreted as a sampling rate in seconds, which is rounded to the nearest frame
342
+ sampling rate.
341
343
  verbose (bool, optional): enable additional debug console output
342
344
  allow_empty_videos (bool, optional): Just print a warning if a video appears to have no
343
345
  frames (by default, this is an error).
@@ -418,8 +420,9 @@ def video_to_frames(input_video_file,
418
420
  output_folder (str): folder to put frame images in
419
421
  overwrite (bool, optional): whether to overwrite existing frame images
420
422
  every_n_frames (int, optional): sample every Nth frame starting from the first frame;
421
- if this is None or 1, every frame is extracted. Mutually exclusive with
422
- frames_to_extract.
423
+ if this is None or 1, every frame is extracted. If this is a negative value, it's
424
+ interpreted as a sampling rate in seconds, which is rounded to the nearest frame sampling
425
+ rate. Mutually exclusive with frames_to_extract.
423
426
  verbose (bool, optional): enable additional debug console output
424
427
  quality (int, optional): JPEG quality for frame output, from 0-100. Defaults
425
428
  to the opencv default (typically 95).
@@ -452,6 +455,13 @@ def video_to_frames(input_video_file,
452
455
  n_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
453
456
  Fs = vidcap.get(cv2.CAP_PROP_FPS)
454
457
 
458
+ if (every_n_frames is not None) and (every_n_frames < 0):
459
+ every_n_seconds = abs(every_n_frames)
460
+ every_n_frames = int(every_n_seconds * Fs)
461
+ if verbose:
462
+ print('Interpreting a time sampling rate of {} hz as a frame interval of {}'.format(
463
+ every_n_seconds,every_n_frames))
464
+
455
465
  # If we're not over-writing, check whether all frame images already exist
456
466
  if overwrite == False:
457
467
 
@@ -569,7 +579,7 @@ def video_to_frames(input_video_file,
569
579
  break
570
580
 
571
581
  if every_n_frames is not None:
572
- if frame_number % every_n_frames != 0:
582
+ if (frame_number % every_n_frames) != 0:
573
583
  continue
574
584
 
575
585
  if frames_to_extract is not None:
@@ -703,8 +713,9 @@ def video_folder_to_frames(input_folder,
703
713
  n_threads (int, optional): number of concurrent workers to use; set to <= 1 to disable
704
714
  parallelism
705
715
  every_n_frames (int, optional): sample every Nth frame starting from the first frame;
706
- if this is None or 1, every frame is extracted. Mutually exclusive with
707
- frames_to_extract.
716
+ if this is None or 1, every frame is extracted. If this is a negative value, it's
717
+ interpreted as a sampling rate in seconds, which is rounded to the nearest frame sampling
718
+ rate. Mutually exclusive with frames_to_extract.
708
719
  verbose (bool, optional): enable additional debug console output
709
720
  parallelization_uses_threads (bool, optional): whether to use threads (True) or
710
721
  processes (False) for parallelization; ignored if n_threads <= 1
@@ -713,4 +713,4 @@ def smooth_classification_results_sequence_level(md_results,
713
713
 
714
714
  return md_results
715
715
 
716
- # ...smooth_classification_results_sequence_level(...)
716
+ # ...smooth_classification_results_sequence_level(...)