megadetector 5.0.29__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (95) hide show
  1. megadetector/classification/efficientnet/model.py +8 -8
  2. megadetector/classification/efficientnet/utils.py +6 -5
  3. megadetector/classification/prepare_classification_script_mc.py +3 -3
  4. megadetector/data_management/annotations/annotation_constants.py +0 -1
  5. megadetector/data_management/camtrap_dp_to_coco.py +34 -1
  6. megadetector/data_management/cct_json_utils.py +2 -2
  7. megadetector/data_management/coco_to_yolo.py +22 -5
  8. megadetector/data_management/databases/add_width_and_height_to_db.py +85 -12
  9. megadetector/data_management/databases/combine_coco_camera_traps_files.py +2 -2
  10. megadetector/data_management/databases/integrity_check_json_db.py +29 -15
  11. megadetector/data_management/generate_crops_from_cct.py +50 -1
  12. megadetector/data_management/labelme_to_coco.py +4 -2
  13. megadetector/data_management/labelme_to_yolo.py +82 -2
  14. megadetector/data_management/lila/generate_lila_per_image_labels.py +276 -18
  15. megadetector/data_management/lila/get_lila_annotation_counts.py +5 -3
  16. megadetector/data_management/lila/lila_common.py +3 -0
  17. megadetector/data_management/lila/test_lila_metadata_urls.py +15 -5
  18. megadetector/data_management/mewc_to_md.py +5 -0
  19. megadetector/data_management/ocr_tools.py +4 -3
  20. megadetector/data_management/read_exif.py +20 -5
  21. megadetector/data_management/remap_coco_categories.py +66 -4
  22. megadetector/data_management/remove_exif.py +50 -1
  23. megadetector/data_management/rename_images.py +3 -3
  24. megadetector/data_management/resize_coco_dataset.py +563 -95
  25. megadetector/data_management/yolo_output_to_md_output.py +131 -2
  26. megadetector/data_management/yolo_to_coco.py +140 -5
  27. megadetector/detection/change_detection.py +4 -3
  28. megadetector/detection/pytorch_detector.py +60 -22
  29. megadetector/detection/run_detector.py +225 -25
  30. megadetector/detection/run_detector_batch.py +42 -16
  31. megadetector/detection/run_inference_with_yolov5_val.py +12 -2
  32. megadetector/detection/run_tiled_inference.py +1 -0
  33. megadetector/detection/video_utils.py +53 -24
  34. megadetector/postprocessing/add_max_conf.py +4 -0
  35. megadetector/postprocessing/categorize_detections_by_size.py +1 -1
  36. megadetector/postprocessing/classification_postprocessing.py +55 -20
  37. megadetector/postprocessing/combine_batch_outputs.py +3 -2
  38. megadetector/postprocessing/compare_batch_results.py +64 -10
  39. megadetector/postprocessing/convert_output_format.py +12 -8
  40. megadetector/postprocessing/create_crop_folder.py +137 -10
  41. megadetector/postprocessing/load_api_results.py +26 -8
  42. megadetector/postprocessing/md_to_coco.py +4 -4
  43. megadetector/postprocessing/md_to_labelme.py +18 -7
  44. megadetector/postprocessing/merge_detections.py +5 -0
  45. megadetector/postprocessing/postprocess_batch_results.py +6 -3
  46. megadetector/postprocessing/remap_detection_categories.py +55 -2
  47. megadetector/postprocessing/render_detection_confusion_matrix.py +9 -6
  48. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  49. megadetector/taxonomy_mapping/map_new_lila_datasets.py +3 -4
  50. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +40 -19
  51. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +1 -1
  52. megadetector/taxonomy_mapping/species_lookup.py +123 -41
  53. megadetector/utils/ct_utils.py +133 -113
  54. megadetector/utils/md_tests.py +93 -13
  55. megadetector/utils/path_utils.py +137 -107
  56. megadetector/utils/split_locations_into_train_val.py +2 -2
  57. megadetector/utils/string_utils.py +7 -7
  58. megadetector/utils/url_utils.py +81 -58
  59. megadetector/utils/wi_utils.py +46 -17
  60. megadetector/visualization/plot_utils.py +13 -9
  61. megadetector/visualization/render_images_with_thumbnails.py +2 -1
  62. megadetector/visualization/visualization_utils.py +94 -46
  63. megadetector/visualization/visualize_db.py +36 -9
  64. megadetector/visualization/visualize_detector_output.py +4 -4
  65. {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/METADATA +135 -135
  66. megadetector-10.0.0.dist-info/RECORD +139 -0
  67. {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  68. {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
  69. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  70. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  71. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -438
  72. megadetector/api/batch_processing/api_core/server.py +0 -294
  73. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  74. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  75. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  76. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  77. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  78. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  79. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  80. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  81. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  82. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  83. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  84. megadetector/api/synchronous/__init__.py +0 -0
  85. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  86. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  87. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  88. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  89. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  90. megadetector/api/synchronous/api_core/tests/load_test.py +0 -109
  91. megadetector/utils/azure_utils.py +0 -178
  92. megadetector/utils/sas_blob_utils.py +0 -513
  93. megadetector-5.0.29.dist-info/RECORD +0 -163
  94. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  95. {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/WHEEL +0 -0
@@ -431,7 +431,9 @@ def video_to_frames(input_video_file,
431
431
  frames_to_extract (list of int, optional): extract this specific set of frames;
432
432
  mutually exclusive with every_n_frames. If all values are beyond the length
433
433
  of the video, no frames are extracted. Can also be a single int, specifying
434
- a single frame number.
434
+ a single frame number. In the special case where frames_to_extract
435
+ is [], this function still reads video frame rates and verifies that videos
436
+ are readable, but no frames are extracted.
435
437
  allow_empty_videos (bool, optional): Just print a warning if a video appears to have no
436
438
  frames (by default, this is an error).
437
439
 
@@ -450,7 +452,10 @@ def video_to_frames(input_video_file,
450
452
  if (frames_to_extract is not None) and (every_n_frames is not None):
451
453
  raise ValueError('frames_to_extract and every_n_frames are mutually exclusive')
452
454
 
453
- os.makedirs(output_folder,exist_ok=True)
455
+ bypass_extraction = ((frames_to_extract is not None) and (len(frames_to_extract) == 0))
456
+
457
+ if not bypass_extraction:
458
+ os.makedirs(output_folder,exist_ok=True)
454
459
 
455
460
  vidcap = cv2.VideoCapture(input_video_file)
456
461
  n_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -464,7 +469,7 @@ def video_to_frames(input_video_file,
464
469
  every_n_seconds,every_n_frames))
465
470
 
466
471
  # If we're not over-writing, check whether all frame images already exist
467
- if not overwrite:
472
+ if (not overwrite) and (not bypass_extraction):
468
473
 
469
474
  missing_frame_number = None
470
475
  missing_frame_filename = None
@@ -514,7 +519,6 @@ def video_to_frames(input_video_file,
514
519
 
515
520
  # When specific frames are requested, if anything is missing, reprocess the video
516
521
  if (frames_to_extract is not None) and (missing_frame_number is not None):
517
-
518
522
  pass
519
523
 
520
524
  # If no frames are missing, or only frames very close to the end of the video are "missing",
@@ -572,6 +576,10 @@ def video_to_frames(input_video_file,
572
576
  # for frame_number in tqdm(range(0,n_frames)):
573
577
  for frame_number in range(0,n_frames):
574
578
 
579
+ # Special handling for the case where we're just doing dummy reads
580
+ if bypass_extraction:
581
+ break
582
+
575
583
  success,image = vidcap.read()
576
584
  if not success:
577
585
  assert image is None
@@ -643,9 +651,9 @@ def video_to_frames(input_video_file,
643
651
 
644
652
  if len(frame_filenames) == 0:
645
653
  if allow_empty_videos:
646
- print('Warning: found no frames in file {}'.format(input_video_file))
654
+ print('Warning: no frames extracted from file {}'.format(input_video_file))
647
655
  else:
648
- raise Exception('Error: found no frames in file {}'.format(input_video_file))
656
+ raise Exception('Error: no frames extracted from file {}'.format(input_video_file))
649
657
 
650
658
  if verbose:
651
659
  print('\nExtracted {} of {} frames for {}'.format(
@@ -726,7 +734,9 @@ def video_folder_to_frames(input_folder,
726
734
  frames_to_extract (list of int, optional): extract this specific set of frames from
727
735
  each video; mutually exclusive with every_n_frames. If all values are beyond
728
736
  the length of a video, no frames are extracted. Can also be a single int,
729
- specifying a single frame number.
737
+ specifying a single frame number. In the special case where frames_to_extract
738
+ is [], this function still reads video frame rates and verifies that videos
739
+ are readable, but no frames are extracted.
730
740
  allow_empty_videos (bool, optional): Just print a warning if a video appears to have no
731
741
  frames (by default, this is an error).
732
742
 
@@ -762,9 +772,16 @@ def video_folder_to_frames(input_folder,
762
772
  for input_fn_relative in tqdm(input_files_relative_paths):
763
773
 
764
774
  frame_filenames,fs = \
765
- _video_to_frames_for_folder(input_fn_relative,input_folder,output_folder_base,
766
- every_n_frames,overwrite,verbose,quality,max_width,
767
- frames_to_extract,allow_empty_videos)
775
+ _video_to_frames_for_folder(input_fn_relative,
776
+ input_folder,
777
+ output_folder_base,
778
+ every_n_frames,
779
+ overwrite,
780
+ verbose,
781
+ quality,
782
+ max_width,
783
+ frames_to_extract,
784
+ allow_empty_videos)
768
785
  frame_filenames_by_video.append(frame_filenames)
769
786
  fs_by_video.append(fs)
770
787
  else:
@@ -778,15 +795,15 @@ def video_folder_to_frames(input_folder,
778
795
  print('Starting a worker pool with {} processes'.format(n_threads))
779
796
  pool = Pool(n_threads)
780
797
  process_video_with_options = partial(_video_to_frames_for_folder,
781
- input_folder=input_folder,
782
- output_folder_base=output_folder_base,
783
- every_n_frames=every_n_frames,
784
- overwrite=overwrite,
785
- verbose=verbose,
786
- quality=quality,
787
- max_width=max_width,
788
- frames_to_extract=frames_to_extract,
789
- allow_empty_videos=allow_empty_videos)
798
+ input_folder=input_folder,
799
+ output_folder_base=output_folder_base,
800
+ every_n_frames=every_n_frames,
801
+ overwrite=overwrite,
802
+ verbose=verbose,
803
+ quality=quality,
804
+ max_width=max_width,
805
+ frames_to_extract=frames_to_extract,
806
+ allow_empty_videos=allow_empty_videos)
790
807
  results = list(tqdm(pool.imap(
791
808
  partial(process_video_with_options),input_files_relative_paths),
792
809
  total=len(input_files_relative_paths)))
@@ -822,6 +839,9 @@ class FrameToVideoOptions:
822
839
  #: video; can be 'error' or 'skip_with_warning'
823
840
  self.non_video_behavior = 'error'
824
841
 
842
+ #: Are frame rates required?
843
+ self.frame_rates_are_required = False
844
+
825
845
 
826
846
  def frame_results_to_video_results(input_file,
827
847
  output_file,
@@ -839,13 +859,18 @@ def frame_results_to_video_results(input_file,
839
859
  output_file (str): the .json file to which we should write video-level results
840
860
  options (FrameToVideoOptions, optional): parameters for converting frame-level results
841
861
  to video-level results, see FrameToVideoOptions for details
842
- video_filename_to_frame_rate (dict): maps (relative) video path names to frame rates,
843
- used only to populate the output file
862
+ video_filename_to_frame_rate (dict, optional): maps (relative) video path names to frame
863
+ rates, used only to populate the output file
844
864
  """
845
865
 
846
866
  if options is None:
847
867
  options = FrameToVideoOptions()
848
868
 
869
+ if options.frame_rates_are_required:
870
+ assert video_filename_to_frame_rate is not None, \
871
+ 'You specified that frame rates are required, but you did not ' + \
872
+ 'supply video_filename_to_frame_rate'
873
+
849
874
  # Load results
850
875
  with open(input_file,'r') as f:
851
876
  input_data = json.load(f)
@@ -902,9 +927,13 @@ def frame_results_to_video_results(input_file,
902
927
  im_out = {}
903
928
  im_out['file'] = video_name
904
929
 
905
- if (video_filename_to_frame_rate is not None) and \
906
- (video_name in video_filename_to_frame_rate):
907
- im_out['frame_rate'] = video_filename_to_frame_rate[video_name]
930
+ if (video_filename_to_frame_rate is not None):
931
+
932
+ if options.frame_rates_are_required:
933
+ assert video_name in video_filename_to_frame_rate, \
934
+ 'Could not determine frame rate for {}'.format(video_name)
935
+ if video_name in video_filename_to_frame_rate:
936
+ im_out['frame_rate'] = video_filename_to_frame_rate[video_name]
908
937
 
909
938
  # Find all detections for this video
910
939
  all_detections_this_video = []
@@ -26,6 +26,10 @@ from megadetector.utils import ct_utils
26
26
  def add_max_conf(input_file,output_file):
27
27
  """
28
28
  Add maximum confidence values to [input_file] and write the results to [output_file].
29
+
30
+ Args:
31
+ input_file (str): MD-formatted .json file to which we should add maxconf values
32
+ output_file (str): output .json file
29
33
  """
30
34
 
31
35
  assert os.path.isfile(input_file), "Can't find input file {}".format(input_file)
@@ -50,7 +50,7 @@ def categorize_detections_by_size(input_file,output_file=None,options=None):
50
50
  Args:
51
51
  input_file (str): file to process
52
52
  output_file (str, optional): optional output file
53
- options (SizeCategorizationOptions): categorization parameters
53
+ options (SizeCategorizationOptions, optional): categorization parameters
54
54
 
55
55
  Returns:
56
56
  dict: data loaded from [input_file], with the new size-based categories.
@@ -121,9 +121,17 @@ class ClassificationSmoothingOptions:
121
121
  #: if this is True, we'll make a copy of the input dict before modifying.
122
122
  self.modify_in_place = False
123
123
 
124
+ #: Only include these categories in the smoothing process (None to use all categories)
125
+ self.detection_category_names_to_smooth = ['animal']
126
+
124
127
  #: Debug options
125
128
  self.break_at_image = None
126
129
 
130
+ ## Populated internally
131
+
132
+ #: #: Only include these categories in the smoothing process (None to use all categories)
133
+ self._detection_category_ids_to_smooth = None
134
+
127
135
 
128
136
  #%% Utility functions
129
137
 
@@ -149,6 +157,23 @@ def _sort_images_by_time(images):
149
157
  return sorted(images, key = lambda im: im['datetime'])
150
158
 
151
159
 
160
+ def _detection_is_relevant_for_smoothing(det,options):
161
+ """
162
+ Determine whether [det] has classifications that might be meaningful for smoothing.
163
+ """
164
+
165
+ if ('classifications' not in det) or \
166
+ (det['conf'] < options.detection_confidence_threshold):
167
+ return False
168
+
169
+ # Ignore non-smoothed categories
170
+ if (options._detection_category_ids_to_smooth is not None) and \
171
+ (det['category'] not in options._detection_category_ids_to_smooth):
172
+ return False
173
+
174
+ return True
175
+
176
+
152
177
  def count_detections_by_classification_category(detections,options=None):
153
178
  """
154
179
  Count the number of instances of each classification category in the detections list
@@ -159,7 +184,7 @@ def count_detections_by_classification_category(detections,options=None):
159
184
  Only processes the top classification for each detection.
160
185
 
161
186
  Args:
162
- detections: detections list
187
+ detections (list of dict): detections list
163
188
  options (ClassificationSmoothingOptions, optional): see ClassificationSmoothingOptions
164
189
 
165
190
  Returns:
@@ -175,11 +200,13 @@ def count_detections_by_classification_category(detections,options=None):
175
200
  category_to_count = defaultdict(int)
176
201
 
177
202
  for det in detections:
178
- if ('classifications' in det) and (det['conf'] >= options.detection_confidence_threshold):
179
- # assert len(det['classifications']) == 1
180
- c = det['classifications'][0]
181
- if c[1] >= options.classification_confidence_threshold:
182
- category_to_count[c[0]] += 1
203
+
204
+ if not _detection_is_relevant_for_smoothing(det,options):
205
+ continue
206
+
207
+ c = det['classifications'][0]
208
+ if c[1] >= options.classification_confidence_threshold:
209
+ category_to_count[c[0]] += 1
183
210
 
184
211
  category_to_count = {k: v for k, v in sorted(category_to_count.items(),
185
212
  key=lambda item: item[1],
@@ -233,6 +260,8 @@ def _prepare_results_for_smoothing(input_file,options):
233
260
  Load results from [input_file] if necessary, prepare category descriptions
234
261
  for smoothing. Adds pre-smoothing descriptions to every image if the options
235
262
  say we're supposed to do that.
263
+
264
+ May modify some fields in [options].
236
265
  """
237
266
 
238
267
  if isinstance(input_file,str):
@@ -256,6 +285,16 @@ def _prepare_results_for_smoothing(input_file,options):
256
285
  if s in category_name_to_id:
257
286
  other_category_ids.append(category_name_to_id[s])
258
287
 
288
+ # Possibly update the list of category IDs we should smooth
289
+ if options.detection_category_names_to_smooth is None:
290
+ options._detection_category_ids_to_smooth = None
291
+ else:
292
+ detection_category_id_to_name = d['detection_categories']
293
+ detection_category_name_to_id = invert_dictionary(detection_category_id_to_name)
294
+ options._detection_category_ids_to_smooth = []
295
+ for category_name in options.detection_category_names_to_smooth:
296
+ options._detection_category_ids_to_smooth.append(detection_category_name_to_id[category_name])
297
+
259
298
  # Before we do anything else, get rid of everything but the top classification
260
299
  # for each detection, and remove the 'classifications' field from detections with
261
300
  # no classifications.
@@ -283,8 +322,9 @@ def _prepare_results_for_smoothing(input_file,options):
283
322
  # ...for each image
284
323
 
285
324
 
286
- ## Clean up classification descriptions so we can test taxonomic relationships
287
- ## by substring testing.
325
+ ## Clean up classification descriptions...
326
+
327
+ # ...so we can test taxonomic relationships by substring testing.
288
328
 
289
329
  classification_descriptions_clean = None
290
330
  classification_descriptions = None
@@ -395,8 +435,7 @@ def _smooth_classifications_for_list_of_detections(detections,
395
435
 
396
436
  for det in detections:
397
437
 
398
- if ('classifications' not in det) or \
399
- (det['conf'] < options.detection_confidence_threshold):
438
+ if not _detection_is_relevant_for_smoothing(det,options):
400
439
  continue
401
440
 
402
441
  assert len(det['classifications']) == 1
@@ -450,8 +489,7 @@ def _smooth_classifications_for_list_of_detections(detections,
450
489
  # i_det = 0; det = detections[i_det]
451
490
  for i_det,det in enumerate(detections):
452
491
 
453
- if ('classifications' not in det) or \
454
- (det['conf'] < options.detection_confidence_threshold):
492
+ if not _detection_is_relevant_for_smoothing(det,options):
455
493
  continue
456
494
 
457
495
  assert len(det['classifications']) == 1
@@ -532,8 +570,7 @@ def _smooth_classifications_for_list_of_detections(detections,
532
570
  # det = detections[3]
533
571
  for det in detections:
534
572
 
535
- if ('classifications' not in det) or \
536
- (det['conf'] < options.detection_confidence_threshold):
573
+ if not _detection_is_relevant_for_smoothing(det,options):
537
574
  continue
538
575
 
539
576
  assert len(det['classifications']) == 1
@@ -660,8 +697,7 @@ def _smooth_classifications_for_list_of_detections(detections,
660
697
  # det = detections[0]
661
698
  for det in detections:
662
699
 
663
- if ('classifications' not in det) or \
664
- (det['conf'] < options.detection_confidence_threshold):
700
+ if not _detection_is_relevant_for_smoothing(det,options):
665
701
  continue
666
702
 
667
703
  assert len(det['classifications']) == 1
@@ -720,7 +756,6 @@ def _smooth_classifications_for_list_of_detections(detections,
720
756
 
721
757
  # ...if the dominant category is legit and we have taxonomic information available
722
758
 
723
-
724
759
  return {'n_other_classifications_changed_this_image':n_other_classifications_changed_this_image,
725
760
  'n_detections_flipped_this_image':n_detections_flipped_this_image,
726
761
  'n_taxonomic_changes_this_image':n_taxonomic_changes_this_image,
@@ -894,8 +929,8 @@ def smooth_classification_results_sequence_level(input_file,
894
929
 
895
930
  Args:
896
931
  input_file (str or dict): MegaDetector-formatted classification results file to smooth
897
- (or already-loaded results). If you supply a dict, it's modified in place by default, but
898
- a copy can be forced by setting options.modify_in_place=False.
932
+ (or already-loaded results). If you supply a dict, it's copied by default, but
933
+ in-place modification is supported via options.modify_in_place.
899
934
  cct_sequence_information (str, dict, or list): COCO Camera Traps file containing sequence IDs for
900
935
  each image (or an already-loaded CCT-formatted dict, or just the 'images' list from a CCT dict).
901
936
  output_file (str, optional): .json file to write smoothed results
@@ -1074,7 +1109,7 @@ def restrict_to_taxa_list(taxa_list,
1074
1109
  For example, if only a single felid species is allowed, should other
1075
1110
  felid predictions be mapped to that species, as opposed to being mapped
1076
1111
  to the family?
1077
- add_pre_restriction_description (bool, optional): should we add a new metadata
1112
+ add_pre_filtering_description (bool, optional): should we add a new metadata
1078
1113
  field that summarizes each image's classifications prior to taxonomic
1079
1114
  restriction?
1080
1115
  """
@@ -43,8 +43,9 @@ def combine_batch_output_files(input_files,
43
43
  Args:
44
44
  input_files (list of str): paths to JSON detection files
45
45
  output_file (str, optional): path to write merged JSON
46
- require_uniqueness (bool): whether to require that the images in
46
+ require_uniqueness (bool, optional): whether to require that the images in
47
47
  each list of images be unique
48
+ verbose (bool, optional): enable additional debug output
48
49
 
49
50
  Returns:
50
51
  dict: merged dictionaries loaded from [input_files], identical to what's
@@ -80,7 +81,7 @@ def combine_batch_output_dictionaries(input_dicts, require_uniqueness=True):
80
81
  Args:
81
82
  input_dicts (list of dicts): list of dicts in which each dict represents the
82
83
  contents of a MD output file
83
- require_uniqueness (bool): whether to require that the images in
84
+ require_uniqueness (bool, optional): whether to require that the images in
84
85
  each input dict be unique; if this is True and image filenames are
85
86
  not unique, an error is raised.
86
87
 
@@ -32,6 +32,7 @@ Operates in one of three modes, depending on whether ground truth labels/boxes a
32
32
 
33
33
  import json
34
34
  import os
35
+ import re
35
36
  import random
36
37
  import copy
37
38
  import urllib
@@ -207,6 +208,9 @@ class BatchComparisonOptions:
207
208
  #: output page?
208
209
  self.parse_link_paths = True
209
210
 
211
+ #: Should we include a TOC? TOC is always omitted if <=2 comparisons are performed.
212
+ self.include_toc = True
213
+
210
214
  # ...class BatchComparisonOptions
211
215
 
212
216
 
@@ -235,6 +239,12 @@ class PairwiseBatchComparisonResults:
235
239
  #: Values are dicts with fields 'im_a', 'im_b', 'sort_conf', and 'im_gt'
236
240
  self.categories_to_image_pairs = None
237
241
 
242
+ #: Short identifier for this comparison
243
+ self.comparison_short_name = None
244
+
245
+ #: Friendly identifier for this comparison
246
+ self.comparison_friendly_name = None
247
+
238
248
  # ...class PairwiseBatchComparisonResults
239
249
 
240
250
 
@@ -254,7 +264,7 @@ class BatchComparisonResults:
254
264
  # ...class BatchComparisonResults
255
265
 
256
266
 
257
- main_page_style_header = """<head>
267
+ main_page_style_header = """<head><title>Results comparison</title>
258
268
  <style type="text/css">
259
269
  a { text-decoration: none; }
260
270
  body { font-family: segoe ui, calibri, "trebuchet ms", verdana, arial, sans-serif; }
@@ -375,7 +385,7 @@ def _render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
375
385
 
376
386
  try:
377
387
  font = ImageFont.truetype('arial.ttf', 25)
378
- except IOError:
388
+ except OSError:
379
389
  font = ImageFont.load_default()
380
390
 
381
391
  draw = ImageDraw.Draw(im)
@@ -1456,14 +1466,35 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1456
1466
  try:
1457
1467
  pool.close()
1458
1468
  pool.join()
1459
- print("Pool closed and joined for comparisong rendering")
1469
+ print("Pool closed and joined for comparison rendering")
1460
1470
  except Exception:
1461
1471
  pass
1462
1472
  ##%% Write the top-level HTML file content
1463
1473
 
1464
1474
  html_output_string = ''
1465
1475
 
1466
- html_output_string += '<p>Comparing <b>{}</b> (A, {}) to <b>{}</b> (B, {})</p>'.format(
1476
+ def _sanitize_id_name(s, lower=True):
1477
+ """
1478
+ Remove characters in [s] that are not allowed in HTML id attributes
1479
+ """
1480
+
1481
+ s = re.sub(r'[^a-zA-Z0-9_-]', '', s)
1482
+ s = re.sub(r'^[^a-zA-Z]*', '', s)
1483
+ if lower:
1484
+ s = s.lower()
1485
+ return s
1486
+
1487
+ comparison_short_name = '{}_vs_{}'.format(
1488
+ _sanitize_id_name(pairwise_options.results_description_a),
1489
+ _sanitize_id_name(pairwise_options.results_description_b))
1490
+
1491
+ comparison_friendly_name = '{} vs {}'.format(
1492
+ pairwise_options.results_description_a,
1493
+ pairwise_options.results_description_b
1494
+ )
1495
+
1496
+ html_output_string += '<p id="{}">Comparing <b>{}</b> (A, {}) to <b>{}</b> (B, {})</p>'.format(
1497
+ comparison_short_name,
1467
1498
  pairwise_options.results_description_a,color_string_a.lower(),
1468
1499
  pairwise_options.results_description_b,color_string_b.lower())
1469
1500
  html_output_string += '<div class="contentdiv">\n'
@@ -1515,6 +1546,8 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1515
1546
 
1516
1547
  pairwise_results = PairwiseBatchComparisonResults()
1517
1548
 
1549
+ pairwise_results.comparison_short_name = comparison_short_name
1550
+ pairwise_results.comparison_friendly_name = comparison_friendly_name
1518
1551
  pairwise_results.html_content = html_output_string
1519
1552
  pairwise_results.pairwise_options = pairwise_options
1520
1553
  pairwise_results.categories_to_image_pairs = categories_to_image_pairs
@@ -1555,20 +1588,32 @@ def compare_batch_results(options):
1555
1588
  all_pairwise_results = []
1556
1589
 
1557
1590
  # i_comparison = 0; pairwise_options = pairwise_options_list[i_comparison]
1558
-
1559
1591
  for i_comparison,pairwise_options in enumerate(pairwise_options_list):
1592
+
1560
1593
  print('Running comparison {} of {}'.format(i_comparison,n_comparisons))
1561
1594
  pairwise_results = \
1562
1595
  _pairwise_compare_batch_results(options,i_comparison,pairwise_options)
1563
1596
  html_content += pairwise_results.html_content
1564
1597
  all_pairwise_results.append(pairwise_results)
1565
1598
 
1599
+ # ...for each pairwise comparison
1600
+
1566
1601
  html_output_string = main_page_header
1567
1602
  job_name_string = ''
1568
1603
  if len(options.job_name) > 0:
1569
1604
  job_name_string = ' for {}'.format(options.job_name)
1570
1605
  html_output_string += '<h2>Comparison of results{}</h2>\n'.format(
1571
1606
  job_name_string)
1607
+
1608
+ if options.include_toc and (len(pairwise_options_list) > 2):
1609
+ toc_string = '<p><b>Contents</b></p>\n'
1610
+ toc_string += '<div class="contentdiv">\n'
1611
+ for r in all_pairwise_results:
1612
+ toc_string += '<a href="#{}">{}</a><br/>'.format(r.comparison_short_name,
1613
+ r.comparison_friendly_name)
1614
+ toc_string += '</div>\n'
1615
+ html_output_string += toc_string
1616
+
1572
1617
  html_output_string += html_content
1573
1618
  html_output_string += main_page_footer
1574
1619
 
@@ -1832,9 +1877,12 @@ def find_equivalent_threshold(results_a,
1832
1877
  threshold_b = confidence_values_b[detection_cutoff_index]
1833
1878
 
1834
1879
  if verbose:
1835
- print('{} confidence values above threshold (A)'.format(len(confidence_values_a_above_threshold)))
1836
- confidence_values_b_above_threshold = [c for c in confidence_values_b if c >= threshold_b]
1837
- print('{} confidence values above threshold (B)'.format(len(confidence_values_b_above_threshold)))
1880
+ print('{} confidence values above threshold (A)'.format(
1881
+ len(confidence_values_a_above_threshold)))
1882
+ confidence_values_b_above_threshold = \
1883
+ [c for c in confidence_values_b if c >= threshold_b]
1884
+ print('{} confidence values above threshold (B)'.format(
1885
+ len(confidence_values_b_above_threshold)))
1838
1886
 
1839
1887
  return threshold_b
1840
1888
 
@@ -1868,7 +1916,10 @@ if False:
1868
1916
  detection_thresholds = [0.15,0.15]
1869
1917
  rendering_thresholds = None
1870
1918
 
1871
- results = n_way_comparison(filenames,options,detection_thresholds,rendering_thresholds=rendering_thresholds)
1919
+ results = n_way_comparison(filenames,
1920
+ options,
1921
+ detection_thresholds,
1922
+ rendering_thresholds=rendering_thresholds)
1872
1923
 
1873
1924
  from megadetector.utils.path_utils import open_file
1874
1925
  open_file(results.html_output_file)
@@ -1980,7 +2031,10 @@ def main(): # noqa
1980
2031
  if args.use_processes:
1981
2032
  options.parallelize_rendering_with_threads = False
1982
2033
 
1983
- results = n_way_comparison(args.results_files,options,args.detection_thresholds,args.rendering_thresholds)
2034
+ results = n_way_comparison(args.results_files,
2035
+ options,
2036
+ args.detection_thresholds,
2037
+ args.rendering_thresholds)
1984
2038
 
1985
2039
  if args.open_results:
1986
2040
  path_utils.open_file(results.html_output_file)
@@ -51,12 +51,12 @@ def convert_json_to_csv(input_path,
51
51
  [input_path].csv
52
52
  min_confidence (float, optional): the minimum-confidence detection we should include
53
53
  in the "detections" column; has no impact on the other columns
54
- omit_bounding_boxes (bool): whether to leave out the json-formatted bounding boxes
55
- that make up the "detections" column, which are not generally useful for someone who
56
- wants to consume this data as a .csv file
54
+ omit_bounding_boxes (bool, optional): whether to leave out the json-formatted bounding
55
+ boxes that make up the "detections" column, which are not generally useful for someone
56
+ who wants to consume this data as a .csv file
57
57
  output_encoding (str, optional): encoding to use for the .csv file
58
- overwrite (bool): whether to overwrite an existing .csv file; if this is False and the
59
- output file exists, no-ops and returns
58
+ overwrite (bool, optional): whether to overwrite an existing .csv file; if this is False and
59
+ the output file exists, no-ops and returns
60
60
 
61
61
  """
62
62
 
@@ -230,8 +230,8 @@ def convert_csv_to_json(input_path,output_path=None,overwrite=True):
230
230
  input_path (str): .csv filename to convert to .json
231
231
  output_path (str, optional): the output .json file to generate; if this is None, uses
232
232
  [input_path].json
233
- overwrite (bool): whether to overwrite an existing .json file; if this is False and the
234
- output file exists, no-ops and returns
233
+ overwrite (bool, optional): whether to overwrite an existing .json file; if this is
234
+ False and the output file exists, no-ops and returns
235
235
 
236
236
  """
237
237
 
@@ -365,7 +365,11 @@ if False:
365
365
 
366
366
  #%% Command-line driver
367
367
 
368
- def main(): # noqa
368
+ def main():
369
+ """
370
+ Command-line driver for convert_output_format(), which converts
371
+ json <--> csv.
372
+ """
369
373
 
370
374
  parser = argparse.ArgumentParser()
371
375
  parser.add_argument('input_path',type=str,