megadetector 5.0.19__py3-none-any.whl → 5.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (34) hide show
  1. megadetector/data_management/importers/bellevue_to_json.py +0 -1
  2. megadetector/data_management/importers/osu-small-animals-to-json.py +364 -0
  3. megadetector/data_management/lila/generate_lila_per_image_labels.py +1 -1
  4. megadetector/data_management/lila/get_lila_annotation_counts.py +2 -0
  5. megadetector/data_management/lila/lila_common.py +28 -12
  6. megadetector/data_management/lila/test_lila_metadata_urls.py +17 -8
  7. megadetector/data_management/read_exif.py +73 -0
  8. megadetector/data_management/yolo_output_to_md_output.py +18 -5
  9. megadetector/detection/process_video.py +84 -16
  10. megadetector/detection/run_detector.py +36 -13
  11. megadetector/detection/run_detector_batch.py +104 -15
  12. megadetector/detection/run_inference_with_yolov5_val.py +20 -23
  13. megadetector/detection/video_utils.py +79 -44
  14. megadetector/postprocessing/combine_api_outputs.py +1 -1
  15. megadetector/postprocessing/detector_calibration.py +367 -0
  16. megadetector/postprocessing/md_to_coco.py +2 -1
  17. megadetector/postprocessing/postprocess_batch_results.py +32 -20
  18. megadetector/postprocessing/validate_batch_results.py +118 -58
  19. megadetector/taxonomy_mapping/map_new_lila_datasets.py +8 -3
  20. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +3 -2
  21. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -1
  22. megadetector/utils/ct_utils.py +20 -0
  23. megadetector/utils/md_tests.py +63 -17
  24. megadetector/utils/path_utils.py +139 -30
  25. megadetector/utils/write_html_image_list.py +16 -5
  26. megadetector/visualization/visualization_utils.py +126 -23
  27. megadetector/visualization/visualize_db.py +104 -63
  28. {megadetector-5.0.19.dist-info → megadetector-5.0.21.dist-info}/METADATA +2 -2
  29. {megadetector-5.0.19.dist-info → megadetector-5.0.21.dist-info}/RECORD +32 -32
  30. {megadetector-5.0.19.dist-info → megadetector-5.0.21.dist-info}/WHEEL +1 -1
  31. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  32. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +0 -677
  33. {megadetector-5.0.19.dist-info → megadetector-5.0.21.dist-info}/LICENSE +0 -0
  34. {megadetector-5.0.19.dist-info → megadetector-5.0.21.dist-info}/top_level.txt +0 -0
@@ -788,38 +788,35 @@ def run_inference_with_yolo_val(options):
788
788
  yolo_read_failures = []
789
789
 
790
790
  for line in yolo_console_output:
791
- # Lines look like:
791
+
792
+ #
793
+ # Lines indicating read failures look like:
792
794
  #
793
795
  # For ultralytics val:
794
796
  #
795
- # val: WARNING ⚠️ /a/b/c/d.jpg: ignoring corrupt image/label: [Errno 13] Permission denied: '/a/b/c/d.jpg'
796
797
  # line = "val: WARNING ⚠️ /a/b/c/d.jpg: ignoring corrupt image/label: [Errno 13] Permission denied: '/a/b/c/d.jpg'"
797
798
  #
798
799
  # For yolov5 val.py:
799
800
  #
800
- # test: WARNING: a/b/c/d.jpg: ignoring corrupt image/label: cannot identify image file '/a/b/c/d.jpg'
801
801
  # line = "test: WARNING: a/b/c/d.jpg: ignoring corrupt image/label: cannot identify image file '/a/b/c/d.jpg'"
802
- if 'cannot identify image file' in line:
803
- tokens = line.split('cannot identify image file')
804
- image_name = tokens[-1].strip()
805
- assert image_name[0] == "'" and image_name [-1] == "'"
806
- image_name = image_name[1:-1]
807
- yolo_read_failures.append(image_name)
808
- elif 'ignoring corrupt image/label' in line:
809
- assert 'WARNING' in line
810
- if '⚠️' in line:
811
- assert line.startswith('val'), \
812
- 'Unrecognized line in YOLO output: {}'.format(line)
813
- tokens = line.split('ignoring corrupt image/label')
814
- image_name = tokens[0].split('⚠️')[-1].strip()
815
- else:
816
- assert line.startswith('test'), \
817
- 'Unrecognized line in YOLO output: {}'.format(line)
818
- tokens = line.split('ignoring corrupt image/label')
819
- image_name = tokens[0].split('WARNING:')[-1].strip()
820
- assert image_name.endswith(':')
821
- image_name = image_name[0:-1]
802
+ #
803
+ # In both cases, when we are using symlinks, the first filename is the symlink name, the
804
+ # second filename is the target, e.g.:
805
+ #
806
+ # line = "test: WARNING: /tmp/md_to_yolo/md_to_yolo_xyz/symlinks/xyz/0000000004.jpg: ignoring corrupt image/label: cannot identify image file '/tmp/md-tests/md-test-images/corrupt-images/real-file.jpg'"
807
+ #
808
+ line = line.replace('⚠️',':')
809
+ if 'ignoring corrupt image/label' in line:
810
+
811
+ tokens = line.split('ignoring corrupt image/label')
812
+ assert len(tokens) == 2
813
+ tokens = tokens[0].split(':',maxsplit=3)
814
+ assert len(tokens) == 4
815
+ assert 'warning' in tokens[1].lower()
816
+ image_name = tokens[2].strip()
822
817
  yolo_read_failures.append(image_name)
818
+
819
+ # ...for each line in the console output
823
820
 
824
821
  # image_file = yolo_read_failures[0]
825
822
  for image_file in yolo_read_failures:
@@ -228,9 +228,10 @@ def run_callback_on_frames(input_video_file,
228
228
  input_video_file (str): video file to process
229
229
  frame_callback (function): callback to run on frames, should take an np.array and a string and
230
230
  return a single value. callback should expect PIL-formatted (RGB) images.
231
- every_n_frames (int, optional): sample every Nth frame starting from the first frame;
232
- if this is None or 1, every frame is processed. Mutually exclusive with
233
- frames_to_process.
231
+ every_n_frames (float, optional): sample every Nth frame starting from the first frame;
232
+ if this is None or 1, every frame is processed. If this is a negative value, that's
233
+ interpreted as a sampling rate in seconds, which is rounded to the nearest frame sampling
234
+ rate. Mutually exclusive with frames_to_process.
234
235
  verbose (bool, optional): enable additional debug console output
235
236
  frames_to_process (list of int, optional): process this specific set of frames;
236
237
  mutually exclusive with every_n_frames. If all values are beyond the length
@@ -263,6 +264,13 @@ def run_callback_on_frames(input_video_file,
263
264
  frame_filenames = []
264
265
  results = []
265
266
 
267
+ if every_n_frames is not None and every_n_frames < 0:
268
+ every_n_seconds = abs(every_n_frames)
269
+ every_n_frames = int(every_n_seconds * frame_rate)
270
+ if verbose:
271
+ print('Interpreting a time sampling rate of {} hz as a frame interval of {}'.format(
272
+ every_n_seconds,every_n_frames))
273
+
266
274
  # frame_number = 0
267
275
  for frame_number in range(0,n_frames):
268
276
 
@@ -670,12 +678,18 @@ def _video_to_frames_for_folder(relative_fn,input_folder,output_folder_base,
670
678
  return frame_filenames,fs
671
679
 
672
680
 
673
- def video_folder_to_frames(input_folder, output_folder_base,
674
- recursive=True, overwrite=True,
675
- n_threads=1, every_n_frames=None,
676
- verbose=False, parallelization_uses_threads=True,
677
- quality=None, max_width=None,
678
- frames_to_extract=None, allow_empty_videos=False):
681
+ def video_folder_to_frames(input_folder,
682
+ output_folder_base,
683
+ recursive=True,
684
+ overwrite=True,
685
+ n_threads=1,
686
+ every_n_frames=None,
687
+ verbose=False,
688
+ parallelization_uses_threads=True,
689
+ quality=None,
690
+ max_width=None,
691
+ frames_to_extract=None,
692
+ allow_empty_videos=False):
679
693
  """
680
694
  For every video file in input_folder, creates a folder within output_folder_base, and
681
695
  renders frame of that video to images in that folder.
@@ -701,6 +715,8 @@ def video_folder_to_frames(input_folder, output_folder_base,
701
715
  each video; mutually exclusive with every_n_frames. If all values are beyond
702
716
  the length of a video, no frames are extracted. Can also be a single int,
703
717
  specifying a single frame number.
718
+ allow_empty_videos (bool, optional): Just print a warning if a video appears to have no
719
+ frames (by default, this is an error).
704
720
 
705
721
  Returns:
706
722
  tuple: a length-3 tuple containing:
@@ -711,8 +727,11 @@ def video_folder_to_frames(input_folder, output_folder_base,
711
727
  """
712
728
 
713
729
  # Recursively enumerate video files
730
+ if verbose:
731
+ print('Enumerating videos in {}'.format(input_folder))
714
732
  input_files_full_paths = find_videos(input_folder,recursive=recursive)
715
- print('Found {} videos in folder {}'.format(len(input_files_full_paths),input_folder))
733
+ if verbose:
734
+ print('Found {} videos in folder {}'.format(len(input_files_full_paths),input_folder))
716
735
  if len(input_files_full_paths) == 0:
717
736
  return [],[],[]
718
737
 
@@ -776,6 +795,10 @@ class FrameToVideoOptions:
776
795
  #: for the whole video, i.e. "1" means "use the confidence value from the highest-confidence frame"
777
796
  self.nth_highest_confidence = 1
778
797
 
798
+ #: Should we include just a single representative frame result for each video (default), or
799
+ #: every frame that was processed?
800
+ self.include_all_processed_frames = False
801
+
779
802
  #: What to do if a file referred to in a .json results file appears not to be a
780
803
  #: video; can be 'error' or 'skip_with_warning'
781
804
  self.non_video_behavior = 'error'
@@ -803,7 +826,7 @@ def frame_results_to_video_results(input_file,
803
826
 
804
827
  if options is None:
805
828
  options = FrameToVideoOptions()
806
-
829
+
807
830
  # Load results
808
831
  with open(input_file,'r') as f:
809
832
  input_data = json.load(f)
@@ -856,47 +879,58 @@ def frame_results_to_video_results(input_file,
856
879
  # video_name = list(video_to_frame_info.keys())[0]
857
880
  for video_name in tqdm(video_to_frame_info):
858
881
 
859
- frames = video_to_frame_info[video_name]
860
-
861
- all_detections_this_video = []
862
-
863
- # frame = frames[0]
864
- for frame in frames:
865
- if ('detections' in frame) and (frame['detections'] is not None):
866
- all_detections_this_video.extend(frame['detections'])
867
-
868
- # At most one detection for each category for the whole video
869
- canonical_detections = []
870
-
871
- # category_id = list(detection_categories.keys())[0]
872
- for category_id in detection_categories:
873
-
874
- category_detections = [det for det in all_detections_this_video if \
875
- det['category'] == category_id]
876
-
877
- # Find the nth-highest-confidence video to choose a confidence value
878
- if len(category_detections) >= options.nth_highest_confidence:
879
-
880
- category_detections_by_confidence = sorted(category_detections,
881
- key = lambda i: i['conf'],reverse=True)
882
- canonical_detection = category_detections_by_confidence[options.nth_highest_confidence-1]
883
- canonical_detections.append(canonical_detection)
884
-
885
882
  # Prepare the output representation for this video
886
883
  im_out = {}
887
884
  im_out['file'] = video_name
888
- im_out['detections'] = canonical_detections
889
885
 
890
886
  if (video_filename_to_frame_rate is not None) and \
891
887
  (video_name in video_filename_to_frame_rate):
892
888
  im_out['frame_rate'] = video_filename_to_frame_rate[video_name]
893
889
 
894
- # 'max_detection_conf' is no longer included in output files by default
895
- if False:
896
- im_out['max_detection_conf'] = 0
897
- if len(canonical_detections) > 0:
898
- confidences = [d['conf'] for d in canonical_detections]
899
- im_out['max_detection_conf'] = max(confidences)
890
+ # Find all detections for this video
891
+ all_detections_this_video = []
892
+
893
+ frames = video_to_frame_info[video_name]
894
+
895
+ # frame = frames[0]
896
+ for frame in frames:
897
+ if ('detections' in frame) and (frame['detections'] is not None):
898
+ all_detections_this_video.extend(frame['detections'])
899
+
900
+ # Should we keep detections for all frames?
901
+ if (options.include_all_processed_frames):
902
+
903
+ im_out['detections'] = all_detections_this_video
904
+
905
+ # ...or should we keep just a canonical detection for each category?
906
+ else:
907
+
908
+ canonical_detections = []
909
+
910
+ # category_id = list(detection_categories.keys())[0]
911
+ for category_id in detection_categories:
912
+
913
+ category_detections = [det for det in all_detections_this_video if \
914
+ det['category'] == category_id]
915
+
916
+ # Find the nth-highest-confidence video to choose a confidence value
917
+ if len(category_detections) >= options.nth_highest_confidence:
918
+
919
+ category_detections_by_confidence = sorted(category_detections,
920
+ key = lambda i: i['conf'],reverse=True)
921
+ canonical_detection = category_detections_by_confidence[options.nth_highest_confidence-1]
922
+ canonical_detections.append(canonical_detection)
923
+
924
+ im_out['detections'] = canonical_detections
925
+
926
+ # 'max_detection_conf' is no longer included in output files by default
927
+ if False:
928
+ im_out['max_detection_conf'] = 0
929
+ if len(canonical_detections) > 0:
930
+ confidences = [d['conf'] for d in canonical_detections]
931
+ im_out['max_detection_conf'] = max(confidences)
932
+
933
+ # ...if we're keeping output for all frames / canonical frames
900
934
 
901
935
  output_images.append(im_out)
902
936
 
@@ -951,6 +985,7 @@ if False:
951
985
  results_file = r'results.json'
952
986
  confidence_threshold = 0.75
953
987
 
988
+
954
989
  #%% Load detector output
955
990
 
956
991
  with open(results_file,'r') as f:
@@ -192,7 +192,7 @@ def combine_api_shard_files(input_files, output_file=None):
192
192
 
193
193
  Args:
194
194
  input_files (list of str): files to merge
195
- output_file (str, optiona): file to which we should write merged results
195
+ output_file (str, optional): file to which we should write merged results
196
196
 
197
197
  Returns:
198
198
  dict: merged results
@@ -0,0 +1,367 @@
1
+ """
2
+
3
+ detector_calibration.py
4
+
5
+ Tools for comparing/calibrating confidence values from detectors, particularly different
6
+ versions of MegaDetector.
7
+
8
+ """
9
+
10
+ #%% Constants and imports
11
+
12
+ import random
13
+
14
+ from tqdm import tqdm
15
+ from enum import IntEnum
16
+ from collections import defaultdict
17
+
18
+ import numpy as np
19
+ import matplotlib
20
+ import matplotlib.pyplot as plt
21
+
22
+ from megadetector.postprocessing.validate_batch_results import \
23
+ validate_batch_results, ValidateBatchResultsOptions
24
+ from megadetector.utils.ct_utils import get_iou
25
+
26
+
27
+ #%% Classes
28
+
29
+ class CalibrationOptions:
30
+ """
31
+ Options controlling comparison/calibration behavior.
32
+ """
33
+
34
+ def __init__(self):
35
+
36
+ #: IoU threshold used for determining whether two detections are the same
37
+ #:
38
+ #: When multiple detections match, we will only use the highest-matching IoU.
39
+ self.iou_threshold = 0.75
40
+
41
+ #: Minimum confidence threshold to consider for calibration (should be lower than
42
+ #: the lowest value you would use in realistic situations)
43
+ self.confidence_threshold = 0.025
44
+
45
+ #: Should we populate the data_a and data_b fields in the return value?
46
+ self.return_data = False
47
+
48
+ #: Model name to use in printouts and plots for result set A
49
+ self.model_name_a = 'model_a'
50
+
51
+ #: Model name to use in printouts and plots for result set B
52
+ self.model_name_b = 'model_b'
53
+
54
+ #: Maximum number of samples to use for plotting or calibration per category,
55
+ #: or None to use all paired values.
56
+ self.max_samples_per_category = None
57
+
58
+ #: List of category IDs to use for plotting comparisons, or None to plot
59
+ #: all categories.
60
+ self.categories_to_plot = None
61
+
62
+ #: Optionally map category ID to name in plot labels
63
+ self.category_id_to_name = None
64
+
65
+ # ...class CalibrationOptions
66
+
67
+ class ConfidenceMatchColumns(IntEnum):
68
+
69
+ COLUMN_CONF_A = 0
70
+ COLUMN_CONF_B = 1
71
+ COLUMN_CONF_IOU = 2
72
+ COLUMN_CONF_I_IMAGE = 3
73
+ COLUMN_CONF_CATEGORY_ID = 4
74
+
75
+ class CalibrationResults:
76
+ """
77
+ Results of a model-to-model comparison.
78
+ """
79
+
80
+ def __init__(self):
81
+
82
+ #: List of tuples: [conf_a, conf_b, iou, i_image, category_id]
83
+ self.confidence_matches = []
84
+
85
+ #: Populated with the data loaded from json_filename_a if options.return_data is True
86
+ self.data_a = None
87
+
88
+ #: Populated with the data loaded from json_filename_b if options.return_data is True
89
+ self.data_b = None
90
+
91
+ # ...class CalibrationResults
92
+
93
+
94
+ #%% Calibration functions
95
+
96
+ def compare_model_confidence_values(json_filename_a,json_filename_b,options=None):
97
+ """
98
+ Compare confidence values across two .json results files. Compares only detections that
99
+ can be matched by IoU, i.e., does not do anything with detections that only appear in one file.
100
+
101
+ Args:
102
+ json_filename_a (str or dict): filename containing results from the first model to be compared;
103
+ should refer to the same images as [json_filename_b]. Can also be a loaded results dict.
104
+ json_filename_b (str or dict): filename containing results from the second model to be compared;
105
+ should refer to the same images as [json_filename_a]. Can also be a loaded results dict.
106
+ options (CalibrationOptions, optional): all the parameters used to control this process, see
107
+ CalibrationOptions for details
108
+
109
+ Returns:
110
+ CalibrationResults: description of the comparison results
111
+ """
112
+
113
+ ## Option handling
114
+
115
+ if options is None:
116
+ options = CalibrationOptions()
117
+
118
+ validation_options = ValidateBatchResultsOptions()
119
+ validation_options.return_data = True
120
+
121
+ if isinstance(json_filename_a,str):
122
+ results_a = validate_batch_results(json_filename_a,options=validation_options)
123
+ assert len(results_a['validation_results']['errors']) == 0
124
+ else:
125
+ assert isinstance(json_filename_a,dict)
126
+ results_a = json_filename_a
127
+
128
+ if isinstance(json_filename_b,str):
129
+ results_b = validate_batch_results(json_filename_b,options=validation_options)
130
+ assert len(results_b['validation_results']['errors']) == 0
131
+ else:
132
+ assert isinstance(json_filename_b,dict)
133
+ results_b = json_filename_b
134
+
135
+
136
+ ## Make sure these results sets are comparable
137
+
138
+ image_filenames_a = [im['file'] for im in results_a['images']]
139
+ image_filenames_b = [im['file'] for im in results_b['images']]
140
+
141
+ assert set(image_filenames_a) == set(image_filenames_b), \
142
+ 'Cannot calibrate non-matching image sets'
143
+
144
+ categories_a = results_a['detection_categories']
145
+ categories_b = results_b['detection_categories']
146
+ assert set(categories_a.keys()) == set(categories_b.keys())
147
+ for k in categories_a.keys():
148
+ assert categories_a[k] == categories_b[k], 'Category mismatch'
149
+
150
+
151
+ ## Compare detections
152
+
153
+ image_filename_b_to_im = {}
154
+ for im in results_b['images']:
155
+ image_filename_b_to_im[im['file']] = im
156
+
157
+ n_detections_a = 0
158
+ n_detections_a_queried = 0
159
+ n_detections_a_matched = 0
160
+
161
+ confidence_matches = []
162
+
163
+ # For each image
164
+ # im_a = results_a['images'][0]
165
+ for i_image,im_a in tqdm(enumerate(results_a['images']),total=len(results_a['images'])):
166
+
167
+ fn = im_a['file']
168
+ im_b = image_filename_b_to_im[fn]
169
+
170
+ if 'detections' not in im_a or im_a['detections'] is None:
171
+ continue
172
+ if 'detections' not in im_b or im_b['detections'] is None:
173
+ continue
174
+
175
+ # For each detection in result set A...
176
+ #
177
+ # det_a = im_a['detections'][0]
178
+ for det_a in im_a['detections']:
179
+
180
+ n_detections_a += 1
181
+
182
+ conf_a = det_a['conf']
183
+ category_id = det_a['category']
184
+
185
+ # Is this above threshold?
186
+ if conf_a < options.confidence_threshold:
187
+ continue
188
+
189
+ n_detections_a_queried += 1
190
+
191
+ bbox_a = det_a['bbox']
192
+
193
+ best_iou = None
194
+ best_iou_conf = None
195
+
196
+ # For each detection in result set B...
197
+ #
198
+ # det_b = im_b['detections'][0]
199
+ for det_b in im_b['detections']:
200
+
201
+ # Is this the same category?
202
+ if det_b['category'] != category_id:
203
+ continue
204
+
205
+ conf_b = det_b['conf']
206
+
207
+ # Is this above threshold?
208
+ if conf_b < options.confidence_threshold:
209
+ continue
210
+
211
+ bbox_b = det_b['bbox']
212
+
213
+ iou = get_iou(bbox_a,bbox_b)
214
+
215
+ # Is this an adequate IoU to consider?
216
+ if iou < options.iou_threshold:
217
+ continue
218
+
219
+ # Is this the best match so far?
220
+ if best_iou is None or iou > best_iou:
221
+ best_iou = iou
222
+ best_iou_conf = conf_b
223
+
224
+ # ...for each detection in im_b
225
+
226
+ if best_iou is not None:
227
+ n_detections_a_matched += 1
228
+ conf_result = [conf_a,best_iou_conf,best_iou,i_image,category_id]
229
+ confidence_matches.append(conf_result)
230
+
231
+ # ...for each detection in im_a
232
+
233
+ # ...for each image in result set A
234
+
235
+ print('\nOf {} detections in result set A, queried {}, matched {}'.format(
236
+ n_detections_a,n_detections_a_queried,n_detections_a_matched))
237
+ assert len(confidence_matches) == n_detections_a_matched
238
+
239
+ calibration_results = CalibrationResults()
240
+ calibration_results.confidence_matches = confidence_matches
241
+
242
+ if options.return_data:
243
+ calibration_results.data_a = results_a
244
+ calibration_results.data_b = results_b
245
+
246
+ return calibration_results
247
+
248
+ # ...def compare_model_confidence_values(...)
249
+
250
+
251
+ #%% Plotting functions
252
+
253
+ def plot_matched_confidence_values(calibration_results,output_filename,options=None):
254
+ """
255
+ Given a set of paired confidence values for matching detections (from
256
+ compare_model_confidence_values), plot histograms of those pairs for each
257
+ detection category.
258
+
259
+ Args:
260
+ calibration_results (CalibrationResults): output from a call to
261
+ compare_model_confidence_values, containing paired confidence
262
+ values for two sets of detection results.
263
+ output_filename (str): filename to write the plot (.png or .jpg)
264
+ options (CalibrationOptions, optional): plotting options, see
265
+ CalibrationOptions for details.
266
+ """
267
+
268
+ fig_w = 12
269
+ fig_h = 8
270
+ n_hist_bins = 80
271
+
272
+ if options is None:
273
+ options = CalibrationOptions()
274
+
275
+ # Find matched confidence pairs for each category ID
276
+ category_to_matches = defaultdict(list)
277
+
278
+ confidence_matches = calibration_results.confidence_matches
279
+ for m in confidence_matches:
280
+ category_id = m[ConfidenceMatchColumns.COLUMN_CONF_CATEGORY_ID]
281
+ category_to_matches[category_id].append(m)
282
+
283
+ # Optionally sample matches
284
+ category_to_samples = defaultdict(list)
285
+
286
+ for i_category,category_id in enumerate(category_to_matches.keys()):
287
+
288
+ matches_this_category = category_to_matches[category_id]
289
+
290
+ if (options.max_samples_per_category is None) or \
291
+ (len(matches_this_category) <= options.max_samples_per_category):
292
+ category_to_samples[category_id] = matches_this_category
293
+ else:
294
+ assert len(matches_this_category) > options.max_samples_per_category
295
+ category_to_samples[category_id] = random.sample(matches_this_category,options.max_samples_per_category)
296
+
297
+ del category_to_matches
298
+ del confidence_matches
299
+
300
+ categories_to_plot = list(category_to_samples.keys())
301
+
302
+ if options.categories_to_plot is not None:
303
+ categories_to_plot = [category_id for category_id in categories_to_plot if\
304
+ category_id in options.categories_to_plot]
305
+
306
+ n_subplots = len(categories_to_plot)
307
+
308
+ plt.ioff()
309
+
310
+ fig = matplotlib.figure.Figure(figsize=(fig_w, fig_h), tight_layout=True)
311
+ # fig,axes = plt.subplots(nrows=n_subplots,ncols=1)
312
+
313
+ axes = fig.subplots(n_subplots, 1)
314
+
315
+ # i_category = 0; category_id = categories_to_plot[i_category]
316
+ for i_category,category_id in enumerate(categories_to_plot):
317
+
318
+ ax = axes[i_category]
319
+
320
+ category_string = category_id
321
+ if options.category_id_to_name is not None and \
322
+ category_id in options.category_id_to_name:
323
+ category_string = options.category_id_to_name[category_id]
324
+
325
+ samples_this_category = category_to_samples[category_id]
326
+ x = [m[0] for m in samples_this_category]
327
+ y = [m[1] for m in samples_this_category]
328
+
329
+ weights_a = np.ones_like(x)/float(len(x))
330
+ weights_b = np.ones_like(y)/float(len(y))
331
+ ax.hist(x,histtype='step',bins=n_hist_bins,density=False,color='red',weights=weights_a)
332
+ ax.hist(y,histtype='step',bins=n_hist_bins,density=False,color='blue',weights=weights_b)
333
+ ax.legend([options.model_name_a,options.model_name_b])
334
+ ax.set_ylabel(category_string)
335
+ # plt.tight_layout()
336
+
337
+ # I experimented with heat maps, but they weren't very informative.
338
+ # Leaving this code here in case I revisit. Note to self: scatter plots
339
+ # were a disaster.
340
+ if False:
341
+ heatmap, xedges, yedges = np.histogram2d(x, y, bins=30)
342
+ extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
343
+ plt.imshow(heatmap.T, extent=extent, origin='lower', norm='log')
344
+
345
+ # ...for each category for which we need to generate a histogram
346
+
347
+ plt.close(fig)
348
+ fig.savefig(output_filename,dpi=100)
349
+
350
+ # ...def plot_matched_confidence_values(...)
351
+
352
+
353
+ #%% Interactive driver(s)
354
+
355
+ if False:
356
+
357
+ #%%
358
+
359
+ options = ValidateBatchResultsOptions()
360
+ # json_filename = r'g:\temp\format.json'
361
+ # json_filename = r'g:\temp\test-videos\video_results.json'
362
+ json_filename = r'g:\temp\test-videos\image_results.json'
363
+ options.check_image_existence = True
364
+ options.relative_path_base = r'g:\temp\test-videos'
365
+ validate_batch_results(json_filename,options)
366
+
367
+
@@ -41,7 +41,8 @@ def md_to_coco(md_results_file,
41
41
  The default confidence threshold is not 0; the assumption is that by default, you are
42
42
  going to treat the resulting COCO file as a set of labels. If you are using the resulting COCO
43
43
  file to evaluate a detector, you likely want a default confidence threshold of 0. Confidence
44
- values will be written to the semi-standard "score" field for each image
44
+ values will be written to the semi-standard "score" field for each image if
45
+ preserve_nonstandard_metadata is True.
45
46
 
46
47
  A folder of images is required if width and height information are not available
47
48
  in the MD results file.