megadetector 5.0.20__py3-none-any.whl → 5.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (41) hide show
  1. megadetector/data_management/cct_json_utils.py +143 -7
  2. megadetector/data_management/cct_to_md.py +12 -5
  3. megadetector/data_management/databases/integrity_check_json_db.py +83 -77
  4. megadetector/data_management/importers/osu-small-animals-to-json.py +4 -4
  5. megadetector/data_management/importers/raic_csv_to_md_results.py +416 -0
  6. megadetector/data_management/importers/zamba_results_to_md_results.py +1 -2
  7. megadetector/data_management/lila/create_lila_test_set.py +25 -11
  8. megadetector/data_management/lila/download_lila_subset.py +9 -2
  9. megadetector/data_management/lila/generate_lila_per_image_labels.py +3 -2
  10. megadetector/data_management/lila/test_lila_metadata_urls.py +5 -1
  11. megadetector/data_management/read_exif.py +10 -14
  12. megadetector/data_management/rename_images.py +1 -1
  13. megadetector/data_management/yolo_output_to_md_output.py +18 -5
  14. megadetector/detection/process_video.py +14 -3
  15. megadetector/detection/pytorch_detector.py +15 -3
  16. megadetector/detection/run_detector.py +4 -3
  17. megadetector/detection/run_inference_with_yolov5_val.py +121 -13
  18. megadetector/detection/video_utils.py +40 -17
  19. megadetector/postprocessing/classification_postprocessing.py +1 -1
  20. megadetector/postprocessing/combine_api_outputs.py +1 -1
  21. megadetector/postprocessing/compare_batch_results.py +931 -142
  22. megadetector/postprocessing/detector_calibration.py +565 -0
  23. megadetector/postprocessing/md_to_coco.py +85 -19
  24. megadetector/postprocessing/postprocess_batch_results.py +32 -21
  25. megadetector/postprocessing/validate_batch_results.py +174 -64
  26. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -12
  27. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
  28. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -1
  29. megadetector/utils/ct_utils.py +64 -2
  30. megadetector/utils/md_tests.py +15 -13
  31. megadetector/utils/path_utils.py +153 -37
  32. megadetector/utils/process_utils.py +9 -3
  33. megadetector/utils/write_html_image_list.py +21 -6
  34. megadetector/visualization/visualization_utils.py +329 -102
  35. megadetector/visualization/visualize_db.py +104 -63
  36. {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/LICENSE +0 -0
  37. {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/METADATA +143 -142
  38. {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/RECORD +40 -39
  39. {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/WHEEL +1 -1
  40. {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/top_level.txt +0 -0
  41. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
@@ -3,7 +3,8 @@
3
3
  md_to_coco.py
4
4
 
5
5
  "Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
6
- this is an opinionated transformation that requires a confidence threshold.
6
+ this is an opinionated transformation that requires a confidence threshold for most
7
+ applications.
7
8
 
8
9
  Does not currently handle classification information.
9
10
 
@@ -18,6 +19,7 @@ import uuid
18
19
  from tqdm import tqdm
19
20
 
20
21
  from megadetector.visualization import visualization_utils as vis_utils
22
+ from megadetector.utils.path_utils import insert_before_extension
21
23
 
22
24
  default_confidence_threshold = 0.15
23
25
 
@@ -33,23 +35,29 @@ def md_to_coco(md_results_file,
33
35
  preserve_nonstandard_metadata=True,
34
36
  include_failed_images=True,
35
37
  include_annotations_without_bounding_boxes=True,
36
- empty_category_id='0'):
38
+ empty_category_id='0',
39
+ overwrite_behavior='skip',
40
+ verbose=True,
41
+ image_filename_to_size=None):
37
42
  """
38
43
  "Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
39
- this is an opinionated transformation that requires a confidence threshold.
44
+ this is an opinionated transformation that typically requires a confidence threshold.
40
45
 
41
46
  The default confidence threshold is not 0; the assumption is that by default, you are
42
47
  going to treat the resulting COCO file as a set of labels. If you are using the resulting COCO
43
- file to evaluate a detector, you likely want a default confidence threshold of 0. Confidence
44
- values will be written to the semi-standard "score" field for each image
48
+ file to *evaluate* a detector, rather than as a set of labels, you likely want a
49
+ confidence threshold of 0. Confidence values will be written to the semi-standard "score"
50
+ field for each image (regardless of the threshold) if preserve_nonstandard_metadata is True.
45
51
 
46
52
  A folder of images is required if width and height information are not available
47
53
  in the MD results file.
48
54
 
49
55
  Args:
50
- md_results_file (str): MD results .json file to convert to COCO format
56
+ md_results_file (str): MD results .json file to convert to COCO
57
+ format
51
58
  coco_output_file (str, optional): COCO .json file to write; if this is None, we'll return
52
- a COCO-formatted dict, but won't write it to disk
59
+ a COCO-formatted dict, but won't write it to disk. If this is 'auto', we'll write to
60
+ [md_results_file_without_extension].coco.json.
53
61
  image_folder (str, optional): folder of images, required if 'width' and 'height' are not
54
62
  present in the MD results file (they are not required by the format)
55
63
  confidence_threshold (float, optional): boxes below this confidence threshold will not be
@@ -59,8 +67,8 @@ def md_to_coco(md_results_file,
59
67
  info (dict, optional): arbitrary metadata to include in an "info" field in the COCO-formatted
60
68
  output
61
69
  preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
62
- non-standard "conf" field in each annotation, and any random fields present in each image's data
63
- (e.g. EXIF metadata) will be propagated to COCO output
70
+ non-standard "score" field in each annotation, and any random fields present in each image's
71
+ data (e.g. EXIF metadata) will be propagated to COCO output
64
72
  include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
65
73
  with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
66
74
  include_annotations_without_bounding_boxes (bool, optional): if this is True, annotations with
@@ -68,22 +76,62 @@ def md_to_coco(md_results_file,
68
76
  images will be represented with no annotations.
69
77
  empty_category_id (str, optional): category ID reserved for the 'empty' class, should not be
70
78
  attached to any bounding boxes
79
+ overwrite_behavior (str, optional): determines behavior if the output file exists ('skip' to skip conversion,
80
+ 'overwrite' to overwrite the existing file, 'error' to raise an error, 'skip_if_valid' to skip conversion
81
+ if the .json file appears to be intact (does not verify COCO formatting, just intact-.json-ness))
82
+ verbose (bool, optional): enable debug output, including the progress bar,
83
+ image_filename_to_size (dict, optional): dictionary mapping relative image paths to (w,h) tuples. Reading
84
+ image sizes is the slowest step, so if you need to convert many results files at once for the same
85
+ set of images, things will be gobs faster if you read the image sizes in advance and pass them in
86
+ via this argument. The format used here is the same format output by parallel_get_image_sizes().
71
87
 
72
88
  Returns:
73
89
  dict: the COCO data dict, identical to what's written to [coco_output_file] if [coco_output_file]
74
90
  is not None.
75
91
  """
92
+
93
+ assert isinstance(md_results_file,str)
94
+ assert os.path.isfile(md_results_file), \
95
+ 'MD results file {} does not exist'.format(md_results_file)
76
96
 
97
+ if coco_output_file == 'auto':
98
+ coco_output_file = insert_before_extension(md_results_file,'coco')
99
+
100
+ if coco_output_file is not None:
101
+ if os.path.isfile(coco_output_file):
102
+ if overwrite_behavior == 'skip':
103
+ print('Skipping conversion of {}, output file {} exists'.format(
104
+ md_results_file,coco_output_file))
105
+ return None
106
+ elif overwrite_behavior == 'skip_if_valid':
107
+ output_file_is_valid = True
108
+ try:
109
+ with open(coco_output_file,'r') as f:
110
+ _ = json.load(f)
111
+ except Exception:
112
+ print('COCO file {} is invalid, proceeding with conversion'.format(
113
+ coco_output_file))
114
+ output_file_is_valid = False
115
+ if output_file_is_valid:
116
+ print('Skipping conversion of {}, output file {} exists and is valid'.format(
117
+ md_results_file,coco_output_file))
118
+ return None
119
+ elif overwrite_behavior == 'overwrite':
120
+ pass
121
+ elif overwrite_behavior == 'error':
122
+ raise ValueError('Output file {} exists'.format(coco_output_file))
123
+
77
124
  with open(md_results_file,'r') as f:
78
125
  md_results = json.load(f)
79
126
 
80
127
  coco_images = []
81
128
  coco_annotations = []
82
129
 
83
- print('Converting MD results to COCO...')
130
+ print('Converting MD results file {} to COCO file {}...'.format(
131
+ md_results_file, coco_output_file))
84
132
 
85
133
  # im = md_results['images'][0]
86
- for im in tqdm(md_results['images']):
134
+ for im in tqdm(md_results['images'],disable=(not verbose)):
87
135
 
88
136
  coco_im = {}
89
137
  coco_im['id'] = im['file']
@@ -101,18 +149,36 @@ def md_to_coco(md_results_file,
101
149
  h = None
102
150
 
103
151
  if ('width' not in im) or ('height' not in im) or validate_image_sizes:
104
- if image_folder is None:
105
- raise ValueError('Must provide an image folder when height/width need to be read from images')
106
- image_file_abs = os.path.join(image_folder,im['file'])
107
- pil_im = vis_utils.open_image(image_file_abs)
108
- w = pil_im.width
109
- h = pil_im.height
152
+ if (image_folder is None) and (image_filename_to_size is None):
153
+ raise ValueError('Must provide an image folder or a size mapping when height/width need to be read from images')
154
+
155
+ w = None; h = None
156
+
157
+ if image_filename_to_size is not None:
158
+
159
+ if im['file'] not in image_filename_to_size:
160
+ print('Warning: file {} not in image size mapping dict, reading from file'.format(im['file']))
161
+ else:
162
+ image_size = image_filename_to_size[im['file']]
163
+ if image_size is not None:
164
+ assert len(image_size) == 2
165
+ w = image_size[0]
166
+ h = image_size[1]
167
+
168
+ if w is None:
169
+
170
+ image_file_abs = os.path.join(image_folder,im['file'])
171
+ pil_im = vis_utils.open_image(image_file_abs)
172
+ w = pil_im.width
173
+ h = pil_im.height
174
+
110
175
  if validate_image_sizes:
111
176
  if 'width' in im:
112
177
  assert im['width'] == w, 'Width mismatch for image {}'.format(im['file'])
113
178
  if 'height' in im:
114
179
  assert im['height'] == h, 'Height mismatch for image {}'.format(im['file'])
115
180
  else:
181
+
116
182
  w = im['width']
117
183
  h = im['height']
118
184
 
@@ -202,9 +268,9 @@ def md_to_coco(md_results_file,
202
268
  with open(coco_output_file,'w') as f:
203
269
  json.dump(output_dict,f,indent=1)
204
270
 
205
- return output_dict
271
+ return output_dict
206
272
 
207
- # def md_to_coco(...)
273
+ # ...def md_to_coco(...)
208
274
 
209
275
 
210
276
  #%% Interactive driver
@@ -92,16 +92,18 @@ class PostProcessingOptions:
92
92
  #: Optional .json file containing ground truth information
93
93
  self.ground_truth_json_file = ''
94
94
 
95
- #: Classes we'll treat as negative
95
+ #: List of classes we'll treat as negative (defaults to "empty", typically includes
96
+ #: classes like "blank", "misfire", etc.).
96
97
  #:
97
98
  #: Include the token "#NO_LABELS#" to indicate that an image with no annotations
98
99
  #: should be considered empty.
99
100
  self.negative_classes = DEFAULT_NEGATIVE_CLASSES
100
101
 
101
- #: Classes we'll treat as neither positive nor negative
102
+ #: List of classes we'll treat as neither positive nor negative (defaults to
103
+ #: "unknown", typically includes classes like "unidentifiable").
102
104
  self.unlabeled_classes = DEFAULT_UNKNOWN_CLASSES
103
105
 
104
- #: A list of output sets that we should count, but not render images for.
106
+ #: List of output sets that we should count, but not render images for.
105
107
  #:
106
108
  #: Typically used to preview sets with lots of empties, where you don't want to
107
109
  #: subset but also don't want to render 100,000 empty images.
@@ -198,11 +200,16 @@ class PostProcessingOptions:
198
200
 
199
201
  #: When classification results are present, should be sort alphabetically by class name (False)
200
202
  #: or in descending order by frequency (True)?
201
- self.sort_classification_results_by_count = False
203
+ self.sort_classification_results_by_count = False
202
204
 
203
205
  #: Should we split individual pages up into smaller pages if there are more than
204
206
  #: N images?
205
207
  self.max_figures_per_html_file = None
208
+
209
+ #: Footer text for the index page
210
+ # self.footer_text = '<br/><p style="font-size:80%;">Preview page created with the <a href="{}">MegaDetector Python package</a>.</p>'.\
211
+ # format('https://megadetector.readthedocs.io')
212
+ self.footer_text = ''
206
213
 
207
214
  # ...__init__()
208
215
 
@@ -590,6 +597,7 @@ def _prepare_html_subpages(images_html, output_dir, options=None):
590
597
  html_image_list_options = {}
591
598
  html_image_list_options['maxFiguresPerHtmlFile'] = options.max_figures_per_html_file
592
599
  html_image_list_options['headerHtml'] = '<h1>{}</h1>'.format(res.upper())
600
+ html_image_list_options['pageTitle'] = '{}'.format(res.lower())
593
601
 
594
602
  # Don't write empty pages
595
603
  if len(array) == 0:
@@ -762,7 +770,7 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
762
770
  if len(rendered_image_html_info) > 0:
763
771
 
764
772
  image_result = [[res, rendered_image_html_info]]
765
-
773
+ classes_rendered_this_image = set()
766
774
  max_conf = 0
767
775
 
768
776
  for det in detections:
@@ -782,11 +790,14 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
782
790
  # confidence threshold
783
791
  if (options.classification_confidence_threshold < 0) or \
784
792
  (top1_class_score >= options.classification_confidence_threshold):
785
- image_result.append(['class_{}'.format(top1_class_name),
786
- rendered_image_html_info])
793
+ class_string = 'class_{}'.format(top1_class_name)
787
794
  else:
788
- image_result.append(['class_unreliable',
795
+ class_string = 'class_unreliable'
796
+
797
+ if class_string not in classes_rendered_this_image:
798
+ image_result.append([class_string,
789
799
  rendered_image_html_info])
800
+ classes_rendered_this_image.add(class_string)
790
801
 
791
802
  # ...if this detection has classification info
792
803
 
@@ -887,7 +898,6 @@ def _render_image_with_gt(file_info,ground_truth_indexed_db,
887
898
  #%% Main function
888
899
 
889
900
  def process_batch_results(options):
890
-
891
901
  """
892
902
  Given a .json or .csv file containing MD results, do one or more of the following:
893
903
 
@@ -1083,7 +1093,8 @@ def process_batch_results(options):
1083
1093
 
1084
1094
  output_html_file = ''
1085
1095
 
1086
- style_header = """<head>
1096
+ style_header = """<head>
1097
+ <title>Detection results preview</title>
1087
1098
  <style type="text/css">
1088
1099
  a { text-decoration: none; }
1089
1100
  body { font-family: segoe ui, calibri, "trebuchet ms", verdana, arial, sans-serif; }
@@ -1424,7 +1435,7 @@ def process_batch_results(options):
1424
1435
  else:
1425
1436
  confidence_threshold_string = str(options.confidence_threshold)
1426
1437
 
1427
- index_page = """<html>
1438
+ index_page = """<html>
1428
1439
  {}
1429
1440
  <body>
1430
1441
  <h2>Evaluation</h2>
@@ -1509,7 +1520,7 @@ def process_batch_results(options):
1509
1520
  index_page += '</div>'
1510
1521
 
1511
1522
  # Close body and html tags
1512
- index_page += '</body></html>'
1523
+ index_page += '{}</body></html>'.format(options.footer_text)
1513
1524
  output_html_file = os.path.join(output_dir, 'index.html')
1514
1525
  with open(output_html_file, 'w') as f:
1515
1526
  f.write(index_page)
@@ -1529,7 +1540,6 @@ def process_batch_results(options):
1529
1540
  # for each category
1530
1541
  images_html = collections.defaultdict(list)
1531
1542
 
1532
-
1533
1543
  # Add default entries by accessing them for the first time
1534
1544
 
1535
1545
  # Maps sorted tuples of detection category IDs (string ints) - e.g. ("1"), ("1", "4", "7") - to
@@ -1637,14 +1647,15 @@ def process_batch_results(options):
1637
1647
  files_to_render), total=len(files_to_render)))
1638
1648
  else:
1639
1649
  for file_info in tqdm(files_to_render):
1640
- rendering_results.append(_render_image_no_gt(file_info,
1641
- detection_categories_to_results_name,
1642
- detection_categories,
1643
- classification_categories,
1644
- options=options))
1650
+ rendering_result = _render_image_no_gt(file_info,
1651
+ detection_categories_to_results_name,
1652
+ detection_categories,
1653
+ classification_categories,
1654
+ options=options)
1655
+ rendering_results.append(rendering_result)
1645
1656
 
1646
- elapsed = time.time() - start_time
1647
-
1657
+ elapsed = time.time() - start_time
1658
+
1648
1659
  # Do we have classification results in addition to detection results?
1649
1660
  has_classification_info = False
1650
1661
 
@@ -1793,7 +1804,7 @@ def process_batch_results(options):
1793
1804
  cname, cname.lower(), ccount)
1794
1805
  index_page += '</div>\n'
1795
1806
 
1796
- index_page += '</body></html>'
1807
+ index_page += '{}</body></html>'.format(options.footer_text)
1797
1808
  output_html_file = os.path.join(output_dir, 'index.html')
1798
1809
  with open(output_html_file, 'w') as f:
1799
1810
  f.write(index_page)
@@ -15,8 +15,10 @@ import sys
15
15
  import json
16
16
  import argparse
17
17
 
18
+ from tqdm import tqdm
19
+
18
20
  from megadetector.detection.video_utils import is_video_file
19
- from megadetector.utils.ct_utils import args_to_object
21
+ from megadetector.utils.ct_utils import args_to_object, is_list_sorted # noqa
20
22
 
21
23
  typical_info_fields = ['detector','detection_completion_time',
22
24
  'classifier','classification_completion_time',
@@ -42,11 +44,16 @@ class ValidateBatchResultsOptions:
42
44
  #:
43
45
  #: If None, assumes absolute paths.
44
46
  self.relative_path_base = None
47
+
48
+ #: Should we return the loaded data, or just the validation results?
49
+ self.return_data = False
50
+
51
+ #: Enable additional debug output
52
+ self.verbose = False
45
53
 
46
54
  # ...class ValidateBatchResultsOptions
47
55
 
48
56
 
49
-
50
57
  #%% Main function
51
58
 
52
59
  def validate_batch_results(json_filename,options=None):
@@ -55,88 +62,181 @@ def validate_batch_results(json_filename,options=None):
55
62
 
56
63
  Args:
57
64
  json_filename (str): the filename to validate
58
- options (ValidateBatchResultsOptions, optionsl): all the parameters used to control this
65
+ options (ValidateBatchResultsOptions, optional): all the parameters used to control this
59
66
  process, see ValidateBatchResultsOptions for details
60
67
 
61
68
  Returns:
62
- bool: reserved; currently always errors or returns True.
69
+ dict: a dict with a field called "validation_results", which is itself a dict. The reason
70
+ it's a dict inside a dict is that if return_data is True, the outer dict also contains all
71
+ the loaded data. The "validation_results" dict contains fields called "errors", "warnings",
72
+ and "filename". "errors" and "warnings" are lists of strings, although "errors" will never
73
+ be longer than N=1, since validation fails at the first error.
74
+
75
+
63
76
  """
64
77
 
65
78
  if options is None:
66
79
  options = ValidateBatchResultsOptions()
67
80
 
81
+ if options.verbose:
82
+ print('Loading results from {}'.format(json_filename))
83
+
68
84
  with open(json_filename,'r') as f:
69
85
  d = json.load(f)
70
86
 
71
- ## Info validation
87
+ validation_results = {}
88
+ validation_results['filename'] = json_filename
89
+ validation_results['warnings'] = []
90
+ validation_results['errors'] = []
72
91
 
73
- assert 'info' in d
74
- info = d['info']
92
+ if not isinstance(d,dict):
93
+
94
+ validation_results['errors'].append('Input data is not a dict')
95
+ to_return = {}
96
+ to_return['validation_results'] = validation_results
97
+ return to_return
75
98
 
76
- assert isinstance(info,dict)
77
- assert 'format_version' in info
78
- format_version = float(info['format_version'])
79
- assert format_version >= 1.3, 'This validator can only be used with format version 1.3 or later'
99
+ try:
100
+
101
+ ## Info validation
102
+
103
+ if not 'info' in d:
104
+ raise ValueError('Input does not contain info field')
80
105
 
81
- print('Validating a .json results file with format version {}'.format(format_version))
82
-
83
- ## Category validation
84
-
85
- assert 'detection_categories' in d
86
- for k in d['detection_categories'].keys():
87
- # Categories should be string-formatted ints
88
- assert isinstance(k,str)
89
- _ = int(k)
90
- assert isinstance(d['detection_categories'][k],str)
106
+ info = d['info']
107
+
108
+ if not isinstance(info,dict):
109
+ raise ValueError('Input contains invalid info field')
110
+
111
+ if 'format_version' not in info :
112
+ raise ValueError('Input does not specify format version')
113
+
114
+ format_version = float(info['format_version'])
115
+ if format_version < 1.3:
116
+ raise ValueError('This validator can only be used with format version 1.3 or later')
91
117
 
92
- if 'classification_categories' in d:
93
- for k in d['classification_categories'].keys():
94
- # Categories should be string-formatted ints
95
- assert isinstance(k,str)
118
+
119
+ ## Category validation
120
+
121
+ if 'detection_categories' not in d:
122
+ raise ValueError('Input does not contain detection_categories field')
123
+
124
+ for k in d['detection_categories'].keys():
125
+ # Category ID should be string-formatted ints
126
+ if not isinstance(k,str):
127
+ raise ValueError('Invalid detection category ID: {}'.format(k))
96
128
  _ = int(k)
97
- assert isinstance(d['classification_categories'][k],str)
98
-
99
-
100
- ## Image validation
101
-
102
- assert 'images' in d
103
- assert isinstance(d['images'],list)
104
-
105
- # im = d['images'][0]
106
- for im in d['images']:
129
+ if not isinstance(d['detection_categories'][k],str):
130
+ raise ValueError('Invalid detection category name: {}'.format(
131
+ d['detection_categories'][k]))
132
+
133
+ if 'classification_categories' in d:
134
+ for k in d['classification_categories'].keys():
135
+ # Categories should be string-formatted ints
136
+ if not isinstance(k,str):
137
+ raise ValueError('Invalid classification category ID: {}'.format(k))
138
+ _ = int(k)
139
+ if not isinstance(d['classification_categories'][k],str):
140
+ raise ValueError('Invalid classification category name: {}'.format(
141
+ d['classification_categories'][k]))
107
142
 
108
- assert isinstance(im,dict)
109
- assert 'file' in im
110
143
 
111
- file = im['file']
144
+ ## Image validation
112
145
 
113
- if options.check_image_existence:
114
- if options.relative_path_base is None:
115
- file_abs = file
116
- else:
117
- file_abs = os.path.join(options.relative_path_base,file)
118
- assert os.path.isfile(file_abs), 'Cannot find file {}'.format(file_abs)
146
+ if 'images' not in d:
147
+ raise ValueError('images field not present')
148
+ if not isinstance(d['images'],list):
149
+ raise ValueError('Invalid images field')
150
+
151
+ if options.verbose:
152
+ print('Validating images')
153
+
154
+ # im = d['images'][0]
155
+ for i_im,im in tqdm(enumerate(d['images']),total=len(d['images']),disable=(not options.verbose)):
119
156
 
120
- if 'detections' not in im or im['detections'] is None:
121
- assert 'failure' in im and isinstance(im['failure'],str)
122
- else:
123
- assert isinstance(im['detections'],list)
157
+ if not isinstance(im,dict):
158
+ raise ValueError('Invalid image at index {}'.format(i_im))
159
+ if 'file' not in im:
160
+ raise ValueError('Image without filename at index {}'.format(i_im))
161
+
162
+ file = im['file']
124
163
 
125
- if is_video_file(im['file']) and (format_version >= 1.4):
126
- assert 'frame_rate' in im
127
164
  if 'detections' in im and im['detections'] is not None:
128
165
  for det in im['detections']:
129
- assert 'frame_number' in det
166
+ assert 'category' in det, 'Image {} has a detection with no category'.format(file)
167
+ assert 'conf' in det, 'Image {} has a detection with no confidence'.format(file)
168
+ assert isinstance(det['conf'],float), \
169
+ 'Image {} has an illegal confidence value'.format(file)
170
+ assert 'bbox' in det, 'Image {} has a detection with no box'.format(file)
171
+ assert det['category'] in d['detection_categories'], \
172
+ 'Image {} has a detection with an unmapped category {}'.format(
173
+ file,det['category'])
174
+
175
+ if options.check_image_existence:
176
+
177
+ if options.relative_path_base is None:
178
+ file_abs = file
179
+ else:
180
+ file_abs = os.path.join(options.relative_path_base,file)
181
+ if not os.path.isfile(file_abs):
182
+ raise ValueError('Cannot find file {}'.format(file_abs))
183
+
184
+ if 'failure' in im:
185
+ if im['failure'] is not None:
186
+ if not isinstance(im['failure'],str):
187
+ raise ValueError('Image {} has an illegal [failure] value: {}'.format(
188
+ im['file'],str(im['failure'])))
189
+ if 'detections' not in im:
190
+ s = 'Image {} has a failure value, should also have a null detections array'.format(
191
+ im['file'])
192
+ validation_results['warnings'].append(s)
193
+ elif im['detections'] is not None:
194
+ raise ValueError('Image {} has a failure value but a non-null detections array'.format(
195
+ im['file']))
196
+ else:
197
+ if not isinstance(im['detections'],list):
198
+ raise ValueError('Invalid detections list for image {}'.format(im['file']))
199
+
200
+ if is_video_file(im['file']) and (format_version >= 1.4):
201
+
202
+ if 'frame_rate' not in im:
203
+ raise ValueError('Video without frame rate: {}'.format(im['file']))
204
+ if im['frame_rate'] < 0:
205
+ raise ValueError('Video with illegal frame rate {}: {}'.format(
206
+ str(im['frame_rate']),im['file']))
207
+ if 'detections' in im and im['detections'] is not None:
208
+ for det in im['detections']:
209
+ if 'frame_number' not in det:
210
+ raise ValueError('Frame without frame number in video {}'.format(
211
+ im['file']))
212
+ frame_numbers = [det['frame_number'] for det in im['detections']] # noqa
213
+ # assert is_list_sorted(frame_numbers)
214
+
215
+ # ...for each image
130
216
 
131
- # ...for each image
132
217
 
218
+ ## Validation of other keys
219
+
220
+ for k in d.keys():
221
+ if (k not in typical_keys) and (k not in required_keys):
222
+ validation_results['warnings'].append(
223
+ 'Warning: non-standard key {} present at file level'.format(k))
224
+
225
+ except Exception as e:
226
+
227
+ validation_results['errors'].append(str(e))
228
+
229
+ # ...try/except
133
230
 
134
- ## Checking on other keys
231
+ if options.return_data:
232
+ to_return = d
233
+ else:
234
+ to_return = {}
135
235
 
136
- for k in d.keys():
137
- if k not in typical_keys and k not in required_keys:
138
- print('Warning: non-standard key {} present at file level'.format(k))
139
-
236
+ to_return['validation_results'] = validation_results
237
+
238
+ return to_return
239
+
140
240
  # ...def validate_batch_results(...)
141
241
 
142
242
 
@@ -144,15 +244,25 @@ def validate_batch_results(json_filename,options=None):
144
244
 
145
245
  if False:
146
246
 
147
- #%%
247
+ #%% Validate all .json files in the MD test suite
248
+
249
+ from megadetector.utils.path_utils import recursive_file_list
250
+ filenames = recursive_file_list(os.path.expanduser('~/AppData/Local/Temp/md-tests'))
251
+ filenames = [fn for fn in filenames if fn.endswith('.json')]
252
+ filenames = [fn for fn in filenames if 'detectionIndex' not in fn]
148
253
 
149
254
  options = ValidateBatchResultsOptions()
150
- # json_filename = r'g:\temp\format.json'
151
- # json_filename = r'g:\temp\test-videos\video_results.json'
152
- json_filename = r'g:\temp\test-videos\image_results.json'
153
- options.check_image_existence = True
154
- options.relative_path_base = r'g:\temp\test-videos'
155
- validate_batch_results(json_filename,options)
255
+ options.check_image_existence = False
256
+ options.relative_path_base = None # r'g:\temp\test-videos'
257
+
258
+ for json_filename in filenames:
259
+ results = validate_batch_results(json_filename,options)
260
+ if len(results['validation_results']['warnings']) > 0:
261
+ print('Warnings in file {}:'.format(json_filename))
262
+ for s in results['validation_results']['warnings']:
263
+ print(s)
264
+ print('')
265
+ assert len(results['validation_results']['errors']) == 0
156
266
 
157
267
 
158
268
  #%% Command-line driver