megadetector 5.0.20__py3-none-any.whl → 5.0.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/cct_json_utils.py +143 -7
- megadetector/data_management/cct_to_md.py +12 -5
- megadetector/data_management/databases/integrity_check_json_db.py +83 -77
- megadetector/data_management/importers/osu-small-animals-to-json.py +4 -4
- megadetector/data_management/importers/raic_csv_to_md_results.py +416 -0
- megadetector/data_management/importers/zamba_results_to_md_results.py +1 -2
- megadetector/data_management/lila/create_lila_test_set.py +25 -11
- megadetector/data_management/lila/download_lila_subset.py +9 -2
- megadetector/data_management/lila/generate_lila_per_image_labels.py +3 -2
- megadetector/data_management/lila/test_lila_metadata_urls.py +5 -1
- megadetector/data_management/read_exif.py +10 -14
- megadetector/data_management/rename_images.py +1 -1
- megadetector/data_management/yolo_output_to_md_output.py +18 -5
- megadetector/detection/process_video.py +14 -3
- megadetector/detection/pytorch_detector.py +15 -3
- megadetector/detection/run_detector.py +4 -3
- megadetector/detection/run_inference_with_yolov5_val.py +121 -13
- megadetector/detection/video_utils.py +40 -17
- megadetector/postprocessing/classification_postprocessing.py +1 -1
- megadetector/postprocessing/combine_api_outputs.py +1 -1
- megadetector/postprocessing/compare_batch_results.py +931 -142
- megadetector/postprocessing/detector_calibration.py +565 -0
- megadetector/postprocessing/md_to_coco.py +85 -19
- megadetector/postprocessing/postprocess_batch_results.py +32 -21
- megadetector/postprocessing/validate_batch_results.py +174 -64
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -12
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -1
- megadetector/utils/ct_utils.py +64 -2
- megadetector/utils/md_tests.py +15 -13
- megadetector/utils/path_utils.py +153 -37
- megadetector/utils/process_utils.py +9 -3
- megadetector/utils/write_html_image_list.py +21 -6
- megadetector/visualization/visualization_utils.py +329 -102
- megadetector/visualization/visualize_db.py +104 -63
- {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/LICENSE +0 -0
- {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/METADATA +143 -142
- {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/RECORD +40 -39
- {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/WHEEL +1 -1
- {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/top_level.txt +0 -0
- megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
md_to_coco.py
|
|
4
4
|
|
|
5
5
|
"Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
|
|
6
|
-
this is an opinionated transformation that requires a confidence threshold
|
|
6
|
+
this is an opinionated transformation that requires a confidence threshold for most
|
|
7
|
+
applications.
|
|
7
8
|
|
|
8
9
|
Does not currently handle classification information.
|
|
9
10
|
|
|
@@ -18,6 +19,7 @@ import uuid
|
|
|
18
19
|
from tqdm import tqdm
|
|
19
20
|
|
|
20
21
|
from megadetector.visualization import visualization_utils as vis_utils
|
|
22
|
+
from megadetector.utils.path_utils import insert_before_extension
|
|
21
23
|
|
|
22
24
|
default_confidence_threshold = 0.15
|
|
23
25
|
|
|
@@ -33,23 +35,29 @@ def md_to_coco(md_results_file,
|
|
|
33
35
|
preserve_nonstandard_metadata=True,
|
|
34
36
|
include_failed_images=True,
|
|
35
37
|
include_annotations_without_bounding_boxes=True,
|
|
36
|
-
empty_category_id='0'
|
|
38
|
+
empty_category_id='0',
|
|
39
|
+
overwrite_behavior='skip',
|
|
40
|
+
verbose=True,
|
|
41
|
+
image_filename_to_size=None):
|
|
37
42
|
"""
|
|
38
43
|
"Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
|
|
39
|
-
this is an opinionated transformation that requires a confidence threshold.
|
|
44
|
+
this is an opinionated transformation that typically requires a confidence threshold.
|
|
40
45
|
|
|
41
46
|
The default confidence threshold is not 0; the assumption is that by default, you are
|
|
42
47
|
going to treat the resulting COCO file as a set of labels. If you are using the resulting COCO
|
|
43
|
-
file to evaluate a detector,
|
|
44
|
-
values will be written to the semi-standard "score"
|
|
48
|
+
file to *evaluate* a detector, rather than as a set of labels, you likely want a
|
|
49
|
+
confidence threshold of 0. Confidence values will be written to the semi-standard "score"
|
|
50
|
+
field for each image (regardless of the threshold) if preserve_nonstandard_metadata is True.
|
|
45
51
|
|
|
46
52
|
A folder of images is required if width and height information are not available
|
|
47
53
|
in the MD results file.
|
|
48
54
|
|
|
49
55
|
Args:
|
|
50
|
-
md_results_file (str): MD results .json file to convert to COCO
|
|
56
|
+
md_results_file (str): MD results .json file to convert to COCO
|
|
57
|
+
format
|
|
51
58
|
coco_output_file (str, optional): COCO .json file to write; if this is None, we'll return
|
|
52
|
-
a COCO-formatted dict, but won't write it to disk
|
|
59
|
+
a COCO-formatted dict, but won't write it to disk. If this is 'auto', we'll write to
|
|
60
|
+
[md_results_file_without_extension].coco.json.
|
|
53
61
|
image_folder (str, optional): folder of images, required if 'width' and 'height' are not
|
|
54
62
|
present in the MD results file (they are not required by the format)
|
|
55
63
|
confidence_threshold (float, optional): boxes below this confidence threshold will not be
|
|
@@ -59,8 +67,8 @@ def md_to_coco(md_results_file,
|
|
|
59
67
|
info (dict, optional): arbitrary metadata to include in an "info" field in the COCO-formatted
|
|
60
68
|
output
|
|
61
69
|
preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
|
|
62
|
-
non-standard "
|
|
63
|
-
(e.g. EXIF metadata) will be propagated to COCO output
|
|
70
|
+
non-standard "score" field in each annotation, and any random fields present in each image's
|
|
71
|
+
data (e.g. EXIF metadata) will be propagated to COCO output
|
|
64
72
|
include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
|
|
65
73
|
with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
|
|
66
74
|
include_annotations_without_bounding_boxes (bool, optional): if this is True, annotations with
|
|
@@ -68,22 +76,62 @@ def md_to_coco(md_results_file,
|
|
|
68
76
|
images will be represented with no annotations.
|
|
69
77
|
empty_category_id (str, optional): category ID reserved for the 'empty' class, should not be
|
|
70
78
|
attached to any bounding boxes
|
|
79
|
+
overwrite_behavior (str, optional): determines behavior if the output file exists ('skip' to skip conversion,
|
|
80
|
+
'overwrite' to overwrite the existing file, 'error' to raise an error, 'skip_if_valid' to skip conversion
|
|
81
|
+
if the .json file appears to be intact (does not verify COCO formatting, just intact-.json-ness))
|
|
82
|
+
verbose (bool, optional): enable debug output, including the progress bar,
|
|
83
|
+
image_filename_to_size (dict, optional): dictionary mapping relative image paths to (w,h) tuples. Reading
|
|
84
|
+
image sizes is the slowest step, so if you need to convert many results files at once for the same
|
|
85
|
+
set of images, things will be gobs faster if you read the image sizes in advance and pass them in
|
|
86
|
+
via this argument. The format used here is the same format output by parallel_get_image_sizes().
|
|
71
87
|
|
|
72
88
|
Returns:
|
|
73
89
|
dict: the COCO data dict, identical to what's written to [coco_output_file] if [coco_output_file]
|
|
74
90
|
is not None.
|
|
75
91
|
"""
|
|
92
|
+
|
|
93
|
+
assert isinstance(md_results_file,str)
|
|
94
|
+
assert os.path.isfile(md_results_file), \
|
|
95
|
+
'MD results file {} does not exist'.format(md_results_file)
|
|
76
96
|
|
|
97
|
+
if coco_output_file == 'auto':
|
|
98
|
+
coco_output_file = insert_before_extension(md_results_file,'coco')
|
|
99
|
+
|
|
100
|
+
if coco_output_file is not None:
|
|
101
|
+
if os.path.isfile(coco_output_file):
|
|
102
|
+
if overwrite_behavior == 'skip':
|
|
103
|
+
print('Skipping conversion of {}, output file {} exists'.format(
|
|
104
|
+
md_results_file,coco_output_file))
|
|
105
|
+
return None
|
|
106
|
+
elif overwrite_behavior == 'skip_if_valid':
|
|
107
|
+
output_file_is_valid = True
|
|
108
|
+
try:
|
|
109
|
+
with open(coco_output_file,'r') as f:
|
|
110
|
+
_ = json.load(f)
|
|
111
|
+
except Exception:
|
|
112
|
+
print('COCO file {} is invalid, proceeding with conversion'.format(
|
|
113
|
+
coco_output_file))
|
|
114
|
+
output_file_is_valid = False
|
|
115
|
+
if output_file_is_valid:
|
|
116
|
+
print('Skipping conversion of {}, output file {} exists and is valid'.format(
|
|
117
|
+
md_results_file,coco_output_file))
|
|
118
|
+
return None
|
|
119
|
+
elif overwrite_behavior == 'overwrite':
|
|
120
|
+
pass
|
|
121
|
+
elif overwrite_behavior == 'error':
|
|
122
|
+
raise ValueError('Output file {} exists'.format(coco_output_file))
|
|
123
|
+
|
|
77
124
|
with open(md_results_file,'r') as f:
|
|
78
125
|
md_results = json.load(f)
|
|
79
126
|
|
|
80
127
|
coco_images = []
|
|
81
128
|
coco_annotations = []
|
|
82
129
|
|
|
83
|
-
print('Converting MD results to COCO...'
|
|
130
|
+
print('Converting MD results file {} to COCO file {}...'.format(
|
|
131
|
+
md_results_file, coco_output_file))
|
|
84
132
|
|
|
85
133
|
# im = md_results['images'][0]
|
|
86
|
-
for im in tqdm(md_results['images']):
|
|
134
|
+
for im in tqdm(md_results['images'],disable=(not verbose)):
|
|
87
135
|
|
|
88
136
|
coco_im = {}
|
|
89
137
|
coco_im['id'] = im['file']
|
|
@@ -101,18 +149,36 @@ def md_to_coco(md_results_file,
|
|
|
101
149
|
h = None
|
|
102
150
|
|
|
103
151
|
if ('width' not in im) or ('height' not in im) or validate_image_sizes:
|
|
104
|
-
if image_folder is None:
|
|
105
|
-
raise ValueError('Must provide an image folder when height/width need to be read from images')
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
152
|
+
if (image_folder is None) and (image_filename_to_size is None):
|
|
153
|
+
raise ValueError('Must provide an image folder or a size mapping when height/width need to be read from images')
|
|
154
|
+
|
|
155
|
+
w = None; h = None
|
|
156
|
+
|
|
157
|
+
if image_filename_to_size is not None:
|
|
158
|
+
|
|
159
|
+
if im['file'] not in image_filename_to_size:
|
|
160
|
+
print('Warning: file {} not in image size mapping dict, reading from file'.format(im['file']))
|
|
161
|
+
else:
|
|
162
|
+
image_size = image_filename_to_size[im['file']]
|
|
163
|
+
if image_size is not None:
|
|
164
|
+
assert len(image_size) == 2
|
|
165
|
+
w = image_size[0]
|
|
166
|
+
h = image_size[1]
|
|
167
|
+
|
|
168
|
+
if w is None:
|
|
169
|
+
|
|
170
|
+
image_file_abs = os.path.join(image_folder,im['file'])
|
|
171
|
+
pil_im = vis_utils.open_image(image_file_abs)
|
|
172
|
+
w = pil_im.width
|
|
173
|
+
h = pil_im.height
|
|
174
|
+
|
|
110
175
|
if validate_image_sizes:
|
|
111
176
|
if 'width' in im:
|
|
112
177
|
assert im['width'] == w, 'Width mismatch for image {}'.format(im['file'])
|
|
113
178
|
if 'height' in im:
|
|
114
179
|
assert im['height'] == h, 'Height mismatch for image {}'.format(im['file'])
|
|
115
180
|
else:
|
|
181
|
+
|
|
116
182
|
w = im['width']
|
|
117
183
|
h = im['height']
|
|
118
184
|
|
|
@@ -202,9 +268,9 @@ def md_to_coco(md_results_file,
|
|
|
202
268
|
with open(coco_output_file,'w') as f:
|
|
203
269
|
json.dump(output_dict,f,indent=1)
|
|
204
270
|
|
|
205
|
-
return output_dict
|
|
271
|
+
return output_dict
|
|
206
272
|
|
|
207
|
-
# def md_to_coco(...)
|
|
273
|
+
# ...def md_to_coco(...)
|
|
208
274
|
|
|
209
275
|
|
|
210
276
|
#%% Interactive driver
|
|
@@ -92,16 +92,18 @@ class PostProcessingOptions:
|
|
|
92
92
|
#: Optional .json file containing ground truth information
|
|
93
93
|
self.ground_truth_json_file = ''
|
|
94
94
|
|
|
95
|
-
#:
|
|
95
|
+
#: List of classes we'll treat as negative (defaults to "empty", typically includes
|
|
96
|
+
#: classes like "blank", "misfire", etc.).
|
|
96
97
|
#:
|
|
97
98
|
#: Include the token "#NO_LABELS#" to indicate that an image with no annotations
|
|
98
99
|
#: should be considered empty.
|
|
99
100
|
self.negative_classes = DEFAULT_NEGATIVE_CLASSES
|
|
100
101
|
|
|
101
|
-
#:
|
|
102
|
+
#: List of classes we'll treat as neither positive nor negative (defaults to
|
|
103
|
+
#: "unknown", typically includes classes like "unidentifiable").
|
|
102
104
|
self.unlabeled_classes = DEFAULT_UNKNOWN_CLASSES
|
|
103
105
|
|
|
104
|
-
#:
|
|
106
|
+
#: List of output sets that we should count, but not render images for.
|
|
105
107
|
#:
|
|
106
108
|
#: Typically used to preview sets with lots of empties, where you don't want to
|
|
107
109
|
#: subset but also don't want to render 100,000 empty images.
|
|
@@ -198,11 +200,16 @@ class PostProcessingOptions:
|
|
|
198
200
|
|
|
199
201
|
#: When classification results are present, should be sort alphabetically by class name (False)
|
|
200
202
|
#: or in descending order by frequency (True)?
|
|
201
|
-
self.sort_classification_results_by_count = False
|
|
203
|
+
self.sort_classification_results_by_count = False
|
|
202
204
|
|
|
203
205
|
#: Should we split individual pages up into smaller pages if there are more than
|
|
204
206
|
#: N images?
|
|
205
207
|
self.max_figures_per_html_file = None
|
|
208
|
+
|
|
209
|
+
#: Footer text for the index page
|
|
210
|
+
# self.footer_text = '<br/><p style="font-size:80%;">Preview page created with the <a href="{}">MegaDetector Python package</a>.</p>'.\
|
|
211
|
+
# format('https://megadetector.readthedocs.io')
|
|
212
|
+
self.footer_text = ''
|
|
206
213
|
|
|
207
214
|
# ...__init__()
|
|
208
215
|
|
|
@@ -590,6 +597,7 @@ def _prepare_html_subpages(images_html, output_dir, options=None):
|
|
|
590
597
|
html_image_list_options = {}
|
|
591
598
|
html_image_list_options['maxFiguresPerHtmlFile'] = options.max_figures_per_html_file
|
|
592
599
|
html_image_list_options['headerHtml'] = '<h1>{}</h1>'.format(res.upper())
|
|
600
|
+
html_image_list_options['pageTitle'] = '{}'.format(res.lower())
|
|
593
601
|
|
|
594
602
|
# Don't write empty pages
|
|
595
603
|
if len(array) == 0:
|
|
@@ -762,7 +770,7 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
|
|
|
762
770
|
if len(rendered_image_html_info) > 0:
|
|
763
771
|
|
|
764
772
|
image_result = [[res, rendered_image_html_info]]
|
|
765
|
-
|
|
773
|
+
classes_rendered_this_image = set()
|
|
766
774
|
max_conf = 0
|
|
767
775
|
|
|
768
776
|
for det in detections:
|
|
@@ -782,11 +790,14 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
|
|
|
782
790
|
# confidence threshold
|
|
783
791
|
if (options.classification_confidence_threshold < 0) or \
|
|
784
792
|
(top1_class_score >= options.classification_confidence_threshold):
|
|
785
|
-
|
|
786
|
-
rendered_image_html_info])
|
|
793
|
+
class_string = 'class_{}'.format(top1_class_name)
|
|
787
794
|
else:
|
|
788
|
-
|
|
795
|
+
class_string = 'class_unreliable'
|
|
796
|
+
|
|
797
|
+
if class_string not in classes_rendered_this_image:
|
|
798
|
+
image_result.append([class_string,
|
|
789
799
|
rendered_image_html_info])
|
|
800
|
+
classes_rendered_this_image.add(class_string)
|
|
790
801
|
|
|
791
802
|
# ...if this detection has classification info
|
|
792
803
|
|
|
@@ -887,7 +898,6 @@ def _render_image_with_gt(file_info,ground_truth_indexed_db,
|
|
|
887
898
|
#%% Main function
|
|
888
899
|
|
|
889
900
|
def process_batch_results(options):
|
|
890
|
-
|
|
891
901
|
"""
|
|
892
902
|
Given a .json or .csv file containing MD results, do one or more of the following:
|
|
893
903
|
|
|
@@ -1083,7 +1093,8 @@ def process_batch_results(options):
|
|
|
1083
1093
|
|
|
1084
1094
|
output_html_file = ''
|
|
1085
1095
|
|
|
1086
|
-
style_header = """<head>
|
|
1096
|
+
style_header = """<head>
|
|
1097
|
+
<title>Detection results preview</title>
|
|
1087
1098
|
<style type="text/css">
|
|
1088
1099
|
a { text-decoration: none; }
|
|
1089
1100
|
body { font-family: segoe ui, calibri, "trebuchet ms", verdana, arial, sans-serif; }
|
|
@@ -1424,7 +1435,7 @@ def process_batch_results(options):
|
|
|
1424
1435
|
else:
|
|
1425
1436
|
confidence_threshold_string = str(options.confidence_threshold)
|
|
1426
1437
|
|
|
1427
|
-
index_page = """<html>
|
|
1438
|
+
index_page = """<html>
|
|
1428
1439
|
{}
|
|
1429
1440
|
<body>
|
|
1430
1441
|
<h2>Evaluation</h2>
|
|
@@ -1509,7 +1520,7 @@ def process_batch_results(options):
|
|
|
1509
1520
|
index_page += '</div>'
|
|
1510
1521
|
|
|
1511
1522
|
# Close body and html tags
|
|
1512
|
-
index_page += '</body></html>'
|
|
1523
|
+
index_page += '{}</body></html>'.format(options.footer_text)
|
|
1513
1524
|
output_html_file = os.path.join(output_dir, 'index.html')
|
|
1514
1525
|
with open(output_html_file, 'w') as f:
|
|
1515
1526
|
f.write(index_page)
|
|
@@ -1529,7 +1540,6 @@ def process_batch_results(options):
|
|
|
1529
1540
|
# for each category
|
|
1530
1541
|
images_html = collections.defaultdict(list)
|
|
1531
1542
|
|
|
1532
|
-
|
|
1533
1543
|
# Add default entries by accessing them for the first time
|
|
1534
1544
|
|
|
1535
1545
|
# Maps sorted tuples of detection category IDs (string ints) - e.g. ("1"), ("1", "4", "7") - to
|
|
@@ -1637,14 +1647,15 @@ def process_batch_results(options):
|
|
|
1637
1647
|
files_to_render), total=len(files_to_render)))
|
|
1638
1648
|
else:
|
|
1639
1649
|
for file_info in tqdm(files_to_render):
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1650
|
+
rendering_result = _render_image_no_gt(file_info,
|
|
1651
|
+
detection_categories_to_results_name,
|
|
1652
|
+
detection_categories,
|
|
1653
|
+
classification_categories,
|
|
1654
|
+
options=options)
|
|
1655
|
+
rendering_results.append(rendering_result)
|
|
1645
1656
|
|
|
1646
|
-
elapsed = time.time() - start_time
|
|
1647
|
-
|
|
1657
|
+
elapsed = time.time() - start_time
|
|
1658
|
+
|
|
1648
1659
|
# Do we have classification results in addition to detection results?
|
|
1649
1660
|
has_classification_info = False
|
|
1650
1661
|
|
|
@@ -1793,7 +1804,7 @@ def process_batch_results(options):
|
|
|
1793
1804
|
cname, cname.lower(), ccount)
|
|
1794
1805
|
index_page += '</div>\n'
|
|
1795
1806
|
|
|
1796
|
-
index_page += '</body></html>'
|
|
1807
|
+
index_page += '{}</body></html>'.format(options.footer_text)
|
|
1797
1808
|
output_html_file = os.path.join(output_dir, 'index.html')
|
|
1798
1809
|
with open(output_html_file, 'w') as f:
|
|
1799
1810
|
f.write(index_page)
|
|
@@ -15,8 +15,10 @@ import sys
|
|
|
15
15
|
import json
|
|
16
16
|
import argparse
|
|
17
17
|
|
|
18
|
+
from tqdm import tqdm
|
|
19
|
+
|
|
18
20
|
from megadetector.detection.video_utils import is_video_file
|
|
19
|
-
from megadetector.utils.ct_utils import args_to_object
|
|
21
|
+
from megadetector.utils.ct_utils import args_to_object, is_list_sorted # noqa
|
|
20
22
|
|
|
21
23
|
typical_info_fields = ['detector','detection_completion_time',
|
|
22
24
|
'classifier','classification_completion_time',
|
|
@@ -42,11 +44,16 @@ class ValidateBatchResultsOptions:
|
|
|
42
44
|
#:
|
|
43
45
|
#: If None, assumes absolute paths.
|
|
44
46
|
self.relative_path_base = None
|
|
47
|
+
|
|
48
|
+
#: Should we return the loaded data, or just the validation results?
|
|
49
|
+
self.return_data = False
|
|
50
|
+
|
|
51
|
+
#: Enable additional debug output
|
|
52
|
+
self.verbose = False
|
|
45
53
|
|
|
46
54
|
# ...class ValidateBatchResultsOptions
|
|
47
55
|
|
|
48
56
|
|
|
49
|
-
|
|
50
57
|
#%% Main function
|
|
51
58
|
|
|
52
59
|
def validate_batch_results(json_filename,options=None):
|
|
@@ -55,88 +62,181 @@ def validate_batch_results(json_filename,options=None):
|
|
|
55
62
|
|
|
56
63
|
Args:
|
|
57
64
|
json_filename (str): the filename to validate
|
|
58
|
-
options (ValidateBatchResultsOptions,
|
|
65
|
+
options (ValidateBatchResultsOptions, optional): all the parameters used to control this
|
|
59
66
|
process, see ValidateBatchResultsOptions for details
|
|
60
67
|
|
|
61
68
|
Returns:
|
|
62
|
-
|
|
69
|
+
dict: a dict with a field called "validation_results", which is itself a dict. The reason
|
|
70
|
+
it's a dict inside a dict is that if return_data is True, the outer dict also contains all
|
|
71
|
+
the loaded data. The "validation_results" dict contains fields called "errors", "warnings",
|
|
72
|
+
and "filename". "errors" and "warnings" are lists of strings, although "errors" will never
|
|
73
|
+
be longer than N=1, since validation fails at the first error.
|
|
74
|
+
|
|
75
|
+
|
|
63
76
|
"""
|
|
64
77
|
|
|
65
78
|
if options is None:
|
|
66
79
|
options = ValidateBatchResultsOptions()
|
|
67
80
|
|
|
81
|
+
if options.verbose:
|
|
82
|
+
print('Loading results from {}'.format(json_filename))
|
|
83
|
+
|
|
68
84
|
with open(json_filename,'r') as f:
|
|
69
85
|
d = json.load(f)
|
|
70
86
|
|
|
71
|
-
|
|
87
|
+
validation_results = {}
|
|
88
|
+
validation_results['filename'] = json_filename
|
|
89
|
+
validation_results['warnings'] = []
|
|
90
|
+
validation_results['errors'] = []
|
|
72
91
|
|
|
73
|
-
|
|
74
|
-
|
|
92
|
+
if not isinstance(d,dict):
|
|
93
|
+
|
|
94
|
+
validation_results['errors'].append('Input data is not a dict')
|
|
95
|
+
to_return = {}
|
|
96
|
+
to_return['validation_results'] = validation_results
|
|
97
|
+
return to_return
|
|
75
98
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
99
|
+
try:
|
|
100
|
+
|
|
101
|
+
## Info validation
|
|
102
|
+
|
|
103
|
+
if not 'info' in d:
|
|
104
|
+
raise ValueError('Input does not contain info field')
|
|
80
105
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
106
|
+
info = d['info']
|
|
107
|
+
|
|
108
|
+
if not isinstance(info,dict):
|
|
109
|
+
raise ValueError('Input contains invalid info field')
|
|
110
|
+
|
|
111
|
+
if 'format_version' not in info :
|
|
112
|
+
raise ValueError('Input does not specify format version')
|
|
113
|
+
|
|
114
|
+
format_version = float(info['format_version'])
|
|
115
|
+
if format_version < 1.3:
|
|
116
|
+
raise ValueError('This validator can only be used with format version 1.3 or later')
|
|
91
117
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
118
|
+
|
|
119
|
+
## Category validation
|
|
120
|
+
|
|
121
|
+
if 'detection_categories' not in d:
|
|
122
|
+
raise ValueError('Input does not contain detection_categories field')
|
|
123
|
+
|
|
124
|
+
for k in d['detection_categories'].keys():
|
|
125
|
+
# Category ID should be string-formatted ints
|
|
126
|
+
if not isinstance(k,str):
|
|
127
|
+
raise ValueError('Invalid detection category ID: {}'.format(k))
|
|
96
128
|
_ = int(k)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
129
|
+
if not isinstance(d['detection_categories'][k],str):
|
|
130
|
+
raise ValueError('Invalid detection category name: {}'.format(
|
|
131
|
+
d['detection_categories'][k]))
|
|
132
|
+
|
|
133
|
+
if 'classification_categories' in d:
|
|
134
|
+
for k in d['classification_categories'].keys():
|
|
135
|
+
# Categories should be string-formatted ints
|
|
136
|
+
if not isinstance(k,str):
|
|
137
|
+
raise ValueError('Invalid classification category ID: {}'.format(k))
|
|
138
|
+
_ = int(k)
|
|
139
|
+
if not isinstance(d['classification_categories'][k],str):
|
|
140
|
+
raise ValueError('Invalid classification category name: {}'.format(
|
|
141
|
+
d['classification_categories'][k]))
|
|
107
142
|
|
|
108
|
-
assert isinstance(im,dict)
|
|
109
|
-
assert 'file' in im
|
|
110
143
|
|
|
111
|
-
|
|
144
|
+
## Image validation
|
|
112
145
|
|
|
113
|
-
if
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
146
|
+
if 'images' not in d:
|
|
147
|
+
raise ValueError('images field not present')
|
|
148
|
+
if not isinstance(d['images'],list):
|
|
149
|
+
raise ValueError('Invalid images field')
|
|
150
|
+
|
|
151
|
+
if options.verbose:
|
|
152
|
+
print('Validating images')
|
|
153
|
+
|
|
154
|
+
# im = d['images'][0]
|
|
155
|
+
for i_im,im in tqdm(enumerate(d['images']),total=len(d['images']),disable=(not options.verbose)):
|
|
119
156
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
157
|
+
if not isinstance(im,dict):
|
|
158
|
+
raise ValueError('Invalid image at index {}'.format(i_im))
|
|
159
|
+
if 'file' not in im:
|
|
160
|
+
raise ValueError('Image without filename at index {}'.format(i_im))
|
|
161
|
+
|
|
162
|
+
file = im['file']
|
|
124
163
|
|
|
125
|
-
if is_video_file(im['file']) and (format_version >= 1.4):
|
|
126
|
-
assert 'frame_rate' in im
|
|
127
164
|
if 'detections' in im and im['detections'] is not None:
|
|
128
165
|
for det in im['detections']:
|
|
129
|
-
assert '
|
|
166
|
+
assert 'category' in det, 'Image {} has a detection with no category'.format(file)
|
|
167
|
+
assert 'conf' in det, 'Image {} has a detection with no confidence'.format(file)
|
|
168
|
+
assert isinstance(det['conf'],float), \
|
|
169
|
+
'Image {} has an illegal confidence value'.format(file)
|
|
170
|
+
assert 'bbox' in det, 'Image {} has a detection with no box'.format(file)
|
|
171
|
+
assert det['category'] in d['detection_categories'], \
|
|
172
|
+
'Image {} has a detection with an unmapped category {}'.format(
|
|
173
|
+
file,det['category'])
|
|
174
|
+
|
|
175
|
+
if options.check_image_existence:
|
|
176
|
+
|
|
177
|
+
if options.relative_path_base is None:
|
|
178
|
+
file_abs = file
|
|
179
|
+
else:
|
|
180
|
+
file_abs = os.path.join(options.relative_path_base,file)
|
|
181
|
+
if not os.path.isfile(file_abs):
|
|
182
|
+
raise ValueError('Cannot find file {}'.format(file_abs))
|
|
183
|
+
|
|
184
|
+
if 'failure' in im:
|
|
185
|
+
if im['failure'] is not None:
|
|
186
|
+
if not isinstance(im['failure'],str):
|
|
187
|
+
raise ValueError('Image {} has an illegal [failure] value: {}'.format(
|
|
188
|
+
im['file'],str(im['failure'])))
|
|
189
|
+
if 'detections' not in im:
|
|
190
|
+
s = 'Image {} has a failure value, should also have a null detections array'.format(
|
|
191
|
+
im['file'])
|
|
192
|
+
validation_results['warnings'].append(s)
|
|
193
|
+
elif im['detections'] is not None:
|
|
194
|
+
raise ValueError('Image {} has a failure value but a non-null detections array'.format(
|
|
195
|
+
im['file']))
|
|
196
|
+
else:
|
|
197
|
+
if not isinstance(im['detections'],list):
|
|
198
|
+
raise ValueError('Invalid detections list for image {}'.format(im['file']))
|
|
199
|
+
|
|
200
|
+
if is_video_file(im['file']) and (format_version >= 1.4):
|
|
201
|
+
|
|
202
|
+
if 'frame_rate' not in im:
|
|
203
|
+
raise ValueError('Video without frame rate: {}'.format(im['file']))
|
|
204
|
+
if im['frame_rate'] < 0:
|
|
205
|
+
raise ValueError('Video with illegal frame rate {}: {}'.format(
|
|
206
|
+
str(im['frame_rate']),im['file']))
|
|
207
|
+
if 'detections' in im and im['detections'] is not None:
|
|
208
|
+
for det in im['detections']:
|
|
209
|
+
if 'frame_number' not in det:
|
|
210
|
+
raise ValueError('Frame without frame number in video {}'.format(
|
|
211
|
+
im['file']))
|
|
212
|
+
frame_numbers = [det['frame_number'] for det in im['detections']] # noqa
|
|
213
|
+
# assert is_list_sorted(frame_numbers)
|
|
214
|
+
|
|
215
|
+
# ...for each image
|
|
130
216
|
|
|
131
|
-
# ...for each image
|
|
132
217
|
|
|
218
|
+
## Validation of other keys
|
|
219
|
+
|
|
220
|
+
for k in d.keys():
|
|
221
|
+
if (k not in typical_keys) and (k not in required_keys):
|
|
222
|
+
validation_results['warnings'].append(
|
|
223
|
+
'Warning: non-standard key {} present at file level'.format(k))
|
|
224
|
+
|
|
225
|
+
except Exception as e:
|
|
226
|
+
|
|
227
|
+
validation_results['errors'].append(str(e))
|
|
228
|
+
|
|
229
|
+
# ...try/except
|
|
133
230
|
|
|
134
|
-
|
|
231
|
+
if options.return_data:
|
|
232
|
+
to_return = d
|
|
233
|
+
else:
|
|
234
|
+
to_return = {}
|
|
135
235
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
236
|
+
to_return['validation_results'] = validation_results
|
|
237
|
+
|
|
238
|
+
return to_return
|
|
239
|
+
|
|
140
240
|
# ...def validate_batch_results(...)
|
|
141
241
|
|
|
142
242
|
|
|
@@ -144,15 +244,25 @@ def validate_batch_results(json_filename,options=None):
|
|
|
144
244
|
|
|
145
245
|
if False:
|
|
146
246
|
|
|
147
|
-
#%%
|
|
247
|
+
#%% Validate all .json files in the MD test suite
|
|
248
|
+
|
|
249
|
+
from megadetector.utils.path_utils import recursive_file_list
|
|
250
|
+
filenames = recursive_file_list(os.path.expanduser('~/AppData/Local/Temp/md-tests'))
|
|
251
|
+
filenames = [fn for fn in filenames if fn.endswith('.json')]
|
|
252
|
+
filenames = [fn for fn in filenames if 'detectionIndex' not in fn]
|
|
148
253
|
|
|
149
254
|
options = ValidateBatchResultsOptions()
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
255
|
+
options.check_image_existence = False
|
|
256
|
+
options.relative_path_base = None # r'g:\temp\test-videos'
|
|
257
|
+
|
|
258
|
+
for json_filename in filenames:
|
|
259
|
+
results = validate_batch_results(json_filename,options)
|
|
260
|
+
if len(results['validation_results']['warnings']) > 0:
|
|
261
|
+
print('Warnings in file {}:'.format(json_filename))
|
|
262
|
+
for s in results['validation_results']['warnings']:
|
|
263
|
+
print(s)
|
|
264
|
+
print('')
|
|
265
|
+
assert len(results['validation_results']['errors']) == 0
|
|
156
266
|
|
|
157
267
|
|
|
158
268
|
#%% Command-line driver
|