megadetector 10.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/__init__.py +0 -0
- megadetector/api/__init__.py +0 -0
- megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
- megadetector/classification/__init__.py +0 -0
- megadetector/classification/aggregate_classifier_probs.py +108 -0
- megadetector/classification/analyze_failed_images.py +227 -0
- megadetector/classification/cache_batchapi_outputs.py +198 -0
- megadetector/classification/create_classification_dataset.py +626 -0
- megadetector/classification/crop_detections.py +516 -0
- megadetector/classification/csv_to_json.py +226 -0
- megadetector/classification/detect_and_crop.py +853 -0
- megadetector/classification/efficientnet/__init__.py +9 -0
- megadetector/classification/efficientnet/model.py +415 -0
- megadetector/classification/efficientnet/utils.py +608 -0
- megadetector/classification/evaluate_model.py +520 -0
- megadetector/classification/identify_mislabeled_candidates.py +152 -0
- megadetector/classification/json_to_azcopy_list.py +63 -0
- megadetector/classification/json_validator.py +696 -0
- megadetector/classification/map_classification_categories.py +276 -0
- megadetector/classification/merge_classification_detection_output.py +509 -0
- megadetector/classification/prepare_classification_script.py +194 -0
- megadetector/classification/prepare_classification_script_mc.py +228 -0
- megadetector/classification/run_classifier.py +287 -0
- megadetector/classification/save_mislabeled.py +110 -0
- megadetector/classification/train_classifier.py +827 -0
- megadetector/classification/train_classifier_tf.py +725 -0
- megadetector/classification/train_utils.py +323 -0
- megadetector/data_management/__init__.py +0 -0
- megadetector/data_management/animl_to_md.py +161 -0
- megadetector/data_management/annotations/__init__.py +0 -0
- megadetector/data_management/annotations/annotation_constants.py +33 -0
- megadetector/data_management/camtrap_dp_to_coco.py +270 -0
- megadetector/data_management/cct_json_utils.py +566 -0
- megadetector/data_management/cct_to_md.py +184 -0
- megadetector/data_management/cct_to_wi.py +293 -0
- megadetector/data_management/coco_to_labelme.py +284 -0
- megadetector/data_management/coco_to_yolo.py +702 -0
- megadetector/data_management/databases/__init__.py +0 -0
- megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
- megadetector/data_management/databases/integrity_check_json_db.py +528 -0
- megadetector/data_management/databases/subset_json_db.py +195 -0
- megadetector/data_management/generate_crops_from_cct.py +200 -0
- megadetector/data_management/get_image_sizes.py +164 -0
- megadetector/data_management/labelme_to_coco.py +559 -0
- megadetector/data_management/labelme_to_yolo.py +349 -0
- megadetector/data_management/lila/__init__.py +0 -0
- megadetector/data_management/lila/create_lila_blank_set.py +556 -0
- megadetector/data_management/lila/create_lila_test_set.py +187 -0
- megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
- megadetector/data_management/lila/download_lila_subset.py +182 -0
- megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
- megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
- megadetector/data_management/lila/get_lila_image_counts.py +112 -0
- megadetector/data_management/lila/lila_common.py +319 -0
- megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
- megadetector/data_management/mewc_to_md.py +344 -0
- megadetector/data_management/ocr_tools.py +873 -0
- megadetector/data_management/read_exif.py +964 -0
- megadetector/data_management/remap_coco_categories.py +195 -0
- megadetector/data_management/remove_exif.py +156 -0
- megadetector/data_management/rename_images.py +194 -0
- megadetector/data_management/resize_coco_dataset.py +663 -0
- megadetector/data_management/speciesnet_to_md.py +41 -0
- megadetector/data_management/wi_download_csv_to_coco.py +247 -0
- megadetector/data_management/yolo_output_to_md_output.py +594 -0
- megadetector/data_management/yolo_to_coco.py +876 -0
- megadetector/data_management/zamba_to_md.py +188 -0
- megadetector/detection/__init__.py +0 -0
- megadetector/detection/change_detection.py +840 -0
- megadetector/detection/process_video.py +479 -0
- megadetector/detection/pytorch_detector.py +1451 -0
- megadetector/detection/run_detector.py +1267 -0
- megadetector/detection/run_detector_batch.py +2159 -0
- megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
- megadetector/detection/run_md_and_speciesnet.py +1494 -0
- megadetector/detection/run_tiled_inference.py +1038 -0
- megadetector/detection/tf_detector.py +209 -0
- megadetector/detection/video_utils.py +1379 -0
- megadetector/postprocessing/__init__.py +0 -0
- megadetector/postprocessing/add_max_conf.py +72 -0
- megadetector/postprocessing/categorize_detections_by_size.py +166 -0
- megadetector/postprocessing/classification_postprocessing.py +1752 -0
- megadetector/postprocessing/combine_batch_outputs.py +249 -0
- megadetector/postprocessing/compare_batch_results.py +2110 -0
- megadetector/postprocessing/convert_output_format.py +403 -0
- megadetector/postprocessing/create_crop_folder.py +629 -0
- megadetector/postprocessing/detector_calibration.py +570 -0
- megadetector/postprocessing/generate_csv_report.py +522 -0
- megadetector/postprocessing/load_api_results.py +223 -0
- megadetector/postprocessing/md_to_coco.py +428 -0
- megadetector/postprocessing/md_to_labelme.py +351 -0
- megadetector/postprocessing/md_to_wi.py +41 -0
- megadetector/postprocessing/merge_detections.py +392 -0
- megadetector/postprocessing/postprocess_batch_results.py +2077 -0
- megadetector/postprocessing/remap_detection_categories.py +226 -0
- megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
- megadetector/postprocessing/separate_detections_into_folders.py +795 -0
- megadetector/postprocessing/subset_json_detector_output.py +964 -0
- megadetector/postprocessing/top_folders_to_bottom.py +238 -0
- megadetector/postprocessing/validate_batch_results.py +332 -0
- megadetector/taxonomy_mapping/__init__.py +0 -0
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +213 -0
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
- megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
- megadetector/taxonomy_mapping/simple_image_download.py +224 -0
- megadetector/taxonomy_mapping/species_lookup.py +1008 -0
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
- megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
- megadetector/tests/__init__.py +0 -0
- megadetector/tests/test_nms_synthetic.py +335 -0
- megadetector/utils/__init__.py +0 -0
- megadetector/utils/ct_utils.py +1857 -0
- megadetector/utils/directory_listing.py +199 -0
- megadetector/utils/extract_frames_from_video.py +307 -0
- megadetector/utils/gpu_test.py +125 -0
- megadetector/utils/md_tests.py +2072 -0
- megadetector/utils/path_utils.py +2832 -0
- megadetector/utils/process_utils.py +172 -0
- megadetector/utils/split_locations_into_train_val.py +237 -0
- megadetector/utils/string_utils.py +234 -0
- megadetector/utils/url_utils.py +825 -0
- megadetector/utils/wi_platform_utils.py +968 -0
- megadetector/utils/wi_taxonomy_utils.py +1759 -0
- megadetector/utils/write_html_image_list.py +239 -0
- megadetector/visualization/__init__.py +0 -0
- megadetector/visualization/plot_utils.py +309 -0
- megadetector/visualization/render_images_with_thumbnails.py +243 -0
- megadetector/visualization/visualization_utils.py +1940 -0
- megadetector/visualization/visualize_db.py +630 -0
- megadetector/visualization/visualize_detector_output.py +479 -0
- megadetector/visualization/visualize_video_output.py +705 -0
- megadetector-10.0.13.dist-info/METADATA +134 -0
- megadetector-10.0.13.dist-info/RECORD +147 -0
- megadetector-10.0.13.dist-info/WHEEL +5 -0
- megadetector-10.0.13.dist-info/licenses/LICENSE +19 -0
- megadetector-10.0.13.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
top_folders_to_bottom.py
|
|
4
|
+
|
|
5
|
+
Given a base folder with files like:
|
|
6
|
+
|
|
7
|
+
* A/1/2/a.jpg
|
|
8
|
+
* B/3/4/b.jpg
|
|
9
|
+
|
|
10
|
+
...moves the top-level folders to the bottom in a new output folder, i.e., creates:
|
|
11
|
+
|
|
12
|
+
* 1/2/A/a.jpg
|
|
13
|
+
* 3/4/B/b.jpg
|
|
14
|
+
|
|
15
|
+
In practice, this is used to make this:
|
|
16
|
+
|
|
17
|
+
animal/camera01/image01.jpg
|
|
18
|
+
|
|
19
|
+
...look like:
|
|
20
|
+
|
|
21
|
+
camera01/animal/image01.jpg
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
#%% Constants and imports
|
|
26
|
+
|
|
27
|
+
import os
|
|
28
|
+
import sys
|
|
29
|
+
import shutil
|
|
30
|
+
import argparse
|
|
31
|
+
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from tqdm import tqdm
|
|
34
|
+
|
|
35
|
+
from functools import partial
|
|
36
|
+
from multiprocessing.pool import ThreadPool
|
|
37
|
+
|
|
38
|
+
from megadetector.utils.path_utils import path_is_abs
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
#%% Classes
|
|
42
|
+
|
|
43
|
+
class TopFoldersToBottomOptions:
|
|
44
|
+
"""
|
|
45
|
+
Options used to parameterize top_folders_to_bottom()
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self,
|
|
49
|
+
input_folder,
|
|
50
|
+
output_folder,
|
|
51
|
+
copy=True,
|
|
52
|
+
n_threads=1,
|
|
53
|
+
overwrite=False):
|
|
54
|
+
|
|
55
|
+
#: Whether to copy (True) vs. move (False) false when re-organizing
|
|
56
|
+
self.copy = copy
|
|
57
|
+
|
|
58
|
+
#: Number of worker threads to use, or <1 to disable parallelization
|
|
59
|
+
self.n_threads = n_threads
|
|
60
|
+
|
|
61
|
+
#: Input folder
|
|
62
|
+
self.input_folder = input_folder
|
|
63
|
+
|
|
64
|
+
#: Output folder
|
|
65
|
+
self.output_folder = output_folder
|
|
66
|
+
|
|
67
|
+
#: If this is False and an output file exists, throw an error
|
|
68
|
+
self.overwrite = overwrite
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
#%% Main functions
|
|
72
|
+
|
|
73
|
+
def _process_file(relative_filename,options,execute=True):
|
|
74
|
+
|
|
75
|
+
assert ('/' in relative_filename) and \
|
|
76
|
+
('\\' not in relative_filename) and \
|
|
77
|
+
(not path_is_abs(relative_filename))
|
|
78
|
+
|
|
79
|
+
# Find top-level folder
|
|
80
|
+
tokens = relative_filename.split('/')
|
|
81
|
+
topmost_folder = tokens.pop(0)
|
|
82
|
+
tokens.insert(len(tokens)-1,topmost_folder)
|
|
83
|
+
|
|
84
|
+
# Find file/folder names
|
|
85
|
+
output_relative_path = '/'.join(tokens)
|
|
86
|
+
output_relative_folder = '/'.join(tokens[0:-1])
|
|
87
|
+
|
|
88
|
+
output_absolute_folder = os.path.join(options.output_folder,output_relative_folder)
|
|
89
|
+
output_absolute_path = os.path.join(options.output_folder,output_relative_path)
|
|
90
|
+
|
|
91
|
+
if execute:
|
|
92
|
+
|
|
93
|
+
os.makedirs(output_absolute_folder,exist_ok=True)
|
|
94
|
+
|
|
95
|
+
input_absolute_path = os.path.join(options.input_folder,relative_filename)
|
|
96
|
+
|
|
97
|
+
if not options.overwrite:
|
|
98
|
+
assert not os.path.isfile(output_absolute_path), \
|
|
99
|
+
'Error: output file {} exists'.format(output_absolute_path)
|
|
100
|
+
|
|
101
|
+
# Move or copy
|
|
102
|
+
if options.copy:
|
|
103
|
+
shutil.copy(input_absolute_path, output_absolute_path)
|
|
104
|
+
else:
|
|
105
|
+
shutil.move(input_absolute_path, output_absolute_path)
|
|
106
|
+
|
|
107
|
+
return output_absolute_path
|
|
108
|
+
|
|
109
|
+
# ...def _process_file()
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def top_folders_to_bottom(options):
|
|
113
|
+
"""
|
|
114
|
+
top_folders_to_bottom.py
|
|
115
|
+
|
|
116
|
+
Given a base folder with files like:
|
|
117
|
+
|
|
118
|
+
* A/1/2/a.jpg
|
|
119
|
+
* B/3/4/b.jpg
|
|
120
|
+
|
|
121
|
+
...moves the top-level folders to the bottom in a new output folder, i.e., creates:
|
|
122
|
+
|
|
123
|
+
* 1/2/A/a.jpg
|
|
124
|
+
* 3/4/B/b.jpg
|
|
125
|
+
|
|
126
|
+
In practice, this is used to make this:
|
|
127
|
+
|
|
128
|
+
animal/camera01/image01.jpg
|
|
129
|
+
|
|
130
|
+
...look like:
|
|
131
|
+
|
|
132
|
+
camera01/animal/image01.jpg
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
options (TopFoldersToBottomOptions): See TopFoldersToBottomOptions for parameter details.
|
|
136
|
+
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
os.makedirs(options.output_folder,exist_ok=True)
|
|
140
|
+
|
|
141
|
+
# Enumerate input folder
|
|
142
|
+
print('Enumerating files...')
|
|
143
|
+
files = list(Path(options.input_folder).rglob('*'))
|
|
144
|
+
files = [p for p in files if not p.is_dir()]
|
|
145
|
+
files = [str(s) for s in files]
|
|
146
|
+
print('Enumerated {} files'.format(len(files)))
|
|
147
|
+
|
|
148
|
+
# Convert absolute paths to relative paths
|
|
149
|
+
relative_files = [os.path.relpath(s,options.input_folder) for s in files]
|
|
150
|
+
|
|
151
|
+
# Standardize delimiters
|
|
152
|
+
relative_files = [s.replace('\\','/') for s in relative_files]
|
|
153
|
+
|
|
154
|
+
base_files = [s for s in relative_files if '/' not in s]
|
|
155
|
+
if len(base_files) > 0:
|
|
156
|
+
print('Warning: ignoring {} files in the base folder'.format(len(base_files)))
|
|
157
|
+
relative_files = [s for s in relative_files if '/' in s]
|
|
158
|
+
|
|
159
|
+
# Make sure each input file maps to a unique output file
|
|
160
|
+
absolute_output_files = [_process_file(s, options, execute=False) for s in relative_files]
|
|
161
|
+
assert len(absolute_output_files) == len(set(absolute_output_files)),\
|
|
162
|
+
"Error: input filenames don't map to unique output filenames"
|
|
163
|
+
|
|
164
|
+
# relative_filename = relative_files[0]
|
|
165
|
+
|
|
166
|
+
# Loop
|
|
167
|
+
if options.n_threads <= 1:
|
|
168
|
+
|
|
169
|
+
for relative_filename in tqdm(relative_files):
|
|
170
|
+
_process_file(relative_filename,options)
|
|
171
|
+
|
|
172
|
+
else:
|
|
173
|
+
|
|
174
|
+
print('Starting a pool with {} threads'.format(options.n_threads))
|
|
175
|
+
pool = ThreadPool(options.n_threads)
|
|
176
|
+
try:
|
|
177
|
+
process_file_with_options = partial(_process_file, options=options)
|
|
178
|
+
_ = list(tqdm(pool.imap(process_file_with_options, relative_files), total=len(relative_files)))
|
|
179
|
+
finally:
|
|
180
|
+
pool.close()
|
|
181
|
+
pool.join()
|
|
182
|
+
print('Pool closed and join for folder inversion')
|
|
183
|
+
|
|
184
|
+
# ...def top_folders_to_bottom(...)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
#%% Interactive driver
|
|
188
|
+
|
|
189
|
+
if False:
|
|
190
|
+
|
|
191
|
+
pass
|
|
192
|
+
|
|
193
|
+
#%%
|
|
194
|
+
|
|
195
|
+
input_folder = r"G:\temp\output"
|
|
196
|
+
output_folder = r"G:\temp\output-inverted"
|
|
197
|
+
options = TopFoldersToBottomOptions(input_folder,output_folder,copy=True,n_threads=10)
|
|
198
|
+
|
|
199
|
+
#%%
|
|
200
|
+
|
|
201
|
+
top_folders_to_bottom(options)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
#%% Command-line driver
|
|
205
|
+
|
|
206
|
+
# python top_folders_to_bottom.py "g:\temp\separated_images" "g:\temp\separated_images_inverted" --n_threads 10
|
|
207
|
+
|
|
208
|
+
def main(): # noqa
|
|
209
|
+
|
|
210
|
+
parser = argparse.ArgumentParser()
|
|
211
|
+
parser.add_argument('input_folder', type=str, help='Input image folder')
|
|
212
|
+
parser.add_argument('output_folder', type=str, help='Output image folder')
|
|
213
|
+
|
|
214
|
+
parser.add_argument('--copy', action='store_true',
|
|
215
|
+
help='Copy images, instead of moving (moving is the default)')
|
|
216
|
+
parser.add_argument('--overwrite', action='store_true',
|
|
217
|
+
help='Allow image overwrite (default=False)')
|
|
218
|
+
parser.add_argument('--n_threads', type=int, default=1,
|
|
219
|
+
help='Number of threads to use for parallel operation (default=1)')
|
|
220
|
+
|
|
221
|
+
if len(sys.argv[1:])==0:
|
|
222
|
+
parser.print_help()
|
|
223
|
+
parser.exit()
|
|
224
|
+
|
|
225
|
+
args = parser.parse_args()
|
|
226
|
+
|
|
227
|
+
# Convert to an options object
|
|
228
|
+
options = TopFoldersToBottomOptions(
|
|
229
|
+
args.input_folder,
|
|
230
|
+
args.output_folder,
|
|
231
|
+
copy=args.copy,
|
|
232
|
+
n_threads=args.n_threads,
|
|
233
|
+
overwrite=args.overwrite)
|
|
234
|
+
|
|
235
|
+
top_folders_to_bottom(options)
|
|
236
|
+
|
|
237
|
+
if __name__ == '__main__':
|
|
238
|
+
main()
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
validate_batch_results.py
|
|
4
|
+
|
|
5
|
+
Given a .json file containing MD results, validate that it's compliant with the format spec:
|
|
6
|
+
|
|
7
|
+
https://lila.science/megadetector-output-format
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
#%% Constants and imports
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import sys
|
|
15
|
+
import json
|
|
16
|
+
import argparse
|
|
17
|
+
|
|
18
|
+
from tqdm import tqdm
|
|
19
|
+
|
|
20
|
+
from megadetector.detection.video_utils import is_video_file
|
|
21
|
+
from megadetector.utils.ct_utils import args_to_object, is_list_sorted # noqa
|
|
22
|
+
|
|
23
|
+
typical_info_fields = ['detector',
|
|
24
|
+
'detection_completion_time',
|
|
25
|
+
'classifier',
|
|
26
|
+
'classification_completion_time',
|
|
27
|
+
'detection_metadata',
|
|
28
|
+
'classifier_metadata']
|
|
29
|
+
|
|
30
|
+
required_keys = ['info',
|
|
31
|
+
'images',
|
|
32
|
+
'detection_categories']
|
|
33
|
+
|
|
34
|
+
typical_keys = ['classification_categories',
|
|
35
|
+
'classification_category_descriptions']
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
#%% Classes
|
|
39
|
+
|
|
40
|
+
class ValidateBatchResultsOptions:
|
|
41
|
+
"""
|
|
42
|
+
Options controlling the behavior of validate_batch_results()
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self):
|
|
46
|
+
|
|
47
|
+
#: Should we verify that images exist? If this is True, and the .json
|
|
48
|
+
#: file contains relative paths, relative_path_base needs to be specified.
|
|
49
|
+
self.check_image_existence = False
|
|
50
|
+
|
|
51
|
+
#: If check_image_existence is True, where do the images live?
|
|
52
|
+
#:
|
|
53
|
+
#: If None, assumes absolute paths.
|
|
54
|
+
self.relative_path_base = None
|
|
55
|
+
|
|
56
|
+
#: Should we return the loaded data, or just the validation results?
|
|
57
|
+
self.return_data = False
|
|
58
|
+
|
|
59
|
+
#: Enable additional debug output
|
|
60
|
+
self.verbose = False
|
|
61
|
+
|
|
62
|
+
#: Should we raise errors immediately (vs. just catching and reporting)?
|
|
63
|
+
self.raise_errors = False
|
|
64
|
+
|
|
65
|
+
# ...class ValidateBatchResultsOptions
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
#%% Main function
|
|
69
|
+
|
|
70
|
+
def validate_batch_results(json_filename,options=None):
|
|
71
|
+
"""
|
|
72
|
+
Verify that [json_filename] is a valid MD output file. Currently errors on invalid files.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
json_filename (str): the filename to validate
|
|
76
|
+
options (ValidateBatchResultsOptions, optional): all the parameters used to control this
|
|
77
|
+
process, see ValidateBatchResultsOptions for details
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
dict: a dict with a field called "validation_results", which is itself a dict. The reason
|
|
81
|
+
it's a dict inside a dict is that if return_data is True, the outer dict also contains all
|
|
82
|
+
the loaded data. The "validation_results" dict contains fields called "errors", "warnings",
|
|
83
|
+
and "filename". "errors" and "warnings" are lists of strings, although "errors" will never
|
|
84
|
+
be longer than N=1, since validation fails at the first error.
|
|
85
|
+
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
if options is None:
|
|
89
|
+
options = ValidateBatchResultsOptions()
|
|
90
|
+
|
|
91
|
+
if options.verbose:
|
|
92
|
+
print('Loading results from {}'.format(json_filename))
|
|
93
|
+
|
|
94
|
+
with open(json_filename,'r') as f:
|
|
95
|
+
d = json.load(f)
|
|
96
|
+
|
|
97
|
+
validation_results = {}
|
|
98
|
+
validation_results['filename'] = json_filename
|
|
99
|
+
validation_results['warnings'] = []
|
|
100
|
+
validation_results['errors'] = []
|
|
101
|
+
|
|
102
|
+
if not isinstance(d,dict):
|
|
103
|
+
|
|
104
|
+
validation_results['errors'].append('Input data is not a dict')
|
|
105
|
+
to_return = {}
|
|
106
|
+
to_return['validation_results'] = validation_results
|
|
107
|
+
return to_return
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
|
|
111
|
+
## Info validation
|
|
112
|
+
|
|
113
|
+
if 'info' not in d:
|
|
114
|
+
raise ValueError('Input does not contain info field')
|
|
115
|
+
|
|
116
|
+
info = d['info']
|
|
117
|
+
|
|
118
|
+
if not isinstance(info,dict):
|
|
119
|
+
raise ValueError('Input contains invalid info field')
|
|
120
|
+
|
|
121
|
+
if 'format_version' not in info :
|
|
122
|
+
raise ValueError('Input does not specify format version')
|
|
123
|
+
|
|
124
|
+
format_version = float(info['format_version'])
|
|
125
|
+
if format_version < 1.3:
|
|
126
|
+
raise ValueError('This validator can only be used with format version 1.3 or later')
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
## Category validation
|
|
130
|
+
|
|
131
|
+
if 'detection_categories' not in d:
|
|
132
|
+
raise ValueError('Input does not contain detection_categories field')
|
|
133
|
+
|
|
134
|
+
for k in d['detection_categories'].keys():
|
|
135
|
+
# Category ID should be string-formatted ints
|
|
136
|
+
if not isinstance(k,str):
|
|
137
|
+
raise ValueError('Invalid detection category ID: {}'.format(k))
|
|
138
|
+
_ = int(k)
|
|
139
|
+
if not isinstance(d['detection_categories'][k],str):
|
|
140
|
+
raise ValueError('Invalid detection category name: {}'.format(
|
|
141
|
+
d['detection_categories'][k]))
|
|
142
|
+
|
|
143
|
+
if 'classification_categories' in d:
|
|
144
|
+
for k in d['classification_categories'].keys():
|
|
145
|
+
# Categories should be string-formatted ints
|
|
146
|
+
if not isinstance(k,str):
|
|
147
|
+
raise ValueError('Invalid classification category ID: {}'.format(k))
|
|
148
|
+
_ = int(k)
|
|
149
|
+
if not isinstance(d['classification_categories'][k],str):
|
|
150
|
+
raise ValueError('Invalid classification category name: {}'.format(
|
|
151
|
+
d['classification_categories'][k]))
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
## Image validation
|
|
155
|
+
|
|
156
|
+
if 'images' not in d:
|
|
157
|
+
raise ValueError('images field not present')
|
|
158
|
+
if not isinstance(d['images'],list):
|
|
159
|
+
raise ValueError('Invalid images field')
|
|
160
|
+
|
|
161
|
+
if options.verbose:
|
|
162
|
+
print('Validating images')
|
|
163
|
+
|
|
164
|
+
# im = d['images'][0]
|
|
165
|
+
for i_im,im in tqdm(enumerate(d['images']),total=len(d['images']),disable=(not options.verbose)):
|
|
166
|
+
|
|
167
|
+
if not isinstance(im,dict):
|
|
168
|
+
raise ValueError('Invalid image at index {}'.format(i_im))
|
|
169
|
+
if 'file' not in im:
|
|
170
|
+
raise ValueError('Image without filename at index {}'.format(i_im))
|
|
171
|
+
|
|
172
|
+
file = im['file']
|
|
173
|
+
|
|
174
|
+
if 'detections' in im and im['detections'] is not None:
|
|
175
|
+
|
|
176
|
+
for det in im['detections']:
|
|
177
|
+
|
|
178
|
+
assert 'category' in det, 'Image {} has a detection with no category'.format(file)
|
|
179
|
+
assert 'conf' in det, 'Image {} has a detection with no confidence'.format(file)
|
|
180
|
+
assert isinstance(det['conf'],float), \
|
|
181
|
+
'Image {} has an illegal confidence value'.format(file)
|
|
182
|
+
assert 'bbox' in det, 'Image {} has a detection with no box'.format(file)
|
|
183
|
+
assert det['category'] in d['detection_categories'], \
|
|
184
|
+
'Image {} has a detection with an unmapped category {}'.format(
|
|
185
|
+
file,det['category'])
|
|
186
|
+
|
|
187
|
+
if 'classifications' in det and det['classifications'] is not None:
|
|
188
|
+
for c in det['classifications']:
|
|
189
|
+
assert isinstance(c[0],str), \
|
|
190
|
+
'Image {} has an illegal classification category: {}'.format(file,c[0])
|
|
191
|
+
try:
|
|
192
|
+
_ = int(c[0])
|
|
193
|
+
except Exception:
|
|
194
|
+
raise ValueError('Image {} has an illegal classification category: {}'.format(
|
|
195
|
+
file,c[0]))
|
|
196
|
+
assert isinstance(c[1],float) or isinstance(c[1], int)
|
|
197
|
+
|
|
198
|
+
# ...for each detection
|
|
199
|
+
|
|
200
|
+
# ...if this image has a detections field
|
|
201
|
+
|
|
202
|
+
if options.check_image_existence:
|
|
203
|
+
|
|
204
|
+
if options.relative_path_base is None:
|
|
205
|
+
file_abs = file
|
|
206
|
+
else:
|
|
207
|
+
file_abs = os.path.join(options.relative_path_base,file)
|
|
208
|
+
if not os.path.isfile(file_abs):
|
|
209
|
+
raise ValueError('Cannot find file {}'.format(file_abs))
|
|
210
|
+
|
|
211
|
+
if 'failure' in im:
|
|
212
|
+
if im['failure'] is not None:
|
|
213
|
+
if not isinstance(im['failure'],str):
|
|
214
|
+
raise ValueError('Image {} has an illegal [failure] value: {}'.format(
|
|
215
|
+
im['file'],str(im['failure'])))
|
|
216
|
+
if 'detections' not in im:
|
|
217
|
+
s = 'Image {} has a failure value, should also have a null detections array'.format(
|
|
218
|
+
im['file'])
|
|
219
|
+
validation_results['warnings'].append(s)
|
|
220
|
+
elif im['detections'] is not None:
|
|
221
|
+
raise ValueError('Image {} has a failure value but a non-null detections array'.format(
|
|
222
|
+
im['file']))
|
|
223
|
+
else:
|
|
224
|
+
if not isinstance(im['detections'],list):
|
|
225
|
+
raise ValueError('Invalid detections list for image {}'.format(im['file']))
|
|
226
|
+
|
|
227
|
+
if is_video_file(im['file']) and (format_version >= 1.5):
|
|
228
|
+
|
|
229
|
+
if 'frames_processed' not in im:
|
|
230
|
+
raise ValueError('Video without frames_processed field: {}'.format(im['file']))
|
|
231
|
+
|
|
232
|
+
if is_video_file(im['file']) and (format_version >= 1.4):
|
|
233
|
+
|
|
234
|
+
if 'frame_rate' not in im:
|
|
235
|
+
raise ValueError('Video without frame rate: {}'.format(im['file']))
|
|
236
|
+
if im['frame_rate'] < 0:
|
|
237
|
+
if 'failure' not in im:
|
|
238
|
+
raise ValueError('Video with illegal frame rate {}: {}'.format(
|
|
239
|
+
str(im['frame_rate']),im['file']))
|
|
240
|
+
if 'detections' in im and im['detections'] is not None:
|
|
241
|
+
for det in im['detections']:
|
|
242
|
+
if 'frame_number' not in det:
|
|
243
|
+
raise ValueError('Frame without frame number in video {}'.format(
|
|
244
|
+
im['file']))
|
|
245
|
+
frame_numbers = [det['frame_number'] for det in im['detections']] # noqa
|
|
246
|
+
# assert is_list_sorted(frame_numbers)
|
|
247
|
+
|
|
248
|
+
# ...for each image
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
## Validation of other keys
|
|
252
|
+
|
|
253
|
+
for k in d.keys():
|
|
254
|
+
if (k not in typical_keys) and (k not in required_keys):
|
|
255
|
+
validation_results['warnings'].append(
|
|
256
|
+
'Warning: non-standard key {} present at file level'.format(k))
|
|
257
|
+
|
|
258
|
+
except Exception as e:
|
|
259
|
+
|
|
260
|
+
if options.raise_errors:
|
|
261
|
+
raise
|
|
262
|
+
else:
|
|
263
|
+
validation_results['errors'].append(str(e))
|
|
264
|
+
|
|
265
|
+
# ...try/except
|
|
266
|
+
|
|
267
|
+
if options.return_data:
|
|
268
|
+
to_return = d
|
|
269
|
+
else:
|
|
270
|
+
to_return = {}
|
|
271
|
+
|
|
272
|
+
to_return['validation_results'] = validation_results
|
|
273
|
+
|
|
274
|
+
return to_return
|
|
275
|
+
|
|
276
|
+
# ...def validate_batch_results(...)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
#%% Interactive driver(s)
|
|
280
|
+
|
|
281
|
+
if False:
|
|
282
|
+
|
|
283
|
+
#%% Validate all .json files in the MD test suite
|
|
284
|
+
|
|
285
|
+
from megadetector.utils.path_utils import recursive_file_list
|
|
286
|
+
filenames = recursive_file_list(os.path.expanduser('~/AppData/Local/Temp/md-tests'))
|
|
287
|
+
filenames = [fn for fn in filenames if fn.endswith('.json')]
|
|
288
|
+
filenames = [fn for fn in filenames if 'detectionIndex' not in fn]
|
|
289
|
+
|
|
290
|
+
options = ValidateBatchResultsOptions()
|
|
291
|
+
options.check_image_existence = False
|
|
292
|
+
options.relative_path_base = None # r'g:\temp\test-videos'
|
|
293
|
+
|
|
294
|
+
for json_filename in filenames:
|
|
295
|
+
results = validate_batch_results(json_filename,options)
|
|
296
|
+
if len(results['validation_results']['warnings']) > 0:
|
|
297
|
+
print('Warnings in file {}:'.format(json_filename))
|
|
298
|
+
for s in results['validation_results']['warnings']:
|
|
299
|
+
print(s)
|
|
300
|
+
print('')
|
|
301
|
+
assert len(results['validation_results']['errors']) == 0
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
#%% Command-line driver
|
|
305
|
+
|
|
306
|
+
def main(): # noqa
|
|
307
|
+
|
|
308
|
+
options = ValidateBatchResultsOptions()
|
|
309
|
+
|
|
310
|
+
parser = argparse.ArgumentParser()
|
|
311
|
+
parser.add_argument(
|
|
312
|
+
'json_filename',
|
|
313
|
+
help='path to .json file containing MegaDetector results')
|
|
314
|
+
parser.add_argument(
|
|
315
|
+
'--check_image_existence', action='store_true',
|
|
316
|
+
help='check that all images referred to in the results file exist')
|
|
317
|
+
parser.add_argument(
|
|
318
|
+
'--relative_path_base', default=None,
|
|
319
|
+
help='if --check_image_existence is specified and paths are relative, use this as the base folder')
|
|
320
|
+
if len(sys.argv[1:]) == 0:
|
|
321
|
+
parser.print_help()
|
|
322
|
+
parser.exit()
|
|
323
|
+
|
|
324
|
+
args = parser.parse_args()
|
|
325
|
+
|
|
326
|
+
args_to_object(args, options)
|
|
327
|
+
|
|
328
|
+
validate_batch_results(args.json_filename,options)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
if __name__ == '__main__':
|
|
332
|
+
main()
|
|
File without changes
|