megadetector 10.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/__init__.py +0 -0
- megadetector/api/__init__.py +0 -0
- megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
- megadetector/classification/__init__.py +0 -0
- megadetector/classification/aggregate_classifier_probs.py +108 -0
- megadetector/classification/analyze_failed_images.py +227 -0
- megadetector/classification/cache_batchapi_outputs.py +198 -0
- megadetector/classification/create_classification_dataset.py +626 -0
- megadetector/classification/crop_detections.py +516 -0
- megadetector/classification/csv_to_json.py +226 -0
- megadetector/classification/detect_and_crop.py +853 -0
- megadetector/classification/efficientnet/__init__.py +9 -0
- megadetector/classification/efficientnet/model.py +415 -0
- megadetector/classification/efficientnet/utils.py +608 -0
- megadetector/classification/evaluate_model.py +520 -0
- megadetector/classification/identify_mislabeled_candidates.py +152 -0
- megadetector/classification/json_to_azcopy_list.py +63 -0
- megadetector/classification/json_validator.py +696 -0
- megadetector/classification/map_classification_categories.py +276 -0
- megadetector/classification/merge_classification_detection_output.py +509 -0
- megadetector/classification/prepare_classification_script.py +194 -0
- megadetector/classification/prepare_classification_script_mc.py +228 -0
- megadetector/classification/run_classifier.py +287 -0
- megadetector/classification/save_mislabeled.py +110 -0
- megadetector/classification/train_classifier.py +827 -0
- megadetector/classification/train_classifier_tf.py +725 -0
- megadetector/classification/train_utils.py +323 -0
- megadetector/data_management/__init__.py +0 -0
- megadetector/data_management/animl_to_md.py +161 -0
- megadetector/data_management/annotations/__init__.py +0 -0
- megadetector/data_management/annotations/annotation_constants.py +33 -0
- megadetector/data_management/camtrap_dp_to_coco.py +270 -0
- megadetector/data_management/cct_json_utils.py +566 -0
- megadetector/data_management/cct_to_md.py +184 -0
- megadetector/data_management/cct_to_wi.py +293 -0
- megadetector/data_management/coco_to_labelme.py +284 -0
- megadetector/data_management/coco_to_yolo.py +702 -0
- megadetector/data_management/databases/__init__.py +0 -0
- megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
- megadetector/data_management/databases/integrity_check_json_db.py +528 -0
- megadetector/data_management/databases/subset_json_db.py +195 -0
- megadetector/data_management/generate_crops_from_cct.py +200 -0
- megadetector/data_management/get_image_sizes.py +164 -0
- megadetector/data_management/labelme_to_coco.py +559 -0
- megadetector/data_management/labelme_to_yolo.py +349 -0
- megadetector/data_management/lila/__init__.py +0 -0
- megadetector/data_management/lila/create_lila_blank_set.py +556 -0
- megadetector/data_management/lila/create_lila_test_set.py +187 -0
- megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
- megadetector/data_management/lila/download_lila_subset.py +182 -0
- megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
- megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
- megadetector/data_management/lila/get_lila_image_counts.py +112 -0
- megadetector/data_management/lila/lila_common.py +319 -0
- megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
- megadetector/data_management/mewc_to_md.py +344 -0
- megadetector/data_management/ocr_tools.py +873 -0
- megadetector/data_management/read_exif.py +964 -0
- megadetector/data_management/remap_coco_categories.py +195 -0
- megadetector/data_management/remove_exif.py +156 -0
- megadetector/data_management/rename_images.py +194 -0
- megadetector/data_management/resize_coco_dataset.py +663 -0
- megadetector/data_management/speciesnet_to_md.py +41 -0
- megadetector/data_management/wi_download_csv_to_coco.py +247 -0
- megadetector/data_management/yolo_output_to_md_output.py +594 -0
- megadetector/data_management/yolo_to_coco.py +876 -0
- megadetector/data_management/zamba_to_md.py +188 -0
- megadetector/detection/__init__.py +0 -0
- megadetector/detection/change_detection.py +840 -0
- megadetector/detection/process_video.py +479 -0
- megadetector/detection/pytorch_detector.py +1451 -0
- megadetector/detection/run_detector.py +1267 -0
- megadetector/detection/run_detector_batch.py +2159 -0
- megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
- megadetector/detection/run_md_and_speciesnet.py +1494 -0
- megadetector/detection/run_tiled_inference.py +1038 -0
- megadetector/detection/tf_detector.py +209 -0
- megadetector/detection/video_utils.py +1379 -0
- megadetector/postprocessing/__init__.py +0 -0
- megadetector/postprocessing/add_max_conf.py +72 -0
- megadetector/postprocessing/categorize_detections_by_size.py +166 -0
- megadetector/postprocessing/classification_postprocessing.py +1752 -0
- megadetector/postprocessing/combine_batch_outputs.py +249 -0
- megadetector/postprocessing/compare_batch_results.py +2110 -0
- megadetector/postprocessing/convert_output_format.py +403 -0
- megadetector/postprocessing/create_crop_folder.py +629 -0
- megadetector/postprocessing/detector_calibration.py +570 -0
- megadetector/postprocessing/generate_csv_report.py +522 -0
- megadetector/postprocessing/load_api_results.py +223 -0
- megadetector/postprocessing/md_to_coco.py +428 -0
- megadetector/postprocessing/md_to_labelme.py +351 -0
- megadetector/postprocessing/md_to_wi.py +41 -0
- megadetector/postprocessing/merge_detections.py +392 -0
- megadetector/postprocessing/postprocess_batch_results.py +2077 -0
- megadetector/postprocessing/remap_detection_categories.py +226 -0
- megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
- megadetector/postprocessing/separate_detections_into_folders.py +795 -0
- megadetector/postprocessing/subset_json_detector_output.py +964 -0
- megadetector/postprocessing/top_folders_to_bottom.py +238 -0
- megadetector/postprocessing/validate_batch_results.py +332 -0
- megadetector/taxonomy_mapping/__init__.py +0 -0
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +213 -0
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
- megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
- megadetector/taxonomy_mapping/simple_image_download.py +224 -0
- megadetector/taxonomy_mapping/species_lookup.py +1008 -0
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
- megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
- megadetector/tests/__init__.py +0 -0
- megadetector/tests/test_nms_synthetic.py +335 -0
- megadetector/utils/__init__.py +0 -0
- megadetector/utils/ct_utils.py +1857 -0
- megadetector/utils/directory_listing.py +199 -0
- megadetector/utils/extract_frames_from_video.py +307 -0
- megadetector/utils/gpu_test.py +125 -0
- megadetector/utils/md_tests.py +2072 -0
- megadetector/utils/path_utils.py +2832 -0
- megadetector/utils/process_utils.py +172 -0
- megadetector/utils/split_locations_into_train_val.py +237 -0
- megadetector/utils/string_utils.py +234 -0
- megadetector/utils/url_utils.py +825 -0
- megadetector/utils/wi_platform_utils.py +968 -0
- megadetector/utils/wi_taxonomy_utils.py +1759 -0
- megadetector/utils/write_html_image_list.py +239 -0
- megadetector/visualization/__init__.py +0 -0
- megadetector/visualization/plot_utils.py +309 -0
- megadetector/visualization/render_images_with_thumbnails.py +243 -0
- megadetector/visualization/visualization_utils.py +1940 -0
- megadetector/visualization/visualize_db.py +630 -0
- megadetector/visualization/visualize_detector_output.py +479 -0
- megadetector/visualization/visualize_video_output.py +705 -0
- megadetector-10.0.13.dist-info/METADATA +134 -0
- megadetector-10.0.13.dist-info/RECORD +147 -0
- megadetector-10.0.13.dist-info/WHEEL +5 -0
- megadetector-10.0.13.dist-info/licenses/LICENSE +19 -0
- megadetector-10.0.13.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
integrity_check_json_db.py
|
|
4
|
+
|
|
5
|
+
Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file, specifically:
|
|
6
|
+
|
|
7
|
+
* Verifies that required fields are present and have the right types
|
|
8
|
+
* Verifies that annotations refer to valid images
|
|
9
|
+
* Verifies that annotations refer to valid categories
|
|
10
|
+
* Verifies that image, category, and annotation IDs are unique
|
|
11
|
+
* Optionally checks file existence
|
|
12
|
+
* Finds un-annotated images
|
|
13
|
+
* Finds unused categories
|
|
14
|
+
* Prints a list of categories sorted by count
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
#%% Constants and environment
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import json
|
|
22
|
+
import os
|
|
23
|
+
import sys
|
|
24
|
+
|
|
25
|
+
from functools import partial
|
|
26
|
+
from multiprocessing.pool import Pool, ThreadPool
|
|
27
|
+
from operator import itemgetter
|
|
28
|
+
from tqdm import tqdm
|
|
29
|
+
|
|
30
|
+
from megadetector.visualization.visualization_utils import open_image
|
|
31
|
+
from megadetector.utils import ct_utils
|
|
32
|
+
from megadetector.utils.path_utils import find_images
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
#%% Classes and environment
|
|
36
|
+
|
|
37
|
+
class IntegrityCheckOptions:
|
|
38
|
+
"""
|
|
39
|
+
Options for integrity_check_json_db()
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self):
|
|
43
|
+
|
|
44
|
+
#: Image path; the filenames in the .json file should be relative to this folder
|
|
45
|
+
self.baseDir = ''
|
|
46
|
+
|
|
47
|
+
#: Should we validate the image sizes?
|
|
48
|
+
self.bCheckImageSizes = False
|
|
49
|
+
|
|
50
|
+
#: Should we check that all the images in the .json file exist on disk?
|
|
51
|
+
self.bCheckImageExistence = False
|
|
52
|
+
|
|
53
|
+
#: Should we search [baseDir] for images that are not used in the .json file?
|
|
54
|
+
self.bFindUnusedImages = False
|
|
55
|
+
|
|
56
|
+
#: Should we require that all images in the .json file have a 'location' field?
|
|
57
|
+
self.bRequireLocation = True
|
|
58
|
+
|
|
59
|
+
#: For debugging, limit the number of images we'll process
|
|
60
|
+
self.iMaxNumImages = -1
|
|
61
|
+
|
|
62
|
+
#: Number of threads to use for parallelization, set to <= 1 to disable parallelization
|
|
63
|
+
self.nThreads = 10
|
|
64
|
+
|
|
65
|
+
#: Whether to use threads (rather than processes for parallelization)
|
|
66
|
+
self.parallelizeWithThreads = True
|
|
67
|
+
|
|
68
|
+
#: Enable additional debug output
|
|
69
|
+
self.verbose = True
|
|
70
|
+
|
|
71
|
+
#: Allow integer-valued image and annotation IDs (COCO uses this, CCT files use strings)
|
|
72
|
+
self.allowIntIDs = False
|
|
73
|
+
|
|
74
|
+
#: If True, error if the 'info' field is not present
|
|
75
|
+
self.requireInfo = False
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
#%% Functions
|
|
79
|
+
|
|
80
|
+
def _check_image_existence_and_size(image,options=None):
|
|
81
|
+
"""
|
|
82
|
+
Validate the image represented in the CCT image dict [image], which should have fields:
|
|
83
|
+
|
|
84
|
+
* file_name
|
|
85
|
+
* width
|
|
86
|
+
* height
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
image (dict): image to validate
|
|
90
|
+
options (IntegrityCheckOptions): parameters impacting validation
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
str: None if this image passes validation, otherwise an error string
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
if options is None:
|
|
97
|
+
options = IntegrityCheckOptions()
|
|
98
|
+
|
|
99
|
+
assert options.bCheckImageExistence
|
|
100
|
+
|
|
101
|
+
file_path = os.path.join(options.baseDir,image['file_name'])
|
|
102
|
+
if not os.path.isfile(file_path):
|
|
103
|
+
s = 'Image path {} does not exist'.format(file_path)
|
|
104
|
+
return s
|
|
105
|
+
|
|
106
|
+
if options.bCheckImageSizes:
|
|
107
|
+
if not ('height' in image and 'width' in image):
|
|
108
|
+
s = 'Missing image size in {}'.format(file_path)
|
|
109
|
+
return s
|
|
110
|
+
|
|
111
|
+
# width, height = Image.open(file_path).size
|
|
112
|
+
try:
|
|
113
|
+
pil_im = open_image(file_path)
|
|
114
|
+
width,height = pil_im.size
|
|
115
|
+
pil_im.close()
|
|
116
|
+
except Exception as e:
|
|
117
|
+
s = 'Error opening {}: {}'.format(file_path,str(e))
|
|
118
|
+
return s
|
|
119
|
+
if (not (width == image['width'] and height == image['height'])):
|
|
120
|
+
s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
|
|
121
|
+
image['id'], file_path, image['width'], image['height'], width, height)
|
|
122
|
+
return s
|
|
123
|
+
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def integrity_check_json_db(json_file, options=None):
|
|
128
|
+
"""
|
|
129
|
+
Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file; see
|
|
130
|
+
module header comment for a list of the validation steps.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
json_file (str): filename to validate, or an already-loaded dict
|
|
134
|
+
options (IntegrityCheckOptions, optional): see IntegrityCheckOptions
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
tuple: tuple containing:
|
|
138
|
+
- sorted_categories (dict): list of categories used in [json_file], sorted by frequency
|
|
139
|
+
- data (dict): the data loaded from [json_file]
|
|
140
|
+
- error_info (dict): specific validation errors
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
if options is None:
|
|
144
|
+
options = IntegrityCheckOptions()
|
|
145
|
+
|
|
146
|
+
if options.bCheckImageSizes:
|
|
147
|
+
options.bCheckImageExistence = True
|
|
148
|
+
|
|
149
|
+
if options.verbose:
|
|
150
|
+
print(options.__dict__)
|
|
151
|
+
|
|
152
|
+
if options.baseDir is None:
|
|
153
|
+
options.baseDir = ''
|
|
154
|
+
|
|
155
|
+
base_dir = options.baseDir
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
##%% Read .json file if necessary, integrity-check fields
|
|
159
|
+
|
|
160
|
+
if isinstance(json_file,dict):
|
|
161
|
+
|
|
162
|
+
data = json_file
|
|
163
|
+
|
|
164
|
+
elif isinstance(json_file,str):
|
|
165
|
+
|
|
166
|
+
assert os.path.isfile(json_file), '.json file {} does not exist'.format(json_file)
|
|
167
|
+
|
|
168
|
+
if options.verbose:
|
|
169
|
+
print('Reading .json {} with base dir [{}]...'.format(
|
|
170
|
+
json_file,base_dir))
|
|
171
|
+
|
|
172
|
+
with open(json_file,'r') as f:
|
|
173
|
+
data = json.load(f)
|
|
174
|
+
|
|
175
|
+
else:
|
|
176
|
+
|
|
177
|
+
raise ValueError('Illegal value for json_file')
|
|
178
|
+
|
|
179
|
+
images = data['images']
|
|
180
|
+
annotations = data['annotations']
|
|
181
|
+
categories = data['categories']
|
|
182
|
+
|
|
183
|
+
if options.requireInfo:
|
|
184
|
+
assert 'info' in data, 'No info struct in database'
|
|
185
|
+
|
|
186
|
+
if len(base_dir) > 0:
|
|
187
|
+
assert os.path.isdir(base_dir), \
|
|
188
|
+
'Base directory {} does not exist'.format(base_dir)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
##%% Build dictionaries, checking ID uniqueness and internal validity as we go
|
|
192
|
+
|
|
193
|
+
image_id_to_image = {}
|
|
194
|
+
ann_id_to_ann = {}
|
|
195
|
+
category_id_to_category = {}
|
|
196
|
+
category_name_to_category = {}
|
|
197
|
+
image_location_set = set()
|
|
198
|
+
|
|
199
|
+
if options.verbose:
|
|
200
|
+
print('Checking categories...')
|
|
201
|
+
|
|
202
|
+
for cat in tqdm(categories):
|
|
203
|
+
|
|
204
|
+
# Confirm that required fields are present
|
|
205
|
+
assert 'name' in cat
|
|
206
|
+
assert 'id' in cat
|
|
207
|
+
|
|
208
|
+
assert isinstance(cat['id'],int), \
|
|
209
|
+
'Illegal category ID type: [{}]'.format(str(cat['id']))
|
|
210
|
+
assert isinstance(cat['name'],str), \
|
|
211
|
+
'Illegal category name type [{}]'.format(str(cat['name']))
|
|
212
|
+
|
|
213
|
+
category_id = cat['id']
|
|
214
|
+
category_name = cat['name']
|
|
215
|
+
|
|
216
|
+
# Confirm ID uniqueness
|
|
217
|
+
assert category_id not in category_id_to_category, \
|
|
218
|
+
'Category ID {} is used more than once'.format(category_id)
|
|
219
|
+
category_id_to_category[category_id] = cat
|
|
220
|
+
cat['_count'] = 0
|
|
221
|
+
|
|
222
|
+
assert category_name not in category_name_to_category, \
|
|
223
|
+
'Category name {} is used more than once'.format(category_name)
|
|
224
|
+
category_name_to_category[category_name] = cat
|
|
225
|
+
|
|
226
|
+
# ...for each category
|
|
227
|
+
|
|
228
|
+
if options.verbose:
|
|
229
|
+
print('\nChecking image records...')
|
|
230
|
+
|
|
231
|
+
if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
|
|
232
|
+
|
|
233
|
+
if options.verbose:
|
|
234
|
+
print('Trimming image list to {}'.format(options.iMaxNumImages))
|
|
235
|
+
images = images[0:options.iMaxNumImages]
|
|
236
|
+
|
|
237
|
+
image_paths_in_json = set()
|
|
238
|
+
|
|
239
|
+
sequences = set()
|
|
240
|
+
|
|
241
|
+
# image = images[0]
|
|
242
|
+
for image in tqdm(images):
|
|
243
|
+
|
|
244
|
+
image['_count'] = 0
|
|
245
|
+
|
|
246
|
+
# Confirm that required fields are present
|
|
247
|
+
assert 'file_name' in image
|
|
248
|
+
assert 'id' in image
|
|
249
|
+
|
|
250
|
+
image['file_name'] = image['file_name'].replace('\\','/')
|
|
251
|
+
|
|
252
|
+
image_paths_in_json.add(image['file_name'])
|
|
253
|
+
|
|
254
|
+
assert isinstance(image['file_name'],str), 'Illegal image filename type'
|
|
255
|
+
|
|
256
|
+
if options.allowIntIDs:
|
|
257
|
+
assert isinstance(image['id'],str) or isinstance(image['id'],int), \
|
|
258
|
+
'Illegal image ID type'
|
|
259
|
+
else:
|
|
260
|
+
assert isinstance(image['id'],str), 'Illegal image ID type'
|
|
261
|
+
|
|
262
|
+
image_id = image['id']
|
|
263
|
+
|
|
264
|
+
# Confirm ID uniqueness
|
|
265
|
+
assert image_id not in image_id_to_image, 'Duplicate image ID {}'.format(image_id)
|
|
266
|
+
|
|
267
|
+
image_id_to_image[image_id] = image
|
|
268
|
+
|
|
269
|
+
if 'height' in image:
|
|
270
|
+
assert 'width' in image, 'Image with height but no width: {}'.format(image['id'])
|
|
271
|
+
|
|
272
|
+
if 'width' in image:
|
|
273
|
+
assert 'height' in image, 'Image with width but no height: {}'.format(image['id'])
|
|
274
|
+
|
|
275
|
+
if options.bRequireLocation:
|
|
276
|
+
assert 'location' in image, 'No location available for: {}'.format(image['id'])
|
|
277
|
+
|
|
278
|
+
if 'location' in image:
|
|
279
|
+
# We previously supported ints here; this should be strings now
|
|
280
|
+
# assert isinstance(image['location'], str) or isinstance(image['location'], int), \
|
|
281
|
+
# 'Illegal image location type'
|
|
282
|
+
assert isinstance(image['location'], str)
|
|
283
|
+
image_location_set.add(image['location'])
|
|
284
|
+
|
|
285
|
+
if 'seq_id' in image:
|
|
286
|
+
sequences.add(image['seq_id'])
|
|
287
|
+
|
|
288
|
+
assert not ('sequence_id' in image or 'sequence' in image), 'Illegal sequence identifier'
|
|
289
|
+
|
|
290
|
+
unused_files = []
|
|
291
|
+
|
|
292
|
+
image_paths_relative = None
|
|
293
|
+
|
|
294
|
+
# Are we checking for unused images?
|
|
295
|
+
if (len(base_dir) > 0) and options.bFindUnusedImages:
|
|
296
|
+
|
|
297
|
+
if options.verbose:
|
|
298
|
+
print('\nEnumerating images...')
|
|
299
|
+
|
|
300
|
+
image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
|
|
301
|
+
|
|
302
|
+
for fn_relative in image_paths_relative:
|
|
303
|
+
if fn_relative not in image_paths_in_json:
|
|
304
|
+
unused_files.append(fn_relative)
|
|
305
|
+
|
|
306
|
+
# List of (filename,error_string) tuples
|
|
307
|
+
validation_errors = []
|
|
308
|
+
|
|
309
|
+
# If we're checking image existence but not image size, we don't need to read the images
|
|
310
|
+
if options.bCheckImageExistence and not options.bCheckImageSizes:
|
|
311
|
+
|
|
312
|
+
if image_paths_relative is None:
|
|
313
|
+
image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
|
|
314
|
+
|
|
315
|
+
image_paths_relative_set = set(image_paths_relative)
|
|
316
|
+
|
|
317
|
+
for im in images:
|
|
318
|
+
if im['file_name'] not in image_paths_relative_set:
|
|
319
|
+
validation_errors.append((im['file_name'],'not found in relative path list'))
|
|
320
|
+
|
|
321
|
+
# If we're checking image size, we need to read the images
|
|
322
|
+
if options.bCheckImageSizes:
|
|
323
|
+
|
|
324
|
+
if len(base_dir) == 0:
|
|
325
|
+
print('Warning: checking image sizes without a base directory, assuming "."')
|
|
326
|
+
|
|
327
|
+
if options.verbose:
|
|
328
|
+
print('Checking image existence and/or image sizes...')
|
|
329
|
+
|
|
330
|
+
if options.nThreads is not None and options.nThreads > 1:
|
|
331
|
+
|
|
332
|
+
if options.parallelizeWithThreads:
|
|
333
|
+
worker_string = 'threads'
|
|
334
|
+
else:
|
|
335
|
+
worker_string = 'processes'
|
|
336
|
+
|
|
337
|
+
if options.verbose:
|
|
338
|
+
print('Starting a pool of {} {}'.format(options.nThreads,worker_string))
|
|
339
|
+
if options.parallelizeWithThreads:
|
|
340
|
+
pool = ThreadPool(options.nThreads)
|
|
341
|
+
else:
|
|
342
|
+
pool = Pool(options.nThreads)
|
|
343
|
+
try:
|
|
344
|
+
results = list(tqdm(pool.imap(
|
|
345
|
+
partial(_check_image_existence_and_size,options=options), images),
|
|
346
|
+
total=len(images)))
|
|
347
|
+
finally:
|
|
348
|
+
pool.close()
|
|
349
|
+
pool.join()
|
|
350
|
+
print('Pool closed and joined for image size checks')
|
|
351
|
+
else:
|
|
352
|
+
results = []
|
|
353
|
+
for im in tqdm(images):
|
|
354
|
+
results.append(_check_image_existence_and_size(im,options))
|
|
355
|
+
|
|
356
|
+
for i_image,result in enumerate(results):
|
|
357
|
+
if result is not None:
|
|
358
|
+
validation_errors.append((images[i_image]['file_name'],result))
|
|
359
|
+
|
|
360
|
+
# ...for each image
|
|
361
|
+
|
|
362
|
+
if options.verbose:
|
|
363
|
+
print('{} validation errors (of {})'.format(len(validation_errors),len(images)))
|
|
364
|
+
print('Checking annotations...')
|
|
365
|
+
|
|
366
|
+
n_boxes = 0
|
|
367
|
+
|
|
368
|
+
for ann in tqdm(annotations):
|
|
369
|
+
|
|
370
|
+
# Confirm that required fields are present
|
|
371
|
+
assert 'image_id' in ann
|
|
372
|
+
assert 'id' in ann
|
|
373
|
+
assert 'category_id' in ann
|
|
374
|
+
|
|
375
|
+
if options.allowIntIDs:
|
|
376
|
+
assert isinstance(ann['id'],str) or isinstance(ann['id'],int), \
|
|
377
|
+
'Illegal annotation ID type'
|
|
378
|
+
assert isinstance(ann['image_id'],str) or isinstance(ann['image_id'],int), \
|
|
379
|
+
'Illegal annotation image ID type'
|
|
380
|
+
else:
|
|
381
|
+
assert isinstance(ann['id'],str), 'Illegal annotation ID type'
|
|
382
|
+
assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
|
|
383
|
+
|
|
384
|
+
assert isinstance(ann['category_id'],int), 'Illegal annotation category ID type'
|
|
385
|
+
|
|
386
|
+
if 'bbox' in ann:
|
|
387
|
+
n_boxes += 1
|
|
388
|
+
|
|
389
|
+
ann_id = ann['id']
|
|
390
|
+
|
|
391
|
+
# Confirm ID uniqueness
|
|
392
|
+
assert ann_id not in ann_id_to_ann
|
|
393
|
+
ann_id_to_ann[ann_id] = ann
|
|
394
|
+
|
|
395
|
+
# Confirm validity
|
|
396
|
+
assert ann['category_id'] in category_id_to_category, \
|
|
397
|
+
'Category {} not found in category list'.format(ann['category_id'])
|
|
398
|
+
assert ann['image_id'] in image_id_to_image, \
|
|
399
|
+
'Image ID {} referred to by annotation {}, not available'.format(
|
|
400
|
+
ann['image_id'],ann['id'])
|
|
401
|
+
|
|
402
|
+
image_id_to_image[ann['image_id']]['_count'] += 1
|
|
403
|
+
category_id_to_category[ann['category_id']]['_count'] +=1
|
|
404
|
+
|
|
405
|
+
# ...for each annotation
|
|
406
|
+
|
|
407
|
+
sorted_categories = sorted(categories, key=itemgetter('_count'), reverse=True)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
##%% Print statistics
|
|
411
|
+
|
|
412
|
+
if options.verbose:
|
|
413
|
+
|
|
414
|
+
# Find un-annotated images and multi-annotation images
|
|
415
|
+
n_unannotated = 0
|
|
416
|
+
n_multi_annotated = 0
|
|
417
|
+
|
|
418
|
+
for image in images:
|
|
419
|
+
if image['_count'] == 0:
|
|
420
|
+
n_unannotated += 1
|
|
421
|
+
elif image['_count'] > 1:
|
|
422
|
+
n_multi_annotated += 1
|
|
423
|
+
|
|
424
|
+
print('\nFound {} unannotated images, {} images with multiple annotations'.format(
|
|
425
|
+
n_unannotated,n_multi_annotated))
|
|
426
|
+
|
|
427
|
+
if (len(base_dir) > 0) and options.bFindUnusedImages:
|
|
428
|
+
print('Found {} unused image files'.format(len(unused_files)))
|
|
429
|
+
|
|
430
|
+
n_unused_categories = 0
|
|
431
|
+
|
|
432
|
+
# Find unused categories
|
|
433
|
+
for cat in categories:
|
|
434
|
+
if cat['_count'] == 0:
|
|
435
|
+
print('Unused category: {}'.format(cat['name']))
|
|
436
|
+
n_unused_categories += 1
|
|
437
|
+
|
|
438
|
+
print('Found {} unused categories'.format(n_unused_categories))
|
|
439
|
+
|
|
440
|
+
sequence_string = 'no sequence info'
|
|
441
|
+
if len(sequences) > 0:
|
|
442
|
+
sequence_string = '{} sequences'.format(len(sequences))
|
|
443
|
+
|
|
444
|
+
print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
|
|
445
|
+
len(images),len(annotations),n_boxes,len(categories),sequence_string))
|
|
446
|
+
|
|
447
|
+
if len(image_location_set) > 0:
|
|
448
|
+
print('DB contains images from {} locations\n'.format(len(image_location_set)))
|
|
449
|
+
|
|
450
|
+
print('Categories and annotation (not image) counts:\n')
|
|
451
|
+
|
|
452
|
+
for cat in sorted_categories:
|
|
453
|
+
print('{:6} {}'.format(cat['_count'],cat['name']))
|
|
454
|
+
|
|
455
|
+
print('')
|
|
456
|
+
|
|
457
|
+
error_info = {}
|
|
458
|
+
error_info['unused_files'] = unused_files
|
|
459
|
+
error_info['validation_errors'] = validation_errors
|
|
460
|
+
|
|
461
|
+
return sorted_categories, data, error_info
|
|
462
|
+
|
|
463
|
+
# ...def integrity_check_json_db()
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
#%% Command-line driver
|
|
467
|
+
|
|
468
|
+
def main(): # noqa
|
|
469
|
+
|
|
470
|
+
parser = argparse.ArgumentParser()
|
|
471
|
+
parser.add_argument('json_file',type=str,
|
|
472
|
+
help='COCO-formatted .json file to validate')
|
|
473
|
+
parser.add_argument('--bCheckImageSizes', action='store_true',
|
|
474
|
+
help='Validate image size, requires baseDir to be specified. ' + \
|
|
475
|
+
'Implies existence checking.')
|
|
476
|
+
parser.add_argument('--bCheckImageExistence', action='store_true',
|
|
477
|
+
help='Validate image existence, requires baseDir to be specified')
|
|
478
|
+
parser.add_argument('--bFindUnusedImages', action='store_true',
|
|
479
|
+
help='Check for images in baseDir that aren\'t in the database, ' + \
|
|
480
|
+
'requires baseDir to be specified')
|
|
481
|
+
parser.add_argument('--baseDir', action='store', type=str, default='',
|
|
482
|
+
help='Base directory for images')
|
|
483
|
+
parser.add_argument('--bAllowNoLocation', action='store_true',
|
|
484
|
+
help='Disable errors when no location is specified for an image')
|
|
485
|
+
parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
|
|
486
|
+
help='Cap on total number of images to check')
|
|
487
|
+
parser.add_argument('--nThreads', action='store', type=int, default=10,
|
|
488
|
+
help='Number of threads (only relevant when verifying image ' + \
|
|
489
|
+
'sizes and/or existence)')
|
|
490
|
+
|
|
491
|
+
if len(sys.argv[1:])==0:
|
|
492
|
+
parser.print_help()
|
|
493
|
+
parser.exit()
|
|
494
|
+
|
|
495
|
+
args = parser.parse_args()
|
|
496
|
+
args.bRequireLocation = (not args.bAllowNoLocation)
|
|
497
|
+
options = IntegrityCheckOptions()
|
|
498
|
+
ct_utils.args_to_object(args, options)
|
|
499
|
+
integrity_check_json_db(args.json_file,options)
|
|
500
|
+
|
|
501
|
+
if __name__ == '__main__':
|
|
502
|
+
main()
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
#%% Interactive driver(s)
|
|
506
|
+
|
|
507
|
+
if False:
|
|
508
|
+
|
|
509
|
+
#%%
|
|
510
|
+
|
|
511
|
+
"""
|
|
512
|
+
python integrity_check_json_db.py ~/data/ena24.json --baseDir ~/data/ENA24 --bAllowNoLocation
|
|
513
|
+
"""
|
|
514
|
+
|
|
515
|
+
# Integrity-check .json files for LILA
|
|
516
|
+
json_files = [os.path.expanduser('~/data/ena24.json')]
|
|
517
|
+
|
|
518
|
+
options = IntegrityCheckOptions()
|
|
519
|
+
options.baseDir = os.path.expanduser('~/data/ENA24')
|
|
520
|
+
options.bCheckImageSizes = False
|
|
521
|
+
options.bFindUnusedImages = True
|
|
522
|
+
options.bRequireLocation = False
|
|
523
|
+
|
|
524
|
+
# options.iMaxNumImages = 10
|
|
525
|
+
|
|
526
|
+
for json_file in json_files:
|
|
527
|
+
|
|
528
|
+
sorted_categories,data,_ = integrity_check_json_db(json_file, options)
|