megadetector 10.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +701 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +563 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +192 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +665 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +984 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2172 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1604 -0
  81. megadetector/detection/run_tiled_inference.py +1044 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1943 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2140 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +231 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2872 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1766 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1973 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +498 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.15.dist-info/METADATA +115 -0
  144. megadetector-10.0.15.dist-info/RECORD +147 -0
  145. megadetector-10.0.15.dist-info/WHEEL +5 -0
  146. megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.15.dist-info/top_level.txt +1 -0
@@ -0,0 +1,563 @@
1
+ """
2
+
3
+ integrity_check_json_db.py
4
+
5
+ Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file, specifically:
6
+
7
+ * Verifies that required fields are present and have the right types
8
+ * Verifies that annotations refer to valid images
9
+ * Verifies that annotations refer to valid categories
10
+ * Verifies that image, category, and annotation IDs are unique
11
+ * Optionally checks file existence
12
+ * Finds un-annotated images
13
+ * Finds unused categories
14
+ * Prints a list of categories sorted by count
15
+
16
+ """
17
+
18
+ #%% Constants and environment
19
+
20
+ import argparse
21
+ import json
22
+ import os
23
+ import sys
24
+
25
+ from functools import partial
26
+ from multiprocessing.pool import Pool, ThreadPool
27
+ from operator import itemgetter
28
+ from tqdm import tqdm
29
+
30
+ from megadetector.visualization.visualization_utils import open_image
31
+ from megadetector.utils import ct_utils
32
+ from megadetector.utils.path_utils import find_images
33
+
34
+
35
+ #%% Classes and environment
36
+
37
+ class IntegrityCheckOptions:
38
+ """
39
+ Options for integrity_check_json_db()
40
+ """
41
+
42
+ def __init__(self):
43
+
44
+ #: Image path; the filenames in the .json file should be relative to this folder
45
+ self.baseDir = ''
46
+
47
+ #: Should we validate the image sizes?
48
+ self.bCheckImageSizes = False
49
+
50
+ #: Should we check that all the images in the .json file exist on disk?
51
+ self.bCheckImageExistence = False
52
+
53
+ #: Should we search [baseDir] for images that are not used in the .json file?
54
+ self.bFindUnusedImages = False
55
+
56
+ #: Should we require that all images in the .json file have a 'location' field?
57
+ self.bRequireLocation = True
58
+
59
+ #: For debugging, limit the number of images we'll process
60
+ self.iMaxNumImages = -1
61
+
62
+ #: Number of threads to use for parallelization, set to <= 1 to disable parallelization
63
+ self.nThreads = 10
64
+
65
+ #: Whether to use threads (rather than processes for parallelization)
66
+ self.parallelizeWithThreads = True
67
+
68
+ #: Enable additional debug output
69
+ self.verbose = True
70
+
71
+ #: Allow integer-valued image and annotation IDs (COCO uses this, CCT files use strings)
72
+ self.allowIntIDs = False
73
+
74
+ #: If True, error if the 'info' field is not present
75
+ self.requireInfo = False
76
+
77
+ #: Validate that boxes have positive width/height values, can be 'error',
78
+ #: 'warning', or None
79
+ self.validateBoxes = None
80
+
81
+
82
+ #%% Functions
83
+
84
+ def _check_image_existence_and_size(image,options=None):
85
+ """
86
+ Validate the image represented in the CCT image dict [image], which should have fields:
87
+
88
+ * file_name
89
+ * width
90
+ * height
91
+
92
+ Args:
93
+ image (dict): image to validate
94
+ options (IntegrityCheckOptions): parameters impacting validation
95
+
96
+ Returns:
97
+ str: None if this image passes validation, otherwise an error string
98
+ """
99
+
100
+ if options is None:
101
+ options = IntegrityCheckOptions()
102
+
103
+ assert options.bCheckImageExistence
104
+
105
+ file_path = os.path.join(options.baseDir,image['file_name'])
106
+ if not os.path.isfile(file_path):
107
+ s = 'Image path {} does not exist'.format(file_path)
108
+ return s
109
+
110
+ if options.bCheckImageSizes:
111
+ if not ('height' in image and 'width' in image):
112
+ s = 'Missing image size in {}'.format(file_path)
113
+ return s
114
+
115
+ # width, height = Image.open(file_path).size
116
+ try:
117
+ pil_im = open_image(file_path)
118
+ width,height = pil_im.size
119
+ pil_im.close()
120
+ except Exception as e:
121
+ s = 'Error opening {}: {}'.format(file_path,str(e))
122
+ return s
123
+ if (not (width == image['width'] and height == image['height'])):
124
+ s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
125
+ image['id'], file_path, image['width'], image['height'], width, height)
126
+ return s
127
+
128
+ return None
129
+
130
+
131
+ def integrity_check_json_db(json_file, options=None):
132
+ """
133
+ Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file; see
134
+ module header comment for a list of the validation steps.
135
+
136
+ Args:
137
+ json_file (str): filename to validate, or an already-loaded dict
138
+ options (IntegrityCheckOptions, optional): see IntegrityCheckOptions
139
+
140
+ Returns:
141
+ tuple: tuple containing:
142
+ - sorted_categories (dict): list of categories used in [json_file], sorted by frequency
143
+ - data (dict): the data loaded from [json_file]
144
+ - error_info (dict): specific validation errors
145
+ """
146
+
147
+ if options is None:
148
+ options = IntegrityCheckOptions()
149
+
150
+ if options.bCheckImageSizes:
151
+ options.bCheckImageExistence = True
152
+
153
+ if options.verbose:
154
+ print(options.__dict__)
155
+
156
+ if options.baseDir is None:
157
+ options.baseDir = ''
158
+
159
+ base_dir = options.baseDir
160
+
161
+
162
+ ##%% Read .json file if necessary, integrity-check fields
163
+
164
+ if isinstance(json_file,dict):
165
+
166
+ data = json_file
167
+
168
+ elif isinstance(json_file,str):
169
+
170
+ assert os.path.isfile(json_file), '.json file {} does not exist'.format(json_file)
171
+
172
+ if options.verbose:
173
+ print('Reading .json {} with base dir [{}]...'.format(
174
+ json_file,base_dir))
175
+
176
+ with open(json_file,'r') as f:
177
+ data = json.load(f)
178
+
179
+ else:
180
+
181
+ raise ValueError('Illegal value for json_file')
182
+
183
+ images = data['images']
184
+ annotations = data['annotations']
185
+ categories = data['categories']
186
+
187
+ if options.requireInfo:
188
+ assert 'info' in data, 'No info struct in database'
189
+
190
+ if len(base_dir) > 0:
191
+ assert os.path.isdir(base_dir), \
192
+ 'Base directory {} does not exist'.format(base_dir)
193
+
194
+
195
+ ##%% Build dictionaries, checking ID uniqueness and internal validity as we go
196
+
197
+ image_id_to_image = {}
198
+ ann_id_to_ann = {}
199
+ category_id_to_category = {}
200
+ category_name_to_category = {}
201
+ image_location_set = set()
202
+
203
+ if options.verbose:
204
+ print('Checking categories...')
205
+
206
+ for cat in tqdm(categories):
207
+
208
+ # Confirm that required fields are present
209
+ assert 'name' in cat
210
+ assert 'id' in cat
211
+
212
+ assert isinstance(cat['id'],int), \
213
+ 'Illegal category ID type: [{}]'.format(str(cat['id']))
214
+ assert isinstance(cat['name'],str), \
215
+ 'Illegal category name type [{}]'.format(str(cat['name']))
216
+
217
+ category_id = cat['id']
218
+ category_name = cat['name']
219
+
220
+ # Confirm ID uniqueness
221
+ assert category_id not in category_id_to_category, \
222
+ 'Category ID {} is used more than once'.format(category_id)
223
+ category_id_to_category[category_id] = cat
224
+ cat['_count'] = 0
225
+
226
+ assert category_name not in category_name_to_category, \
227
+ 'Category name {} is used more than once'.format(category_name)
228
+ category_name_to_category[category_name] = cat
229
+
230
+ # ...for each category
231
+
232
+ if options.verbose:
233
+ print('\nChecking image records...')
234
+
235
+ if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
236
+
237
+ if options.verbose:
238
+ print('Trimming image list to {}'.format(options.iMaxNumImages))
239
+ images = images[0:options.iMaxNumImages]
240
+
241
+ image_paths_in_json = set()
242
+
243
+ sequences = set()
244
+
245
+ # image = images[0]
246
+ for image in tqdm(images):
247
+
248
+ image['_count'] = 0
249
+
250
+ # Confirm that required fields are present
251
+ assert 'file_name' in image
252
+ assert 'id' in image
253
+
254
+ image['file_name'] = image['file_name'].replace('\\','/')
255
+
256
+ image_paths_in_json.add(image['file_name'])
257
+
258
+ assert isinstance(image['file_name'],str), 'Illegal image filename type'
259
+
260
+ if options.allowIntIDs:
261
+ assert isinstance(image['id'],str) or isinstance(image['id'],int), \
262
+ 'Illegal image ID type'
263
+ else:
264
+ assert isinstance(image['id'],str), 'Illegal image ID type'
265
+
266
+ image_id = image['id']
267
+
268
+ # Confirm ID uniqueness
269
+ assert image_id not in image_id_to_image, 'Duplicate image ID {}'.format(image_id)
270
+
271
+ image_id_to_image[image_id] = image
272
+
273
+ if 'height' in image:
274
+ assert 'width' in image, 'Image with height but no width: {}'.format(image['id'])
275
+
276
+ if 'width' in image:
277
+ assert 'height' in image, 'Image with width but no height: {}'.format(image['id'])
278
+
279
+ if options.bRequireLocation:
280
+ assert 'location' in image, 'No location available for: {}'.format(image['id'])
281
+
282
+ if 'location' in image:
283
+ # We previously supported ints here; this should be strings now
284
+ # assert isinstance(image['location'], str) or isinstance(image['location'], int), \
285
+ # 'Illegal image location type'
286
+ assert isinstance(image['location'], str)
287
+ image_location_set.add(image['location'])
288
+
289
+ if 'seq_id' in image:
290
+ sequences.add(image['seq_id'])
291
+
292
+ assert not ('sequence_id' in image or 'sequence' in image), 'Illegal sequence identifier'
293
+
294
+ unused_files = []
295
+
296
+ image_paths_relative = None
297
+
298
+ # Are we checking for unused images?
299
+ if (len(base_dir) > 0) and options.bFindUnusedImages:
300
+
301
+ if options.verbose:
302
+ print('\nEnumerating images...')
303
+
304
+ image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
305
+
306
+ for fn_relative in image_paths_relative:
307
+ if fn_relative not in image_paths_in_json:
308
+ unused_files.append(fn_relative)
309
+
310
+ # List of (filename,error_string) tuples
311
+ validation_errors = []
312
+
313
+ # If we're checking image existence but not image size, we don't need to read the images
314
+ if options.bCheckImageExistence and not options.bCheckImageSizes:
315
+
316
+ if image_paths_relative is None:
317
+ image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
318
+
319
+ image_paths_relative_set = set(image_paths_relative)
320
+
321
+ for im in images:
322
+ if im['file_name'] not in image_paths_relative_set:
323
+ validation_errors.append((im['file_name'],'not found in relative path list'))
324
+
325
+ # If we're checking image size, we need to read the images
326
+ if options.bCheckImageSizes:
327
+
328
+ if len(base_dir) == 0:
329
+ print('Warning: checking image sizes without a base directory, assuming "."')
330
+
331
+ if options.verbose:
332
+ print('Checking image existence and/or image sizes...')
333
+
334
+ if options.nThreads is not None and options.nThreads > 1:
335
+
336
+ if options.parallelizeWithThreads:
337
+ worker_string = 'threads'
338
+ else:
339
+ worker_string = 'processes'
340
+
341
+ if options.verbose:
342
+ print('Starting a pool of {} {}'.format(options.nThreads,worker_string))
343
+ if options.parallelizeWithThreads:
344
+ pool = ThreadPool(options.nThreads)
345
+ else:
346
+ pool = Pool(options.nThreads)
347
+ try:
348
+ results = list(tqdm(pool.imap(
349
+ partial(_check_image_existence_and_size,options=options), images),
350
+ total=len(images)))
351
+ finally:
352
+ pool.close()
353
+ pool.join()
354
+ print('Pool closed and joined for image size checks')
355
+ else:
356
+ results = []
357
+ for im in tqdm(images):
358
+ results.append(_check_image_existence_and_size(im,options))
359
+
360
+ for i_image,result in enumerate(results):
361
+ if result is not None:
362
+ validation_errors.append((images[i_image]['file_name'],result))
363
+
364
+ # ...for each image
365
+
366
+ if options.verbose:
367
+ print('{} validation errors (of {})'.format(len(validation_errors),len(images)))
368
+ print('Checking annotations...')
369
+
370
+ n_boxes = 0
371
+
372
+ for ann in tqdm(annotations):
373
+
374
+ # Confirm that required fields are present
375
+ assert 'image_id' in ann
376
+ assert 'id' in ann
377
+ assert 'category_id' in ann
378
+
379
+ if options.allowIntIDs:
380
+ assert isinstance(ann['id'],str) or isinstance(ann['id'],int), \
381
+ 'Illegal annotation ID type'
382
+ assert isinstance(ann['image_id'],str) or isinstance(ann['image_id'],int), \
383
+ 'Illegal annotation image ID type'
384
+ else:
385
+ assert isinstance(ann['id'],str), 'Illegal annotation ID type'
386
+ assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
387
+
388
+ assert isinstance(ann['category_id'],int), 'Illegal annotation category ID type'
389
+
390
+ if 'bbox' in ann:
391
+ n_boxes += 1
392
+
393
+ ann_id = ann['id']
394
+ image_id = ann['image_id']
395
+
396
+ if ('bbox' in ann) and (options.validateBoxes is not None):
397
+
398
+ assert options.validateBoxes in ('error','warning'), \
399
+ 'Illegal value {} for validateBoxes'.format(options.validateBoxes)
400
+
401
+ annotation_string = str(ann['bbox'])
402
+
403
+ # We'll allow aribtrary metadata to be tacked on to the end of boxes
404
+ s = ''
405
+ if len(ann['bbox']) < 4:
406
+ s += 'Annotation error: illegal bounding box in annotation {} for image {}: {}\n'.format(
407
+ ann_id,image_id,annotation_string)
408
+ if ann['bbox'][2] < 0:
409
+ s += 'Annotation error: negative width in annotation {} for image {}: {}\n'.format(
410
+ ann_id,image_id,annotation_string)
411
+ if ann['bbox'][3] < 0:
412
+ s += 'Annotation error: negative height in annotation {} for image {}: {}\n'.format(
413
+ ann_id,image_id,annotation_string)
414
+ if len(s) > 0:
415
+ if options.validateBoxes == 'error':
416
+ raise ValueError(s)
417
+ else:
418
+ print('Warning: {}'.format(s))
419
+ im = image_id_to_image[image_id]
420
+ validation_errors.append((im['file_name'],s))
421
+
422
+ # ...if we're supposed to validate boxes
423
+
424
+ # Confirm ID uniqueness
425
+ assert ann_id not in ann_id_to_ann, \
426
+ 'Duplicate annotation ID {}'.format(ann_id)
427
+
428
+ ann_id_to_ann[ann_id] = ann
429
+
430
+ # Confirm validity
431
+ assert ann['category_id'] in category_id_to_category, \
432
+ 'Category {} not found in category list'.format(ann['category_id'])
433
+ assert ann['image_id'] in image_id_to_image, \
434
+ 'Image ID {} referred to by annotation {}, not available'.format(
435
+ ann['image_id'],ann['id'])
436
+
437
+ image_id_to_image[ann['image_id']]['_count'] += 1
438
+ category_id_to_category[ann['category_id']]['_count'] +=1
439
+
440
+ # ...for each annotation
441
+
442
+ sorted_categories = sorted(categories, key=itemgetter('_count'), reverse=True)
443
+
444
+
445
+ ##%% Print statistics
446
+
447
+ if options.verbose:
448
+
449
+ # Find un-annotated images and multi-annotation images
450
+ n_unannotated = 0
451
+ n_multi_annotated = 0
452
+
453
+ for image in images:
454
+ if image['_count'] == 0:
455
+ n_unannotated += 1
456
+ elif image['_count'] > 1:
457
+ n_multi_annotated += 1
458
+
459
+ print('\nFound {} unannotated images, {} images with multiple annotations'.format(
460
+ n_unannotated,n_multi_annotated))
461
+
462
+ if (len(base_dir) > 0) and options.bFindUnusedImages:
463
+ print('Found {} unused image files'.format(len(unused_files)))
464
+
465
+ n_unused_categories = 0
466
+
467
+ # Find unused categories
468
+ for cat in categories:
469
+ if cat['_count'] == 0:
470
+ print('Unused category: {}'.format(cat['name']))
471
+ n_unused_categories += 1
472
+
473
+ print('Found {} unused categories'.format(n_unused_categories))
474
+
475
+ sequence_string = 'no sequence info'
476
+ if len(sequences) > 0:
477
+ sequence_string = '{} sequences'.format(len(sequences))
478
+
479
+ print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
480
+ len(images),len(annotations),n_boxes,len(categories),sequence_string))
481
+
482
+ if len(image_location_set) > 0:
483
+ print('DB contains images from {} locations\n'.format(len(image_location_set)))
484
+
485
+ print('Categories and annotation (not image) counts:\n')
486
+
487
+ for cat in sorted_categories:
488
+ print('{:6} {}'.format(cat['_count'],cat['name']))
489
+
490
+ print('')
491
+
492
+ error_info = {}
493
+ error_info['unused_files'] = unused_files
494
+ error_info['validation_errors'] = validation_errors
495
+
496
+ return sorted_categories, data, error_info
497
+
498
+ # ...def integrity_check_json_db()
499
+
500
+
501
+ #%% Command-line driver
502
+
503
+ def main(): # noqa
504
+
505
+ parser = argparse.ArgumentParser()
506
+ parser.add_argument('json_file',type=str,
507
+ help='COCO-formatted .json file to validate')
508
+ parser.add_argument('--bCheckImageSizes', action='store_true',
509
+ help='Validate image size, requires baseDir to be specified. ' + \
510
+ 'Implies existence checking.')
511
+ parser.add_argument('--bCheckImageExistence', action='store_true',
512
+ help='Validate image existence, requires baseDir to be specified')
513
+ parser.add_argument('--bFindUnusedImages', action='store_true',
514
+ help='Check for images in baseDir that aren\'t in the database, ' + \
515
+ 'requires baseDir to be specified')
516
+ parser.add_argument('--baseDir', action='store', type=str, default='',
517
+ help='Base directory for images')
518
+ parser.add_argument('--bAllowNoLocation', action='store_true',
519
+ help='Disable errors when no location is specified for an image')
520
+ parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
521
+ help='Cap on total number of images to check')
522
+ parser.add_argument('--nThreads', action='store', type=int, default=10,
523
+ help='Number of threads (only relevant when verifying image ' + \
524
+ 'sizes and/or existence)')
525
+
526
+ if len(sys.argv[1:])==0:
527
+ parser.print_help()
528
+ parser.exit()
529
+
530
+ args = parser.parse_args()
531
+ args.bRequireLocation = (not args.bAllowNoLocation)
532
+ options = IntegrityCheckOptions()
533
+ ct_utils.args_to_object(args, options)
534
+ integrity_check_json_db(args.json_file,options)
535
+
536
+ if __name__ == '__main__':
537
+ main()
538
+
539
+
540
+ #%% Interactive driver(s)
541
+
542
+ if False:
543
+
544
+ #%%
545
+
546
+ """
547
+ python integrity_check_json_db.py ~/data/ena24.json --baseDir ~/data/ENA24 --bAllowNoLocation
548
+ """
549
+
550
+ # Integrity-check .json files for LILA
551
+ json_files = [os.path.expanduser('~/data/ena24.json')]
552
+
553
+ options = IntegrityCheckOptions()
554
+ options.baseDir = os.path.expanduser('~/data/ENA24')
555
+ options.bCheckImageSizes = False
556
+ options.bFindUnusedImages = True
557
+ options.bRequireLocation = False
558
+
559
+ # options.iMaxNumImages = 10
560
+
561
+ for json_file in json_files:
562
+
563
+ sorted_categories,data,_ = integrity_check_json_db(json_file, options)