megadetector 10.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +702 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +528 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +187 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +663 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +876 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2159 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1494 -0
  81. megadetector/detection/run_tiled_inference.py +1038 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1752 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2077 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +213 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +224 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2832 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1759 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1940 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +479 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.13.dist-info/METADATA +134 -0
  144. megadetector-10.0.13.dist-info/RECORD +147 -0
  145. megadetector-10.0.13.dist-info/WHEEL +5 -0
  146. megadetector-10.0.13.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,528 @@
1
+ """
2
+
3
+ integrity_check_json_db.py
4
+
5
+ Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file, specifically:
6
+
7
+ * Verifies that required fields are present and have the right types
8
+ * Verifies that annotations refer to valid images
9
+ * Verifies that annotations refer to valid categories
10
+ * Verifies that image, category, and annotation IDs are unique
11
+ * Optionally checks file existence
12
+ * Finds un-annotated images
13
+ * Finds unused categories
14
+ * Prints a list of categories sorted by count
15
+
16
+ """
17
+
18
+ #%% Constants and environment
19
+
20
+ import argparse
21
+ import json
22
+ import os
23
+ import sys
24
+
25
+ from functools import partial
26
+ from multiprocessing.pool import Pool, ThreadPool
27
+ from operator import itemgetter
28
+ from tqdm import tqdm
29
+
30
+ from megadetector.visualization.visualization_utils import open_image
31
+ from megadetector.utils import ct_utils
32
+ from megadetector.utils.path_utils import find_images
33
+
34
+
35
+ #%% Classes and environment
36
+
37
+ class IntegrityCheckOptions:
38
+ """
39
+ Options for integrity_check_json_db()
40
+ """
41
+
42
+ def __init__(self):
43
+
44
+ #: Image path; the filenames in the .json file should be relative to this folder
45
+ self.baseDir = ''
46
+
47
+ #: Should we validate the image sizes?
48
+ self.bCheckImageSizes = False
49
+
50
+ #: Should we check that all the images in the .json file exist on disk?
51
+ self.bCheckImageExistence = False
52
+
53
+ #: Should we search [baseDir] for images that are not used in the .json file?
54
+ self.bFindUnusedImages = False
55
+
56
+ #: Should we require that all images in the .json file have a 'location' field?
57
+ self.bRequireLocation = True
58
+
59
+ #: For debugging, limit the number of images we'll process
60
+ self.iMaxNumImages = -1
61
+
62
+ #: Number of threads to use for parallelization, set to <= 1 to disable parallelization
63
+ self.nThreads = 10
64
+
65
+ #: Whether to use threads (rather than processes for parallelization)
66
+ self.parallelizeWithThreads = True
67
+
68
+ #: Enable additional debug output
69
+ self.verbose = True
70
+
71
+ #: Allow integer-valued image and annotation IDs (COCO uses this, CCT files use strings)
72
+ self.allowIntIDs = False
73
+
74
+ #: If True, error if the 'info' field is not present
75
+ self.requireInfo = False
76
+
77
+
78
+ #%% Functions
79
+
80
+ def _check_image_existence_and_size(image,options=None):
81
+ """
82
+ Validate the image represented in the CCT image dict [image], which should have fields:
83
+
84
+ * file_name
85
+ * width
86
+ * height
87
+
88
+ Args:
89
+ image (dict): image to validate
90
+ options (IntegrityCheckOptions): parameters impacting validation
91
+
92
+ Returns:
93
+ str: None if this image passes validation, otherwise an error string
94
+ """
95
+
96
+ if options is None:
97
+ options = IntegrityCheckOptions()
98
+
99
+ assert options.bCheckImageExistence
100
+
101
+ file_path = os.path.join(options.baseDir,image['file_name'])
102
+ if not os.path.isfile(file_path):
103
+ s = 'Image path {} does not exist'.format(file_path)
104
+ return s
105
+
106
+ if options.bCheckImageSizes:
107
+ if not ('height' in image and 'width' in image):
108
+ s = 'Missing image size in {}'.format(file_path)
109
+ return s
110
+
111
+ # width, height = Image.open(file_path).size
112
+ try:
113
+ pil_im = open_image(file_path)
114
+ width,height = pil_im.size
115
+ pil_im.close()
116
+ except Exception as e:
117
+ s = 'Error opening {}: {}'.format(file_path,str(e))
118
+ return s
119
+ if (not (width == image['width'] and height == image['height'])):
120
+ s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
121
+ image['id'], file_path, image['width'], image['height'], width, height)
122
+ return s
123
+
124
+ return None
125
+
126
+
127
+ def integrity_check_json_db(json_file, options=None):
128
+ """
129
+ Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file; see
130
+ module header comment for a list of the validation steps.
131
+
132
+ Args:
133
+ json_file (str): filename to validate, or an already-loaded dict
134
+ options (IntegrityCheckOptions, optional): see IntegrityCheckOptions
135
+
136
+ Returns:
137
+ tuple: tuple containing:
138
+ - sorted_categories (dict): list of categories used in [json_file], sorted by frequency
139
+ - data (dict): the data loaded from [json_file]
140
+ - error_info (dict): specific validation errors
141
+ """
142
+
143
+ if options is None:
144
+ options = IntegrityCheckOptions()
145
+
146
+ if options.bCheckImageSizes:
147
+ options.bCheckImageExistence = True
148
+
149
+ if options.verbose:
150
+ print(options.__dict__)
151
+
152
+ if options.baseDir is None:
153
+ options.baseDir = ''
154
+
155
+ base_dir = options.baseDir
156
+
157
+
158
+ ##%% Read .json file if necessary, integrity-check fields
159
+
160
+ if isinstance(json_file,dict):
161
+
162
+ data = json_file
163
+
164
+ elif isinstance(json_file,str):
165
+
166
+ assert os.path.isfile(json_file), '.json file {} does not exist'.format(json_file)
167
+
168
+ if options.verbose:
169
+ print('Reading .json {} with base dir [{}]...'.format(
170
+ json_file,base_dir))
171
+
172
+ with open(json_file,'r') as f:
173
+ data = json.load(f)
174
+
175
+ else:
176
+
177
+ raise ValueError('Illegal value for json_file')
178
+
179
+ images = data['images']
180
+ annotations = data['annotations']
181
+ categories = data['categories']
182
+
183
+ if options.requireInfo:
184
+ assert 'info' in data, 'No info struct in database'
185
+
186
+ if len(base_dir) > 0:
187
+ assert os.path.isdir(base_dir), \
188
+ 'Base directory {} does not exist'.format(base_dir)
189
+
190
+
191
+ ##%% Build dictionaries, checking ID uniqueness and internal validity as we go
192
+
193
+ image_id_to_image = {}
194
+ ann_id_to_ann = {}
195
+ category_id_to_category = {}
196
+ category_name_to_category = {}
197
+ image_location_set = set()
198
+
199
+ if options.verbose:
200
+ print('Checking categories...')
201
+
202
+ for cat in tqdm(categories):
203
+
204
+ # Confirm that required fields are present
205
+ assert 'name' in cat
206
+ assert 'id' in cat
207
+
208
+ assert isinstance(cat['id'],int), \
209
+ 'Illegal category ID type: [{}]'.format(str(cat['id']))
210
+ assert isinstance(cat['name'],str), \
211
+ 'Illegal category name type [{}]'.format(str(cat['name']))
212
+
213
+ category_id = cat['id']
214
+ category_name = cat['name']
215
+
216
+ # Confirm ID uniqueness
217
+ assert category_id not in category_id_to_category, \
218
+ 'Category ID {} is used more than once'.format(category_id)
219
+ category_id_to_category[category_id] = cat
220
+ cat['_count'] = 0
221
+
222
+ assert category_name not in category_name_to_category, \
223
+ 'Category name {} is used more than once'.format(category_name)
224
+ category_name_to_category[category_name] = cat
225
+
226
+ # ...for each category
227
+
228
+ if options.verbose:
229
+ print('\nChecking image records...')
230
+
231
+ if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
232
+
233
+ if options.verbose:
234
+ print('Trimming image list to {}'.format(options.iMaxNumImages))
235
+ images = images[0:options.iMaxNumImages]
236
+
237
+ image_paths_in_json = set()
238
+
239
+ sequences = set()
240
+
241
+ # image = images[0]
242
+ for image in tqdm(images):
243
+
244
+ image['_count'] = 0
245
+
246
+ # Confirm that required fields are present
247
+ assert 'file_name' in image
248
+ assert 'id' in image
249
+
250
+ image['file_name'] = image['file_name'].replace('\\','/')
251
+
252
+ image_paths_in_json.add(image['file_name'])
253
+
254
+ assert isinstance(image['file_name'],str), 'Illegal image filename type'
255
+
256
+ if options.allowIntIDs:
257
+ assert isinstance(image['id'],str) or isinstance(image['id'],int), \
258
+ 'Illegal image ID type'
259
+ else:
260
+ assert isinstance(image['id'],str), 'Illegal image ID type'
261
+
262
+ image_id = image['id']
263
+
264
+ # Confirm ID uniqueness
265
+ assert image_id not in image_id_to_image, 'Duplicate image ID {}'.format(image_id)
266
+
267
+ image_id_to_image[image_id] = image
268
+
269
+ if 'height' in image:
270
+ assert 'width' in image, 'Image with height but no width: {}'.format(image['id'])
271
+
272
+ if 'width' in image:
273
+ assert 'height' in image, 'Image with width but no height: {}'.format(image['id'])
274
+
275
+ if options.bRequireLocation:
276
+ assert 'location' in image, 'No location available for: {}'.format(image['id'])
277
+
278
+ if 'location' in image:
279
+ # We previously supported ints here; this should be strings now
280
+ # assert isinstance(image['location'], str) or isinstance(image['location'], int), \
281
+ # 'Illegal image location type'
282
+ assert isinstance(image['location'], str)
283
+ image_location_set.add(image['location'])
284
+
285
+ if 'seq_id' in image:
286
+ sequences.add(image['seq_id'])
287
+
288
+ assert not ('sequence_id' in image or 'sequence' in image), 'Illegal sequence identifier'
289
+
290
+ unused_files = []
291
+
292
+ image_paths_relative = None
293
+
294
+ # Are we checking for unused images?
295
+ if (len(base_dir) > 0) and options.bFindUnusedImages:
296
+
297
+ if options.verbose:
298
+ print('\nEnumerating images...')
299
+
300
+ image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
301
+
302
+ for fn_relative in image_paths_relative:
303
+ if fn_relative not in image_paths_in_json:
304
+ unused_files.append(fn_relative)
305
+
306
+ # List of (filename,error_string) tuples
307
+ validation_errors = []
308
+
309
+ # If we're checking image existence but not image size, we don't need to read the images
310
+ if options.bCheckImageExistence and not options.bCheckImageSizes:
311
+
312
+ if image_paths_relative is None:
313
+ image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
314
+
315
+ image_paths_relative_set = set(image_paths_relative)
316
+
317
+ for im in images:
318
+ if im['file_name'] not in image_paths_relative_set:
319
+ validation_errors.append((im['file_name'],'not found in relative path list'))
320
+
321
+ # If we're checking image size, we need to read the images
322
+ if options.bCheckImageSizes:
323
+
324
+ if len(base_dir) == 0:
325
+ print('Warning: checking image sizes without a base directory, assuming "."')
326
+
327
+ if options.verbose:
328
+ print('Checking image existence and/or image sizes...')
329
+
330
+ if options.nThreads is not None and options.nThreads > 1:
331
+
332
+ if options.parallelizeWithThreads:
333
+ worker_string = 'threads'
334
+ else:
335
+ worker_string = 'processes'
336
+
337
+ if options.verbose:
338
+ print('Starting a pool of {} {}'.format(options.nThreads,worker_string))
339
+ if options.parallelizeWithThreads:
340
+ pool = ThreadPool(options.nThreads)
341
+ else:
342
+ pool = Pool(options.nThreads)
343
+ try:
344
+ results = list(tqdm(pool.imap(
345
+ partial(_check_image_existence_and_size,options=options), images),
346
+ total=len(images)))
347
+ finally:
348
+ pool.close()
349
+ pool.join()
350
+ print('Pool closed and joined for image size checks')
351
+ else:
352
+ results = []
353
+ for im in tqdm(images):
354
+ results.append(_check_image_existence_and_size(im,options))
355
+
356
+ for i_image,result in enumerate(results):
357
+ if result is not None:
358
+ validation_errors.append((images[i_image]['file_name'],result))
359
+
360
+ # ...for each image
361
+
362
+ if options.verbose:
363
+ print('{} validation errors (of {})'.format(len(validation_errors),len(images)))
364
+ print('Checking annotations...')
365
+
366
+ n_boxes = 0
367
+
368
+ for ann in tqdm(annotations):
369
+
370
+ # Confirm that required fields are present
371
+ assert 'image_id' in ann
372
+ assert 'id' in ann
373
+ assert 'category_id' in ann
374
+
375
+ if options.allowIntIDs:
376
+ assert isinstance(ann['id'],str) or isinstance(ann['id'],int), \
377
+ 'Illegal annotation ID type'
378
+ assert isinstance(ann['image_id'],str) or isinstance(ann['image_id'],int), \
379
+ 'Illegal annotation image ID type'
380
+ else:
381
+ assert isinstance(ann['id'],str), 'Illegal annotation ID type'
382
+ assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
383
+
384
+ assert isinstance(ann['category_id'],int), 'Illegal annotation category ID type'
385
+
386
+ if 'bbox' in ann:
387
+ n_boxes += 1
388
+
389
+ ann_id = ann['id']
390
+
391
+ # Confirm ID uniqueness
392
+ assert ann_id not in ann_id_to_ann
393
+ ann_id_to_ann[ann_id] = ann
394
+
395
+ # Confirm validity
396
+ assert ann['category_id'] in category_id_to_category, \
397
+ 'Category {} not found in category list'.format(ann['category_id'])
398
+ assert ann['image_id'] in image_id_to_image, \
399
+ 'Image ID {} referred to by annotation {}, not available'.format(
400
+ ann['image_id'],ann['id'])
401
+
402
+ image_id_to_image[ann['image_id']]['_count'] += 1
403
+ category_id_to_category[ann['category_id']]['_count'] +=1
404
+
405
+ # ...for each annotation
406
+
407
+ sorted_categories = sorted(categories, key=itemgetter('_count'), reverse=True)
408
+
409
+
410
+ ##%% Print statistics
411
+
412
+ if options.verbose:
413
+
414
+ # Find un-annotated images and multi-annotation images
415
+ n_unannotated = 0
416
+ n_multi_annotated = 0
417
+
418
+ for image in images:
419
+ if image['_count'] == 0:
420
+ n_unannotated += 1
421
+ elif image['_count'] > 1:
422
+ n_multi_annotated += 1
423
+
424
+ print('\nFound {} unannotated images, {} images with multiple annotations'.format(
425
+ n_unannotated,n_multi_annotated))
426
+
427
+ if (len(base_dir) > 0) and options.bFindUnusedImages:
428
+ print('Found {} unused image files'.format(len(unused_files)))
429
+
430
+ n_unused_categories = 0
431
+
432
+ # Find unused categories
433
+ for cat in categories:
434
+ if cat['_count'] == 0:
435
+ print('Unused category: {}'.format(cat['name']))
436
+ n_unused_categories += 1
437
+
438
+ print('Found {} unused categories'.format(n_unused_categories))
439
+
440
+ sequence_string = 'no sequence info'
441
+ if len(sequences) > 0:
442
+ sequence_string = '{} sequences'.format(len(sequences))
443
+
444
+ print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
445
+ len(images),len(annotations),n_boxes,len(categories),sequence_string))
446
+
447
+ if len(image_location_set) > 0:
448
+ print('DB contains images from {} locations\n'.format(len(image_location_set)))
449
+
450
+ print('Categories and annotation (not image) counts:\n')
451
+
452
+ for cat in sorted_categories:
453
+ print('{:6} {}'.format(cat['_count'],cat['name']))
454
+
455
+ print('')
456
+
457
+ error_info = {}
458
+ error_info['unused_files'] = unused_files
459
+ error_info['validation_errors'] = validation_errors
460
+
461
+ return sorted_categories, data, error_info
462
+
463
+ # ...def integrity_check_json_db()
464
+
465
+
466
+ #%% Command-line driver
467
+
468
+ def main(): # noqa
469
+
470
+ parser = argparse.ArgumentParser()
471
+ parser.add_argument('json_file',type=str,
472
+ help='COCO-formatted .json file to validate')
473
+ parser.add_argument('--bCheckImageSizes', action='store_true',
474
+ help='Validate image size, requires baseDir to be specified. ' + \
475
+ 'Implies existence checking.')
476
+ parser.add_argument('--bCheckImageExistence', action='store_true',
477
+ help='Validate image existence, requires baseDir to be specified')
478
+ parser.add_argument('--bFindUnusedImages', action='store_true',
479
+ help='Check for images in baseDir that aren\'t in the database, ' + \
480
+ 'requires baseDir to be specified')
481
+ parser.add_argument('--baseDir', action='store', type=str, default='',
482
+ help='Base directory for images')
483
+ parser.add_argument('--bAllowNoLocation', action='store_true',
484
+ help='Disable errors when no location is specified for an image')
485
+ parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
486
+ help='Cap on total number of images to check')
487
+ parser.add_argument('--nThreads', action='store', type=int, default=10,
488
+ help='Number of threads (only relevant when verifying image ' + \
489
+ 'sizes and/or existence)')
490
+
491
+ if len(sys.argv[1:])==0:
492
+ parser.print_help()
493
+ parser.exit()
494
+
495
+ args = parser.parse_args()
496
+ args.bRequireLocation = (not args.bAllowNoLocation)
497
+ options = IntegrityCheckOptions()
498
+ ct_utils.args_to_object(args, options)
499
+ integrity_check_json_db(args.json_file,options)
500
+
501
+ if __name__ == '__main__':
502
+ main()
503
+
504
+
505
+ #%% Interactive driver(s)
506
+
507
+ if False:
508
+
509
+ #%%
510
+
511
+ """
512
+ python integrity_check_json_db.py ~/data/ena24.json --baseDir ~/data/ENA24 --bAllowNoLocation
513
+ """
514
+
515
+ # Integrity-check .json files for LILA
516
+ json_files = [os.path.expanduser('~/data/ena24.json')]
517
+
518
+ options = IntegrityCheckOptions()
519
+ options.baseDir = os.path.expanduser('~/data/ENA24')
520
+ options.bCheckImageSizes = False
521
+ options.bFindUnusedImages = True
522
+ options.bRequireLocation = False
523
+
524
+ # options.iMaxNumImages = 10
525
+
526
+ for json_file in json_files:
527
+
528
+ sorted_categories,data,_ = integrity_check_json_db(json_file, options)