megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ Does some integrity-checking and computes basic statistics on a COCO Camera Trap
7
7
  * Verifies that required fields are present and have the right types
8
8
  * Verifies that annotations refer to valid images
9
9
  * Verifies that annotations refer to valid categories
10
- * Verifies that image, category, and annotation IDs are unique
10
+ * Verifies that image, category, and annotation IDs are unique
11
11
  * Optionally checks file existence
12
12
  * Finds un-annotated images
13
13
  * Finds unused categories
@@ -37,36 +37,39 @@ class IntegrityCheckOptions:
37
37
  """
38
38
  Options for integrity_check_json_db()
39
39
  """
40
-
40
+
41
41
  def __init__(self):
42
-
42
+
43
43
  #: Image path; the filenames in the .json file should be relative to this folder
44
44
  self.baseDir = ''
45
-
45
+
46
46
  #: Should we validate the image sizes?
47
47
  self.bCheckImageSizes = False
48
-
48
+
49
49
  #: Should we check that all the images in the .json file exist on disk?
50
50
  self.bCheckImageExistence = False
51
-
51
+
52
52
  #: Should we search [baseDir] for images that are not used in the .json file?
53
53
  self.bFindUnusedImages = False
54
-
54
+
55
55
  #: Should we require that all images in the .json file have a 'location' field?
56
56
  self.bRequireLocation = True
57
-
57
+
58
58
  #: For debugging, limit the number of images we'll process
59
59
  self.iMaxNumImages = -1
60
-
60
+
61
61
  #: Number of threads to use for parallelization, set to <= 1 to disable parallelization
62
62
  self.nThreads = 10
63
-
63
+
64
64
  #: Enable additional debug output
65
65
  self.verbose = True
66
-
66
+
67
67
  #: Allow integer-valued image and annotation IDs (COCO uses this, CCT files use strings)
68
68
  self.allowIntIDs = False
69
-
69
+
70
+ #: If True, error if the 'info' field is not present
71
+ self.requireInfo = False
72
+
70
73
  # This is used in a medium-hacky way to share modified options across threads
71
74
  default_options = IntegrityCheckOptions()
72
75
 
@@ -76,7 +79,7 @@ default_options = IntegrityCheckOptions()
76
79
  def _check_image_existence_and_size(image,options=None):
77
80
  """
78
81
  Validate the image represented in the CCT image dict [image], which should have fields:
79
-
82
+
80
83
  * file_name
81
84
  * width
82
85
  * height
@@ -84,233 +87,239 @@ def _check_image_existence_and_size(image,options=None):
84
87
  Args:
85
88
  image (dict): image to validate
86
89
  options (IntegrityCheckOptions): parameters impacting validation
87
-
90
+
88
91
  Returns:
89
92
  str: None if this image passes validation, otherwise an error string
90
93
  """
91
94
 
92
- if options is None:
95
+ if options is None:
93
96
  options = default_options
94
-
97
+
95
98
  assert options.bCheckImageExistence
96
-
97
- filePath = os.path.join(options.baseDir,image['file_name'])
98
- if not os.path.isfile(filePath):
99
- s = 'Image path {} does not exist'.format(filePath)
99
+
100
+ file_path = os.path.join(options.baseDir,image['file_name'])
101
+ if not os.path.isfile(file_path):
102
+ s = 'Image path {} does not exist'.format(file_path)
100
103
  return s
101
-
104
+
102
105
  if options.bCheckImageSizes:
103
106
  if not ('height' in image and 'width' in image):
104
- s = 'Missing image size in {}'.format(filePath)
107
+ s = 'Missing image size in {}'.format(file_path)
105
108
  return s
106
109
 
107
- # width, height = Image.open(filePath).size
108
- pil_im = open_image(filePath)
110
+ # width, height = Image.open(file_path).size
111
+ pil_im = open_image(file_path)
109
112
  width,height = pil_im.size
110
113
  if (not (width == image['width'] and height == image['height'])):
111
114
  s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
112
- image['id'], filePath, image['width'], image['height'], width, height)
115
+ image['id'], file_path, image['width'], image['height'], width, height)
113
116
  return s
114
-
117
+
115
118
  return None
116
119
 
117
-
118
- def integrity_check_json_db(jsonFile, options=None):
120
+
121
+ def integrity_check_json_db(json_file, options=None):
119
122
  """
120
123
  Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file; see
121
124
  module header comment for a list of the validation steps.
122
-
125
+
123
126
  Args:
124
- jsonFile (str): filename to validate, or an already-loaded dict
125
-
127
+ json_file (str): filename to validate, or an already-loaded dict
128
+
126
129
  Returns:
127
130
  tuple: tuple containing:
128
- - sorted_categories (dict): list of categories used in [jsonFile], sorted by frequency
129
- - data (dict): the data loaded from [jsonFile]
131
+ - sorted_categories (dict): list of categories used in [json_file], sorted by frequency
132
+ - data (dict): the data loaded from [json_file]
130
133
  - error_info (dict): specific validation errors
131
134
  """
132
-
133
- if options is None:
135
+
136
+ if options is None:
134
137
  options = IntegrityCheckOptions()
135
-
136
- if options.bCheckImageSizes:
138
+
139
+ if options.bCheckImageSizes:
137
140
  options.bCheckImageExistence = True
138
-
141
+
139
142
  if options.verbose:
140
143
  print(options.__dict__)
141
-
144
+
142
145
  if options.baseDir is None:
143
146
  options.baseDir = ''
144
-
147
+
145
148
  base_dir = options.baseDir
146
-
147
-
149
+
150
+
148
151
  ##%% Read .json file if necessary, integrity-check fields
149
-
150
- if isinstance(jsonFile,dict):
151
-
152
- data = jsonFile
153
-
154
- elif isinstance(jsonFile,str):
155
-
156
- assert os.path.isfile(jsonFile), '.json file {} does not exist'.format(jsonFile)
157
-
152
+
153
+ if isinstance(json_file,dict):
154
+
155
+ data = json_file
156
+
157
+ elif isinstance(json_file,str):
158
+
159
+ assert os.path.isfile(json_file), '.json file {} does not exist'.format(json_file)
160
+
158
161
  if options.verbose:
159
162
  print('Reading .json {} with base dir [{}]...'.format(
160
- jsonFile,base_dir))
161
-
162
- with open(jsonFile,'r') as f:
163
- data = json.load(f)
164
-
163
+ json_file,base_dir))
164
+
165
+ with open(json_file,'r') as f:
166
+ data = json.load(f)
167
+
165
168
  else:
166
-
167
- raise ValueError('Illegal value for jsonFile')
168
-
169
+
170
+ raise ValueError('Illegal value for json_file')
171
+
169
172
  images = data['images']
170
173
  annotations = data['annotations']
171
174
  categories = data['categories']
172
- # info = data['info']
173
- assert 'info' in data, 'No info struct in database'
174
175
 
175
- if len(base_dir) > 0:
176
- assert os.path.isdir(base_dir), 'Base directory {} does not exist'.format(base_dir)
177
-
178
-
176
+ if options.requireInfo:
177
+ assert 'info' in data, 'No info struct in database'
178
+
179
+ if len(base_dir) > 0:
180
+ assert os.path.isdir(base_dir), \
181
+ 'Base directory {} does not exist'.format(base_dir)
182
+
183
+
179
184
  ##%% Build dictionaries, checking ID uniqueness and internal validity as we go
180
-
185
+
181
186
  image_id_to_image = {}
182
187
  ann_id_to_ann = {}
183
188
  category_id_to_category = {}
184
189
  category_name_to_category = {}
185
190
  image_location_set = set()
186
-
191
+
187
192
  if options.verbose:
188
193
  print('Checking categories...')
189
-
194
+
190
195
  for cat in tqdm(categories):
191
-
196
+
192
197
  # Confirm that required fields are present
193
198
  assert 'name' in cat
194
199
  assert 'id' in cat
195
-
196
- assert isinstance(cat['id'],int), 'Illegal category ID type: [{}]'.format(str(cat['id']))
197
- assert isinstance(cat['name'],str), 'Illegal category name type [{}]'.format(str(cat['name']))
198
-
200
+
201
+ assert isinstance(cat['id'],int), \
202
+ 'Illegal category ID type: [{}]'.format(str(cat['id']))
203
+ assert isinstance(cat['name'],str), \
204
+ 'Illegal category name type [{}]'.format(str(cat['name']))
205
+
199
206
  category_id = cat['id']
200
207
  category_name = cat['name']
201
-
208
+
202
209
  # Confirm ID uniqueness
203
- assert category_id not in category_id_to_category, 'Category ID {} is used more than once'.format(category_id)
210
+ assert category_id not in category_id_to_category, \
211
+ 'Category ID {} is used more than once'.format(category_id)
204
212
  category_id_to_category[category_id] = cat
205
213
  cat['_count'] = 0
206
-
207
- assert category_name not in category_name_to_category, 'Category name {} is used more than once'.format(category_name)
208
- category_name_to_category[category_name] = cat
209
-
214
+
215
+ assert category_name not in category_name_to_category, \
216
+ 'Category name {} is used more than once'.format(category_name)
217
+ category_name_to_category[category_name] = cat
218
+
210
219
  # ...for each category
211
-
220
+
212
221
  if options.verbose:
213
222
  print('\nChecking images...')
214
-
223
+
215
224
  if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
216
-
225
+
217
226
  if options.verbose:
218
227
  print('Trimming image list to {}'.format(options.iMaxNumImages))
219
228
  images = images[0:options.iMaxNumImages]
220
-
229
+
221
230
  image_paths_in_json = set()
222
-
231
+
223
232
  sequences = set()
224
-
233
+
225
234
  # image = images[0]
226
235
  for image in tqdm(images):
227
-
236
+
228
237
  image['_count'] = 0
229
-
238
+
230
239
  # Confirm that required fields are present
231
240
  assert 'file_name' in image
232
241
  assert 'id' in image
233
242
 
234
243
  image['file_name'] = image['file_name'].replace('\\','/')
235
-
244
+
236
245
  image_paths_in_json.add(image['file_name'])
237
-
246
+
238
247
  assert isinstance(image['file_name'],str), 'Illegal image filename type'
239
-
248
+
240
249
  if options.allowIntIDs:
241
250
  assert isinstance(image['id'],str) or isinstance(image['id'],int), \
242
251
  'Illegal image ID type'
243
252
  else:
244
253
  assert isinstance(image['id'],str), 'Illegal image ID type'
245
-
246
- image_id = image['id']
247
-
254
+
255
+ image_id = image['id']
256
+
248
257
  # Confirm ID uniqueness
249
258
  assert image_id not in image_id_to_image, 'Duplicate image ID {}'.format(image_id)
250
-
259
+
251
260
  image_id_to_image[image_id] = image
252
-
261
+
253
262
  if 'height' in image:
254
263
  assert 'width' in image, 'Image with height but no width: {}'.format(image['id'])
255
-
264
+
256
265
  if 'width' in image:
257
266
  assert 'height' in image, 'Image with width but no height: {}'.format(image['id'])
258
267
 
259
268
  if options.bRequireLocation:
260
269
  assert 'location' in image, 'No location available for: {}'.format(image['id'])
261
-
270
+
262
271
  if 'location' in image:
263
272
  # We previously supported ints here; this should be strings now
264
273
  # assert isinstance(image['location'], str) or isinstance(image['location'], int), \
265
274
  # 'Illegal image location type'
266
275
  assert isinstance(image['location'], str)
267
276
  image_location_set.add(image['location'])
268
-
277
+
269
278
  if 'seq_id' in image:
270
279
  sequences.add(image['seq_id'])
271
-
280
+
272
281
  assert not ('sequence_id' in image or 'sequence' in image), 'Illegal sequence identifier'
273
-
282
+
274
283
  unused_files = []
275
-
284
+
276
285
  image_paths_relative = None
277
-
286
+
278
287
  # Are we checking for unused images?
279
- if (len(base_dir) > 0) and options.bFindUnusedImages:
280
-
288
+ if (len(base_dir) > 0) and options.bFindUnusedImages:
289
+
281
290
  if options.verbose:
282
291
  print('\nEnumerating images...')
283
-
292
+
284
293
  image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
285
-
294
+
286
295
  for fn_relative in image_paths_relative:
287
296
  if fn_relative not in image_paths_in_json:
288
297
  unused_files.append(fn_relative)
289
-
298
+
290
299
  # List of (filename,error_string) tuples
291
300
  validation_errors = []
292
-
301
+
293
302
  # If we're checking image existence but not image size, we don't need to read the images
294
303
  if options.bCheckImageExistence and not options.bCheckImageSizes:
295
-
304
+
296
305
  if image_paths_relative is None:
297
306
  image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
298
-
307
+
299
308
  image_paths_relative_set = set(image_paths_relative)
300
-
309
+
301
310
  for im in images:
302
- if im['file_name'] not in image_paths_relative_set:
311
+ if im['file_name'] not in image_paths_relative_set:
303
312
  validation_errors.append((im['file_name'],'not found in relative path list'))
304
-
313
+
305
314
  # If we're checking image size, we need to read the images
306
315
  if options.bCheckImageSizes:
307
-
316
+
308
317
  if len(base_dir) == 0:
309
318
  print('Warning: checking image sizes without a base directory, assuming "."')
310
-
319
+
311
320
  if options.verbose:
312
321
  print('Checking image existence and/or image sizes...')
313
-
322
+
314
323
  if options.nThreads is not None and options.nThreads > 1:
315
324
  if options.verbose:
316
325
  print('Starting a pool of {} workers'.format(options.nThreads))
@@ -319,31 +328,36 @@ def integrity_check_json_db(jsonFile, options=None):
319
328
  default_options.baseDir = options.baseDir
320
329
  default_options.bCheckImageSizes = options.bCheckImageSizes
321
330
  default_options.bCheckImageExistence = options.bCheckImageExistence
322
- results = tqdm(pool.imap(_check_image_existence_and_size, images), total=len(images))
331
+ try:
332
+ results = tqdm(pool.imap(_check_image_existence_and_size, images), total=len(images))
333
+ finally:
334
+ pool.close()
335
+ pool.join()
336
+ print("Pool closed and joined for image size checks")
323
337
  else:
324
338
  results = []
325
- for im in tqdm(images):
339
+ for im in tqdm(images):
326
340
  results.append(_check_image_existence_and_size(im,options))
327
-
341
+
328
342
  for i_image,result in enumerate(results):
329
343
  if result is not None:
330
344
  validation_errors.append((images[i_image]['file_name'],result))
331
-
345
+
332
346
  # ...for each image
333
-
347
+
334
348
  if options.verbose:
335
349
  print('{} validation errors (of {})'.format(len(validation_errors),len(images)))
336
350
  print('Checking annotations...')
337
-
338
- nBoxes = 0
339
-
351
+
352
+ n_boxes = 0
353
+
340
354
  for ann in tqdm(annotations):
341
-
355
+
342
356
  # Confirm that required fields are present
343
357
  assert 'image_id' in ann
344
358
  assert 'id' in ann
345
359
  assert 'category_id' in ann
346
-
360
+
347
361
  if options.allowIntIDs:
348
362
  assert isinstance(ann['id'],str) or isinstance(ann['id'],int), \
349
363
  'Illegal annotation ID type'
@@ -352,149 +366,149 @@ def integrity_check_json_db(jsonFile, options=None):
352
366
  else:
353
367
  assert isinstance(ann['id'],str), 'Illegal annotation ID type'
354
368
  assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
355
-
369
+
356
370
  assert isinstance(ann['category_id'],int), 'Illegal annotation category ID type'
357
-
371
+
358
372
  if 'bbox' in ann:
359
- nBoxes += 1
360
-
361
- annId = ann['id']
362
-
373
+ n_boxes += 1
374
+
375
+ ann_id = ann['id']
376
+
363
377
  # Confirm ID uniqueness
364
- assert annId not in ann_id_to_ann
365
- ann_id_to_ann[annId] = ann
366
-
378
+ assert ann_id not in ann_id_to_ann
379
+ ann_id_to_ann[ann_id] = ann
380
+
367
381
  # Confirm validity
368
382
  assert ann['category_id'] in category_id_to_category, \
369
383
  'Category {} not found in category list'.format(ann['category_id'])
370
384
  assert ann['image_id'] in image_id_to_image, \
371
385
  'Image ID {} referred to by annotation {}, not available'.format(
372
386
  ann['image_id'],ann['id'])
373
-
387
+
374
388
  image_id_to_image[ann['image_id']]['_count'] += 1
375
- category_id_to_category[ann['category_id']]['_count'] +=1
376
-
389
+ category_id_to_category[ann['category_id']]['_count'] +=1
390
+
377
391
  # ...for each annotation
378
-
392
+
379
393
  sorted_categories = sorted(categories, key=itemgetter('_count'), reverse=True)
380
-
381
-
394
+
395
+
382
396
  ##%% Print statistics
383
-
397
+
384
398
  if options.verbose:
385
-
399
+
386
400
  # Find un-annotated images and multi-annotation images
387
- nUnannotated = 0
388
- nMultiAnnotated = 0
389
-
401
+ n_unannotated = 0
402
+ n_multi_annotated = 0
403
+
390
404
  for image in images:
391
405
  if image['_count'] == 0:
392
- nUnannotated += 1
406
+ n_unannotated += 1
393
407
  elif image['_count'] > 1:
394
- nMultiAnnotated += 1
395
-
408
+ n_multi_annotated += 1
409
+
396
410
  print('\nFound {} unannotated images, {} images with multiple annotations'.format(
397
- nUnannotated,nMultiAnnotated))
398
-
411
+ n_unannotated,n_multi_annotated))
412
+
399
413
  if (len(base_dir) > 0) and options.bFindUnusedImages:
400
414
  print('Found {} unused image files'.format(len(unused_files)))
401
-
415
+
402
416
  n_unused_categories = 0
403
-
417
+
404
418
  # Find unused categories
405
419
  for cat in categories:
406
420
  if cat['_count'] == 0:
407
421
  print('Unused category: {}'.format(cat['name']))
408
422
  n_unused_categories += 1
409
-
423
+
410
424
  print('Found {} unused categories'.format(n_unused_categories))
411
-
412
- sequenceString = 'no sequence info'
425
+
426
+ sequence_string = 'no sequence info'
413
427
  if len(sequences) > 0:
414
- sequenceString = '{} sequences'.format(len(sequences))
415
-
428
+ sequence_string = '{} sequences'.format(len(sequences))
429
+
416
430
  print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
417
- len(images),len(annotations),nBoxes,len(categories),sequenceString))
418
-
431
+ len(images),len(annotations),n_boxes,len(categories),sequence_string))
432
+
419
433
  if len(image_location_set) > 0:
420
434
  print('DB contains images from {} locations\n'.format(len(image_location_set)))
421
-
435
+
422
436
  print('Categories and annotation (not image) counts:\n')
423
-
437
+
424
438
  for cat in sorted_categories:
425
439
  print('{:6} {}'.format(cat['_count'],cat['name']))
426
-
440
+
427
441
  print('')
428
-
442
+
429
443
  error_info = {}
430
444
  error_info['unused_files'] = unused_files
431
445
  error_info['validation_errors'] = validation_errors
432
-
446
+
433
447
  return sorted_categories, data, error_info
434
448
 
435
449
  # ...def integrity_check_json_db()
436
-
450
+
437
451
 
438
452
  #%% Command-line driver
439
-
440
- def main():
441
-
453
+
454
+ def main(): # noqa
455
+
442
456
  parser = argparse.ArgumentParser()
443
- parser.add_argument('jsonFile',type=str,
457
+ parser.add_argument('json_file',type=str,
444
458
  help='COCO-formatted .json file to validate')
445
- parser.add_argument('--bCheckImageSizes', action='store_true',
459
+ parser.add_argument('--bCheckImageSizes', action='store_true',
446
460
  help='Validate image size, requires baseDir to be specified. ' + \
447
461
  'Implies existence checking.')
448
- parser.add_argument('--bCheckImageExistence', action='store_true',
462
+ parser.add_argument('--bCheckImageExistence', action='store_true',
449
463
  help='Validate image existence, requires baseDir to be specified')
450
- parser.add_argument('--bFindUnusedImages', action='store_true',
464
+ parser.add_argument('--bFindUnusedImages', action='store_true',
451
465
  help='Check for images in baseDir that aren\'t in the database, ' + \
452
466
  'requires baseDir to be specified')
453
- parser.add_argument('--baseDir', action='store', type=str, default='',
467
+ parser.add_argument('--baseDir', action='store', type=str, default='',
454
468
  help='Base directory for images')
455
469
  parser.add_argument('--bAllowNoLocation', action='store_true',
456
470
  help='Disable errors when no location is specified for an image')
457
- parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
471
+ parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
458
472
  help='Cap on total number of images to check')
459
- parser.add_argument('--nThreads', action='store', type=int, default=10,
473
+ parser.add_argument('--nThreads', action='store', type=int, default=10,
460
474
  help='Number of threads (only relevant when verifying image ' + \
461
475
  'sizes and/or existence)')
462
-
476
+
463
477
  if len(sys.argv[1:])==0:
464
478
  parser.print_help()
465
479
  parser.exit()
466
-
480
+
467
481
  args = parser.parse_args()
468
482
  args.bRequireLocation = (not args.bAllowNoLocation)
469
483
  options = IntegrityCheckOptions()
470
484
  ct_utils.args_to_object(args, options)
471
- integrity_check_json_db(args.jsonFile,options)
485
+ integrity_check_json_db(args.json_file,options)
472
486
 
473
- if __name__ == '__main__':
487
+ if __name__ == '__main__':
474
488
  main()
475
489
 
476
490
 
477
491
  #%% Interactive driver(s)
478
492
 
479
493
  if False:
480
-
494
+
481
495
  #%%
482
496
 
483
- """
497
+ """
484
498
  python integrity_check_json_db.py ~/data/ena24.json --baseDir ~/data/ENA24 --bAllowNoLocation
485
499
  """
486
-
500
+
487
501
  # Integrity-check .json files for LILA
488
502
  json_files = [os.path.expanduser('~/data/ena24.json')]
489
-
503
+
490
504
  options = IntegrityCheckOptions()
491
505
  options.baseDir = os.path.expanduser('~/data/ENA24')
492
506
  options.bCheckImageSizes = False
493
507
  options.bFindUnusedImages = True
494
508
  options.bRequireLocation = False
495
-
496
- # options.iMaxNumImages = 10
497
-
509
+
510
+ # options.iMaxNumImages = 10
511
+
498
512
  for json_file in json_files:
499
-
513
+
500
514
  sorted_categories,data,_ = integrity_check_json_db(json_file, options)