megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ Does some integrity-checking and computes basic statistics on a COCO Camera Trap
7
7
  * Verifies that required fields are present and have the right types
8
8
  * Verifies that annotations refer to valid images
9
9
  * Verifies that annotations refer to valid categories
10
- * Verifies that image, category, and annotation IDs are unique
10
+ * Verifies that image, category, and annotation IDs are unique
11
11
  * Optionally checks file existence
12
12
  * Finds un-annotated images
13
13
  * Finds unused categories
@@ -22,7 +22,8 @@ import json
22
22
  import os
23
23
  import sys
24
24
 
25
- from multiprocessing.pool import ThreadPool
25
+ from functools import partial
26
+ from multiprocessing.pool import Pool, ThreadPool
26
27
  from operator import itemgetter
27
28
  from tqdm import tqdm
28
29
 
@@ -37,38 +38,41 @@ class IntegrityCheckOptions:
37
38
  """
38
39
  Options for integrity_check_json_db()
39
40
  """
40
-
41
+
41
42
  def __init__(self):
42
-
43
+
43
44
  #: Image path; the filenames in the .json file should be relative to this folder
44
45
  self.baseDir = ''
45
-
46
+
46
47
  #: Should we validate the image sizes?
47
48
  self.bCheckImageSizes = False
48
-
49
+
49
50
  #: Should we check that all the images in the .json file exist on disk?
50
51
  self.bCheckImageExistence = False
51
-
52
+
52
53
  #: Should we search [baseDir] for images that are not used in the .json file?
53
54
  self.bFindUnusedImages = False
54
-
55
+
55
56
  #: Should we require that all images in the .json file have a 'location' field?
56
57
  self.bRequireLocation = True
57
-
58
+
58
59
  #: For debugging, limit the number of images we'll process
59
60
  self.iMaxNumImages = -1
60
-
61
+
61
62
  #: Number of threads to use for parallelization, set to <= 1 to disable parallelization
62
63
  self.nThreads = 10
63
-
64
+
65
+ #: Whether to use threads (rather than processes for parallelization)
66
+ self.parallelizeWithThreads = True
67
+
64
68
  #: Enable additional debug output
65
69
  self.verbose = True
66
-
70
+
67
71
  #: Allow integer-valued image and annotation IDs (COCO uses this, CCT files use strings)
68
72
  self.allowIntIDs = False
69
-
70
- # This is used in a medium-hacky way to share modified options across threads
71
- default_options = IntegrityCheckOptions()
73
+
74
+ #: If True, error if the 'info' field is not present
75
+ self.requireInfo = False
72
76
 
73
77
 
74
78
  #%% Functions
@@ -76,7 +80,7 @@ default_options = IntegrityCheckOptions()
76
80
  def _check_image_existence_and_size(image,options=None):
77
81
  """
78
82
  Validate the image represented in the CCT image dict [image], which should have fields:
79
-
83
+
80
84
  * file_name
81
85
  * width
82
86
  * height
@@ -84,266 +88,290 @@ def _check_image_existence_and_size(image,options=None):
84
88
  Args:
85
89
  image (dict): image to validate
86
90
  options (IntegrityCheckOptions): parameters impacting validation
87
-
91
+
88
92
  Returns:
89
93
  str: None if this image passes validation, otherwise an error string
90
94
  """
91
95
 
92
- if options is None:
93
- options = default_options
94
-
96
+ if options is None:
97
+ options = IntegrityCheckOptions()
98
+
95
99
  assert options.bCheckImageExistence
96
-
97
- filePath = os.path.join(options.baseDir,image['file_name'])
98
- if not os.path.isfile(filePath):
99
- s = 'Image path {} does not exist'.format(filePath)
100
+
101
+ file_path = os.path.join(options.baseDir,image['file_name'])
102
+ if not os.path.isfile(file_path):
103
+ s = 'Image path {} does not exist'.format(file_path)
100
104
  return s
101
-
105
+
102
106
  if options.bCheckImageSizes:
103
107
  if not ('height' in image and 'width' in image):
104
- s = 'Missing image size in {}'.format(filePath)
108
+ s = 'Missing image size in {}'.format(file_path)
109
+ return s
110
+
111
+ # width, height = Image.open(file_path).size
112
+ try:
113
+ pil_im = open_image(file_path)
114
+ except Exception as e:
115
+ s = 'Error opening {}: {}'.format(file_path,str(e))
105
116
  return s
106
117
 
107
- # width, height = Image.open(filePath).size
108
- pil_im = open_image(filePath)
109
118
  width,height = pil_im.size
110
119
  if (not (width == image['width'] and height == image['height'])):
111
120
  s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
112
- image['id'], filePath, image['width'], image['height'], width, height)
121
+ image['id'], file_path, image['width'], image['height'], width, height)
113
122
  return s
114
-
123
+
115
124
  return None
116
125
 
117
-
118
- def integrity_check_json_db(jsonFile, options=None):
126
+
127
+ def integrity_check_json_db(json_file, options=None):
119
128
  """
120
129
  Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file; see
121
130
  module header comment for a list of the validation steps.
122
-
131
+
123
132
  Args:
124
- jsonFile (str): filename to validate, or an already-loaded dict
125
-
133
+ json_file (str): filename to validate, or an already-loaded dict
134
+ options (IntegrityCheckOptions, optional): see IntegrityCheckOptions
135
+
126
136
  Returns:
127
137
  tuple: tuple containing:
128
- - sorted_categories (dict): list of categories used in [jsonFile], sorted by frequency
129
- - data (dict): the data loaded from [jsonFile]
138
+ - sorted_categories (dict): list of categories used in [json_file], sorted by frequency
139
+ - data (dict): the data loaded from [json_file]
130
140
  - error_info (dict): specific validation errors
131
141
  """
132
-
133
- if options is None:
142
+
143
+ if options is None:
134
144
  options = IntegrityCheckOptions()
135
-
136
- if options.bCheckImageSizes:
145
+
146
+ if options.bCheckImageSizes:
137
147
  options.bCheckImageExistence = True
138
-
148
+
139
149
  if options.verbose:
140
150
  print(options.__dict__)
141
-
151
+
142
152
  if options.baseDir is None:
143
153
  options.baseDir = ''
144
-
154
+
145
155
  base_dir = options.baseDir
146
-
147
-
156
+
157
+
148
158
  ##%% Read .json file if necessary, integrity-check fields
149
-
150
- if isinstance(jsonFile,dict):
151
-
152
- data = jsonFile
153
-
154
- elif isinstance(jsonFile,str):
155
-
156
- assert os.path.isfile(jsonFile), '.json file {} does not exist'.format(jsonFile)
157
-
159
+
160
+ if isinstance(json_file,dict):
161
+
162
+ data = json_file
163
+
164
+ elif isinstance(json_file,str):
165
+
166
+ assert os.path.isfile(json_file), '.json file {} does not exist'.format(json_file)
167
+
158
168
  if options.verbose:
159
169
  print('Reading .json {} with base dir [{}]...'.format(
160
- jsonFile,base_dir))
161
-
162
- with open(jsonFile,'r') as f:
163
- data = json.load(f)
164
-
170
+ json_file,base_dir))
171
+
172
+ with open(json_file,'r') as f:
173
+ data = json.load(f)
174
+
165
175
  else:
166
-
167
- raise ValueError('Illegal value for jsonFile')
168
-
176
+
177
+ raise ValueError('Illegal value for json_file')
178
+
169
179
  images = data['images']
170
180
  annotations = data['annotations']
171
181
  categories = data['categories']
172
- # info = data['info']
173
- assert 'info' in data, 'No info struct in database'
174
-
175
- if len(base_dir) > 0:
176
- assert os.path.isdir(base_dir), 'Base directory {} does not exist'.format(base_dir)
177
-
178
-
182
+
183
+ if options.requireInfo:
184
+ assert 'info' in data, 'No info struct in database'
185
+
186
+ if len(base_dir) > 0:
187
+ assert os.path.isdir(base_dir), \
188
+ 'Base directory {} does not exist'.format(base_dir)
189
+
190
+
179
191
  ##%% Build dictionaries, checking ID uniqueness and internal validity as we go
180
-
192
+
181
193
  image_id_to_image = {}
182
194
  ann_id_to_ann = {}
183
195
  category_id_to_category = {}
184
196
  category_name_to_category = {}
185
197
  image_location_set = set()
186
-
198
+
187
199
  if options.verbose:
188
200
  print('Checking categories...')
189
-
201
+
190
202
  for cat in tqdm(categories):
191
-
203
+
192
204
  # Confirm that required fields are present
193
205
  assert 'name' in cat
194
206
  assert 'id' in cat
195
-
196
- assert isinstance(cat['id'],int), 'Illegal category ID type: [{}]'.format(str(cat['id']))
197
- assert isinstance(cat['name'],str), 'Illegal category name type [{}]'.format(str(cat['name']))
198
-
207
+
208
+ assert isinstance(cat['id'],int), \
209
+ 'Illegal category ID type: [{}]'.format(str(cat['id']))
210
+ assert isinstance(cat['name'],str), \
211
+ 'Illegal category name type [{}]'.format(str(cat['name']))
212
+
199
213
  category_id = cat['id']
200
214
  category_name = cat['name']
201
-
215
+
202
216
  # Confirm ID uniqueness
203
- assert category_id not in category_id_to_category, 'Category ID {} is used more than once'.format(category_id)
217
+ assert category_id not in category_id_to_category, \
218
+ 'Category ID {} is used more than once'.format(category_id)
204
219
  category_id_to_category[category_id] = cat
205
220
  cat['_count'] = 0
206
-
207
- assert category_name not in category_name_to_category, 'Category name {} is used more than once'.format(category_name)
208
- category_name_to_category[category_name] = cat
209
-
221
+
222
+ assert category_name not in category_name_to_category, \
223
+ 'Category name {} is used more than once'.format(category_name)
224
+ category_name_to_category[category_name] = cat
225
+
210
226
  # ...for each category
211
-
227
+
212
228
  if options.verbose:
213
- print('\nChecking images...')
214
-
229
+ print('\nChecking image records...')
230
+
215
231
  if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
216
-
232
+
217
233
  if options.verbose:
218
234
  print('Trimming image list to {}'.format(options.iMaxNumImages))
219
235
  images = images[0:options.iMaxNumImages]
220
-
236
+
221
237
  image_paths_in_json = set()
222
-
238
+
223
239
  sequences = set()
224
-
240
+
225
241
  # image = images[0]
226
242
  for image in tqdm(images):
227
-
243
+
228
244
  image['_count'] = 0
229
-
245
+
230
246
  # Confirm that required fields are present
231
247
  assert 'file_name' in image
232
248
  assert 'id' in image
233
249
 
234
250
  image['file_name'] = image['file_name'].replace('\\','/')
235
-
251
+
236
252
  image_paths_in_json.add(image['file_name'])
237
-
253
+
238
254
  assert isinstance(image['file_name'],str), 'Illegal image filename type'
239
-
255
+
240
256
  if options.allowIntIDs:
241
257
  assert isinstance(image['id'],str) or isinstance(image['id'],int), \
242
258
  'Illegal image ID type'
243
259
  else:
244
260
  assert isinstance(image['id'],str), 'Illegal image ID type'
245
-
246
- image_id = image['id']
247
-
261
+
262
+ image_id = image['id']
263
+
248
264
  # Confirm ID uniqueness
249
265
  assert image_id not in image_id_to_image, 'Duplicate image ID {}'.format(image_id)
250
-
266
+
251
267
  image_id_to_image[image_id] = image
252
-
268
+
253
269
  if 'height' in image:
254
270
  assert 'width' in image, 'Image with height but no width: {}'.format(image['id'])
255
-
271
+
256
272
  if 'width' in image:
257
273
  assert 'height' in image, 'Image with width but no height: {}'.format(image['id'])
258
274
 
259
275
  if options.bRequireLocation:
260
276
  assert 'location' in image, 'No location available for: {}'.format(image['id'])
261
-
277
+
262
278
  if 'location' in image:
263
279
  # We previously supported ints here; this should be strings now
264
280
  # assert isinstance(image['location'], str) or isinstance(image['location'], int), \
265
281
  # 'Illegal image location type'
266
282
  assert isinstance(image['location'], str)
267
283
  image_location_set.add(image['location'])
268
-
284
+
269
285
  if 'seq_id' in image:
270
286
  sequences.add(image['seq_id'])
271
-
287
+
272
288
  assert not ('sequence_id' in image or 'sequence' in image), 'Illegal sequence identifier'
273
-
289
+
274
290
  unused_files = []
275
-
291
+
276
292
  image_paths_relative = None
277
-
293
+
278
294
  # Are we checking for unused images?
279
- if (len(base_dir) > 0) and options.bFindUnusedImages:
280
-
295
+ if (len(base_dir) > 0) and options.bFindUnusedImages:
296
+
281
297
  if options.verbose:
282
298
  print('\nEnumerating images...')
283
-
299
+
284
300
  image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
285
-
301
+
286
302
  for fn_relative in image_paths_relative:
287
303
  if fn_relative not in image_paths_in_json:
288
304
  unused_files.append(fn_relative)
289
-
305
+
290
306
  # List of (filename,error_string) tuples
291
307
  validation_errors = []
292
-
308
+
293
309
  # If we're checking image existence but not image size, we don't need to read the images
294
310
  if options.bCheckImageExistence and not options.bCheckImageSizes:
295
-
311
+
296
312
  if image_paths_relative is None:
297
313
  image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
298
-
314
+
299
315
  image_paths_relative_set = set(image_paths_relative)
300
-
316
+
301
317
  for im in images:
302
- if im['file_name'] not in image_paths_relative_set:
318
+ if im['file_name'] not in image_paths_relative_set:
303
319
  validation_errors.append((im['file_name'],'not found in relative path list'))
304
-
320
+
305
321
  # If we're checking image size, we need to read the images
306
322
  if options.bCheckImageSizes:
307
-
323
+
308
324
  if len(base_dir) == 0:
309
325
  print('Warning: checking image sizes without a base directory, assuming "."')
310
-
326
+
311
327
  if options.verbose:
312
328
  print('Checking image existence and/or image sizes...')
313
-
329
+
314
330
  if options.nThreads is not None and options.nThreads > 1:
331
+
332
+ if options.parallelizeWithThreads:
333
+ worker_string = 'threads'
334
+ else:
335
+ worker_string = 'processes'
336
+
315
337
  if options.verbose:
316
- print('Starting a pool of {} workers'.format(options.nThreads))
317
- pool = ThreadPool(options.nThreads)
318
- # results = pool.imap_unordered(lambda x: fetch_url(x,nImages), indexedUrlList)
319
- default_options.baseDir = options.baseDir
320
- default_options.bCheckImageSizes = options.bCheckImageSizes
321
- default_options.bCheckImageExistence = options.bCheckImageExistence
322
- results = tqdm(pool.imap(_check_image_existence_and_size, images), total=len(images))
338
+ print('Starting a pool of {} {}'.format(options.nThreads,worker_string))
339
+ if options.parallelizeWithThreads:
340
+ pool = ThreadPool(options.nThreads)
341
+ else:
342
+ pool = Pool(options.nThreads)
343
+ try:
344
+ results = list(tqdm(pool.imap(
345
+ partial(_check_image_existence_and_size,options=options), images),
346
+ total=len(images)))
347
+ finally:
348
+ pool.close()
349
+ pool.join()
350
+ print("Pool closed and joined for image size checks")
323
351
  else:
324
352
  results = []
325
- for im in tqdm(images):
353
+ for im in tqdm(images):
326
354
  results.append(_check_image_existence_and_size(im,options))
327
-
355
+
328
356
  for i_image,result in enumerate(results):
329
357
  if result is not None:
330
358
  validation_errors.append((images[i_image]['file_name'],result))
331
-
359
+
332
360
  # ...for each image
333
-
361
+
334
362
  if options.verbose:
335
363
  print('{} validation errors (of {})'.format(len(validation_errors),len(images)))
336
364
  print('Checking annotations...')
337
-
338
- nBoxes = 0
339
-
365
+
366
+ n_boxes = 0
367
+
340
368
  for ann in tqdm(annotations):
341
-
369
+
342
370
  # Confirm that required fields are present
343
371
  assert 'image_id' in ann
344
372
  assert 'id' in ann
345
373
  assert 'category_id' in ann
346
-
374
+
347
375
  if options.allowIntIDs:
348
376
  assert isinstance(ann['id'],str) or isinstance(ann['id'],int), \
349
377
  'Illegal annotation ID type'
@@ -352,149 +380,149 @@ def integrity_check_json_db(jsonFile, options=None):
352
380
  else:
353
381
  assert isinstance(ann['id'],str), 'Illegal annotation ID type'
354
382
  assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
355
-
383
+
356
384
  assert isinstance(ann['category_id'],int), 'Illegal annotation category ID type'
357
-
385
+
358
386
  if 'bbox' in ann:
359
- nBoxes += 1
360
-
361
- annId = ann['id']
362
-
387
+ n_boxes += 1
388
+
389
+ ann_id = ann['id']
390
+
363
391
  # Confirm ID uniqueness
364
- assert annId not in ann_id_to_ann
365
- ann_id_to_ann[annId] = ann
366
-
392
+ assert ann_id not in ann_id_to_ann
393
+ ann_id_to_ann[ann_id] = ann
394
+
367
395
  # Confirm validity
368
396
  assert ann['category_id'] in category_id_to_category, \
369
397
  'Category {} not found in category list'.format(ann['category_id'])
370
398
  assert ann['image_id'] in image_id_to_image, \
371
399
  'Image ID {} referred to by annotation {}, not available'.format(
372
400
  ann['image_id'],ann['id'])
373
-
401
+
374
402
  image_id_to_image[ann['image_id']]['_count'] += 1
375
- category_id_to_category[ann['category_id']]['_count'] +=1
376
-
403
+ category_id_to_category[ann['category_id']]['_count'] +=1
404
+
377
405
  # ...for each annotation
378
-
406
+
379
407
  sorted_categories = sorted(categories, key=itemgetter('_count'), reverse=True)
380
-
381
-
408
+
409
+
382
410
  ##%% Print statistics
383
-
411
+
384
412
  if options.verbose:
385
-
413
+
386
414
  # Find un-annotated images and multi-annotation images
387
- nUnannotated = 0
388
- nMultiAnnotated = 0
389
-
415
+ n_unannotated = 0
416
+ n_multi_annotated = 0
417
+
390
418
  for image in images:
391
419
  if image['_count'] == 0:
392
- nUnannotated += 1
420
+ n_unannotated += 1
393
421
  elif image['_count'] > 1:
394
- nMultiAnnotated += 1
395
-
422
+ n_multi_annotated += 1
423
+
396
424
  print('\nFound {} unannotated images, {} images with multiple annotations'.format(
397
- nUnannotated,nMultiAnnotated))
398
-
425
+ n_unannotated,n_multi_annotated))
426
+
399
427
  if (len(base_dir) > 0) and options.bFindUnusedImages:
400
428
  print('Found {} unused image files'.format(len(unused_files)))
401
-
429
+
402
430
  n_unused_categories = 0
403
-
431
+
404
432
  # Find unused categories
405
433
  for cat in categories:
406
434
  if cat['_count'] == 0:
407
435
  print('Unused category: {}'.format(cat['name']))
408
436
  n_unused_categories += 1
409
-
437
+
410
438
  print('Found {} unused categories'.format(n_unused_categories))
411
-
412
- sequenceString = 'no sequence info'
439
+
440
+ sequence_string = 'no sequence info'
413
441
  if len(sequences) > 0:
414
- sequenceString = '{} sequences'.format(len(sequences))
415
-
442
+ sequence_string = '{} sequences'.format(len(sequences))
443
+
416
444
  print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
417
- len(images),len(annotations),nBoxes,len(categories),sequenceString))
418
-
445
+ len(images),len(annotations),n_boxes,len(categories),sequence_string))
446
+
419
447
  if len(image_location_set) > 0:
420
448
  print('DB contains images from {} locations\n'.format(len(image_location_set)))
421
-
449
+
422
450
  print('Categories and annotation (not image) counts:\n')
423
-
451
+
424
452
  for cat in sorted_categories:
425
453
  print('{:6} {}'.format(cat['_count'],cat['name']))
426
-
454
+
427
455
  print('')
428
-
456
+
429
457
  error_info = {}
430
458
  error_info['unused_files'] = unused_files
431
459
  error_info['validation_errors'] = validation_errors
432
-
460
+
433
461
  return sorted_categories, data, error_info
434
462
 
435
463
  # ...def integrity_check_json_db()
436
-
464
+
437
465
 
438
466
  #%% Command-line driver
439
-
440
- def main():
441
-
467
+
468
+ def main(): # noqa
469
+
442
470
  parser = argparse.ArgumentParser()
443
- parser.add_argument('jsonFile',type=str,
471
+ parser.add_argument('json_file',type=str,
444
472
  help='COCO-formatted .json file to validate')
445
- parser.add_argument('--bCheckImageSizes', action='store_true',
473
+ parser.add_argument('--bCheckImageSizes', action='store_true',
446
474
  help='Validate image size, requires baseDir to be specified. ' + \
447
475
  'Implies existence checking.')
448
- parser.add_argument('--bCheckImageExistence', action='store_true',
476
+ parser.add_argument('--bCheckImageExistence', action='store_true',
449
477
  help='Validate image existence, requires baseDir to be specified')
450
- parser.add_argument('--bFindUnusedImages', action='store_true',
478
+ parser.add_argument('--bFindUnusedImages', action='store_true',
451
479
  help='Check for images in baseDir that aren\'t in the database, ' + \
452
480
  'requires baseDir to be specified')
453
- parser.add_argument('--baseDir', action='store', type=str, default='',
481
+ parser.add_argument('--baseDir', action='store', type=str, default='',
454
482
  help='Base directory for images')
455
483
  parser.add_argument('--bAllowNoLocation', action='store_true',
456
484
  help='Disable errors when no location is specified for an image')
457
- parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
485
+ parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
458
486
  help='Cap on total number of images to check')
459
- parser.add_argument('--nThreads', action='store', type=int, default=10,
487
+ parser.add_argument('--nThreads', action='store', type=int, default=10,
460
488
  help='Number of threads (only relevant when verifying image ' + \
461
489
  'sizes and/or existence)')
462
-
490
+
463
491
  if len(sys.argv[1:])==0:
464
492
  parser.print_help()
465
493
  parser.exit()
466
-
494
+
467
495
  args = parser.parse_args()
468
496
  args.bRequireLocation = (not args.bAllowNoLocation)
469
497
  options = IntegrityCheckOptions()
470
498
  ct_utils.args_to_object(args, options)
471
- integrity_check_json_db(args.jsonFile,options)
499
+ integrity_check_json_db(args.json_file,options)
472
500
 
473
- if __name__ == '__main__':
501
+ if __name__ == '__main__':
474
502
  main()
475
503
 
476
504
 
477
505
  #%% Interactive driver(s)
478
506
 
479
507
  if False:
480
-
508
+
481
509
  #%%
482
510
 
483
- """
511
+ """
484
512
  python integrity_check_json_db.py ~/data/ena24.json --baseDir ~/data/ENA24 --bAllowNoLocation
485
513
  """
486
-
514
+
487
515
  # Integrity-check .json files for LILA
488
516
  json_files = [os.path.expanduser('~/data/ena24.json')]
489
-
517
+
490
518
  options = IntegrityCheckOptions()
491
519
  options.baseDir = os.path.expanduser('~/data/ENA24')
492
520
  options.bCheckImageSizes = False
493
521
  options.bFindUnusedImages = True
494
522
  options.bRequireLocation = False
495
-
496
- # options.iMaxNumImages = 10
497
-
523
+
524
+ # options.iMaxNumImages = 10
525
+
498
526
  for json_file in json_files:
499
-
527
+
500
528
  sorted_categories,data,_ = integrity_check_json_db(json_file, options)