megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,19 +1,19 @@
1
- ########
2
- #
3
- # integrity_check_json_db.py
4
- #
5
- # Does some integrity-checking and computes basic statistics on a db, specifically:
6
- #
7
- # * Verifies that required fields are present and have the right types
8
- # * Verifies that annotations refer to valid images
9
- # * Verifies that annotations refer to valid categories
10
- # * Verifies that image, category, and annotation IDs are unique
11
- # * Optionally checks file existence
12
- # * Finds un-annotated images
13
- # * Finds unused categories
14
- # * Prints a list of categories sorted by count
15
- #
16
- ########
1
+ """
2
+
3
+ integrity_check_json_db.py
4
+
5
+ Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file, specifically:
6
+
7
+ * Verifies that required fields are present and have the right types
8
+ * Verifies that annotations refer to valid images
9
+ * Verifies that annotations refer to valid categories
10
+ * Verifies that image, category, and annotation IDs are unique
11
+ * Optionally checks file existence
12
+ * Finds un-annotated images
13
+ * Finds unused categories
14
+ * Prints a list of categories sorted by count
15
+
16
+ """
17
17
 
18
18
  #%% Constants and environment
19
19
 
@@ -24,31 +24,65 @@ import sys
24
24
 
25
25
  from multiprocessing.pool import ThreadPool
26
26
  from operator import itemgetter
27
- from PIL import Image
28
27
  from tqdm import tqdm
29
28
 
29
+ from md_visualization.visualization_utils import open_image
30
30
  from md_utils import ct_utils
31
31
 
32
32
 
33
33
  #%% Classes and environment
34
34
 
35
35
  class IntegrityCheckOptions:
36
+ """
37
+ Options for integrity_check_json_db()
38
+ """
36
39
 
40
+ #: Image path; the filenames in the .json file should be relative to this folder
37
41
  baseDir = ''
42
+
43
+ #: Should we validate the image sizes?
38
44
  bCheckImageSizes = False
45
+
46
+ #: Should we check that all the images in the .json file exist on disk?
39
47
  bCheckImageExistence = False
48
+
49
+ #: Should we search [baseDir] for images that are not used in the .json file?
40
50
  bFindUnusedImages = False
51
+
52
+ #: Should we require that all images in the .json file have a 'location' field?
41
53
  bRequireLocation = True
54
+
55
+ #: For debugging, limit the number of images we'll process
42
56
  iMaxNumImages = -1
57
+
58
+ #: Number of threads to use for parallelization, set to <= 1 to disable parallelization
43
59
  nThreads = 10
44
60
 
61
+ #: Enable additional debug output
62
+ verbose = True
63
+
64
+
45
65
  # This is used in a medium-hacky way to share modified options across threads
46
66
  defaultOptions = IntegrityCheckOptions()
47
67
 
48
68
 
49
69
  #%% Functions
50
70
 
51
- def check_image_existence_and_size(image,options=None):
71
+ def _check_image_existence_and_size(image,options=None):
72
+ """
73
+ Validate the image represented in the CCT image dict [image], which should have fields:
74
+
75
+ * file_name
76
+ * width
77
+ * height
78
+
79
+ Args:
80
+ image (dict): image to validate
81
+ options (IntegrityCheckOptions): parameters impacting validation
82
+
83
+ Returns:
84
+ bool: whether this image passes validation
85
+ """
52
86
 
53
87
  if options is None:
54
88
  options = defaultOptions
@@ -65,7 +99,9 @@ def check_image_existence_and_size(image,options=None):
65
99
  print('Missing image size in {}'.format(filePath))
66
100
  return False
67
101
 
68
- width, height = Image.open(filePath).size
102
+ # width, height = Image.open(filePath).size
103
+ pil_im = open_image(filePath)
104
+ width,height = pil_im.size
69
105
  if (not (width == image['width'] and height == image['height'])):
70
106
  print('Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
71
107
  image['id'], filePath, image['width'], image['height'], width, height))
@@ -76,9 +112,17 @@ def check_image_existence_and_size(image,options=None):
76
112
 
77
113
  def integrity_check_json_db(jsonFile, options=None):
78
114
  """
79
- jsonFile can be a filename or an already-loaded json database
115
+ Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file; see
116
+ module header comment for a list of the validation steps.
80
117
 
81
- return sortedCategories, data, errorInfo
118
+ Args:
119
+ jsonFile (str): filename to validate, or an already-loaded dict
120
+
121
+ Returns:
122
+ tuple: tuple containing:
123
+ - sortedCategories (dict): list of categories used in [jsonFile], sorted by frequency
124
+ - data (dict): the data loaded from [jsonFile]
125
+ - errorInfo (dict): specific validation errors
82
126
  """
83
127
 
84
128
  if options is None:
@@ -86,8 +130,9 @@ def integrity_check_json_db(jsonFile, options=None):
86
130
 
87
131
  if options.bCheckImageSizes:
88
132
  options.bCheckImageExistence = True
89
-
90
- print(options.__dict__)
133
+
134
+ if options.verbose:
135
+ print(options.__dict__)
91
136
 
92
137
  if options.baseDir is None:
93
138
  options.baseDir = ''
@@ -105,8 +150,9 @@ def integrity_check_json_db(jsonFile, options=None):
105
150
 
106
151
  assert os.path.isfile(jsonFile), '.json file {} does not exist'.format(jsonFile)
107
152
 
108
- print('Reading .json {} with base dir [{}]...'.format(
109
- jsonFile,baseDir))
153
+ if options.verbose:
154
+ print('Reading .json {} with base dir [{}]...'.format(
155
+ jsonFile,baseDir))
110
156
 
111
157
  with open(jsonFile,'r') as f:
112
158
  data = json.load(f)
@@ -133,7 +179,8 @@ def integrity_check_json_db(jsonFile, options=None):
133
179
  catNameToCat = {}
134
180
  imageLocationSet = set()
135
181
 
136
- print('Checking categories...')
182
+ if options.verbose:
183
+ print('Checking categories...')
137
184
 
138
185
  for cat in tqdm(categories):
139
186
 
@@ -157,11 +204,13 @@ def integrity_check_json_db(jsonFile, options=None):
157
204
 
158
205
  # ...for each category
159
206
 
160
- print('\nChecking images...')
207
+ if options.verbose:
208
+ print('\nChecking images...')
161
209
 
162
210
  if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
163
211
 
164
- print('Trimming image list to {}'.format(options.iMaxNumImages))
212
+ if options.verbose:
213
+ print('Trimming image list to {}'.format(options.iMaxNumImages))
165
214
  images = images[0:options.iMaxNumImages]
166
215
 
167
216
  imagePathsInJson = set()
@@ -217,7 +266,8 @@ def integrity_check_json_db(jsonFile, options=None):
217
266
  # Are we checking for unused images?
218
267
  if (len(baseDir) > 0) and options.bFindUnusedImages:
219
268
 
220
- print('\nEnumerating images...')
269
+ if options.verbose:
270
+ print('\nEnumerating images...')
221
271
 
222
272
  # Recursively enumerate images
223
273
  imagePaths = []
@@ -244,8 +294,9 @@ def integrity_check_json_db(jsonFile, options=None):
244
294
 
245
295
  if len(baseDir) == 0:
246
296
  print('Warning: checking image sizes without a base directory, assuming "."')
247
-
248
- print('Checking image existence and/or image sizes...')
297
+
298
+ if options.verbose:
299
+ print('Checking image existence and/or image sizes...')
249
300
 
250
301
  if options.nThreads is not None and options.nThreads > 1:
251
302
  pool = ThreadPool(options.nThreads)
@@ -253,11 +304,11 @@ def integrity_check_json_db(jsonFile, options=None):
253
304
  defaultOptions.baseDir = options.baseDir
254
305
  defaultOptions.bCheckImageSizes = options.bCheckImageSizes
255
306
  defaultOptions.bCheckImageExistence = options.bCheckImageExistence
256
- results = tqdm(pool.imap(check_image_existence_and_size, images), total=len(images))
307
+ results = tqdm(pool.imap(_check_image_existence_and_size, images), total=len(images))
257
308
  else:
258
309
  results = []
259
310
  for im in tqdm(images):
260
- results.append(check_image_existence_and_size(im,options))
311
+ results.append(_check_image_existence_and_size(im,options))
261
312
 
262
313
  for iImage,r in enumerate(results):
263
314
  if not r:
@@ -265,9 +316,9 @@ def integrity_check_json_db(jsonFile, options=None):
265
316
 
266
317
  # ...for each image
267
318
 
268
- print('{} validation errors (of {})'.format(len(validationErrors),len(images)))
269
-
270
- print('Checking annotations...')
319
+ if options.verbose:
320
+ print('{} validation errors (of {})'.format(len(validationErrors),len(images)))
321
+ print('Checking annotations...')
271
322
 
272
323
  nBoxes = 0
273
324
 
@@ -302,58 +353,56 @@ def integrity_check_json_db(jsonFile, options=None):
302
353
  catIdToCat[ann['category_id']]['_count'] +=1
303
354
 
304
355
  # ...for each annotation
305
-
306
-
307
- ##%% Print statistics
308
356
 
309
- # Find un-annotated images and multi-annotation images
310
- nUnannotated = 0
311
- nMultiAnnotated = 0
357
+ sortedCategories = sorted(categories, key=itemgetter('_count'), reverse=True)
312
358
 
313
- for image in images:
314
- if image['_count'] == 0:
315
- nUnannotated += 1
316
- elif image['_count'] > 1:
317
- nMultiAnnotated += 1
318
-
319
- print('Found {} unannotated images, {} images with multiple annotations'.format(
320
- nUnannotated,nMultiAnnotated))
321
359
 
322
- if (len(baseDir) > 0) and options.bFindUnusedImages:
323
- print('Found {} unused image files'.format(len(unusedFiles)))
324
-
325
- nUnusedCategories = 0
360
+ ##%% Print statistics
326
361
 
327
- # Find unused categories
328
- for cat in categories:
329
- if cat['_count'] == 0:
330
- print('Unused category: {}'.format(cat['name']))
331
- nUnusedCategories += 1
362
+ if options.verbose:
332
363
 
333
- print('Found {} unused categories'.format(nUnusedCategories))
364
+ # Find un-annotated images and multi-annotation images
365
+ nUnannotated = 0
366
+ nMultiAnnotated = 0
367
+
368
+ for image in images:
369
+ if image['_count'] == 0:
370
+ nUnannotated += 1
371
+ elif image['_count'] > 1:
372
+ nMultiAnnotated += 1
373
+
374
+ print('Found {} unannotated images, {} images with multiple annotations'.format(
375
+ nUnannotated,nMultiAnnotated))
376
+
377
+ if (len(baseDir) > 0) and options.bFindUnusedImages:
378
+ print('Found {} unused image files'.format(len(unusedFiles)))
334
379
 
335
- sequenceString = 'no sequence info'
336
- if len(sequences) > 0:
337
- sequenceString = '{} sequences'.format(len(sequences))
380
+ nUnusedCategories = 0
338
381
 
339
- print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
340
- len(images),len(annotations),nBoxes,len(categories),sequenceString))
341
-
342
- if len(imageLocationSet) > 0:
343
- print('DB contains images from {} locations\n'.format(len(imageLocationSet)))
344
-
345
- # Prints a list of categories sorted by count
346
- #
347
- # https://stackoverflow.com/questions/72899/how-do-i-sort-a-list-of-dictionaries-by-a-value-of-the-dictionary
348
-
349
- sortedCategories = sorted(categories, key=itemgetter('_count'), reverse=True)
350
-
351
- print('Categories and annotation (not image) counts:\n')
352
-
353
- for cat in sortedCategories:
354
- print('{:6} {}'.format(cat['_count'],cat['name']))
382
+ # Find unused categories
383
+ for cat in categories:
384
+ if cat['_count'] == 0:
385
+ print('Unused category: {}'.format(cat['name']))
386
+ nUnusedCategories += 1
387
+
388
+ print('Found {} unused categories'.format(nUnusedCategories))
389
+
390
+ sequenceString = 'no sequence info'
391
+ if len(sequences) > 0:
392
+ sequenceString = '{} sequences'.format(len(sequences))
393
+
394
+ print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
395
+ len(images),len(annotations),nBoxes,len(categories),sequenceString))
355
396
 
356
- print('')
397
+ if len(imageLocationSet) > 0:
398
+ print('DB contains images from {} locations\n'.format(len(imageLocationSet)))
399
+
400
+ print('Categories and annotation (not image) counts:\n')
401
+
402
+ for cat in sortedCategories:
403
+ print('{:6} {}'.format(cat['_count'],cat['name']))
404
+
405
+ print('')
357
406
 
358
407
  errorInfo = {}
359
408
  errorInfo['unusedFiles'] = unusedFiles
@@ -398,9 +447,7 @@ def main():
398
447
  ct_utils.args_to_object(args, options)
399
448
  integrity_check_json_db(args.jsonFile,options)
400
449
 
401
-
402
- if __name__ == '__main__':
403
-
450
+ if __name__ == '__main__':
404
451
  main()
405
452
 
406
453
 
@@ -1,14 +1,14 @@
1
- ########
2
- #
3
- # subset_json_db.py
4
- #
5
- # Select a subset of images (and associated annotations) from a .json file
6
- # in COCO Camera Traps format.
7
- #
8
- # To subset the .json files in the MegaDetector output format, see
9
- # subset_json_detector_output.py
10
- #
11
- ########
1
+ """
2
+
3
+ subset_json_db.py
4
+
5
+ Select a subset of images (and associated annotations) from a .json file in COCO
6
+ Camera Traps format based on a string query.
7
+
8
+ To subset .json files in the MegaDetector output format, see
9
+ subset_json_detector_output.py.
10
+
11
+ """
12
12
 
13
13
  #%% Constants and imports
14
14
 
@@ -26,6 +26,16 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False):
26
26
  Given a json file (or dictionary already loaded from a json file), produce a new
27
27
  database containing only the images whose filenames contain the string 'query',
28
28
  optionally writing that DB output to a new json file.
29
+
30
+ Args:
31
+ input_json (str): COCO Camera Traps .json file to load, or an already-loaded dict
32
+ query (str): string to query for, only include images in the output whose filenames
33
+ contain this string.
34
+ output_json (str, optional): file to write the resulting .json file to
35
+ ignore_case (bool, optional): whether to perform a case-insensitive search for [query]
36
+
37
+ Returns:
38
+ dict: possibly-modified CCT dictionary
29
39
  """
30
40
 
31
41
  if ignore_case:
@@ -65,7 +75,7 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False):
65
75
  # Write the output file if requested
66
76
  if output_json is not None:
67
77
  print('Writing output .json...')
68
- json.dump(output_data,open(output_json,'w'),indent=4)
78
+ json.dump(output_data,open(output_json,'w'),indent=1)
69
79
 
70
80
  return output_data
71
81
 
@@ -76,8 +86,8 @@ if False:
76
86
 
77
87
  #%%
78
88
 
79
- input_json = r"E:\Statewide_wolf_container\idfg_20190409.json"
80
- output_json = r"E:\Statewide_wolf_container\idfg_20190409_clearcreek.json"
89
+ input_json = r"e:\Statewide_wolf_container\idfg_20190409.json"
90
+ output_json = r"e:\Statewide_wolf_container\idfg_20190409_clearcreek.json"
81
91
  query = 'clearcreek'
82
92
  ignore_case = True
83
93
  db = subset_json_db(input_json, query, output_json, ignore_case)
@@ -101,6 +111,5 @@ def main():
101
111
 
102
112
  subset_json_db(args.input_json,args.query,args.output_json,args.ignore_case)
103
113
 
104
- if __name__ == '__main__':
105
-
114
+ if __name__ == '__main__':
106
115
  main()
@@ -1,11 +1,11 @@
1
- ########
2
- #
3
- # generate_crops_from_cct.py
4
- #
5
- # Given a .json file in COCO Camera Traps format, create a cropped image for
6
- # each bounding box.
7
- #
8
- ########
1
+ """
2
+
3
+ generate_crops_from_cct.py
4
+
5
+ Given a .json file in COCO Camera Traps format, creates a cropped image for
6
+ each bounding box.
7
+
8
+ """
9
9
 
10
10
  #%% Imports and constants
11
11
 
@@ -19,6 +19,23 @@ from PIL import Image
19
19
  #%% Functions
20
20
 
21
21
  def generate_crops_from_cct(cct_file,image_dir,output_dir,padding=0,flat_output=True):
22
+ """
23
+ Given a .json file in COCO Camera Traps format, creates a cropped image for
24
+ each bounding box.
25
+
26
+ Args:
27
+ cct_file (str): the COCO .json file from which we should load data
28
+ image_dir (str): the folder where the images live; filenames in the .json
29
+ file should be relative to this folder
30
+ output_dir (str): the folder where we should write cropped images
31
+ padding (float, optional): number of pixels we should expand each box before
32
+ cropping
33
+ flat_output (bool, optional): if False, folder structure will be preserved
34
+ in the output, e.g. the image a/b/c/d.jpg will result in image files
35
+ in the output folder called, e.g., a/b/c/d_crop_000_id_12345.jpg. If
36
+ [flat_output] is True, the corresponding output image will be
37
+ a_b_c_d_crop_000_id_12345.jpg.
38
+ """
22
39
 
23
40
  ## Read and validate input
24
41
 
@@ -123,45 +140,10 @@ if False:
123
140
  flat_output = True
124
141
  output_dir = '/home/user/tmp/noaa-fish-crops'
125
142
 
126
- #%%
127
-
128
143
  generate_crops_from_cct(cct_file,image_dir,output_dir,padding,flat_output=True)
129
144
  files = os.listdir(output_dir)
130
145
 
131
- #%%
132
-
133
- import random
134
- fn = os.path.join(output_dir,random.choice(files))
135
-
136
- from md_utils.path_utils import open_file
137
- open_file(fn)
138
-
139
-
140
- #%% Scrap
141
-
142
- if False:
143
146
 
144
- pass
147
+ #%% Command-line driver
145
148
 
146
- #%%
147
-
148
- from md_visualization.visualize_db import DbVizOptions,visualize_db
149
-
150
- db_path = cct_file
151
- output_dir = os.path.expanduser('~/tmp/noaa-fish-preview')
152
- image_base_dir = image_dir
153
-
154
- options = DbVizOptions()
155
- options.num_to_visualize = None
156
-
157
- options.parallelize_rendering_n_cores = 5
158
- options.parallelize_rendering = True
159
-
160
- options.viz_size = (-1, -1)
161
- options.trim_to_images_with_bboxes = True
162
-
163
- options.box_thickness = 4
164
- options.box_expansion = 25
165
-
166
- htmlOutputFile,db = visualize_db(db_path,output_dir,image_base_dir,options)
167
-
149
+ # TODO