megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,7 @@ create_lila_blank_set.py
4
4
 
5
5
  Create a folder of blank images sampled from LILA. We'll aim for diversity, so less-common
6
6
  locations will be oversampled relative to more common locations. We'll also run MegaDetector
7
- (with manual review) to remove some incorrectly-labeled, not-actually-empty images from our
7
+ (with manual review) to remove some incorrectly-labeled, not-actually-empty images from our
8
8
  blank set.
9
9
 
10
10
  We'll store location information for each image in a .json file, so we can split locations
@@ -27,8 +27,15 @@ from collections import defaultdict
27
27
 
28
28
  from megadetector.data_management.lila.lila_common import read_lila_all_images_file
29
29
  from megadetector.utils.url_utils import download_url
30
+ from megadetector.utils.ct_utils import sort_dictionary_by_value
31
+ from megadetector.utils.path_utils import is_image_file
32
+ from megadetector.utils.path_utils import find_images
30
33
  from megadetector.visualization import visualization_utils as vis_utils
31
34
  from megadetector.utils.path_utils import recursive_file_list
35
+ from megadetector.utils import ct_utils
36
+
37
+
38
+ #%% Environment
32
39
 
33
40
  # We'll write images, metadata downloads, and temporary files here
34
41
  lila_local_base = os.path.expanduser('~/lila')
@@ -48,7 +55,7 @@ md_possible_non_blanks_folder = os.path.join(project_base,'candidate_non_blanks'
48
55
  os.makedirs(md_possible_non_blanks_folder,exist_ok=True)
49
56
 
50
57
  location_to_blank_image_urls_cache_file = os.path.join(project_base,
51
- 'location_to_blank_image_urls.json')
58
+ 'location_to_blank_image_urls.json')
52
59
 
53
60
  md_results_file = os.path.join(project_base,'lila_blanks_md_results.json')
54
61
 
@@ -90,10 +97,10 @@ other_labels_without_common_names = (
90
97
  'car', 'motorcycle', 'vehicle'
91
98
  )
92
99
 
93
- common_names = sorted(list(df['common_name'].unique()),
94
- key=lambda x:str(x) if isinstance(x,float) else x)
100
+ common_names = sorted(list(df['common_name'].unique()),
101
+ key=lambda x:str(x) if isinstance(x,float) else x)
95
102
  original_labels = sorted(list(df['original_label'].unique()),
96
- key=lambda x:str(x) if isinstance(x,float) else x)
103
+ key=lambda x:str(x) if isinstance(x,float) else x)
97
104
 
98
105
  # Blanks are represented as NaN in the "common_name" column (though not all NaN's are blanks)
99
106
  assert '' not in common_names
@@ -118,16 +125,16 @@ original_label_to_count = defaultdict(int)
118
125
 
119
126
  # This loop takes ~10 mins
120
127
  for i_row,row in tqdm(df.iterrows(),total=len(df)):
121
-
128
+
122
129
  common_name = row['common_name']
123
130
  original_label = row['original_label']
124
-
131
+
125
132
  if isinstance(common_name,float):
126
133
  assert np.isnan(common_name)
127
134
  original_labels_with_nan_common_names.add(original_label)
128
-
135
+
129
136
  common_name = str(common_name)
130
-
137
+
131
138
  assert isinstance(original_label,str)
132
139
  if original_label in blank_original_labels:
133
140
  common_names_with_empty_original_labels.add(common_name)
@@ -137,7 +144,6 @@ for i_row,row in tqdm(df.iterrows(),total=len(df)):
137
144
 
138
145
  #%% Look at the most common labels and common names
139
146
 
140
- from megadetector.utils.ct_utils import sort_dictionary_by_value
141
147
  common_name_to_count = sort_dictionary_by_value(common_name_to_count,reverse=True)
142
148
  original_label_to_count = sort_dictionary_by_value(original_label_to_count,reverse=True)
143
149
 
@@ -185,32 +191,31 @@ force_map_locations = False
185
191
 
186
192
  # Load from .json if available
187
193
  if (not force_map_locations) and (os.path.isfile(location_to_blank_image_urls_cache_file)):
188
-
194
+
189
195
  with open(location_to_blank_image_urls_cache_file,'r') as f:
190
196
  location_to_blank_image_urls = json.load(f)
191
197
 
192
198
  else:
193
-
199
+
194
200
  location_to_blank_image_urls = defaultdict(list)
195
-
201
+
196
202
  # i_row = 0; row = df.iloc[i_row]
197
203
  for i_row,row in tqdm(df.iterrows(),total=len(df)):
198
-
204
+
199
205
  location_id = row['location_id']
200
206
  url = row['url']
201
-
207
+
202
208
  original_label = row['original_label']
203
209
  if original_label in blank_original_labels:
204
210
  assert np.isnan(row['common_name'])
205
211
  location_to_blank_image_urls[location_id].append(url)
206
212
 
207
- with open(location_to_blank_image_urls_cache_file,'w') as f:
208
- json.dump(location_to_blank_image_urls,f,indent=1)
213
+ ct_utils.write_json(location_to_blank_image_urls_cache_file, location_to_blank_image_urls)
209
214
 
210
215
  n_locations_with_blanks = len(location_to_blank_image_urls)
211
216
  print('Found {} locations with blank images'.format(n_locations_with_blanks))
212
217
 
213
-
218
+
214
219
  #%% Sample blanks
215
220
 
216
221
  random.seed(0)
@@ -223,7 +228,7 @@ for location in location_to_blank_image_urls:
223
228
  blank_image_urls_this_location = location_to_blank_image_urls[location]
224
229
  unsampled_blank_image_urls_this_location = blank_image_urls_this_location.copy()
225
230
  location_to_unsampled_blank_image_urls[location] = unsampled_blank_image_urls_this_location
226
-
231
+
227
232
  # Put locations in a random order
228
233
  location_ids = list(location_to_unsampled_blank_image_urls.keys())
229
234
  random.shuffle(location_ids)
@@ -234,32 +239,32 @@ fully_sampled_locations = set()
234
239
 
235
240
  # Pick from each location until we hit our limit or have no blanks left
236
241
  while(True):
237
-
242
+
238
243
  found_sample = False
239
-
244
+
240
245
  # location = location_ids[0]
241
246
  for location in location_ids:
242
-
247
+
243
248
  unsampled_images_this_location = location_to_unsampled_blank_image_urls[location]
244
249
  if len(unsampled_images_this_location) == 0:
245
250
  fully_sampled_locations.add(location)
246
251
  continue
247
-
252
+
248
253
  url = random.choice(unsampled_images_this_location)
249
- blank_urls.append(url)
254
+ blank_urls.append(url)
250
255
  location_to_unsampled_blank_image_urls[location].remove(url)
251
256
  location_to_sampled_blanks[location].append(url)
252
257
  found_sample = True
253
-
258
+
254
259
  if len(blank_urls) == n_blanks:
255
260
  break
256
-
261
+
257
262
  # ...for each location
258
-
263
+
259
264
  if not found_sample:
260
265
  print('Terminating after {} blanks, we ran out before hitting {}'.format(
261
266
  len(blank_urls),n_blanks))
262
-
267
+
263
268
  if len(blank_urls) == n_blanks:
264
269
  break
265
270
 
@@ -278,39 +283,39 @@ for location in location_to_sampled_blanks:
278
283
  print('Choose {} blanks from {} locations'.format(n_blanks,len(location_ids)))
279
284
  print('Fully sampled {} locations'.format(len(fully_sampled_locations)))
280
285
  print('Max samples per location: {}'.format(max_blanks_per_location))
281
-
286
+
282
287
 
283
288
  #%% Download those image files (prep)
284
289
 
285
290
  container_to_url_base = {
286
- 'lilawildlife.blob.core.windows.net':'/lila-wildlide/',
287
- 'storage.googleapis.com':'/public-datasets-lila/'
288
- }
291
+ 'lilawildlife.blob.core.windows.net':'/lila-wildlide/',
292
+ 'storage.googleapis.com':'/public-datasets-lila/'
293
+ }
289
294
 
290
295
  def download_relative_filename(url, output_base, verbose=False, url_base=None, overwrite=False):
291
296
  """
292
297
  Download a URL to output_base, preserving relative path
293
298
  """
294
-
299
+
295
300
  result = {'status':'unknown','url':url,'destination_filename':None}
296
-
301
+
297
302
  if url_base is None:
298
303
  assert url.startswith('https://')
299
304
  container = url.split('/')[2]
300
305
  assert container in container_to_url_base
301
306
  url_base = container_to_url_base[container]
302
-
307
+
303
308
  assert url_base.startswith('/') and url_base.endswith('/')
304
-
309
+
305
310
  p = urlparse(url)
306
311
  relative_filename = str(p.path)
307
312
  # remove the leading '/'
308
313
  assert relative_filename.startswith(url_base)
309
- relative_filename = relative_filename.replace(url_base,'',1)
310
-
314
+ relative_filename = relative_filename.replace(url_base,'',1)
315
+
311
316
  destination_filename = os.path.join(output_base,relative_filename)
312
317
  result['destination_filename'] = destination_filename
313
-
318
+
314
319
  if ((os.path.isfile(destination_filename)) and (not overwrite)):
315
320
  result['status'] = 'skipped'
316
321
  return result
@@ -318,10 +323,10 @@ def download_relative_filename(url, output_base, verbose=False, url_base=None, o
318
323
  download_url(url, destination_filename, verbose=verbose)
319
324
  except Exception as e:
320
325
  print('Warning: error downloading URL {}: {}'.format(
321
- url,str(e)))
326
+ url,str(e)))
322
327
  result['status'] = 'error: {}'.format(str(e))
323
328
  return result
324
-
329
+
325
330
  result['status'] = 'success'
326
331
  return result
327
332
 
@@ -331,11 +336,11 @@ def azure_url_to_gcp_http_url(url,error_if_not_azure_url=True):
331
336
  Most URLs point to Azure by default, but most files are available on both Azure and GCP.
332
337
  This function converts an Azure URL to the corresponding GCP http:// url.
333
338
  """
334
-
339
+
335
340
  lila_azure_storage_account = 'https://lilawildlife.blob.core.windows.net'
336
341
  gcp_bucket_api_url = 'https://storage.googleapis.com/public-datasets-lila'
337
342
  error_if_not_azure_url = False
338
-
343
+
339
344
  if error_if_not_azure_url:
340
345
  assert url.startswith(lila_azure_storage_account)
341
346
  gcp_url = url.replace(lila_azure_storage_account,gcp_bucket_api_url,1)
@@ -344,7 +349,7 @@ def azure_url_to_gcp_http_url(url,error_if_not_azure_url=True):
344
349
  # Convert Azure URLs to GCP URLs if necessary
345
350
  if preferred_image_download_source != 'azure':
346
351
  assert preferred_image_download_source == 'gcp'
347
- blank_urls = [azure_url_to_gcp_http_url(url) for url in blank_urls]
352
+ blank_urls = [azure_url_to_gcp_http_url(url) for url in blank_urls]
348
353
 
349
354
 
350
355
  #%% Download those image files (execution)
@@ -354,16 +359,16 @@ print('Downloading {} images on {} workers'.format(len(blank_urls),n_download_th
354
359
  if n_download_threads <= 1:
355
360
 
356
361
  results = []
357
-
362
+
358
363
  # url = all_urls[0]
359
- for url in tqdm(blank_urls):
364
+ for url in tqdm(blank_urls):
360
365
  results.append(download_relative_filename(url,candidate_blanks_base,url_base=None))
361
-
366
+
362
367
  else:
363
368
 
364
- pool = ThreadPool(n_download_threads)
369
+ pool = ThreadPool(n_download_threads)
365
370
  results = list(tqdm(pool.imap(lambda s: download_relative_filename(
366
- s,candidate_blanks_base,url_base=None),
371
+ s,candidate_blanks_base,url_base=None),
367
372
  blank_urls), total=len(blank_urls)))
368
373
 
369
374
  # pool.terminate()
@@ -385,7 +390,7 @@ cmd = 'python run_detector_batch.py MDV5A "{}" "{}"'.format(
385
390
  candidate_blanks_base,md_results_file)
386
391
  cmd += ' --recursive --output_relative_filenames'
387
392
 
388
- import clipboard; clipboard.copy(cmd); print(cmd)
393
+ # import clipboard; clipboard.copy(cmd); print(cmd)
389
394
 
390
395
 
391
396
  #%% Review MD results that suggests images are non-empty
@@ -406,11 +411,11 @@ for category_id in md_results['detection_categories']:
406
411
 
407
412
  # im = md_results['images'][0]
408
413
  for im in md_results['images']:
409
-
414
+
410
415
  if 'detections' not in im:
411
416
  continue
412
-
413
- found_object = False
417
+
418
+ found_object = False
414
419
  for det in im['detections']:
415
420
  threshold = category_id_to_threshold[det['category']]
416
421
  if det['conf'] >= threshold:
@@ -425,8 +430,8 @@ output_file_to_source_file = {}
425
430
 
426
431
  # i_fn = 0; source_file_relative = images_to_review[i_fn]
427
432
  for i_fn,source_file_relative in tqdm(enumerate(images_to_review_to_detections),
428
- total=len(images_to_review_to_detections)):
429
-
433
+ total=len(images_to_review_to_detections)):
434
+
430
435
  source_file_abs = os.path.join(candidate_blanks_base,source_file_relative)
431
436
  assert os.path.isfile(source_file_abs)
432
437
  ext = os.path.splitext(source_file_abs)[1]
@@ -435,16 +440,15 @@ for i_fn,source_file_relative in tqdm(enumerate(images_to_review_to_detections),
435
440
  output_file_to_source_file[target_file_relative] = source_file_relative
436
441
  # shutil.copyfile(source_file_abs,target_file_abs)
437
442
  vis_utils.draw_bounding_boxes_on_file(input_file=source_file_abs,
438
- output_file=target_file_abs,
439
- detections=images_to_review_to_detections[source_file_relative],
440
- confidence_threshold=min_threshold,
441
- target_size=(1280,-1))
443
+ output_file=target_file_abs,
444
+ detections=images_to_review_to_detections[source_file_relative],
445
+ confidence_threshold=min_threshold,
446
+ target_size=(1280,-1))
442
447
 
443
448
  # This is a temporary file I just used during debugging
444
- with open(os.path.join(project_base,'output_file_to_source_file.json'),'w') as f:
445
- json.dump(output_file_to_source_file,f,indent=1)
446
-
447
-
449
+ ct_utils.write_json(os.path.join(project_base,'output_file_to_source_file.json'), output_file_to_source_file)
450
+
451
+
448
452
  #%% Manual review
449
453
 
450
454
  # Delete images that are *not* empty
@@ -463,15 +467,13 @@ for output_file in tqdm(output_file_to_source_file.keys()):
463
467
  if output_file not in remaining_images:
464
468
  source_file_relative = output_file_to_source_file[output_file]
465
469
  removed_blank_images_relative.append(source_file_relative)
466
-
470
+
467
471
  removed_blank_images_relative_set = set(removed_blank_images_relative)
468
472
  assert len(removed_blank_images_relative) + len(remaining_images) == len(output_file_to_source_file)
469
473
 
470
474
 
471
475
  #%% Copy only the confirmed blanks to the confirmed folder
472
476
 
473
- from megadetector.utils.path_utils import is_image_file
474
-
475
477
  all_candidate_blanks = recursive_file_list(candidate_blanks_base,return_relative_paths=True)
476
478
  print('Found {} candidate blanks'.format(len(all_candidate_blanks)))
477
479
 
@@ -479,19 +481,19 @@ skipped_images_relative = []
479
481
  skipped_non_images = []
480
482
 
481
483
  for source_fn_relative in tqdm(all_candidate_blanks):
482
-
484
+
483
485
  # Skip anything we removed from the "candidate non-blanks" folder; these weren't really
484
486
  # blank.
485
487
  if source_fn_relative in removed_blank_images_relative_set:
486
488
  skipped_images_relative.append(source_fn_relative)
487
489
  continue
488
-
490
+
489
491
  if not is_image_file(source_fn_relative):
490
492
  # Not a typo; "skipped images" really means "skipped files"
491
493
  skipped_images_relative.append(source_fn_relative)
492
494
  skipped_non_images.append(source_fn_relative)
493
-
494
-
495
+
496
+
495
497
  source_fn_abs = os.path.join(candidate_blanks_base,source_fn_relative)
496
498
  assert os.path.isfile(source_fn_abs)
497
499
  target_fn_abs = os.path.join(confirmed_blanks_base,source_fn_relative)
@@ -499,12 +501,11 @@ for source_fn_relative in tqdm(all_candidate_blanks):
499
501
  # shutil.copyfile(source_fn_abs,target_fn_abs)
500
502
 
501
503
  print('Skipped {} files ({} non-image files)'.format(len(skipped_images_relative),
502
- len(skipped_non_images)))
504
+ len(skipped_non_images)))
503
505
 
504
506
 
505
507
  #%% Validate the folder of confirmed blanks
506
508
 
507
- from megadetector.utils.path_utils import find_images
508
509
  # all_confirmed_blanks = recursive_file_list(confirmed_blanks_base,return_relative_paths=True)
509
510
  all_confirmed_blanks = find_images(confirmed_blanks_base,return_relative_paths=True,recursive=True)
510
511
  assert len(all_confirmed_blanks) < len(all_candidate_blanks)
@@ -518,8 +519,8 @@ i_image = random.randint(0, len(skipped_images_relative))
518
519
  fn_relative = skipped_images_relative[i_image]
519
520
  fn_abs = os.path.join(candidate_blanks_base,fn_relative)
520
521
  assert os.path.isfile(fn_abs)
521
- import clipboard
522
- clipboard.copy('feh --scale-down "{}"'.format(fn_abs))
522
+
523
+ # import clipboard; clipboard.copy('feh --scale-down "{}"'.format(fn_abs))
523
524
 
524
525
 
525
526
  #%% Record location information for each confirmed file
@@ -532,27 +533,24 @@ all_fn_relative_to_location = {}
532
533
  # location = next(iter(location_to_blank_image_urls.keys()))
533
534
  for location in tqdm(location_to_blank_image_urls):
534
535
  urls_this_location = location_to_blank_image_urls[location]
535
-
536
+
536
537
  # url = urls_this_location[0]
537
538
  for url in urls_this_location:
538
539
  # Turn:
539
- #
540
+ #
540
541
  # https://lilablobssc.blob.core.windows.net/caltech-unzipped/cct_images/5968c0f9-23d2-11e8-a6a3-ec086b02610b.jpg'
541
542
  #
542
543
  # ...into:
543
544
  #
544
- # caltech-unzipped/cct_images/5968c0f9-23d2-11e8-a6a3-ec086b02610b.jpg'
545
+ # caltech-unzipped/cct_images/5968c0f9-23d2-11e8-a6a3-ec086b02610b.jpg'
545
546
  p = urlparse(url)
546
547
  fn_relative = str(p.path)[1:]
547
548
  all_fn_relative_to_location[fn_relative] = location
548
549
 
549
550
  # Build a much smaller mapping of just the confirmed blanks
550
- confirmed_fn_relative_to_location = {}
551
+ confirmed_fn_relative_to_location = {}
551
552
  for i_fn,fn_relative in tqdm(enumerate(all_confirmed_blanks),total=len(all_confirmed_blanks)):
552
553
  confirmed_fn_relative_to_location[fn_relative] = all_fn_relative_to_location[fn_relative]
553
554
 
554
- with open(all_fn_relative_to_location_file,'w') as f:
555
- json.dump(all_fn_relative_to_location,f,indent=1)
556
-
557
- with open(confirmed_fn_relative_to_location_file,'w') as f:
558
- json.dump(confirmed_fn_relative_to_location,f,indent=1)
555
+ ct_utils.write_json(all_fn_relative_to_location_file, all_fn_relative_to_location)
556
+ ct_utils.write_json(confirmed_fn_relative_to_location_file, confirmed_fn_relative_to_location)
@@ -2,7 +2,7 @@
2
2
 
3
3
  create_lila_test_set.py
4
4
 
5
- Create a test set of camera trap images, containing N empty and N non-empty
5
+ Create a test set of camera trap images, containing N empty and N non-empty
6
6
  images from each LILA data set.
7
7
 
8
8
  """
@@ -15,6 +15,7 @@ import random
15
15
 
16
16
  from megadetector.data_management.lila.lila_common import \
17
17
  read_lila_metadata, read_metadata_file_for_dataset
18
+ from megadetector.utils.url_utils import parallel_download_urls
18
19
 
19
20
  n_empty_images_per_dataset = 1
20
21
  n_non_empty_images_per_dataset = 1
@@ -39,9 +40,10 @@ metadata_table = read_lila_metadata(metadata_dir)
39
40
  #%% Download and extract metadata for every dataset
40
41
 
41
42
  for ds_name in metadata_table.keys():
42
- metadata_table[ds_name]['metadata_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
43
- metadata_dir=metadata_dir,
44
- metadata_table=metadata_table)
43
+ metadata_table[ds_name]['metadata_filename'] = \
44
+ read_metadata_file_for_dataset(ds_name=ds_name,
45
+ metadata_dir=metadata_dir,
46
+ metadata_table=metadata_table)
45
47
 
46
48
 
47
49
  #%% Choose images from each dataset
@@ -52,49 +54,49 @@ for ds_name in metadata_table.keys():
52
54
  for ds_name in metadata_table.keys():
53
55
 
54
56
  print('Choosing images for {}'.format(ds_name))
55
-
57
+
56
58
  json_filename = metadata_table[ds_name]['metadata_filename']
57
-
59
+
58
60
  with open(json_filename,'r') as f:
59
61
  d = json.load(f)
60
-
62
+
61
63
  category_id_to_name = {c['id']:c['name'] for c in d['categories']}
62
64
  category_name_to_id = {c['name']:c['id'] for c in d['categories']}
63
-
65
+
64
66
  ## Find empty images
65
-
67
+
66
68
  if 'empty' not in category_name_to_id:
67
69
  empty_annotations_to_download = []
68
70
  else:
69
- empty_category_id = category_name_to_id['empty']
71
+ empty_category_id = category_name_to_id['empty']
70
72
  empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] == empty_category_id]
71
73
  try:
72
- empty_annotations_to_download = random.sample(empty_annotations,n_empty_images_per_dataset)
74
+ empty_annotations_to_download = random.sample(empty_annotations,n_empty_images_per_dataset)
73
75
  except ValueError:
74
76
  print('No empty images available for dataset {}'.format(ds_name))
75
77
  empty_annotations_to_download = []
76
-
78
+
77
79
  ## Find non-empty images
78
-
79
- non_empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] != empty_category_id]
80
+
81
+ non_empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] != empty_category_id]
80
82
  try:
81
83
  non_empty_annotations_to_download = random.sample(non_empty_annotations,n_non_empty_images_per_dataset)
82
84
  except ValueError:
83
85
  print('No non-empty images available for dataset {}'.format(ds_name))
84
86
  non_empty_annotations_to_download = []
85
87
 
86
-
88
+
87
89
  annotations_to_download = empty_annotations_to_download + non_empty_annotations_to_download
88
-
90
+
89
91
  image_ids_to_download = set([ann['image_id'] for ann in annotations_to_download])
90
92
  assert len(image_ids_to_download) == len(set(image_ids_to_download))
91
-
93
+
92
94
  images_to_download = []
93
95
  for im in d['images']:
94
96
  if im['id'] in image_ids_to_download:
95
97
  images_to_download.append(im)
96
98
  assert len(images_to_download) == len(image_ids_to_download)
97
-
99
+
98
100
  metadata_table[ds_name]['images_to_download'] = images_to_download
99
101
 
100
102
  # ...for each dataset
@@ -109,19 +111,19 @@ for ds_name in metadata_table.keys():
109
111
 
110
112
  base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
111
113
  assert not base_url.endswith('/')
112
-
114
+
113
115
  # Retrieve image file names
114
116
  filenames = [im['file_name'] for im in metadata_table[ds_name]['images_to_download']]
115
-
117
+
116
118
  urls_to_download = []
117
-
119
+
118
120
  # Convert to URLs
119
- for fn in filenames:
121
+ for fn in filenames:
120
122
  url = base_url + '/' + fn
121
123
  urls_to_download.append(url)
122
124
 
123
125
  metadata_table[ds_name]['urls_to_download'] = urls_to_download
124
-
126
+
125
127
  # ...for each dataset
126
128
 
127
129
 
@@ -135,26 +137,25 @@ for ds_name in metadata_table.keys():
135
137
  base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
136
138
  assert not base_url.endswith('/')
137
139
  base_url += '/'
138
-
140
+
139
141
  urls_to_download = metadata_table[ds_name]['urls_to_download']
140
-
142
+
141
143
  # url = urls_to_download[0]
142
144
  for url in urls_to_download:
143
-
145
+
144
146
  assert base_url in url
145
- output_file_relative = ds_name.lower().replace(' ','_') + '_' + url.replace(base_url,'').replace('/','_').replace('\\','_')
147
+ output_file_relative = ds_name.lower().replace(' ','_') + \
148
+ '_' + url.replace(base_url,'').replace('/','_').replace('\\','_')
146
149
  output_file_absolute = os.path.join(output_dir,output_file_relative)
147
150
  url_to_target_file[url] = output_file_absolute
148
-
151
+
149
152
  # ...for each url
150
-
153
+
151
154
  # ...for each dataset
152
155
 
153
156
 
154
157
  #%% Download image files (execution)
155
158
 
156
- from megadetector.utils.url_utils import parallel_download_urls
157
-
158
159
  download_results = parallel_download_urls(url_to_target_file,
159
160
  verbose=False,
160
161
  overwrite=False,
@@ -19,7 +19,7 @@ md_results_local_folder = r'g:\temp\lila-md-results'
19
19
  md_base_url = 'https://lila.science/public/lila-md-results/'
20
20
  assert md_base_url.endswith('/')
21
21
 
22
- # No RDE files for datasets with no location information
22
+ # No RDE files for datasets with no location information
23
23
  datasets_without_location_info = ('ena24','missouri-camera-traps')
24
24
 
25
25
  md_results_column_names = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw','md_results_with_rde']
@@ -32,8 +32,8 @@ validate_urls = False
32
32
  df = pd.read_csv(input_csv_file)
33
33
  for s in md_results_column_names:
34
34
  df[s] = ''
35
-
36
-
35
+
36
+
37
37
  #%% Find matching files locally, and create URLs
38
38
 
39
39
  local_files = os.listdir(md_results_local_folder)
@@ -41,14 +41,14 @@ local_files = [fn for fn in local_files if fn.endswith('.zip')]
41
41
 
42
42
  # i_row = 0; row = df.iloc[i_row]
43
43
  for i_row,row in df.iterrows():
44
-
44
+
45
45
  if not isinstance(row['name'],str):
46
46
  continue
47
-
47
+
48
48
  dataset_shortname = row['short_name']
49
49
  matching_files = [fn for fn in local_files if dataset_shortname in fn]
50
-
51
- # No RDE files for datasets with no location information
50
+
51
+ # No RDE files for datasets with no location information
52
52
  if dataset_shortname in datasets_without_location_info:
53
53
  assert len(matching_files) == 2
54
54
  mdv5a_files = [fn for fn in matching_files if 'mdv5a' in fn]
@@ -57,10 +57,10 @@ for i_row,row in df.iterrows():
57
57
  df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
58
58
  df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
59
59
  else:
60
- # Exclude single-season files for snapshot-serengeti
60
+ # Exclude single-season files for snapshot-serengeti
61
61
  if dataset_shortname == 'snapshot-serengeti':
62
62
  matching_files = [fn for fn in matching_files if '_S' not in fn]
63
- assert len(matching_files) == 2
63
+ assert len(matching_files) == 2
64
64
  assert all(['mdv4' in fn for fn in matching_files])
65
65
  rde_files = [fn for fn in matching_files if 'rde' in fn]
66
66
  raw_files = [fn for fn in matching_files if 'rde' not in fn]
@@ -76,28 +76,28 @@ for i_row,row in df.iterrows():
76
76
  df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
77
77
  df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
78
78
  df.loc[i_row,'md_results_with_rde'] = md_base_url + rde_files[0]
79
-
79
+
80
80
  print('Found {} matching files for {}'.format(len(matching_files),dataset_shortname))
81
81
 
82
- # ...for each row
82
+ # ...for each row
83
83
 
84
84
 
85
85
  #%% Validate URLs
86
86
 
87
87
  if validate_urls:
88
-
88
+
89
89
  from megadetector.utils.url_utils import test_urls
90
-
90
+
91
91
  urls = set()
92
-
92
+
93
93
  for i_row,row in df.iterrows():
94
94
  for column_name in md_results_column_names:
95
95
  if len(row[column_name]) > 0:
96
- assert row[column_name] not in urls
96
+ assert row[column_name] not in urls
97
97
  urls.add(row[column_name])
98
-
99
- test_urls(urls,error_on_failure=True)
100
-
98
+
99
+ test_urls(urls,error_on_failure=True)
100
+
101
101
  print('Validated {} URLs'.format(len(urls)))
102
102
 
103
103