megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@
3
3
  get_lila_annotation_counts.py
4
4
 
5
5
  Generates a .json-formatted dictionary mapping each LILA dataset to all categories
6
- that exist for that dataset, with counts for the number of occurrences of each category
6
+ that exist for that dataset, with counts for the number of occurrences of each category
7
7
  (the number of *annotations* for each category, not the number of *images*).
8
8
 
9
9
  Also loads the taxonomy mapping file, to include scientific names for each category.
@@ -17,8 +17,11 @@ get_lila_image_counts.py counts the number of *images* for each category in each
17
17
  import json
18
18
  import os
19
19
 
20
+ from collections import defaultdict
21
+
20
22
  from megadetector.data_management.lila.lila_common import \
21
23
  read_lila_metadata, read_metadata_file_for_dataset, read_lila_taxonomy_mapping
24
+ from megadetector.utils import ct_utils
22
25
 
23
26
  # cloud provider to use for downloading images; options are 'gcp', 'azure', or 'aws'
24
27
  preferred_cloud = 'gcp'
@@ -53,21 +56,21 @@ datasets_with_taxonomy_mapping = set()
53
56
 
54
57
  # i_row = 1; row = taxonomy_df.iloc[i_row]; row
55
58
  for i_row,row in taxonomy_df.iterrows():
56
-
59
+
57
60
  datasets_with_taxonomy_mapping.add(row['dataset_name'])
58
-
61
+
59
62
  ds_query = row['dataset_name'] + ':' + row['query']
60
63
  ds_query = ds_query.lower()
61
-
64
+
62
65
  if not isinstance(row['scientific_name'],str):
63
66
  unmapped_queries.add(ds_query)
64
67
  ds_query_to_scientific_name[ds_query] = 'unmapped'
65
68
  continue
66
-
69
+
67
70
  ds_query_to_scientific_name[ds_query] = row['scientific_name']
68
-
71
+
69
72
  print('Loaded taxonomy mappings for {} datasets'.format(len(datasets_with_taxonomy_mapping)))
70
-
73
+
71
74
 
72
75
  #%% Download and parse the metadata file
73
76
 
@@ -78,7 +81,7 @@ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
78
81
 
79
82
  #%% Download and extract metadata for each dataset
80
83
 
81
- for ds_name in metadata_table.keys():
84
+ for ds_name in metadata_table.keys():
82
85
  metadata_table[ds_name]['json_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
83
86
  metadata_dir=metadata_dir,
84
87
  metadata_table=metadata_table)
@@ -88,45 +91,43 @@ for ds_name in metadata_table.keys():
88
91
 
89
92
  # Takes ~5 minutes
90
93
 
91
- from collections import defaultdict
92
-
93
94
  dataset_to_categories = {}
94
95
 
95
96
  # ds_name = 'NACTI'
96
97
  for ds_name in metadata_table.keys():
97
-
98
+
98
99
  taxonomy_mapping_available = (ds_name in datasets_with_taxonomy_mapping)
99
-
100
+
100
101
  if not taxonomy_mapping_available:
101
102
  print('Warning: taxonomy mapping not available for {}'.format(ds_name))
102
-
103
+
103
104
  print('Finding categories in {}'.format(ds_name))
104
105
 
105
106
  json_filename = metadata_table[ds_name]['json_filename']
106
107
  base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
107
108
  assert not base_url.endswith('/')
108
-
109
- # Open the metadata file
109
+
110
+ # Open the metadata file
110
111
  with open(json_filename, 'r') as f:
111
112
  data = json.load(f)
112
-
113
+
113
114
  # Collect list of categories and mappings to category name
114
115
  categories = data['categories']
115
-
116
+
116
117
  category_id_to_count = defaultdict(int)
117
- annotations = data['annotations']
118
-
118
+ annotations = data['annotations']
119
+
119
120
  # ann = annotations[0]
120
121
  for ann in annotations:
121
122
  category_id_to_count[ann['category_id']] = category_id_to_count[ann['category_id']] + 1
122
-
123
+
123
124
  # c = categories[0]
124
125
  for c in categories:
125
- count = category_id_to_count[c['id']]
126
+ count = category_id_to_count[c['id']]
126
127
  if 'count' in c:
127
- assert 'bbox' in ds_name or c['count'] == count
128
+ assert 'bbox' in ds_name or c['count'] == count
128
129
  c['count'] = count
129
-
130
+
130
131
  # Don't do taxonomy mapping for bbox data sets, which are sometimes just binary and are
131
132
  # always redundant with the class-level data sets.
132
133
  if 'bbox' in ds_name:
@@ -144,7 +145,7 @@ for ds_name in metadata_table.keys():
144
145
  sn = ds_query_to_scientific_name[taxonomy_query_string]
145
146
  assert sn is not None and len(sn) > 0
146
147
  c['scientific_name_from_taxonomy_mapping'] = sn
147
-
148
+
148
149
  dataset_to_categories[ds_name] = categories
149
150
 
150
151
  # ...for each dataset
@@ -154,19 +155,18 @@ for ds_name in metadata_table.keys():
154
155
 
155
156
  # ds_name = list(dataset_to_categories.keys())[0]
156
157
  for ds_name in dataset_to_categories:
157
-
158
+
158
159
  print('\n** Category counts for {} **\n'.format(ds_name))
159
-
160
+
160
161
  categories = dataset_to_categories[ds_name]
161
162
  categories = sorted(categories, key=lambda x: x['count'], reverse=True)
162
-
163
+
163
164
  for c in categories:
164
165
  print('{} ({}): {}'.format(c['name'],c['scientific_name_from_taxonomy_mapping'],c['count']))
165
-
166
+
166
167
  # ...for each dataset
167
168
 
168
169
 
169
170
  #%% Save the results
170
171
 
171
- with open(output_file, 'w') as f:
172
- json.dump(dataset_to_categories,f,indent=1)
172
+ ct_utils.write_json(output_file, dataset_to_categories)
@@ -5,7 +5,7 @@ get_lila_image_counts.py
5
5
  Count the number of images and bounding boxes with each label in one or more LILA datasets.
6
6
 
7
7
  This script doesn't write these counts out anywhere other than the console, it's just intended
8
- as a template for doing operations like this on LILA data. get_lila_annotation_counts.py writes
8
+ as a template for doing operations like this on LILA data. get_lila_annotation_counts.py writes
9
9
  information out to a .json file, but it counts *annotations*, not *images*, for each category.
10
10
 
11
11
  """
@@ -40,53 +40,53 @@ metadata_table = read_lila_metadata(metadata_dir)
40
40
  if datasets_of_interest is None:
41
41
  datasets_of_interest = list(metadata_table.keys())
42
42
 
43
- for ds_name in datasets_of_interest:
43
+ for ds_name in datasets_of_interest:
44
44
  metadata_table[ds_name]['json_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
45
45
  metadata_dir=metadata_dir,
46
46
  metadata_table=metadata_table)
47
-
48
-
47
+
48
+
49
49
  #%% Count categories
50
50
 
51
51
  ds_name_to_category_counts = {}
52
52
 
53
53
  # ds_name = datasets_of_interest[0]
54
54
  for ds_name in datasets_of_interest:
55
-
55
+
56
56
  category_to_image_count = {}
57
57
  category_to_bbox_count = {}
58
-
58
+
59
59
  print('Counting categories in: ' + ds_name)
60
-
60
+
61
61
  json_filename = metadata_table[ds_name]['json_filename']
62
62
  with open(json_filename, 'r') as f:
63
63
  data = json.load(f)
64
-
64
+
65
65
  categories = data['categories']
66
66
  category_ids = [c['id'] for c in categories]
67
67
  for c in categories:
68
68
  category_id_to_name = {c['id']:c['name'] for c in categories}
69
69
  annotations = data['annotations']
70
70
  images = data['images']
71
-
72
- for category_id in category_ids:
73
- category_name = category_id_to_name[category_id]
71
+
72
+ for category_id in category_ids:
73
+ category_name = category_id_to_name[category_id]
74
74
  category_to_image_count[category_name] = 0
75
75
  category_to_bbox_count[category_name] = 0
76
-
76
+
77
77
  image_id_to_category_names = defaultdict(set)
78
-
78
+
79
79
  # Go through annotations, marking each image with the categories that are present
80
80
  #
81
81
  # ann = annotations[0]
82
82
  for ann in annotations:
83
-
83
+
84
84
  category_name = category_id_to_name[ann['category_id']]
85
85
  image_id_to_category_names[ann['image_id']].add(category_name)
86
86
 
87
87
  # Now go through images and count categories
88
88
  category_to_count = defaultdict(int)
89
-
89
+
90
90
  # im = images[0]
91
91
  for im in images:
92
92
  categories_this_image = image_id_to_category_names[im['id']]
@@ -94,19 +94,19 @@ for ds_name in datasets_of_interest:
94
94
  category_to_count[category_name] += 1
95
95
 
96
96
  ds_name_to_category_counts[ds_name] = category_to_count
97
-
97
+
98
98
  # ...for each dataset
99
-
99
+
100
100
 
101
101
  #%% Print the results
102
102
 
103
103
  for ds_name in ds_name_to_category_counts:
104
-
104
+
105
105
  print('\n** Category counts for {} **\n'.format(ds_name))
106
-
106
+
107
107
  category_to_count = ds_name_to_category_counts[ds_name]
108
- category_to_count = {k: v for k, v in sorted(category_to_count.items(), reverse=True,
108
+ category_to_count = {k: v for k, v in sorted(category_to_count.items(), reverse=True,
109
109
  key=lambda item: item[1])}
110
-
111
- for category_name in category_to_count.keys():
110
+
111
+ for category_name in category_to_count.keys():
112
112
  print('{}: {}'.format(category_name,category_to_count[category_name]))
@@ -53,30 +53,30 @@ for url in lila_base_urls.values():
53
53
  def read_wildlife_insights_taxonomy_mapping(metadata_dir, force_download=False):
54
54
  """
55
55
  Reads the WI taxonomy mapping file, downloading the .json data (and writing to .csv) if necessary.
56
-
56
+
57
57
  Args:
58
58
  metadata_dir (str): folder to use for temporary LILA metadata files
59
- force_download (bool, optional): download the taxonomy mapping file
59
+ force_download (bool, optional): download the taxonomy mapping file
60
60
  even if the local file exists.
61
-
61
+
62
62
  Returns:
63
63
  pd.dataframe: A DataFrame with taxonomy information
64
64
  """
65
-
65
+
66
66
  wi_taxonomy_csv_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_csv_filename)
67
-
67
+
68
68
  if os.path.exists(wi_taxonomy_csv_path):
69
69
  df = pd.read_csv(wi_taxonomy_csv_path)
70
70
  else:
71
71
  wi_taxonomy_json_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_json_filename)
72
- download_url(wildlife_insights_taxonomy_url, wi_taxonomy_json_path,
72
+ download_url(wildlife_insights_taxonomy_url, wi_taxonomy_json_path,
73
73
  force_download=force_download)
74
74
  with open(wi_taxonomy_json_path,'r') as f:
75
75
  d = json.load(f)
76
-
76
+
77
77
  # We haven't implemented paging, make sure that's not an issue
78
78
  assert d['meta']['totalItems'] < wildlife_insights_page_size
79
-
79
+
80
80
  # d['data'] is a list of items that look like:
81
81
  """
82
82
  {'id': 2000003,
@@ -92,46 +92,46 @@ def read_wildlife_insights_taxonomy_mapping(metadata_dir, force_download=False):
92
92
  """
93
93
  df = pd.DataFrame(d['data'])
94
94
  df.to_csv(wi_taxonomy_csv_path,index=False)
95
-
95
+
96
96
  return df
97
97
 
98
-
98
+
99
99
  def read_lila_taxonomy_mapping(metadata_dir, force_download=False):
100
100
  """
101
101
  Reads the LILA taxonomy mapping file, downloading the .csv file if necessary.
102
-
102
+
103
103
  Args:
104
104
  metadata_dir (str): folder to use for temporary LILA metadata files
105
- force_download (bool, optional): download the taxonomy mapping file
106
- even if the local file exists.
107
-
105
+ force_download (bool, optional): download the taxonomy mapping file
106
+ even if the local file exists.
107
+
108
108
  Returns:
109
109
  pd.DataFrame: a DataFrame with one row per identification
110
110
  """
111
-
111
+
112
112
  p = urlparse(lila_taxonomy_mapping_url)
113
113
  taxonomy_filename = os.path.join(metadata_dir,os.path.basename(p.path))
114
- download_url(lila_taxonomy_mapping_url, taxonomy_filename,
114
+ download_url(lila_taxonomy_mapping_url, taxonomy_filename,
115
115
  force_download=force_download)
116
-
116
+
117
117
  df = pd.read_csv(lila_taxonomy_mapping_url)
118
-
118
+
119
119
  return df
120
120
 
121
-
121
+
122
122
  def read_lila_metadata(metadata_dir, force_download=False):
123
123
  """
124
124
  Reads LILA metadata (URLs to each dataset), downloading the .csv file if necessary.
125
-
125
+
126
126
  Args:
127
127
  metadata_dir (str): folder to use for temporary LILA metadata files
128
- force_download (bool, optional): download the metadata file even if
128
+ force_download (bool, optional): download the metadata file even if
129
129
  the local file exists.
130
-
130
+
131
131
  Returns:
132
132
  dict: a dict mapping dataset names (e.g. "Caltech Camera Traps") to dicts
133
133
  with keys corresponding to the headers in the .csv file, currently:
134
-
134
+
135
135
  - name
136
136
  - short_name
137
137
  - continent
@@ -153,65 +153,65 @@ def read_lila_metadata(metadata_dir, force_download=False):
153
153
  - md_results_with_rde
154
154
  - json_filename
155
155
  """
156
-
156
+
157
157
  # Put the master metadata file in the same folder where we're putting images
158
158
  p = urlparse(lila_metadata_url)
159
159
  metadata_filename = os.path.join(metadata_dir,os.path.basename(p.path))
160
160
  download_url(lila_metadata_url, metadata_filename, force_download=force_download)
161
-
161
+
162
162
  df = pd.read_csv(metadata_filename)
163
-
163
+
164
164
  records = df.to_dict('records')
165
-
165
+
166
166
  # Parse into a table keyed by dataset name
167
167
  metadata_table = {}
168
-
168
+
169
169
  # r = records[0]
170
170
  for r in records:
171
171
  if is_empty(r['name']):
172
172
  continue
173
-
173
+
174
174
  # Convert NaN's to None
175
175
  for k in r.keys():
176
176
  if is_empty(r[k]):
177
177
  r[k] = None
178
-
178
+
179
179
  metadata_table[r['name']] = r
180
-
181
- return metadata_table
182
-
180
+
181
+ return metadata_table
182
+
183
183
 
184
184
  def read_lila_all_images_file(metadata_dir, force_download=False):
185
185
  """
186
186
  Downloads if necessary - then unzips if necessary - the .csv file with label mappings for
187
187
  all LILA files, and opens the resulting .csv file as a Pandas DataFrame.
188
-
188
+
189
189
  Args:
190
190
  metadata_dir (str): folder to use for temporary LILA metadata files
191
- force_download (bool, optional): download the metadata file even if
191
+ force_download (bool, optional): download the metadata file even if
192
192
  the local file exists.
193
-
193
+
194
194
  Returns:
195
195
  pd.DataFrame: a DataFrame containing one row per identification in a LILA camera trap image
196
196
  """
197
-
197
+
198
198
  p = urlparse(lila_all_images_url)
199
199
  lila_all_images_zip_filename = os.path.join(metadata_dir,os.path.basename(p.path))
200
200
  download_url(lila_all_images_url, lila_all_images_zip_filename,
201
201
  force_download=force_download)
202
-
202
+
203
203
  with zipfile.ZipFile(lila_all_images_zip_filename,'r') as z:
204
204
  files = z.namelist()
205
205
  assert len(files) == 1
206
-
206
+
207
207
  unzipped_csv_filename = os.path.join(metadata_dir,files[0])
208
208
  if not os.path.isfile(unzipped_csv_filename):
209
209
  unzip_file(lila_all_images_zip_filename,metadata_dir)
210
210
  else:
211
- print('{} already unzipped'.format(unzipped_csv_filename))
212
-
211
+ print('{} already unzipped'.format(unzipped_csv_filename))
212
+
213
213
  df = pd.read_csv(unzipped_csv_filename)
214
-
214
+
215
215
  return df
216
216
 
217
217
 
@@ -223,94 +223,94 @@ def read_metadata_file_for_dataset(ds_name,
223
223
  force_download=False):
224
224
  """
225
225
  Downloads if necessary - then unzips if necessary - the .json file for a specific dataset.
226
-
226
+
227
227
  Args:
228
228
  ds_name (str): the name of the dataset for which you want to retrieve metadata (e.g.
229
- "Caltech Camera Traps")
229
+ "Caltech Camera Traps")
230
230
  metadata_dir (str): folder to use for temporary LILA metadata files
231
231
  metadata_table (dict, optional): an optional dictionary already loaded via
232
232
  read_lila_metadata()
233
233
  json_url (str, optional): the URL of the metadata file, if None will be retrieved
234
234
  via read_lila_metadata()
235
235
  preferred_cloud (str, optional): 'gcp' (default), 'azure', or 'aws'
236
- force_download (bool, optional): download the metadata file even if
236
+ force_download (bool, optional): download the metadata file even if
237
237
  the local file exists.
238
-
238
+
239
239
  Returns:
240
240
  str: the .json filename on the local disk
241
-
241
+
242
242
  """
243
-
243
+
244
244
  assert preferred_cloud in lila_base_urls.keys()
245
-
245
+
246
246
  if json_url is None:
247
-
247
+
248
248
  if metadata_table is None:
249
249
  metadata_table = read_lila_metadata(metadata_dir)
250
-
250
+
251
251
  json_url = metadata_table[ds_name]['metadata_url_' + preferred_cloud]
252
-
252
+
253
253
  p = urlparse(json_url)
254
254
  json_filename = os.path.join(metadata_dir,os.path.basename(p.path))
255
255
  download_url(json_url, json_filename, force_download=force_download)
256
-
256
+
257
257
  # Unzip if necessary
258
258
  if json_filename.endswith('.zip'):
259
-
259
+
260
260
  with zipfile.ZipFile(json_filename,'r') as z:
261
261
  files = z.namelist()
262
262
  assert len(files) == 1
263
263
  unzipped_json_filename = os.path.join(metadata_dir,files[0])
264
264
  if not os.path.isfile(unzipped_json_filename):
265
- unzip_file(json_filename,metadata_dir)
265
+ unzip_file(json_filename,metadata_dir)
266
266
  else:
267
267
  print('{} already unzipped'.format(unzipped_json_filename))
268
268
  json_filename = unzipped_json_filename
269
-
269
+
270
270
  return json_filename
271
271
 
272
272
 
273
273
  #%% Interactive test driver
274
274
 
275
275
  if False:
276
-
276
+
277
277
  pass
278
278
 
279
279
  #%% Verify that all base URLs exist
280
-
280
+
281
281
  # LILA camera trap primary metadata file
282
282
  urls = (lila_metadata_url,
283
283
  lila_taxonomy_mapping_url,
284
284
  lila_all_images_url,
285
285
  wildlife_insights_taxonomy_url)
286
-
286
+
287
287
  from megadetector.utils import url_utils
288
-
288
+
289
289
  status_codes = url_utils.test_urls(urls,timeout=2.0)
290
290
  assert all([code == 200 for code in status_codes])
291
-
292
-
291
+
292
+
293
293
  #%% Verify that the metadata URLs exist for individual datasets
294
-
294
+
295
295
  metadata_dir = os.path.expanduser('~/lila/metadata')
296
-
296
+
297
297
  dataset_metadata = read_lila_metadata(metadata_dir)
298
-
298
+
299
299
  urls_to_test = []
300
-
300
+
301
301
  # ds_name = next(iter(dataset_metadata.keys()))
302
302
  for ds_name in dataset_metadata.keys():
303
-
303
+
304
304
  ds_info = dataset_metadata[ds_name]
305
305
  for cloud_name in lila_base_urls.keys():
306
306
  urls_to_test.append(ds_info['metadata_url_' + cloud_name])
307
- if ds_info['bbox_url_relative'] != None:
307
+ if ds_info['bbox_url_relative'] is not None:
308
308
  urls_to_test.append(ds_info['bbox_url_' + cloud_name])
309
-
309
+
310
310
  status_codes = url_utils.test_urls(urls_to_test,
311
311
  error_on_failure=True,
312
312
  n_workers=10,
313
313
  pool_type='process',
314
314
  timeout=2.0)
315
315
  assert all([code == 200 for code in status_codes])
316
-
316
+
@@ -2,7 +2,7 @@
2
2
 
3
3
  test_lila_metadata_urls.py
4
4
 
5
- Test that all the metadata URLs for LILA camera trap datasets are valid, including MegaDetector
5
+ Test that all the metadata URLs for LILA camera trap datasets are valid, including MegaDetector
6
6
  results files.
7
7
 
8
8
  Also pick an arbitrary image from each dataset and make sure that URL is valid.
@@ -18,6 +18,7 @@ import os
18
18
 
19
19
  from megadetector.data_management.lila.lila_common import read_lila_metadata,\
20
20
  read_metadata_file_for_dataset, read_lila_taxonomy_mapping
21
+ from megadetector.utils.url_utils import test_urls
21
22
 
22
23
  # We'll write images, metadata downloads, and temporary files here
23
24
  lila_local_base = os.path.expanduser('~/lila')
@@ -55,7 +56,7 @@ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
55
56
 
56
57
  # Takes ~60 seconds if everything needs to be downloaded and unzipped
57
58
 
58
- for ds_name in metadata_table.keys():
59
+ for ds_name in metadata_table.keys():
59
60
 
60
61
  # Download the main metadata file for this dataset
61
62
  metadata_table[ds_name]['json_filename'] = \
@@ -63,7 +64,7 @@ for ds_name in metadata_table.keys():
63
64
  metadata_dir=metadata_dir,
64
65
  metadata_table=metadata_table,
65
66
  force_download=force_download)
66
-
67
+
67
68
  # Download MD results for this dataset
68
69
  for k in md_results_keys:
69
70
  md_results_url = metadata_table[ds_name][k]
@@ -93,13 +94,13 @@ image_index = 2000
93
94
  #
94
95
  # ds_name = list(metadata_table.keys())[0]
95
96
  for ds_name in metadata_table.keys():
96
-
97
+
97
98
  if 'bbox' in ds_name:
98
99
  print('Skipping bbox dataset {}'.format(ds_name))
99
100
  continue
100
101
 
101
102
  print('Processing dataset {}'.format(ds_name))
102
-
103
+
103
104
  json_filename = metadata_table[ds_name]['json_filename']
104
105
  with open(json_filename, 'r') as f:
105
106
  data = json.load(f)
@@ -108,20 +109,20 @@ for ds_name in metadata_table.keys():
108
109
  clouds = [preferred_cloud]
109
110
  else:
110
111
  clouds = ['gcp','aws','azure']
111
-
112
+
112
113
  for cloud in clouds:
113
-
114
+
114
115
  image_base_url = metadata_table[ds_name]['image_base_url_' + cloud]
115
116
  assert not image_base_url.endswith('/')
116
-
117
+
117
118
  # Download a test image
118
119
  test_image_relative_path = data['images'][image_index]['file_name']
119
120
  test_image_url = image_base_url + '/' + test_image_relative_path
120
-
121
+
121
122
  url_to_source[test_image_url] = ds_name + ' metadata ({})'.format(cloud)
122
-
123
+
123
124
  # Grab an image from the MegaDetector results
124
-
125
+
125
126
  # k = md_results_keys[2]
126
127
  for k in md_results_keys:
127
128
  k_fn = k + '_filename'
@@ -133,14 +134,12 @@ for ds_name in metadata_table.keys():
133
134
  url_to_source[md_image_url] = ds_name + ' ' + k
134
135
  del md_results
135
136
  del data
136
-
137
+
137
138
  # ...for each dataset
138
139
 
139
140
 
140
141
  #%% Test URLs
141
142
 
142
- from megadetector.utils.url_utils import test_urls
143
-
144
143
  urls_to_test = sorted(url_to_source.keys())
145
144
  urls_to_test = [fn.replace('\\','/') for fn in urls_to_test]
146
145