megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@
3
3
  get_lila_annotation_counts.py
4
4
 
5
5
  Generates a .json-formatted dictionary mapping each LILA dataset to all categories
6
- that exist for that dataset, with counts for the number of occurrences of each category
6
+ that exist for that dataset, with counts for the number of occurrences of each category
7
7
  (the number of *annotations* for each category, not the number of *images*).
8
8
 
9
9
  Also loads the taxonomy mapping file, to include scientific names for each category.
@@ -17,8 +17,11 @@ get_lila_image_counts.py counts the number of *images* for each category in each
17
17
  import json
18
18
  import os
19
19
 
20
+ from collections import defaultdict
21
+
20
22
  from megadetector.data_management.lila.lila_common import \
21
23
  read_lila_metadata, read_metadata_file_for_dataset, read_lila_taxonomy_mapping
24
+ from megadetector.utils import ct_utils
22
25
 
23
26
  # cloud provider to use for downloading images; options are 'gcp', 'azure', or 'aws'
24
27
  preferred_cloud = 'gcp'
@@ -53,21 +56,21 @@ datasets_with_taxonomy_mapping = set()
53
56
 
54
57
  # i_row = 1; row = taxonomy_df.iloc[i_row]; row
55
58
  for i_row,row in taxonomy_df.iterrows():
56
-
59
+
57
60
  datasets_with_taxonomy_mapping.add(row['dataset_name'])
58
-
61
+
59
62
  ds_query = row['dataset_name'] + ':' + row['query']
60
63
  ds_query = ds_query.lower()
61
-
64
+
62
65
  if not isinstance(row['scientific_name'],str):
63
66
  unmapped_queries.add(ds_query)
64
67
  ds_query_to_scientific_name[ds_query] = 'unmapped'
65
68
  continue
66
-
69
+
67
70
  ds_query_to_scientific_name[ds_query] = row['scientific_name']
68
-
71
+
69
72
  print('Loaded taxonomy mappings for {} datasets'.format(len(datasets_with_taxonomy_mapping)))
70
-
73
+
71
74
 
72
75
  #%% Download and parse the metadata file
73
76
 
@@ -78,55 +81,55 @@ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
78
81
 
79
82
  #%% Download and extract metadata for each dataset
80
83
 
81
- for ds_name in metadata_table.keys():
82
- metadata_table[ds_name]['json_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
83
- metadata_dir=metadata_dir,
84
- metadata_table=metadata_table)
84
+ for ds_name in metadata_table.keys():
85
+ metadata_table[ds_name]['json_filename'] = \
86
+ read_metadata_file_for_dataset(ds_name=ds_name,
87
+ metadata_dir=metadata_dir,
88
+ metadata_table=metadata_table,
89
+ preferred_cloud=preferred_cloud)
85
90
 
86
91
 
87
92
  #%% Get category names and counts for each dataset
88
93
 
89
94
  # Takes ~5 minutes
90
95
 
91
- from collections import defaultdict
92
-
93
96
  dataset_to_categories = {}
94
97
 
95
98
  # ds_name = 'NACTI'
96
99
  for ds_name in metadata_table.keys():
97
-
100
+
98
101
  taxonomy_mapping_available = (ds_name in datasets_with_taxonomy_mapping)
99
-
102
+
100
103
  if not taxonomy_mapping_available:
101
104
  print('Warning: taxonomy mapping not available for {}'.format(ds_name))
102
-
105
+
103
106
  print('Finding categories in {}'.format(ds_name))
104
107
 
105
108
  json_filename = metadata_table[ds_name]['json_filename']
106
109
  base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
107
110
  assert not base_url.endswith('/')
108
-
109
- # Open the metadata file
111
+
112
+ # Open the metadata file
110
113
  with open(json_filename, 'r') as f:
111
114
  data = json.load(f)
112
-
115
+
113
116
  # Collect list of categories and mappings to category name
114
117
  categories = data['categories']
115
-
118
+
116
119
  category_id_to_count = defaultdict(int)
117
- annotations = data['annotations']
118
-
120
+ annotations = data['annotations']
121
+
119
122
  # ann = annotations[0]
120
123
  for ann in annotations:
121
124
  category_id_to_count[ann['category_id']] = category_id_to_count[ann['category_id']] + 1
122
-
125
+
123
126
  # c = categories[0]
124
127
  for c in categories:
125
- count = category_id_to_count[c['id']]
128
+ count = category_id_to_count[c['id']]
126
129
  if 'count' in c:
127
- assert 'bbox' in ds_name or c['count'] == count
130
+ assert 'bbox' in ds_name or c['count'] == count
128
131
  c['count'] = count
129
-
132
+
130
133
  # Don't do taxonomy mapping for bbox data sets, which are sometimes just binary and are
131
134
  # always redundant with the class-level data sets.
132
135
  if 'bbox' in ds_name:
@@ -144,7 +147,7 @@ for ds_name in metadata_table.keys():
144
147
  sn = ds_query_to_scientific_name[taxonomy_query_string]
145
148
  assert sn is not None and len(sn) > 0
146
149
  c['scientific_name_from_taxonomy_mapping'] = sn
147
-
150
+
148
151
  dataset_to_categories[ds_name] = categories
149
152
 
150
153
  # ...for each dataset
@@ -154,19 +157,18 @@ for ds_name in metadata_table.keys():
154
157
 
155
158
  # ds_name = list(dataset_to_categories.keys())[0]
156
159
  for ds_name in dataset_to_categories:
157
-
160
+
158
161
  print('\n** Category counts for {} **\n'.format(ds_name))
159
-
162
+
160
163
  categories = dataset_to_categories[ds_name]
161
164
  categories = sorted(categories, key=lambda x: x['count'], reverse=True)
162
-
165
+
163
166
  for c in categories:
164
167
  print('{} ({}): {}'.format(c['name'],c['scientific_name_from_taxonomy_mapping'],c['count']))
165
-
168
+
166
169
  # ...for each dataset
167
170
 
168
171
 
169
172
  #%% Save the results
170
173
 
171
- with open(output_file, 'w') as f:
172
- json.dump(dataset_to_categories,f,indent=1)
174
+ ct_utils.write_json(output_file, dataset_to_categories)
@@ -5,7 +5,7 @@ get_lila_image_counts.py
5
5
  Count the number of images and bounding boxes with each label in one or more LILA datasets.
6
6
 
7
7
  This script doesn't write these counts out anywhere other than the console, it's just intended
8
- as a template for doing operations like this on LILA data. get_lila_annotation_counts.py writes
8
+ as a template for doing operations like this on LILA data. get_lila_annotation_counts.py writes
9
9
  information out to a .json file, but it counts *annotations*, not *images*, for each category.
10
10
 
11
11
  """
@@ -40,53 +40,53 @@ metadata_table = read_lila_metadata(metadata_dir)
40
40
  if datasets_of_interest is None:
41
41
  datasets_of_interest = list(metadata_table.keys())
42
42
 
43
- for ds_name in datasets_of_interest:
43
+ for ds_name in datasets_of_interest:
44
44
  metadata_table[ds_name]['json_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
45
45
  metadata_dir=metadata_dir,
46
46
  metadata_table=metadata_table)
47
-
48
-
47
+
48
+
49
49
  #%% Count categories
50
50
 
51
51
  ds_name_to_category_counts = {}
52
52
 
53
53
  # ds_name = datasets_of_interest[0]
54
54
  for ds_name in datasets_of_interest:
55
-
55
+
56
56
  category_to_image_count = {}
57
57
  category_to_bbox_count = {}
58
-
58
+
59
59
  print('Counting categories in: ' + ds_name)
60
-
60
+
61
61
  json_filename = metadata_table[ds_name]['json_filename']
62
62
  with open(json_filename, 'r') as f:
63
63
  data = json.load(f)
64
-
64
+
65
65
  categories = data['categories']
66
66
  category_ids = [c['id'] for c in categories]
67
67
  for c in categories:
68
68
  category_id_to_name = {c['id']:c['name'] for c in categories}
69
69
  annotations = data['annotations']
70
70
  images = data['images']
71
-
72
- for category_id in category_ids:
73
- category_name = category_id_to_name[category_id]
71
+
72
+ for category_id in category_ids:
73
+ category_name = category_id_to_name[category_id]
74
74
  category_to_image_count[category_name] = 0
75
75
  category_to_bbox_count[category_name] = 0
76
-
76
+
77
77
  image_id_to_category_names = defaultdict(set)
78
-
78
+
79
79
  # Go through annotations, marking each image with the categories that are present
80
80
  #
81
81
  # ann = annotations[0]
82
82
  for ann in annotations:
83
-
83
+
84
84
  category_name = category_id_to_name[ann['category_id']]
85
85
  image_id_to_category_names[ann['image_id']].add(category_name)
86
86
 
87
87
  # Now go through images and count categories
88
88
  category_to_count = defaultdict(int)
89
-
89
+
90
90
  # im = images[0]
91
91
  for im in images:
92
92
  categories_this_image = image_id_to_category_names[im['id']]
@@ -94,19 +94,19 @@ for ds_name in datasets_of_interest:
94
94
  category_to_count[category_name] += 1
95
95
 
96
96
  ds_name_to_category_counts[ds_name] = category_to_count
97
-
97
+
98
98
  # ...for each dataset
99
-
99
+
100
100
 
101
101
  #%% Print the results
102
102
 
103
103
  for ds_name in ds_name_to_category_counts:
104
-
104
+
105
105
  print('\n** Category counts for {} **\n'.format(ds_name))
106
-
106
+
107
107
  category_to_count = ds_name_to_category_counts[ds_name]
108
- category_to_count = {k: v for k, v in sorted(category_to_count.items(), reverse=True,
108
+ category_to_count = {k: v for k, v in sorted(category_to_count.items(), reverse=True,
109
109
  key=lambda item: item[1])}
110
-
111
- for category_name in category_to_count.keys():
110
+
111
+ for category_name in category_to_count.keys():
112
112
  print('{}: {}'.format(category_name,category_to_count[category_name]))
@@ -53,30 +53,30 @@ for url in lila_base_urls.values():
53
53
  def read_wildlife_insights_taxonomy_mapping(metadata_dir, force_download=False):
54
54
  """
55
55
  Reads the WI taxonomy mapping file, downloading the .json data (and writing to .csv) if necessary.
56
-
56
+
57
57
  Args:
58
58
  metadata_dir (str): folder to use for temporary LILA metadata files
59
- force_download (bool, optional): download the taxonomy mapping file
59
+ force_download (bool, optional): download the taxonomy mapping file
60
60
  even if the local file exists.
61
-
61
+
62
62
  Returns:
63
63
  pd.dataframe: A DataFrame with taxonomy information
64
64
  """
65
-
65
+
66
66
  wi_taxonomy_csv_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_csv_filename)
67
-
67
+
68
68
  if os.path.exists(wi_taxonomy_csv_path):
69
69
  df = pd.read_csv(wi_taxonomy_csv_path)
70
70
  else:
71
71
  wi_taxonomy_json_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_json_filename)
72
- download_url(wildlife_insights_taxonomy_url, wi_taxonomy_json_path,
72
+ download_url(wildlife_insights_taxonomy_url, wi_taxonomy_json_path,
73
73
  force_download=force_download)
74
74
  with open(wi_taxonomy_json_path,'r') as f:
75
75
  d = json.load(f)
76
-
76
+
77
77
  # We haven't implemented paging, make sure that's not an issue
78
78
  assert d['meta']['totalItems'] < wildlife_insights_page_size
79
-
79
+
80
80
  # d['data'] is a list of items that look like:
81
81
  """
82
82
  {'id': 2000003,
@@ -92,46 +92,46 @@ def read_wildlife_insights_taxonomy_mapping(metadata_dir, force_download=False):
92
92
  """
93
93
  df = pd.DataFrame(d['data'])
94
94
  df.to_csv(wi_taxonomy_csv_path,index=False)
95
-
95
+
96
96
  return df
97
97
 
98
-
98
+
99
99
  def read_lila_taxonomy_mapping(metadata_dir, force_download=False):
100
100
  """
101
101
  Reads the LILA taxonomy mapping file, downloading the .csv file if necessary.
102
-
102
+
103
103
  Args:
104
104
  metadata_dir (str): folder to use for temporary LILA metadata files
105
- force_download (bool, optional): download the taxonomy mapping file
106
- even if the local file exists.
107
-
105
+ force_download (bool, optional): download the taxonomy mapping file
106
+ even if the local file exists.
107
+
108
108
  Returns:
109
109
  pd.DataFrame: a DataFrame with one row per identification
110
110
  """
111
-
111
+
112
112
  p = urlparse(lila_taxonomy_mapping_url)
113
113
  taxonomy_filename = os.path.join(metadata_dir,os.path.basename(p.path))
114
- download_url(lila_taxonomy_mapping_url, taxonomy_filename,
114
+ download_url(lila_taxonomy_mapping_url, taxonomy_filename,
115
115
  force_download=force_download)
116
-
116
+
117
117
  df = pd.read_csv(lila_taxonomy_mapping_url)
118
-
118
+
119
119
  return df
120
120
 
121
-
121
+
122
122
  def read_lila_metadata(metadata_dir, force_download=False):
123
123
  """
124
124
  Reads LILA metadata (URLs to each dataset), downloading the .csv file if necessary.
125
-
125
+
126
126
  Args:
127
127
  metadata_dir (str): folder to use for temporary LILA metadata files
128
- force_download (bool, optional): download the metadata file even if
128
+ force_download (bool, optional): download the metadata file even if
129
129
  the local file exists.
130
-
130
+
131
131
  Returns:
132
132
  dict: a dict mapping dataset names (e.g. "Caltech Camera Traps") to dicts
133
133
  with keys corresponding to the headers in the .csv file, currently:
134
-
134
+
135
135
  - name
136
136
  - short_name
137
137
  - continent
@@ -153,65 +153,65 @@ def read_lila_metadata(metadata_dir, force_download=False):
153
153
  - md_results_with_rde
154
154
  - json_filename
155
155
  """
156
-
156
+
157
157
  # Put the master metadata file in the same folder where we're putting images
158
158
  p = urlparse(lila_metadata_url)
159
159
  metadata_filename = os.path.join(metadata_dir,os.path.basename(p.path))
160
160
  download_url(lila_metadata_url, metadata_filename, force_download=force_download)
161
-
161
+
162
162
  df = pd.read_csv(metadata_filename)
163
-
163
+
164
164
  records = df.to_dict('records')
165
-
165
+
166
166
  # Parse into a table keyed by dataset name
167
167
  metadata_table = {}
168
-
168
+
169
169
  # r = records[0]
170
170
  for r in records:
171
171
  if is_empty(r['name']):
172
172
  continue
173
-
173
+
174
174
  # Convert NaN's to None
175
175
  for k in r.keys():
176
176
  if is_empty(r[k]):
177
177
  r[k] = None
178
-
178
+
179
179
  metadata_table[r['name']] = r
180
-
181
- return metadata_table
182
-
180
+
181
+ return metadata_table
182
+
183
183
 
184
184
  def read_lila_all_images_file(metadata_dir, force_download=False):
185
185
  """
186
186
  Downloads if necessary - then unzips if necessary - the .csv file with label mappings for
187
187
  all LILA files, and opens the resulting .csv file as a Pandas DataFrame.
188
-
188
+
189
189
  Args:
190
190
  metadata_dir (str): folder to use for temporary LILA metadata files
191
- force_download (bool, optional): download the metadata file even if
191
+ force_download (bool, optional): download the metadata file even if
192
192
  the local file exists.
193
-
193
+
194
194
  Returns:
195
195
  pd.DataFrame: a DataFrame containing one row per identification in a LILA camera trap image
196
196
  """
197
-
197
+
198
198
  p = urlparse(lila_all_images_url)
199
199
  lila_all_images_zip_filename = os.path.join(metadata_dir,os.path.basename(p.path))
200
200
  download_url(lila_all_images_url, lila_all_images_zip_filename,
201
201
  force_download=force_download)
202
-
202
+
203
203
  with zipfile.ZipFile(lila_all_images_zip_filename,'r') as z:
204
204
  files = z.namelist()
205
205
  assert len(files) == 1
206
-
206
+
207
207
  unzipped_csv_filename = os.path.join(metadata_dir,files[0])
208
208
  if not os.path.isfile(unzipped_csv_filename):
209
209
  unzip_file(lila_all_images_zip_filename,metadata_dir)
210
210
  else:
211
- print('{} already unzipped'.format(unzipped_csv_filename))
212
-
211
+ print('{} already unzipped'.format(unzipped_csv_filename))
212
+
213
213
  df = pd.read_csv(unzipped_csv_filename)
214
-
214
+
215
215
  return df
216
216
 
217
217
 
@@ -223,94 +223,97 @@ def read_metadata_file_for_dataset(ds_name,
223
223
  force_download=False):
224
224
  """
225
225
  Downloads if necessary - then unzips if necessary - the .json file for a specific dataset.
226
-
226
+
227
227
  Args:
228
228
  ds_name (str): the name of the dataset for which you want to retrieve metadata (e.g.
229
- "Caltech Camera Traps")
229
+ "Caltech Camera Traps")
230
230
  metadata_dir (str): folder to use for temporary LILA metadata files
231
231
  metadata_table (dict, optional): an optional dictionary already loaded via
232
232
  read_lila_metadata()
233
233
  json_url (str, optional): the URL of the metadata file, if None will be retrieved
234
234
  via read_lila_metadata()
235
235
  preferred_cloud (str, optional): 'gcp' (default), 'azure', or 'aws'
236
- force_download (bool, optional): download the metadata file even if
236
+ force_download (bool, optional): download the metadata file even if
237
237
  the local file exists.
238
-
238
+
239
239
  Returns:
240
240
  str: the .json filename on the local disk
241
-
241
+
242
242
  """
243
-
243
+
244
+ if preferred_cloud is None:
245
+ preferred_cloud = 'gcp'
246
+
244
247
  assert preferred_cloud in lila_base_urls.keys()
245
-
248
+
246
249
  if json_url is None:
247
-
250
+
248
251
  if metadata_table is None:
249
252
  metadata_table = read_lila_metadata(metadata_dir)
250
-
253
+
251
254
  json_url = metadata_table[ds_name]['metadata_url_' + preferred_cloud]
252
-
255
+
253
256
  p = urlparse(json_url)
254
257
  json_filename = os.path.join(metadata_dir,os.path.basename(p.path))
255
258
  download_url(json_url, json_filename, force_download=force_download)
256
-
259
+
257
260
  # Unzip if necessary
258
261
  if json_filename.endswith('.zip'):
259
-
262
+
260
263
  with zipfile.ZipFile(json_filename,'r') as z:
261
264
  files = z.namelist()
262
265
  assert len(files) == 1
263
266
  unzipped_json_filename = os.path.join(metadata_dir,files[0])
264
267
  if not os.path.isfile(unzipped_json_filename):
265
- unzip_file(json_filename,metadata_dir)
268
+ unzip_file(json_filename,metadata_dir)
266
269
  else:
267
270
  print('{} already unzipped'.format(unzipped_json_filename))
268
271
  json_filename = unzipped_json_filename
269
-
272
+
270
273
  return json_filename
271
274
 
272
275
 
273
276
  #%% Interactive test driver
274
277
 
275
278
  if False:
276
-
279
+
277
280
  pass
278
281
 
279
282
  #%% Verify that all base URLs exist
280
-
283
+
281
284
  # LILA camera trap primary metadata file
282
285
  urls = (lila_metadata_url,
283
286
  lila_taxonomy_mapping_url,
284
287
  lila_all_images_url,
285
288
  wildlife_insights_taxonomy_url)
286
-
289
+
287
290
  from megadetector.utils import url_utils
288
-
291
+
289
292
  status_codes = url_utils.test_urls(urls,timeout=2.0)
290
293
  assert all([code == 200 for code in status_codes])
291
-
292
-
294
+
295
+
293
296
  #%% Verify that the metadata URLs exist for individual datasets
294
-
297
+
295
298
  metadata_dir = os.path.expanduser('~/lila/metadata')
296
-
299
+
297
300
  dataset_metadata = read_lila_metadata(metadata_dir)
298
-
301
+
299
302
  urls_to_test = []
300
-
303
+
301
304
  # ds_name = next(iter(dataset_metadata.keys()))
302
305
  for ds_name in dataset_metadata.keys():
303
-
306
+
304
307
  ds_info = dataset_metadata[ds_name]
305
308
  for cloud_name in lila_base_urls.keys():
306
309
  urls_to_test.append(ds_info['metadata_url_' + cloud_name])
307
- if ds_info['bbox_url_relative'] != None:
310
+ if ds_info['bbox_url_relative'] is not None:
308
311
  urls_to_test.append(ds_info['bbox_url_' + cloud_name])
309
-
312
+
310
313
  status_codes = url_utils.test_urls(urls_to_test,
311
314
  error_on_failure=True,
312
315
  n_workers=10,
313
316
  pool_type='process',
314
317
  timeout=2.0)
315
318
  assert all([code == 200 for code in status_codes])
316
-
319
+