megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +231 -224
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +304 -262
- megadetector/detection/run_detector.py +177 -164
- megadetector/detection/run_detector_batch.py +364 -363
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +256 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +290 -282
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +415 -415
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +219 -146
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +23 -20
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +313 -298
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1018 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1457 -398
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +61 -61
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +401 -397
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +79 -73
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.28.dist-info/RECORD +0 -209
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
get_lila_annotation_counts.py
|
|
4
4
|
|
|
5
5
|
Generates a .json-formatted dictionary mapping each LILA dataset to all categories
|
|
6
|
-
that exist for that dataset, with counts for the number of occurrences of each category
|
|
6
|
+
that exist for that dataset, with counts for the number of occurrences of each category
|
|
7
7
|
(the number of *annotations* for each category, not the number of *images*).
|
|
8
8
|
|
|
9
9
|
Also loads the taxonomy mapping file, to include scientific names for each category.
|
|
@@ -17,8 +17,11 @@ get_lila_image_counts.py counts the number of *images* for each category in each
|
|
|
17
17
|
import json
|
|
18
18
|
import os
|
|
19
19
|
|
|
20
|
+
from collections import defaultdict
|
|
21
|
+
|
|
20
22
|
from megadetector.data_management.lila.lila_common import \
|
|
21
23
|
read_lila_metadata, read_metadata_file_for_dataset, read_lila_taxonomy_mapping
|
|
24
|
+
from megadetector.utils import ct_utils
|
|
22
25
|
|
|
23
26
|
# cloud provider to use for downloading images; options are 'gcp', 'azure', or 'aws'
|
|
24
27
|
preferred_cloud = 'gcp'
|
|
@@ -53,21 +56,21 @@ datasets_with_taxonomy_mapping = set()
|
|
|
53
56
|
|
|
54
57
|
# i_row = 1; row = taxonomy_df.iloc[i_row]; row
|
|
55
58
|
for i_row,row in taxonomy_df.iterrows():
|
|
56
|
-
|
|
59
|
+
|
|
57
60
|
datasets_with_taxonomy_mapping.add(row['dataset_name'])
|
|
58
|
-
|
|
61
|
+
|
|
59
62
|
ds_query = row['dataset_name'] + ':' + row['query']
|
|
60
63
|
ds_query = ds_query.lower()
|
|
61
|
-
|
|
64
|
+
|
|
62
65
|
if not isinstance(row['scientific_name'],str):
|
|
63
66
|
unmapped_queries.add(ds_query)
|
|
64
67
|
ds_query_to_scientific_name[ds_query] = 'unmapped'
|
|
65
68
|
continue
|
|
66
|
-
|
|
69
|
+
|
|
67
70
|
ds_query_to_scientific_name[ds_query] = row['scientific_name']
|
|
68
|
-
|
|
71
|
+
|
|
69
72
|
print('Loaded taxonomy mappings for {} datasets'.format(len(datasets_with_taxonomy_mapping)))
|
|
70
|
-
|
|
73
|
+
|
|
71
74
|
|
|
72
75
|
#%% Download and parse the metadata file
|
|
73
76
|
|
|
@@ -78,7 +81,7 @@ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
|
|
|
78
81
|
|
|
79
82
|
#%% Download and extract metadata for each dataset
|
|
80
83
|
|
|
81
|
-
for ds_name in metadata_table.keys():
|
|
84
|
+
for ds_name in metadata_table.keys():
|
|
82
85
|
metadata_table[ds_name]['json_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
|
|
83
86
|
metadata_dir=metadata_dir,
|
|
84
87
|
metadata_table=metadata_table)
|
|
@@ -88,45 +91,43 @@ for ds_name in metadata_table.keys():
|
|
|
88
91
|
|
|
89
92
|
# Takes ~5 minutes
|
|
90
93
|
|
|
91
|
-
from collections import defaultdict
|
|
92
|
-
|
|
93
94
|
dataset_to_categories = {}
|
|
94
95
|
|
|
95
96
|
# ds_name = 'NACTI'
|
|
96
97
|
for ds_name in metadata_table.keys():
|
|
97
|
-
|
|
98
|
+
|
|
98
99
|
taxonomy_mapping_available = (ds_name in datasets_with_taxonomy_mapping)
|
|
99
|
-
|
|
100
|
+
|
|
100
101
|
if not taxonomy_mapping_available:
|
|
101
102
|
print('Warning: taxonomy mapping not available for {}'.format(ds_name))
|
|
102
|
-
|
|
103
|
+
|
|
103
104
|
print('Finding categories in {}'.format(ds_name))
|
|
104
105
|
|
|
105
106
|
json_filename = metadata_table[ds_name]['json_filename']
|
|
106
107
|
base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
|
|
107
108
|
assert not base_url.endswith('/')
|
|
108
|
-
|
|
109
|
-
# Open the metadata file
|
|
109
|
+
|
|
110
|
+
# Open the metadata file
|
|
110
111
|
with open(json_filename, 'r') as f:
|
|
111
112
|
data = json.load(f)
|
|
112
|
-
|
|
113
|
+
|
|
113
114
|
# Collect list of categories and mappings to category name
|
|
114
115
|
categories = data['categories']
|
|
115
|
-
|
|
116
|
+
|
|
116
117
|
category_id_to_count = defaultdict(int)
|
|
117
|
-
annotations = data['annotations']
|
|
118
|
-
|
|
118
|
+
annotations = data['annotations']
|
|
119
|
+
|
|
119
120
|
# ann = annotations[0]
|
|
120
121
|
for ann in annotations:
|
|
121
122
|
category_id_to_count[ann['category_id']] = category_id_to_count[ann['category_id']] + 1
|
|
122
|
-
|
|
123
|
+
|
|
123
124
|
# c = categories[0]
|
|
124
125
|
for c in categories:
|
|
125
|
-
count = category_id_to_count[c['id']]
|
|
126
|
+
count = category_id_to_count[c['id']]
|
|
126
127
|
if 'count' in c:
|
|
127
|
-
assert 'bbox' in ds_name or c['count'] == count
|
|
128
|
+
assert 'bbox' in ds_name or c['count'] == count
|
|
128
129
|
c['count'] = count
|
|
129
|
-
|
|
130
|
+
|
|
130
131
|
# Don't do taxonomy mapping for bbox data sets, which are sometimes just binary and are
|
|
131
132
|
# always redundant with the class-level data sets.
|
|
132
133
|
if 'bbox' in ds_name:
|
|
@@ -144,7 +145,7 @@ for ds_name in metadata_table.keys():
|
|
|
144
145
|
sn = ds_query_to_scientific_name[taxonomy_query_string]
|
|
145
146
|
assert sn is not None and len(sn) > 0
|
|
146
147
|
c['scientific_name_from_taxonomy_mapping'] = sn
|
|
147
|
-
|
|
148
|
+
|
|
148
149
|
dataset_to_categories[ds_name] = categories
|
|
149
150
|
|
|
150
151
|
# ...for each dataset
|
|
@@ -154,19 +155,18 @@ for ds_name in metadata_table.keys():
|
|
|
154
155
|
|
|
155
156
|
# ds_name = list(dataset_to_categories.keys())[0]
|
|
156
157
|
for ds_name in dataset_to_categories:
|
|
157
|
-
|
|
158
|
+
|
|
158
159
|
print('\n** Category counts for {} **\n'.format(ds_name))
|
|
159
|
-
|
|
160
|
+
|
|
160
161
|
categories = dataset_to_categories[ds_name]
|
|
161
162
|
categories = sorted(categories, key=lambda x: x['count'], reverse=True)
|
|
162
|
-
|
|
163
|
+
|
|
163
164
|
for c in categories:
|
|
164
165
|
print('{} ({}): {}'.format(c['name'],c['scientific_name_from_taxonomy_mapping'],c['count']))
|
|
165
|
-
|
|
166
|
+
|
|
166
167
|
# ...for each dataset
|
|
167
168
|
|
|
168
169
|
|
|
169
170
|
#%% Save the results
|
|
170
171
|
|
|
171
|
-
|
|
172
|
-
json.dump(dataset_to_categories,f,indent=1)
|
|
172
|
+
ct_utils.write_json(output_file, dataset_to_categories)
|
|
@@ -5,7 +5,7 @@ get_lila_image_counts.py
|
|
|
5
5
|
Count the number of images and bounding boxes with each label in one or more LILA datasets.
|
|
6
6
|
|
|
7
7
|
This script doesn't write these counts out anywhere other than the console, it's just intended
|
|
8
|
-
as a template for doing operations like this on LILA data. get_lila_annotation_counts.py writes
|
|
8
|
+
as a template for doing operations like this on LILA data. get_lila_annotation_counts.py writes
|
|
9
9
|
information out to a .json file, but it counts *annotations*, not *images*, for each category.
|
|
10
10
|
|
|
11
11
|
"""
|
|
@@ -40,53 +40,53 @@ metadata_table = read_lila_metadata(metadata_dir)
|
|
|
40
40
|
if datasets_of_interest is None:
|
|
41
41
|
datasets_of_interest = list(metadata_table.keys())
|
|
42
42
|
|
|
43
|
-
for ds_name in datasets_of_interest:
|
|
43
|
+
for ds_name in datasets_of_interest:
|
|
44
44
|
metadata_table[ds_name]['json_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
|
|
45
45
|
metadata_dir=metadata_dir,
|
|
46
46
|
metadata_table=metadata_table)
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
|
|
48
|
+
|
|
49
49
|
#%% Count categories
|
|
50
50
|
|
|
51
51
|
ds_name_to_category_counts = {}
|
|
52
52
|
|
|
53
53
|
# ds_name = datasets_of_interest[0]
|
|
54
54
|
for ds_name in datasets_of_interest:
|
|
55
|
-
|
|
55
|
+
|
|
56
56
|
category_to_image_count = {}
|
|
57
57
|
category_to_bbox_count = {}
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
print('Counting categories in: ' + ds_name)
|
|
60
|
-
|
|
60
|
+
|
|
61
61
|
json_filename = metadata_table[ds_name]['json_filename']
|
|
62
62
|
with open(json_filename, 'r') as f:
|
|
63
63
|
data = json.load(f)
|
|
64
|
-
|
|
64
|
+
|
|
65
65
|
categories = data['categories']
|
|
66
66
|
category_ids = [c['id'] for c in categories]
|
|
67
67
|
for c in categories:
|
|
68
68
|
category_id_to_name = {c['id']:c['name'] for c in categories}
|
|
69
69
|
annotations = data['annotations']
|
|
70
70
|
images = data['images']
|
|
71
|
-
|
|
72
|
-
for category_id in category_ids:
|
|
73
|
-
category_name = category_id_to_name[category_id]
|
|
71
|
+
|
|
72
|
+
for category_id in category_ids:
|
|
73
|
+
category_name = category_id_to_name[category_id]
|
|
74
74
|
category_to_image_count[category_name] = 0
|
|
75
75
|
category_to_bbox_count[category_name] = 0
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
image_id_to_category_names = defaultdict(set)
|
|
78
|
-
|
|
78
|
+
|
|
79
79
|
# Go through annotations, marking each image with the categories that are present
|
|
80
80
|
#
|
|
81
81
|
# ann = annotations[0]
|
|
82
82
|
for ann in annotations:
|
|
83
|
-
|
|
83
|
+
|
|
84
84
|
category_name = category_id_to_name[ann['category_id']]
|
|
85
85
|
image_id_to_category_names[ann['image_id']].add(category_name)
|
|
86
86
|
|
|
87
87
|
# Now go through images and count categories
|
|
88
88
|
category_to_count = defaultdict(int)
|
|
89
|
-
|
|
89
|
+
|
|
90
90
|
# im = images[0]
|
|
91
91
|
for im in images:
|
|
92
92
|
categories_this_image = image_id_to_category_names[im['id']]
|
|
@@ -94,19 +94,19 @@ for ds_name in datasets_of_interest:
|
|
|
94
94
|
category_to_count[category_name] += 1
|
|
95
95
|
|
|
96
96
|
ds_name_to_category_counts[ds_name] = category_to_count
|
|
97
|
-
|
|
97
|
+
|
|
98
98
|
# ...for each dataset
|
|
99
|
-
|
|
99
|
+
|
|
100
100
|
|
|
101
101
|
#%% Print the results
|
|
102
102
|
|
|
103
103
|
for ds_name in ds_name_to_category_counts:
|
|
104
|
-
|
|
104
|
+
|
|
105
105
|
print('\n** Category counts for {} **\n'.format(ds_name))
|
|
106
|
-
|
|
106
|
+
|
|
107
107
|
category_to_count = ds_name_to_category_counts[ds_name]
|
|
108
|
-
category_to_count = {k: v for k, v in sorted(category_to_count.items(), reverse=True,
|
|
108
|
+
category_to_count = {k: v for k, v in sorted(category_to_count.items(), reverse=True,
|
|
109
109
|
key=lambda item: item[1])}
|
|
110
|
-
|
|
111
|
-
for category_name in category_to_count.keys():
|
|
110
|
+
|
|
111
|
+
for category_name in category_to_count.keys():
|
|
112
112
|
print('{}: {}'.format(category_name,category_to_count[category_name]))
|
|
@@ -53,30 +53,30 @@ for url in lila_base_urls.values():
|
|
|
53
53
|
def read_wildlife_insights_taxonomy_mapping(metadata_dir, force_download=False):
|
|
54
54
|
"""
|
|
55
55
|
Reads the WI taxonomy mapping file, downloading the .json data (and writing to .csv) if necessary.
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
Args:
|
|
58
58
|
metadata_dir (str): folder to use for temporary LILA metadata files
|
|
59
|
-
force_download (bool, optional): download the taxonomy mapping file
|
|
59
|
+
force_download (bool, optional): download the taxonomy mapping file
|
|
60
60
|
even if the local file exists.
|
|
61
|
-
|
|
61
|
+
|
|
62
62
|
Returns:
|
|
63
63
|
pd.dataframe: A DataFrame with taxonomy information
|
|
64
64
|
"""
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
wi_taxonomy_csv_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_csv_filename)
|
|
67
|
-
|
|
67
|
+
|
|
68
68
|
if os.path.exists(wi_taxonomy_csv_path):
|
|
69
69
|
df = pd.read_csv(wi_taxonomy_csv_path)
|
|
70
70
|
else:
|
|
71
71
|
wi_taxonomy_json_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_json_filename)
|
|
72
|
-
download_url(wildlife_insights_taxonomy_url, wi_taxonomy_json_path,
|
|
72
|
+
download_url(wildlife_insights_taxonomy_url, wi_taxonomy_json_path,
|
|
73
73
|
force_download=force_download)
|
|
74
74
|
with open(wi_taxonomy_json_path,'r') as f:
|
|
75
75
|
d = json.load(f)
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
# We haven't implemented paging, make sure that's not an issue
|
|
78
78
|
assert d['meta']['totalItems'] < wildlife_insights_page_size
|
|
79
|
-
|
|
79
|
+
|
|
80
80
|
# d['data'] is a list of items that look like:
|
|
81
81
|
"""
|
|
82
82
|
{'id': 2000003,
|
|
@@ -92,46 +92,46 @@ def read_wildlife_insights_taxonomy_mapping(metadata_dir, force_download=False):
|
|
|
92
92
|
"""
|
|
93
93
|
df = pd.DataFrame(d['data'])
|
|
94
94
|
df.to_csv(wi_taxonomy_csv_path,index=False)
|
|
95
|
-
|
|
95
|
+
|
|
96
96
|
return df
|
|
97
97
|
|
|
98
|
-
|
|
98
|
+
|
|
99
99
|
def read_lila_taxonomy_mapping(metadata_dir, force_download=False):
|
|
100
100
|
"""
|
|
101
101
|
Reads the LILA taxonomy mapping file, downloading the .csv file if necessary.
|
|
102
|
-
|
|
102
|
+
|
|
103
103
|
Args:
|
|
104
104
|
metadata_dir (str): folder to use for temporary LILA metadata files
|
|
105
|
-
force_download (bool, optional): download the taxonomy mapping file
|
|
106
|
-
even if the local file exists.
|
|
107
|
-
|
|
105
|
+
force_download (bool, optional): download the taxonomy mapping file
|
|
106
|
+
even if the local file exists.
|
|
107
|
+
|
|
108
108
|
Returns:
|
|
109
109
|
pd.DataFrame: a DataFrame with one row per identification
|
|
110
110
|
"""
|
|
111
|
-
|
|
111
|
+
|
|
112
112
|
p = urlparse(lila_taxonomy_mapping_url)
|
|
113
113
|
taxonomy_filename = os.path.join(metadata_dir,os.path.basename(p.path))
|
|
114
|
-
download_url(lila_taxonomy_mapping_url, taxonomy_filename,
|
|
114
|
+
download_url(lila_taxonomy_mapping_url, taxonomy_filename,
|
|
115
115
|
force_download=force_download)
|
|
116
|
-
|
|
116
|
+
|
|
117
117
|
df = pd.read_csv(lila_taxonomy_mapping_url)
|
|
118
|
-
|
|
118
|
+
|
|
119
119
|
return df
|
|
120
120
|
|
|
121
|
-
|
|
121
|
+
|
|
122
122
|
def read_lila_metadata(metadata_dir, force_download=False):
|
|
123
123
|
"""
|
|
124
124
|
Reads LILA metadata (URLs to each dataset), downloading the .csv file if necessary.
|
|
125
|
-
|
|
125
|
+
|
|
126
126
|
Args:
|
|
127
127
|
metadata_dir (str): folder to use for temporary LILA metadata files
|
|
128
|
-
force_download (bool, optional): download the metadata file even if
|
|
128
|
+
force_download (bool, optional): download the metadata file even if
|
|
129
129
|
the local file exists.
|
|
130
|
-
|
|
130
|
+
|
|
131
131
|
Returns:
|
|
132
132
|
dict: a dict mapping dataset names (e.g. "Caltech Camera Traps") to dicts
|
|
133
133
|
with keys corresponding to the headers in the .csv file, currently:
|
|
134
|
-
|
|
134
|
+
|
|
135
135
|
- name
|
|
136
136
|
- short_name
|
|
137
137
|
- continent
|
|
@@ -153,65 +153,65 @@ def read_lila_metadata(metadata_dir, force_download=False):
|
|
|
153
153
|
- md_results_with_rde
|
|
154
154
|
- json_filename
|
|
155
155
|
"""
|
|
156
|
-
|
|
156
|
+
|
|
157
157
|
# Put the master metadata file in the same folder where we're putting images
|
|
158
158
|
p = urlparse(lila_metadata_url)
|
|
159
159
|
metadata_filename = os.path.join(metadata_dir,os.path.basename(p.path))
|
|
160
160
|
download_url(lila_metadata_url, metadata_filename, force_download=force_download)
|
|
161
|
-
|
|
161
|
+
|
|
162
162
|
df = pd.read_csv(metadata_filename)
|
|
163
|
-
|
|
163
|
+
|
|
164
164
|
records = df.to_dict('records')
|
|
165
|
-
|
|
165
|
+
|
|
166
166
|
# Parse into a table keyed by dataset name
|
|
167
167
|
metadata_table = {}
|
|
168
|
-
|
|
168
|
+
|
|
169
169
|
# r = records[0]
|
|
170
170
|
for r in records:
|
|
171
171
|
if is_empty(r['name']):
|
|
172
172
|
continue
|
|
173
|
-
|
|
173
|
+
|
|
174
174
|
# Convert NaN's to None
|
|
175
175
|
for k in r.keys():
|
|
176
176
|
if is_empty(r[k]):
|
|
177
177
|
r[k] = None
|
|
178
|
-
|
|
178
|
+
|
|
179
179
|
metadata_table[r['name']] = r
|
|
180
|
-
|
|
181
|
-
return metadata_table
|
|
182
|
-
|
|
180
|
+
|
|
181
|
+
return metadata_table
|
|
182
|
+
|
|
183
183
|
|
|
184
184
|
def read_lila_all_images_file(metadata_dir, force_download=False):
|
|
185
185
|
"""
|
|
186
186
|
Downloads if necessary - then unzips if necessary - the .csv file with label mappings for
|
|
187
187
|
all LILA files, and opens the resulting .csv file as a Pandas DataFrame.
|
|
188
|
-
|
|
188
|
+
|
|
189
189
|
Args:
|
|
190
190
|
metadata_dir (str): folder to use for temporary LILA metadata files
|
|
191
|
-
force_download (bool, optional): download the metadata file even if
|
|
191
|
+
force_download (bool, optional): download the metadata file even if
|
|
192
192
|
the local file exists.
|
|
193
|
-
|
|
193
|
+
|
|
194
194
|
Returns:
|
|
195
195
|
pd.DataFrame: a DataFrame containing one row per identification in a LILA camera trap image
|
|
196
196
|
"""
|
|
197
|
-
|
|
197
|
+
|
|
198
198
|
p = urlparse(lila_all_images_url)
|
|
199
199
|
lila_all_images_zip_filename = os.path.join(metadata_dir,os.path.basename(p.path))
|
|
200
200
|
download_url(lila_all_images_url, lila_all_images_zip_filename,
|
|
201
201
|
force_download=force_download)
|
|
202
|
-
|
|
202
|
+
|
|
203
203
|
with zipfile.ZipFile(lila_all_images_zip_filename,'r') as z:
|
|
204
204
|
files = z.namelist()
|
|
205
205
|
assert len(files) == 1
|
|
206
|
-
|
|
206
|
+
|
|
207
207
|
unzipped_csv_filename = os.path.join(metadata_dir,files[0])
|
|
208
208
|
if not os.path.isfile(unzipped_csv_filename):
|
|
209
209
|
unzip_file(lila_all_images_zip_filename,metadata_dir)
|
|
210
210
|
else:
|
|
211
|
-
print('{} already unzipped'.format(unzipped_csv_filename))
|
|
212
|
-
|
|
211
|
+
print('{} already unzipped'.format(unzipped_csv_filename))
|
|
212
|
+
|
|
213
213
|
df = pd.read_csv(unzipped_csv_filename)
|
|
214
|
-
|
|
214
|
+
|
|
215
215
|
return df
|
|
216
216
|
|
|
217
217
|
|
|
@@ -223,94 +223,94 @@ def read_metadata_file_for_dataset(ds_name,
|
|
|
223
223
|
force_download=False):
|
|
224
224
|
"""
|
|
225
225
|
Downloads if necessary - then unzips if necessary - the .json file for a specific dataset.
|
|
226
|
-
|
|
226
|
+
|
|
227
227
|
Args:
|
|
228
228
|
ds_name (str): the name of the dataset for which you want to retrieve metadata (e.g.
|
|
229
|
-
"Caltech Camera Traps")
|
|
229
|
+
"Caltech Camera Traps")
|
|
230
230
|
metadata_dir (str): folder to use for temporary LILA metadata files
|
|
231
231
|
metadata_table (dict, optional): an optional dictionary already loaded via
|
|
232
232
|
read_lila_metadata()
|
|
233
233
|
json_url (str, optional): the URL of the metadata file, if None will be retrieved
|
|
234
234
|
via read_lila_metadata()
|
|
235
235
|
preferred_cloud (str, optional): 'gcp' (default), 'azure', or 'aws'
|
|
236
|
-
force_download (bool, optional): download the metadata file even if
|
|
236
|
+
force_download (bool, optional): download the metadata file even if
|
|
237
237
|
the local file exists.
|
|
238
|
-
|
|
238
|
+
|
|
239
239
|
Returns:
|
|
240
240
|
str: the .json filename on the local disk
|
|
241
|
-
|
|
241
|
+
|
|
242
242
|
"""
|
|
243
|
-
|
|
243
|
+
|
|
244
244
|
assert preferred_cloud in lila_base_urls.keys()
|
|
245
|
-
|
|
245
|
+
|
|
246
246
|
if json_url is None:
|
|
247
|
-
|
|
247
|
+
|
|
248
248
|
if metadata_table is None:
|
|
249
249
|
metadata_table = read_lila_metadata(metadata_dir)
|
|
250
|
-
|
|
250
|
+
|
|
251
251
|
json_url = metadata_table[ds_name]['metadata_url_' + preferred_cloud]
|
|
252
|
-
|
|
252
|
+
|
|
253
253
|
p = urlparse(json_url)
|
|
254
254
|
json_filename = os.path.join(metadata_dir,os.path.basename(p.path))
|
|
255
255
|
download_url(json_url, json_filename, force_download=force_download)
|
|
256
|
-
|
|
256
|
+
|
|
257
257
|
# Unzip if necessary
|
|
258
258
|
if json_filename.endswith('.zip'):
|
|
259
|
-
|
|
259
|
+
|
|
260
260
|
with zipfile.ZipFile(json_filename,'r') as z:
|
|
261
261
|
files = z.namelist()
|
|
262
262
|
assert len(files) == 1
|
|
263
263
|
unzipped_json_filename = os.path.join(metadata_dir,files[0])
|
|
264
264
|
if not os.path.isfile(unzipped_json_filename):
|
|
265
|
-
unzip_file(json_filename,metadata_dir)
|
|
265
|
+
unzip_file(json_filename,metadata_dir)
|
|
266
266
|
else:
|
|
267
267
|
print('{} already unzipped'.format(unzipped_json_filename))
|
|
268
268
|
json_filename = unzipped_json_filename
|
|
269
|
-
|
|
269
|
+
|
|
270
270
|
return json_filename
|
|
271
271
|
|
|
272
272
|
|
|
273
273
|
#%% Interactive test driver
|
|
274
274
|
|
|
275
275
|
if False:
|
|
276
|
-
|
|
276
|
+
|
|
277
277
|
pass
|
|
278
278
|
|
|
279
279
|
#%% Verify that all base URLs exist
|
|
280
|
-
|
|
280
|
+
|
|
281
281
|
# LILA camera trap primary metadata file
|
|
282
282
|
urls = (lila_metadata_url,
|
|
283
283
|
lila_taxonomy_mapping_url,
|
|
284
284
|
lila_all_images_url,
|
|
285
285
|
wildlife_insights_taxonomy_url)
|
|
286
|
-
|
|
286
|
+
|
|
287
287
|
from megadetector.utils import url_utils
|
|
288
|
-
|
|
288
|
+
|
|
289
289
|
status_codes = url_utils.test_urls(urls,timeout=2.0)
|
|
290
290
|
assert all([code == 200 for code in status_codes])
|
|
291
|
-
|
|
292
|
-
|
|
291
|
+
|
|
292
|
+
|
|
293
293
|
#%% Verify that the metadata URLs exist for individual datasets
|
|
294
|
-
|
|
294
|
+
|
|
295
295
|
metadata_dir = os.path.expanduser('~/lila/metadata')
|
|
296
|
-
|
|
296
|
+
|
|
297
297
|
dataset_metadata = read_lila_metadata(metadata_dir)
|
|
298
|
-
|
|
298
|
+
|
|
299
299
|
urls_to_test = []
|
|
300
|
-
|
|
300
|
+
|
|
301
301
|
# ds_name = next(iter(dataset_metadata.keys()))
|
|
302
302
|
for ds_name in dataset_metadata.keys():
|
|
303
|
-
|
|
303
|
+
|
|
304
304
|
ds_info = dataset_metadata[ds_name]
|
|
305
305
|
for cloud_name in lila_base_urls.keys():
|
|
306
306
|
urls_to_test.append(ds_info['metadata_url_' + cloud_name])
|
|
307
|
-
if ds_info['bbox_url_relative']
|
|
307
|
+
if ds_info['bbox_url_relative'] is not None:
|
|
308
308
|
urls_to_test.append(ds_info['bbox_url_' + cloud_name])
|
|
309
|
-
|
|
309
|
+
|
|
310
310
|
status_codes = url_utils.test_urls(urls_to_test,
|
|
311
311
|
error_on_failure=True,
|
|
312
312
|
n_workers=10,
|
|
313
313
|
pool_type='process',
|
|
314
314
|
timeout=2.0)
|
|
315
315
|
assert all([code == 200 for code in status_codes])
|
|
316
|
-
|
|
316
|
+
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
test_lila_metadata_urls.py
|
|
4
4
|
|
|
5
|
-
Test that all the metadata URLs for LILA camera trap datasets are valid, including MegaDetector
|
|
5
|
+
Test that all the metadata URLs for LILA camera trap datasets are valid, including MegaDetector
|
|
6
6
|
results files.
|
|
7
7
|
|
|
8
8
|
Also pick an arbitrary image from each dataset and make sure that URL is valid.
|
|
@@ -18,6 +18,7 @@ import os
|
|
|
18
18
|
|
|
19
19
|
from megadetector.data_management.lila.lila_common import read_lila_metadata,\
|
|
20
20
|
read_metadata_file_for_dataset, read_lila_taxonomy_mapping
|
|
21
|
+
from megadetector.utils.url_utils import test_urls
|
|
21
22
|
|
|
22
23
|
# We'll write images, metadata downloads, and temporary files here
|
|
23
24
|
lila_local_base = os.path.expanduser('~/lila')
|
|
@@ -55,7 +56,7 @@ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
|
|
|
55
56
|
|
|
56
57
|
# Takes ~60 seconds if everything needs to be downloaded and unzipped
|
|
57
58
|
|
|
58
|
-
for ds_name in metadata_table.keys():
|
|
59
|
+
for ds_name in metadata_table.keys():
|
|
59
60
|
|
|
60
61
|
# Download the main metadata file for this dataset
|
|
61
62
|
metadata_table[ds_name]['json_filename'] = \
|
|
@@ -63,7 +64,7 @@ for ds_name in metadata_table.keys():
|
|
|
63
64
|
metadata_dir=metadata_dir,
|
|
64
65
|
metadata_table=metadata_table,
|
|
65
66
|
force_download=force_download)
|
|
66
|
-
|
|
67
|
+
|
|
67
68
|
# Download MD results for this dataset
|
|
68
69
|
for k in md_results_keys:
|
|
69
70
|
md_results_url = metadata_table[ds_name][k]
|
|
@@ -93,13 +94,13 @@ image_index = 2000
|
|
|
93
94
|
#
|
|
94
95
|
# ds_name = list(metadata_table.keys())[0]
|
|
95
96
|
for ds_name in metadata_table.keys():
|
|
96
|
-
|
|
97
|
+
|
|
97
98
|
if 'bbox' in ds_name:
|
|
98
99
|
print('Skipping bbox dataset {}'.format(ds_name))
|
|
99
100
|
continue
|
|
100
101
|
|
|
101
102
|
print('Processing dataset {}'.format(ds_name))
|
|
102
|
-
|
|
103
|
+
|
|
103
104
|
json_filename = metadata_table[ds_name]['json_filename']
|
|
104
105
|
with open(json_filename, 'r') as f:
|
|
105
106
|
data = json.load(f)
|
|
@@ -108,20 +109,20 @@ for ds_name in metadata_table.keys():
|
|
|
108
109
|
clouds = [preferred_cloud]
|
|
109
110
|
else:
|
|
110
111
|
clouds = ['gcp','aws','azure']
|
|
111
|
-
|
|
112
|
+
|
|
112
113
|
for cloud in clouds:
|
|
113
|
-
|
|
114
|
+
|
|
114
115
|
image_base_url = metadata_table[ds_name]['image_base_url_' + cloud]
|
|
115
116
|
assert not image_base_url.endswith('/')
|
|
116
|
-
|
|
117
|
+
|
|
117
118
|
# Download a test image
|
|
118
119
|
test_image_relative_path = data['images'][image_index]['file_name']
|
|
119
120
|
test_image_url = image_base_url + '/' + test_image_relative_path
|
|
120
|
-
|
|
121
|
+
|
|
121
122
|
url_to_source[test_image_url] = ds_name + ' metadata ({})'.format(cloud)
|
|
122
|
-
|
|
123
|
+
|
|
123
124
|
# Grab an image from the MegaDetector results
|
|
124
|
-
|
|
125
|
+
|
|
125
126
|
# k = md_results_keys[2]
|
|
126
127
|
for k in md_results_keys:
|
|
127
128
|
k_fn = k + '_filename'
|
|
@@ -133,14 +134,12 @@ for ds_name in metadata_table.keys():
|
|
|
133
134
|
url_to_source[md_image_url] = ds_name + ' ' + k
|
|
134
135
|
del md_results
|
|
135
136
|
del data
|
|
136
|
-
|
|
137
|
+
|
|
137
138
|
# ...for each dataset
|
|
138
139
|
|
|
139
140
|
|
|
140
141
|
#%% Test URLs
|
|
141
142
|
|
|
142
|
-
from megadetector.utils.url_utils import test_urls
|
|
143
|
-
|
|
144
143
|
urls_to_test = sorted(url_to_source.keys())
|
|
145
144
|
urls_to_test = [fn.replace('\\','/') for fn in urls_to_test]
|
|
146
145
|
|