megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/efficientnet/model.py +8 -8
- megadetector/classification/efficientnet/utils.py +6 -5
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +26 -26
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -2
- megadetector/data_management/camtrap_dp_to_coco.py +79 -46
- megadetector/data_management/cct_json_utils.py +103 -103
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +210 -193
- megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
- megadetector/data_management/databases/integrity_check_json_db.py +228 -200
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +88 -39
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +133 -125
- megadetector/data_management/labelme_to_yolo.py +159 -73
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
- megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +73 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
- megadetector/data_management/mewc_to_md.py +344 -340
- megadetector/data_management/ocr_tools.py +262 -255
- megadetector/data_management/read_exif.py +249 -227
- megadetector/data_management/remap_coco_categories.py +90 -28
- megadetector/data_management/remove_exif.py +81 -21
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +588 -120
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +248 -122
- megadetector/data_management/yolo_to_coco.py +333 -191
- megadetector/detection/change_detection.py +832 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +358 -278
- megadetector/detection/run_detector.py +399 -186
- megadetector/detection/run_detector_batch.py +404 -377
- megadetector/detection/run_inference_with_yolov5_val.py +340 -327
- megadetector/detection/run_tiled_inference.py +257 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +332 -295
- megadetector/postprocessing/add_max_conf.py +19 -11
- megadetector/postprocessing/categorize_detections_by_size.py +45 -45
- megadetector/postprocessing/classification_postprocessing.py +468 -433
- megadetector/postprocessing/combine_batch_outputs.py +23 -23
- megadetector/postprocessing/compare_batch_results.py +590 -525
- megadetector/postprocessing/convert_output_format.py +106 -102
- megadetector/postprocessing/create_crop_folder.py +347 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +48 -27
- megadetector/postprocessing/md_to_coco.py +133 -102
- megadetector/postprocessing/md_to_labelme.py +107 -90
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +92 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -301
- megadetector/postprocessing/remap_detection_categories.py +91 -38
- megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +156 -74
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/ct_utils.py +1049 -211
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +632 -529
- megadetector/utils/path_utils.py +1520 -431
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/split_locations_into_train_val.py +62 -62
- megadetector/utils/string_utils.py +148 -27
- megadetector/utils/url_utils.py +489 -176
- megadetector/utils/wi_utils.py +2658 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +34 -30
- megadetector/visualization/render_images_with_thumbnails.py +39 -74
- megadetector/visualization/visualization_utils.py +487 -435
- megadetector/visualization/visualize_db.py +232 -198
- megadetector/visualization/visualize_detector_output.py +82 -76
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
- megadetector-10.0.0.dist-info/RECORD +139 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
- megadetector/api/batch_processing/api_core/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
- megadetector/api/batch_processing/api_core/server.py +0 -294
- megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
- megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
- megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
- megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
- megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
- megadetector/api/batch_processing/api_core/server_utils.py +0 -88
- megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
- megadetector/api/batch_processing/api_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
- megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
- megadetector/api/synchronous/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
- megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
- megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
- megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
- megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector/utils/azure_utils.py +0 -178
- megadetector/utils/sas_blob_utils.py +0 -509
- megadetector-5.0.28.dist-info/RECORD +0 -209
- /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
get_lila_annotation_counts.py
|
|
4
4
|
|
|
5
5
|
Generates a .json-formatted dictionary mapping each LILA dataset to all categories
|
|
6
|
-
that exist for that dataset, with counts for the number of occurrences of each category
|
|
6
|
+
that exist for that dataset, with counts for the number of occurrences of each category
|
|
7
7
|
(the number of *annotations* for each category, not the number of *images*).
|
|
8
8
|
|
|
9
9
|
Also loads the taxonomy mapping file, to include scientific names for each category.
|
|
@@ -17,8 +17,11 @@ get_lila_image_counts.py counts the number of *images* for each category in each
|
|
|
17
17
|
import json
|
|
18
18
|
import os
|
|
19
19
|
|
|
20
|
+
from collections import defaultdict
|
|
21
|
+
|
|
20
22
|
from megadetector.data_management.lila.lila_common import \
|
|
21
23
|
read_lila_metadata, read_metadata_file_for_dataset, read_lila_taxonomy_mapping
|
|
24
|
+
from megadetector.utils import ct_utils
|
|
22
25
|
|
|
23
26
|
# cloud provider to use for downloading images; options are 'gcp', 'azure', or 'aws'
|
|
24
27
|
preferred_cloud = 'gcp'
|
|
@@ -53,21 +56,21 @@ datasets_with_taxonomy_mapping = set()
|
|
|
53
56
|
|
|
54
57
|
# i_row = 1; row = taxonomy_df.iloc[i_row]; row
|
|
55
58
|
for i_row,row in taxonomy_df.iterrows():
|
|
56
|
-
|
|
59
|
+
|
|
57
60
|
datasets_with_taxonomy_mapping.add(row['dataset_name'])
|
|
58
|
-
|
|
61
|
+
|
|
59
62
|
ds_query = row['dataset_name'] + ':' + row['query']
|
|
60
63
|
ds_query = ds_query.lower()
|
|
61
|
-
|
|
64
|
+
|
|
62
65
|
if not isinstance(row['scientific_name'],str):
|
|
63
66
|
unmapped_queries.add(ds_query)
|
|
64
67
|
ds_query_to_scientific_name[ds_query] = 'unmapped'
|
|
65
68
|
continue
|
|
66
|
-
|
|
69
|
+
|
|
67
70
|
ds_query_to_scientific_name[ds_query] = row['scientific_name']
|
|
68
|
-
|
|
71
|
+
|
|
69
72
|
print('Loaded taxonomy mappings for {} datasets'.format(len(datasets_with_taxonomy_mapping)))
|
|
70
|
-
|
|
73
|
+
|
|
71
74
|
|
|
72
75
|
#%% Download and parse the metadata file
|
|
73
76
|
|
|
@@ -78,55 +81,55 @@ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
|
|
|
78
81
|
|
|
79
82
|
#%% Download and extract metadata for each dataset
|
|
80
83
|
|
|
81
|
-
for ds_name in metadata_table.keys():
|
|
82
|
-
metadata_table[ds_name]['json_filename'] =
|
|
83
|
-
|
|
84
|
-
|
|
84
|
+
for ds_name in metadata_table.keys():
|
|
85
|
+
metadata_table[ds_name]['json_filename'] = \
|
|
86
|
+
read_metadata_file_for_dataset(ds_name=ds_name,
|
|
87
|
+
metadata_dir=metadata_dir,
|
|
88
|
+
metadata_table=metadata_table,
|
|
89
|
+
preferred_cloud=preferred_cloud)
|
|
85
90
|
|
|
86
91
|
|
|
87
92
|
#%% Get category names and counts for each dataset
|
|
88
93
|
|
|
89
94
|
# Takes ~5 minutes
|
|
90
95
|
|
|
91
|
-
from collections import defaultdict
|
|
92
|
-
|
|
93
96
|
dataset_to_categories = {}
|
|
94
97
|
|
|
95
98
|
# ds_name = 'NACTI'
|
|
96
99
|
for ds_name in metadata_table.keys():
|
|
97
|
-
|
|
100
|
+
|
|
98
101
|
taxonomy_mapping_available = (ds_name in datasets_with_taxonomy_mapping)
|
|
99
|
-
|
|
102
|
+
|
|
100
103
|
if not taxonomy_mapping_available:
|
|
101
104
|
print('Warning: taxonomy mapping not available for {}'.format(ds_name))
|
|
102
|
-
|
|
105
|
+
|
|
103
106
|
print('Finding categories in {}'.format(ds_name))
|
|
104
107
|
|
|
105
108
|
json_filename = metadata_table[ds_name]['json_filename']
|
|
106
109
|
base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
|
|
107
110
|
assert not base_url.endswith('/')
|
|
108
|
-
|
|
109
|
-
# Open the metadata file
|
|
111
|
+
|
|
112
|
+
# Open the metadata file
|
|
110
113
|
with open(json_filename, 'r') as f:
|
|
111
114
|
data = json.load(f)
|
|
112
|
-
|
|
115
|
+
|
|
113
116
|
# Collect list of categories and mappings to category name
|
|
114
117
|
categories = data['categories']
|
|
115
|
-
|
|
118
|
+
|
|
116
119
|
category_id_to_count = defaultdict(int)
|
|
117
|
-
annotations = data['annotations']
|
|
118
|
-
|
|
120
|
+
annotations = data['annotations']
|
|
121
|
+
|
|
119
122
|
# ann = annotations[0]
|
|
120
123
|
for ann in annotations:
|
|
121
124
|
category_id_to_count[ann['category_id']] = category_id_to_count[ann['category_id']] + 1
|
|
122
|
-
|
|
125
|
+
|
|
123
126
|
# c = categories[0]
|
|
124
127
|
for c in categories:
|
|
125
|
-
count = category_id_to_count[c['id']]
|
|
128
|
+
count = category_id_to_count[c['id']]
|
|
126
129
|
if 'count' in c:
|
|
127
|
-
assert 'bbox' in ds_name or c['count'] == count
|
|
130
|
+
assert 'bbox' in ds_name or c['count'] == count
|
|
128
131
|
c['count'] = count
|
|
129
|
-
|
|
132
|
+
|
|
130
133
|
# Don't do taxonomy mapping for bbox data sets, which are sometimes just binary and are
|
|
131
134
|
# always redundant with the class-level data sets.
|
|
132
135
|
if 'bbox' in ds_name:
|
|
@@ -144,7 +147,7 @@ for ds_name in metadata_table.keys():
|
|
|
144
147
|
sn = ds_query_to_scientific_name[taxonomy_query_string]
|
|
145
148
|
assert sn is not None and len(sn) > 0
|
|
146
149
|
c['scientific_name_from_taxonomy_mapping'] = sn
|
|
147
|
-
|
|
150
|
+
|
|
148
151
|
dataset_to_categories[ds_name] = categories
|
|
149
152
|
|
|
150
153
|
# ...for each dataset
|
|
@@ -154,19 +157,18 @@ for ds_name in metadata_table.keys():
|
|
|
154
157
|
|
|
155
158
|
# ds_name = list(dataset_to_categories.keys())[0]
|
|
156
159
|
for ds_name in dataset_to_categories:
|
|
157
|
-
|
|
160
|
+
|
|
158
161
|
print('\n** Category counts for {} **\n'.format(ds_name))
|
|
159
|
-
|
|
162
|
+
|
|
160
163
|
categories = dataset_to_categories[ds_name]
|
|
161
164
|
categories = sorted(categories, key=lambda x: x['count'], reverse=True)
|
|
162
|
-
|
|
165
|
+
|
|
163
166
|
for c in categories:
|
|
164
167
|
print('{} ({}): {}'.format(c['name'],c['scientific_name_from_taxonomy_mapping'],c['count']))
|
|
165
|
-
|
|
168
|
+
|
|
166
169
|
# ...for each dataset
|
|
167
170
|
|
|
168
171
|
|
|
169
172
|
#%% Save the results
|
|
170
173
|
|
|
171
|
-
|
|
172
|
-
json.dump(dataset_to_categories,f,indent=1)
|
|
174
|
+
ct_utils.write_json(output_file, dataset_to_categories)
|
|
@@ -5,7 +5,7 @@ get_lila_image_counts.py
|
|
|
5
5
|
Count the number of images and bounding boxes with each label in one or more LILA datasets.
|
|
6
6
|
|
|
7
7
|
This script doesn't write these counts out anywhere other than the console, it's just intended
|
|
8
|
-
as a template for doing operations like this on LILA data. get_lila_annotation_counts.py writes
|
|
8
|
+
as a template for doing operations like this on LILA data. get_lila_annotation_counts.py writes
|
|
9
9
|
information out to a .json file, but it counts *annotations*, not *images*, for each category.
|
|
10
10
|
|
|
11
11
|
"""
|
|
@@ -40,53 +40,53 @@ metadata_table = read_lila_metadata(metadata_dir)
|
|
|
40
40
|
if datasets_of_interest is None:
|
|
41
41
|
datasets_of_interest = list(metadata_table.keys())
|
|
42
42
|
|
|
43
|
-
for ds_name in datasets_of_interest:
|
|
43
|
+
for ds_name in datasets_of_interest:
|
|
44
44
|
metadata_table[ds_name]['json_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
|
|
45
45
|
metadata_dir=metadata_dir,
|
|
46
46
|
metadata_table=metadata_table)
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
|
|
48
|
+
|
|
49
49
|
#%% Count categories
|
|
50
50
|
|
|
51
51
|
ds_name_to_category_counts = {}
|
|
52
52
|
|
|
53
53
|
# ds_name = datasets_of_interest[0]
|
|
54
54
|
for ds_name in datasets_of_interest:
|
|
55
|
-
|
|
55
|
+
|
|
56
56
|
category_to_image_count = {}
|
|
57
57
|
category_to_bbox_count = {}
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
print('Counting categories in: ' + ds_name)
|
|
60
|
-
|
|
60
|
+
|
|
61
61
|
json_filename = metadata_table[ds_name]['json_filename']
|
|
62
62
|
with open(json_filename, 'r') as f:
|
|
63
63
|
data = json.load(f)
|
|
64
|
-
|
|
64
|
+
|
|
65
65
|
categories = data['categories']
|
|
66
66
|
category_ids = [c['id'] for c in categories]
|
|
67
67
|
for c in categories:
|
|
68
68
|
category_id_to_name = {c['id']:c['name'] for c in categories}
|
|
69
69
|
annotations = data['annotations']
|
|
70
70
|
images = data['images']
|
|
71
|
-
|
|
72
|
-
for category_id in category_ids:
|
|
73
|
-
category_name = category_id_to_name[category_id]
|
|
71
|
+
|
|
72
|
+
for category_id in category_ids:
|
|
73
|
+
category_name = category_id_to_name[category_id]
|
|
74
74
|
category_to_image_count[category_name] = 0
|
|
75
75
|
category_to_bbox_count[category_name] = 0
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
image_id_to_category_names = defaultdict(set)
|
|
78
|
-
|
|
78
|
+
|
|
79
79
|
# Go through annotations, marking each image with the categories that are present
|
|
80
80
|
#
|
|
81
81
|
# ann = annotations[0]
|
|
82
82
|
for ann in annotations:
|
|
83
|
-
|
|
83
|
+
|
|
84
84
|
category_name = category_id_to_name[ann['category_id']]
|
|
85
85
|
image_id_to_category_names[ann['image_id']].add(category_name)
|
|
86
86
|
|
|
87
87
|
# Now go through images and count categories
|
|
88
88
|
category_to_count = defaultdict(int)
|
|
89
|
-
|
|
89
|
+
|
|
90
90
|
# im = images[0]
|
|
91
91
|
for im in images:
|
|
92
92
|
categories_this_image = image_id_to_category_names[im['id']]
|
|
@@ -94,19 +94,19 @@ for ds_name in datasets_of_interest:
|
|
|
94
94
|
category_to_count[category_name] += 1
|
|
95
95
|
|
|
96
96
|
ds_name_to_category_counts[ds_name] = category_to_count
|
|
97
|
-
|
|
97
|
+
|
|
98
98
|
# ...for each dataset
|
|
99
|
-
|
|
99
|
+
|
|
100
100
|
|
|
101
101
|
#%% Print the results
|
|
102
102
|
|
|
103
103
|
for ds_name in ds_name_to_category_counts:
|
|
104
|
-
|
|
104
|
+
|
|
105
105
|
print('\n** Category counts for {} **\n'.format(ds_name))
|
|
106
|
-
|
|
106
|
+
|
|
107
107
|
category_to_count = ds_name_to_category_counts[ds_name]
|
|
108
|
-
category_to_count = {k: v for k, v in sorted(category_to_count.items(), reverse=True,
|
|
108
|
+
category_to_count = {k: v for k, v in sorted(category_to_count.items(), reverse=True,
|
|
109
109
|
key=lambda item: item[1])}
|
|
110
|
-
|
|
111
|
-
for category_name in category_to_count.keys():
|
|
110
|
+
|
|
111
|
+
for category_name in category_to_count.keys():
|
|
112
112
|
print('{}: {}'.format(category_name,category_to_count[category_name]))
|
|
@@ -53,30 +53,30 @@ for url in lila_base_urls.values():
|
|
|
53
53
|
def read_wildlife_insights_taxonomy_mapping(metadata_dir, force_download=False):
|
|
54
54
|
"""
|
|
55
55
|
Reads the WI taxonomy mapping file, downloading the .json data (and writing to .csv) if necessary.
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
Args:
|
|
58
58
|
metadata_dir (str): folder to use for temporary LILA metadata files
|
|
59
|
-
force_download (bool, optional): download the taxonomy mapping file
|
|
59
|
+
force_download (bool, optional): download the taxonomy mapping file
|
|
60
60
|
even if the local file exists.
|
|
61
|
-
|
|
61
|
+
|
|
62
62
|
Returns:
|
|
63
63
|
pd.dataframe: A DataFrame with taxonomy information
|
|
64
64
|
"""
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
wi_taxonomy_csv_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_csv_filename)
|
|
67
|
-
|
|
67
|
+
|
|
68
68
|
if os.path.exists(wi_taxonomy_csv_path):
|
|
69
69
|
df = pd.read_csv(wi_taxonomy_csv_path)
|
|
70
70
|
else:
|
|
71
71
|
wi_taxonomy_json_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_json_filename)
|
|
72
|
-
download_url(wildlife_insights_taxonomy_url, wi_taxonomy_json_path,
|
|
72
|
+
download_url(wildlife_insights_taxonomy_url, wi_taxonomy_json_path,
|
|
73
73
|
force_download=force_download)
|
|
74
74
|
with open(wi_taxonomy_json_path,'r') as f:
|
|
75
75
|
d = json.load(f)
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
# We haven't implemented paging, make sure that's not an issue
|
|
78
78
|
assert d['meta']['totalItems'] < wildlife_insights_page_size
|
|
79
|
-
|
|
79
|
+
|
|
80
80
|
# d['data'] is a list of items that look like:
|
|
81
81
|
"""
|
|
82
82
|
{'id': 2000003,
|
|
@@ -92,46 +92,46 @@ def read_wildlife_insights_taxonomy_mapping(metadata_dir, force_download=False):
|
|
|
92
92
|
"""
|
|
93
93
|
df = pd.DataFrame(d['data'])
|
|
94
94
|
df.to_csv(wi_taxonomy_csv_path,index=False)
|
|
95
|
-
|
|
95
|
+
|
|
96
96
|
return df
|
|
97
97
|
|
|
98
|
-
|
|
98
|
+
|
|
99
99
|
def read_lila_taxonomy_mapping(metadata_dir, force_download=False):
|
|
100
100
|
"""
|
|
101
101
|
Reads the LILA taxonomy mapping file, downloading the .csv file if necessary.
|
|
102
|
-
|
|
102
|
+
|
|
103
103
|
Args:
|
|
104
104
|
metadata_dir (str): folder to use for temporary LILA metadata files
|
|
105
|
-
force_download (bool, optional): download the taxonomy mapping file
|
|
106
|
-
even if the local file exists.
|
|
107
|
-
|
|
105
|
+
force_download (bool, optional): download the taxonomy mapping file
|
|
106
|
+
even if the local file exists.
|
|
107
|
+
|
|
108
108
|
Returns:
|
|
109
109
|
pd.DataFrame: a DataFrame with one row per identification
|
|
110
110
|
"""
|
|
111
|
-
|
|
111
|
+
|
|
112
112
|
p = urlparse(lila_taxonomy_mapping_url)
|
|
113
113
|
taxonomy_filename = os.path.join(metadata_dir,os.path.basename(p.path))
|
|
114
|
-
download_url(lila_taxonomy_mapping_url, taxonomy_filename,
|
|
114
|
+
download_url(lila_taxonomy_mapping_url, taxonomy_filename,
|
|
115
115
|
force_download=force_download)
|
|
116
|
-
|
|
116
|
+
|
|
117
117
|
df = pd.read_csv(lila_taxonomy_mapping_url)
|
|
118
|
-
|
|
118
|
+
|
|
119
119
|
return df
|
|
120
120
|
|
|
121
|
-
|
|
121
|
+
|
|
122
122
|
def read_lila_metadata(metadata_dir, force_download=False):
|
|
123
123
|
"""
|
|
124
124
|
Reads LILA metadata (URLs to each dataset), downloading the .csv file if necessary.
|
|
125
|
-
|
|
125
|
+
|
|
126
126
|
Args:
|
|
127
127
|
metadata_dir (str): folder to use for temporary LILA metadata files
|
|
128
|
-
force_download (bool, optional): download the metadata file even if
|
|
128
|
+
force_download (bool, optional): download the metadata file even if
|
|
129
129
|
the local file exists.
|
|
130
|
-
|
|
130
|
+
|
|
131
131
|
Returns:
|
|
132
132
|
dict: a dict mapping dataset names (e.g. "Caltech Camera Traps") to dicts
|
|
133
133
|
with keys corresponding to the headers in the .csv file, currently:
|
|
134
|
-
|
|
134
|
+
|
|
135
135
|
- name
|
|
136
136
|
- short_name
|
|
137
137
|
- continent
|
|
@@ -153,65 +153,65 @@ def read_lila_metadata(metadata_dir, force_download=False):
|
|
|
153
153
|
- md_results_with_rde
|
|
154
154
|
- json_filename
|
|
155
155
|
"""
|
|
156
|
-
|
|
156
|
+
|
|
157
157
|
# Put the master metadata file in the same folder where we're putting images
|
|
158
158
|
p = urlparse(lila_metadata_url)
|
|
159
159
|
metadata_filename = os.path.join(metadata_dir,os.path.basename(p.path))
|
|
160
160
|
download_url(lila_metadata_url, metadata_filename, force_download=force_download)
|
|
161
|
-
|
|
161
|
+
|
|
162
162
|
df = pd.read_csv(metadata_filename)
|
|
163
|
-
|
|
163
|
+
|
|
164
164
|
records = df.to_dict('records')
|
|
165
|
-
|
|
165
|
+
|
|
166
166
|
# Parse into a table keyed by dataset name
|
|
167
167
|
metadata_table = {}
|
|
168
|
-
|
|
168
|
+
|
|
169
169
|
# r = records[0]
|
|
170
170
|
for r in records:
|
|
171
171
|
if is_empty(r['name']):
|
|
172
172
|
continue
|
|
173
|
-
|
|
173
|
+
|
|
174
174
|
# Convert NaN's to None
|
|
175
175
|
for k in r.keys():
|
|
176
176
|
if is_empty(r[k]):
|
|
177
177
|
r[k] = None
|
|
178
|
-
|
|
178
|
+
|
|
179
179
|
metadata_table[r['name']] = r
|
|
180
|
-
|
|
181
|
-
return metadata_table
|
|
182
|
-
|
|
180
|
+
|
|
181
|
+
return metadata_table
|
|
182
|
+
|
|
183
183
|
|
|
184
184
|
def read_lila_all_images_file(metadata_dir, force_download=False):
|
|
185
185
|
"""
|
|
186
186
|
Downloads if necessary - then unzips if necessary - the .csv file with label mappings for
|
|
187
187
|
all LILA files, and opens the resulting .csv file as a Pandas DataFrame.
|
|
188
|
-
|
|
188
|
+
|
|
189
189
|
Args:
|
|
190
190
|
metadata_dir (str): folder to use for temporary LILA metadata files
|
|
191
|
-
force_download (bool, optional): download the metadata file even if
|
|
191
|
+
force_download (bool, optional): download the metadata file even if
|
|
192
192
|
the local file exists.
|
|
193
|
-
|
|
193
|
+
|
|
194
194
|
Returns:
|
|
195
195
|
pd.DataFrame: a DataFrame containing one row per identification in a LILA camera trap image
|
|
196
196
|
"""
|
|
197
|
-
|
|
197
|
+
|
|
198
198
|
p = urlparse(lila_all_images_url)
|
|
199
199
|
lila_all_images_zip_filename = os.path.join(metadata_dir,os.path.basename(p.path))
|
|
200
200
|
download_url(lila_all_images_url, lila_all_images_zip_filename,
|
|
201
201
|
force_download=force_download)
|
|
202
|
-
|
|
202
|
+
|
|
203
203
|
with zipfile.ZipFile(lila_all_images_zip_filename,'r') as z:
|
|
204
204
|
files = z.namelist()
|
|
205
205
|
assert len(files) == 1
|
|
206
|
-
|
|
206
|
+
|
|
207
207
|
unzipped_csv_filename = os.path.join(metadata_dir,files[0])
|
|
208
208
|
if not os.path.isfile(unzipped_csv_filename):
|
|
209
209
|
unzip_file(lila_all_images_zip_filename,metadata_dir)
|
|
210
210
|
else:
|
|
211
|
-
print('{} already unzipped'.format(unzipped_csv_filename))
|
|
212
|
-
|
|
211
|
+
print('{} already unzipped'.format(unzipped_csv_filename))
|
|
212
|
+
|
|
213
213
|
df = pd.read_csv(unzipped_csv_filename)
|
|
214
|
-
|
|
214
|
+
|
|
215
215
|
return df
|
|
216
216
|
|
|
217
217
|
|
|
@@ -223,94 +223,97 @@ def read_metadata_file_for_dataset(ds_name,
|
|
|
223
223
|
force_download=False):
|
|
224
224
|
"""
|
|
225
225
|
Downloads if necessary - then unzips if necessary - the .json file for a specific dataset.
|
|
226
|
-
|
|
226
|
+
|
|
227
227
|
Args:
|
|
228
228
|
ds_name (str): the name of the dataset for which you want to retrieve metadata (e.g.
|
|
229
|
-
"Caltech Camera Traps")
|
|
229
|
+
"Caltech Camera Traps")
|
|
230
230
|
metadata_dir (str): folder to use for temporary LILA metadata files
|
|
231
231
|
metadata_table (dict, optional): an optional dictionary already loaded via
|
|
232
232
|
read_lila_metadata()
|
|
233
233
|
json_url (str, optional): the URL of the metadata file, if None will be retrieved
|
|
234
234
|
via read_lila_metadata()
|
|
235
235
|
preferred_cloud (str, optional): 'gcp' (default), 'azure', or 'aws'
|
|
236
|
-
force_download (bool, optional): download the metadata file even if
|
|
236
|
+
force_download (bool, optional): download the metadata file even if
|
|
237
237
|
the local file exists.
|
|
238
|
-
|
|
238
|
+
|
|
239
239
|
Returns:
|
|
240
240
|
str: the .json filename on the local disk
|
|
241
|
-
|
|
241
|
+
|
|
242
242
|
"""
|
|
243
|
-
|
|
243
|
+
|
|
244
|
+
if preferred_cloud is None:
|
|
245
|
+
preferred_cloud = 'gcp'
|
|
246
|
+
|
|
244
247
|
assert preferred_cloud in lila_base_urls.keys()
|
|
245
|
-
|
|
248
|
+
|
|
246
249
|
if json_url is None:
|
|
247
|
-
|
|
250
|
+
|
|
248
251
|
if metadata_table is None:
|
|
249
252
|
metadata_table = read_lila_metadata(metadata_dir)
|
|
250
|
-
|
|
253
|
+
|
|
251
254
|
json_url = metadata_table[ds_name]['metadata_url_' + preferred_cloud]
|
|
252
|
-
|
|
255
|
+
|
|
253
256
|
p = urlparse(json_url)
|
|
254
257
|
json_filename = os.path.join(metadata_dir,os.path.basename(p.path))
|
|
255
258
|
download_url(json_url, json_filename, force_download=force_download)
|
|
256
|
-
|
|
259
|
+
|
|
257
260
|
# Unzip if necessary
|
|
258
261
|
if json_filename.endswith('.zip'):
|
|
259
|
-
|
|
262
|
+
|
|
260
263
|
with zipfile.ZipFile(json_filename,'r') as z:
|
|
261
264
|
files = z.namelist()
|
|
262
265
|
assert len(files) == 1
|
|
263
266
|
unzipped_json_filename = os.path.join(metadata_dir,files[0])
|
|
264
267
|
if not os.path.isfile(unzipped_json_filename):
|
|
265
|
-
unzip_file(json_filename,metadata_dir)
|
|
268
|
+
unzip_file(json_filename,metadata_dir)
|
|
266
269
|
else:
|
|
267
270
|
print('{} already unzipped'.format(unzipped_json_filename))
|
|
268
271
|
json_filename = unzipped_json_filename
|
|
269
|
-
|
|
272
|
+
|
|
270
273
|
return json_filename
|
|
271
274
|
|
|
272
275
|
|
|
273
276
|
#%% Interactive test driver
|
|
274
277
|
|
|
275
278
|
if False:
|
|
276
|
-
|
|
279
|
+
|
|
277
280
|
pass
|
|
278
281
|
|
|
279
282
|
#%% Verify that all base URLs exist
|
|
280
|
-
|
|
283
|
+
|
|
281
284
|
# LILA camera trap primary metadata file
|
|
282
285
|
urls = (lila_metadata_url,
|
|
283
286
|
lila_taxonomy_mapping_url,
|
|
284
287
|
lila_all_images_url,
|
|
285
288
|
wildlife_insights_taxonomy_url)
|
|
286
|
-
|
|
289
|
+
|
|
287
290
|
from megadetector.utils import url_utils
|
|
288
|
-
|
|
291
|
+
|
|
289
292
|
status_codes = url_utils.test_urls(urls,timeout=2.0)
|
|
290
293
|
assert all([code == 200 for code in status_codes])
|
|
291
|
-
|
|
292
|
-
|
|
294
|
+
|
|
295
|
+
|
|
293
296
|
#%% Verify that the metadata URLs exist for individual datasets
|
|
294
|
-
|
|
297
|
+
|
|
295
298
|
metadata_dir = os.path.expanduser('~/lila/metadata')
|
|
296
|
-
|
|
299
|
+
|
|
297
300
|
dataset_metadata = read_lila_metadata(metadata_dir)
|
|
298
|
-
|
|
301
|
+
|
|
299
302
|
urls_to_test = []
|
|
300
|
-
|
|
303
|
+
|
|
301
304
|
# ds_name = next(iter(dataset_metadata.keys()))
|
|
302
305
|
for ds_name in dataset_metadata.keys():
|
|
303
|
-
|
|
306
|
+
|
|
304
307
|
ds_info = dataset_metadata[ds_name]
|
|
305
308
|
for cloud_name in lila_base_urls.keys():
|
|
306
309
|
urls_to_test.append(ds_info['metadata_url_' + cloud_name])
|
|
307
|
-
if ds_info['bbox_url_relative']
|
|
310
|
+
if ds_info['bbox_url_relative'] is not None:
|
|
308
311
|
urls_to_test.append(ds_info['bbox_url_' + cloud_name])
|
|
309
|
-
|
|
312
|
+
|
|
310
313
|
status_codes = url_utils.test_urls(urls_to_test,
|
|
311
314
|
error_on_failure=True,
|
|
312
315
|
n_workers=10,
|
|
313
316
|
pool_type='process',
|
|
314
317
|
timeout=2.0)
|
|
315
318
|
assert all([code == 200 for code in status_codes])
|
|
316
|
-
|
|
319
|
+
|