megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +231 -224
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +304 -262
- megadetector/detection/run_detector.py +177 -164
- megadetector/detection/run_detector_batch.py +364 -363
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +256 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +290 -282
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +415 -415
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +219 -146
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +23 -20
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +313 -298
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1018 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1457 -398
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +61 -61
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +401 -397
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +79 -73
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.28.dist-info/RECORD +0 -209
|
@@ -14,9 +14,12 @@ import random
|
|
|
14
14
|
|
|
15
15
|
from tqdm import tqdm
|
|
16
16
|
from collections import defaultdict
|
|
17
|
+
from copy import deepcopy
|
|
17
18
|
|
|
18
19
|
from megadetector.data_management.lila.lila_common import \
|
|
19
20
|
read_lila_all_images_file, is_empty, lila_base_urls
|
|
21
|
+
from megadetector.utils.url_utils import parallel_download_urls
|
|
22
|
+
from megadetector.utils.path_utils import open_file
|
|
20
23
|
|
|
21
24
|
for s in lila_base_urls.values():
|
|
22
25
|
assert s.endswith('/')
|
|
@@ -58,13 +61,13 @@ common_name_to_count = defaultdict(int)
|
|
|
58
61
|
|
|
59
62
|
ds_name_to_urls = defaultdict(list)
|
|
60
63
|
|
|
61
|
-
def find_items(row):
|
|
62
|
-
|
|
64
|
+
def find_items(row): # noqa
|
|
65
|
+
|
|
63
66
|
if is_empty(row['common_name']):
|
|
64
67
|
return
|
|
65
|
-
|
|
68
|
+
|
|
66
69
|
match = False
|
|
67
|
-
|
|
70
|
+
|
|
68
71
|
# This is the only bit of this file that's specific to a particular query. In this case
|
|
69
72
|
# we're checking whether each row is on a list of species of interest, but you do you.
|
|
70
73
|
for species_name in species_of_interest:
|
|
@@ -72,7 +75,7 @@ def find_items(row):
|
|
|
72
75
|
match = True
|
|
73
76
|
common_name_to_count[species_name] += 1
|
|
74
77
|
break
|
|
75
|
-
|
|
78
|
+
|
|
76
79
|
if match:
|
|
77
80
|
ds_name_to_urls[row['dataset_name']].append(row['url_' + preferred_provider])
|
|
78
81
|
|
|
@@ -86,8 +89,7 @@ print('Found {} matching URLs across {} datasets'.format(len(all_urls),len(ds_na
|
|
|
86
89
|
|
|
87
90
|
for common_name in common_name_to_count:
|
|
88
91
|
print('{}: {}'.format(common_name,common_name_to_count[common_name]))
|
|
89
|
-
|
|
90
|
-
from copy import deepcopy
|
|
92
|
+
|
|
91
93
|
ds_name_to_urls_raw = deepcopy(ds_name_to_urls)
|
|
92
94
|
|
|
93
95
|
|
|
@@ -104,19 +106,17 @@ else:
|
|
|
104
106
|
|
|
105
107
|
#%% Choose target files for each URL
|
|
106
108
|
|
|
107
|
-
from megadetector.data_management.lila.lila_common import lila_base_urls
|
|
108
|
-
|
|
109
109
|
# We have a list of URLs per dataset, flatten that into a single list of URLs
|
|
110
110
|
urls_to_download = set()
|
|
111
111
|
for ds_name in ds_name_to_urls:
|
|
112
112
|
for url in ds_name_to_urls[ds_name]:
|
|
113
113
|
urls_to_download.add(url)
|
|
114
|
-
urls_to_download = sorted(list(urls_to_download))
|
|
114
|
+
urls_to_download = sorted(list(urls_to_download))
|
|
115
115
|
|
|
116
116
|
# A URL might look like this:
|
|
117
117
|
#
|
|
118
118
|
# https://storage.googleapis.com/public-datasets-lila/wcs-unzipped/animals/0667/0302.jpg
|
|
119
|
-
#
|
|
119
|
+
#
|
|
120
120
|
# We'll write that to an output file that looks like this (relative to output_dir):
|
|
121
121
|
#
|
|
122
122
|
# wcs-unzipped/animals/0667/0302.jpg
|
|
@@ -128,7 +128,7 @@ assert base_url.endswith('/')
|
|
|
128
128
|
url_to_target_file = {}
|
|
129
129
|
|
|
130
130
|
for url in urls_to_download:
|
|
131
|
-
assert url.startswith(base_url)
|
|
131
|
+
assert url.startswith(base_url)
|
|
132
132
|
target_fn_relative = url.replace(base_url,'')
|
|
133
133
|
target_fn_abs = os.path.join(output_dir,target_fn_relative)
|
|
134
134
|
url_to_target_file[url] = target_fn_abs
|
|
@@ -136,8 +136,6 @@ for url in urls_to_download:
|
|
|
136
136
|
|
|
137
137
|
#%% Download image files
|
|
138
138
|
|
|
139
|
-
from megadetector.utils.url_utils import parallel_download_urls
|
|
140
|
-
|
|
141
139
|
download_results = parallel_download_urls(url_to_target_file=url_to_target_file,
|
|
142
140
|
verbose=False,
|
|
143
141
|
overwrite=False,
|
|
@@ -147,39 +145,38 @@ download_results = parallel_download_urls(url_to_target_file=url_to_target_file,
|
|
|
147
145
|
|
|
148
146
|
#%% Open output folder
|
|
149
147
|
|
|
150
|
-
from megadetector.utils.path_utils import open_file
|
|
151
148
|
open_file(output_dir)
|
|
152
149
|
|
|
153
150
|
|
|
154
151
|
#%% Scrap
|
|
155
152
|
|
|
156
153
|
if False:
|
|
157
|
-
|
|
154
|
+
|
|
158
155
|
pass
|
|
159
156
|
|
|
160
157
|
#%% Find all the reptiles on LILA
|
|
161
158
|
|
|
162
159
|
reptile_rows = df.loc[df['class'] == 'reptilia']
|
|
163
|
-
|
|
160
|
+
|
|
164
161
|
# i_row = 0; row = reptile_rows.iloc[i_row]
|
|
165
|
-
|
|
162
|
+
|
|
166
163
|
common_name_to_count = defaultdict(int)
|
|
167
164
|
dataset_to_count = defaultdict(int)
|
|
168
165
|
for i_row,row in reptile_rows.iterrows():
|
|
169
166
|
common_name_to_count[row['common_name']] += 1
|
|
170
167
|
dataset_to_count[row['dataset_name']] += 1
|
|
171
|
-
|
|
168
|
+
|
|
172
169
|
from megadetector.utils.ct_utils import sort_dictionary_by_value
|
|
173
|
-
|
|
170
|
+
|
|
174
171
|
print('Found {} reptiles\n'.format(len(reptile_rows)))
|
|
175
|
-
|
|
172
|
+
|
|
176
173
|
common_name_to_count = sort_dictionary_by_value(common_name_to_count,reverse=True)
|
|
177
174
|
dataset_to_count = sort_dictionary_by_value(dataset_to_count,reverse=True)
|
|
178
|
-
|
|
175
|
+
|
|
179
176
|
print('Common names by count:\n')
|
|
180
177
|
for k in common_name_to_count:
|
|
181
178
|
print('{} ({})'.format(k,common_name_to_count[k]))
|
|
182
|
-
|
|
183
|
-
print('\nDatasets by count:\n')
|
|
179
|
+
|
|
180
|
+
print('\nDatasets by count:\n')
|
|
184
181
|
for k in dataset_to_count:
|
|
185
182
|
print('{} ({})'.format(k,dataset_to_count[k]))
|
|
@@ -35,6 +35,7 @@ from megadetector.data_management.lila.lila_common import \
|
|
|
35
35
|
from megadetector.utils import write_html_image_list
|
|
36
36
|
from megadetector.utils.path_utils import zip_file
|
|
37
37
|
from megadetector.utils.path_utils import open_file
|
|
38
|
+
from megadetector.utils.url_utils import parallel_download_urls
|
|
38
39
|
|
|
39
40
|
# We'll write images, metadata downloads, and temporary files here
|
|
40
41
|
lila_local_base = os.path.expanduser('~/lila')
|
|
@@ -47,7 +48,7 @@ os.makedirs(metadata_dir,exist_ok=True)
|
|
|
47
48
|
|
|
48
49
|
output_file = os.path.join(lila_local_base,'lila_image_urls_and_labels.csv')
|
|
49
50
|
|
|
50
|
-
# Some datasets don't have "sequence_level_annotation" fields populated, but we know their
|
|
51
|
+
# Some datasets don't have "sequence_level_annotation" fields populated, but we know their
|
|
51
52
|
# annotation level
|
|
52
53
|
ds_name_to_annotation_level = {}
|
|
53
54
|
ds_name_to_annotation_level['Caltech Camera Traps'] = 'image'
|
|
@@ -79,11 +80,11 @@ if False:
|
|
|
79
80
|
|
|
80
81
|
#%% Download and extract metadata for each dataset
|
|
81
82
|
|
|
82
|
-
for ds_name in metadata_table.keys():
|
|
83
|
+
for ds_name in metadata_table.keys():
|
|
83
84
|
metadata_table[ds_name]['metadata_filename'] = read_metadata_file_for_dataset(ds_name=ds_name,
|
|
84
85
|
metadata_dir=metadata_dir,
|
|
85
86
|
metadata_table=metadata_table)
|
|
86
|
-
|
|
87
|
+
|
|
87
88
|
#%% Load taxonomy data
|
|
88
89
|
|
|
89
90
|
taxonomy_df = read_lila_taxonomy_mapping(metadata_dir)
|
|
@@ -95,12 +96,12 @@ ds_label_to_taxonomy = {}
|
|
|
95
96
|
|
|
96
97
|
# i_row = 0; row = taxonomy_df.iloc[i_row]
|
|
97
98
|
for i_row,row in taxonomy_df.iterrows():
|
|
98
|
-
|
|
99
|
+
|
|
99
100
|
ds_label = row['dataset_name'] + ':' + row['query']
|
|
100
101
|
assert ds_label.strip() == ds_label
|
|
101
102
|
assert ds_label not in ds_label_to_taxonomy
|
|
102
103
|
ds_label_to_taxonomy[ds_label] = row.to_dict()
|
|
103
|
-
|
|
104
|
+
|
|
104
105
|
|
|
105
106
|
#%% Process annotations for each dataset
|
|
106
107
|
|
|
@@ -116,12 +117,12 @@ taxonomy_levels_to_include = \
|
|
|
116
117
|
['kingdom','phylum','subphylum','superclass','class','subclass','infraclass','superorder','order',
|
|
117
118
|
'suborder','infraorder','superfamily','family','subfamily','tribe','genus','species','subspecies',
|
|
118
119
|
'variety']
|
|
119
|
-
|
|
120
|
+
|
|
120
121
|
header.extend(taxonomy_levels_to_include)
|
|
121
122
|
|
|
122
123
|
missing_annotations = set()
|
|
123
124
|
|
|
124
|
-
def
|
|
125
|
+
def _clearnan(v):
|
|
125
126
|
if isinstance(v,float):
|
|
126
127
|
assert np.isnan(v)
|
|
127
128
|
v = ''
|
|
@@ -129,57 +130,57 @@ def clearnan(v):
|
|
|
129
130
|
return v
|
|
130
131
|
|
|
131
132
|
with open(output_file,'w',encoding='utf-8',newline='') as f:
|
|
132
|
-
|
|
133
|
+
|
|
133
134
|
csv_writer = csv.writer(f)
|
|
134
135
|
csv_writer.writerow(header)
|
|
135
|
-
|
|
136
|
+
|
|
136
137
|
# ds_name = list(metadata_table.keys())[0]
|
|
137
138
|
for ds_name in metadata_table.keys():
|
|
138
|
-
|
|
139
|
+
|
|
139
140
|
if 'bbox' in ds_name:
|
|
140
141
|
print('Skipping bbox dataset {}'.format(ds_name))
|
|
141
142
|
continue
|
|
142
|
-
|
|
143
|
+
|
|
143
144
|
print('Processing dataset {}'.format(ds_name))
|
|
144
|
-
|
|
145
|
+
|
|
145
146
|
json_filename = metadata_table[ds_name]['metadata_filename']
|
|
146
147
|
with open(json_filename, 'r') as f:
|
|
147
148
|
data = json.load(f)
|
|
148
|
-
|
|
149
|
+
|
|
149
150
|
categories = data['categories']
|
|
150
151
|
category_ids = [c['id'] for c in categories]
|
|
151
152
|
for c in categories:
|
|
152
153
|
category_id_to_name = {c['id']:c['name'] for c in categories}
|
|
153
|
-
|
|
154
|
+
|
|
154
155
|
annotations = data['annotations']
|
|
155
156
|
images = data['images']
|
|
156
|
-
|
|
157
|
+
|
|
157
158
|
image_id_to_annotations = defaultdict(list)
|
|
158
|
-
|
|
159
|
+
|
|
159
160
|
# Go through annotations, marking each image with the categories that are present
|
|
160
161
|
#
|
|
161
162
|
# ann = annotations[0]
|
|
162
|
-
for ann in annotations:
|
|
163
|
+
for ann in annotations:
|
|
163
164
|
image_id_to_annotations[ann['image_id']].append(ann)
|
|
164
|
-
|
|
165
|
+
|
|
165
166
|
unannotated_images = []
|
|
166
|
-
|
|
167
|
+
|
|
167
168
|
found_date = False
|
|
168
169
|
found_location = False
|
|
169
170
|
found_annotation_level = False
|
|
170
|
-
|
|
171
|
+
|
|
171
172
|
if ds_name in ds_name_to_annotation_level:
|
|
172
173
|
expected_annotation_level = ds_name_to_annotation_level[ds_name]
|
|
173
174
|
else:
|
|
174
175
|
expected_annotation_level = None
|
|
175
|
-
|
|
176
|
+
|
|
176
177
|
# im = images[10]
|
|
177
178
|
for i_image,im in tqdm(enumerate(images),total=len(images)):
|
|
178
|
-
|
|
179
|
+
|
|
179
180
|
if (debug_max_images_per_dataset is not None) and (debug_max_images_per_dataset > 0) \
|
|
180
181
|
and (i_image >= debug_max_images_per_dataset):
|
|
181
182
|
break
|
|
182
|
-
|
|
183
|
+
|
|
183
184
|
file_name = im['file_name'].replace('\\','/')
|
|
184
185
|
base_url_gcp = metadata_table[ds_name]['image_base_url_gcp']
|
|
185
186
|
base_url_aws = metadata_table[ds_name]['image_base_url_aws']
|
|
@@ -187,21 +188,21 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
|
|
|
187
188
|
assert not base_url_gcp.endswith('/')
|
|
188
189
|
assert not base_url_aws.endswith('/')
|
|
189
190
|
assert not base_url_azure.endswith('/')
|
|
190
|
-
|
|
191
|
+
|
|
191
192
|
url_gcp = base_url_gcp + '/' + file_name
|
|
192
193
|
url_aws = base_url_aws + '/' + file_name
|
|
193
194
|
url_azure = base_url_azure + '/' + file_name
|
|
194
|
-
|
|
195
|
+
|
|
195
196
|
for k in im.keys():
|
|
196
197
|
if ('date' in k or 'time' in k) and (k not in ['datetime','date_captured']):
|
|
197
198
|
raise ValueError('Unrecognized datetime field')
|
|
198
|
-
|
|
199
|
+
|
|
199
200
|
# This field name was only used for Caltech Camera Traps
|
|
200
201
|
if 'date_captured' in im:
|
|
201
202
|
assert ds_name == 'Caltech Camera Traps'
|
|
202
203
|
im['datetime'] = im['date_captured']
|
|
203
|
-
|
|
204
|
-
def
|
|
204
|
+
|
|
205
|
+
def _has_valid_datetime(im):
|
|
205
206
|
if 'datetime' not in im:
|
|
206
207
|
return False
|
|
207
208
|
v = im['datetime']
|
|
@@ -212,29 +213,29 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
|
|
|
212
213
|
else:
|
|
213
214
|
assert isinstance(v,float) and np.isnan(v)
|
|
214
215
|
return False
|
|
215
|
-
|
|
216
|
-
dt_string = ''
|
|
217
|
-
if (
|
|
218
|
-
|
|
216
|
+
|
|
217
|
+
dt_string = ''
|
|
218
|
+
if (_has_valid_datetime(im)):
|
|
219
|
+
|
|
219
220
|
dt = dateparser.parse(im['datetime'])
|
|
220
|
-
|
|
221
|
+
|
|
221
222
|
if dt is None or dt.year < 1990 or dt.year > 2025:
|
|
222
|
-
|
|
223
|
+
|
|
223
224
|
# raise ValueError('Suspicious date parsing result')
|
|
224
|
-
|
|
225
|
-
# Special case we don't want to print a warning about... this is
|
|
225
|
+
|
|
226
|
+
# Special case we don't want to print a warning about... this is
|
|
226
227
|
# in invalid date that very likely originates on the camera, not at
|
|
227
228
|
# some intermediate processing step.
|
|
228
229
|
#
|
|
229
230
|
# print('Suspicious date for image {}: {} ({})'.format(
|
|
230
231
|
# im['id'], im['datetime'], ds_name))
|
|
231
|
-
pass
|
|
232
|
-
|
|
232
|
+
pass
|
|
233
|
+
|
|
233
234
|
else:
|
|
234
|
-
|
|
235
|
+
|
|
235
236
|
found_date = True
|
|
236
237
|
dt_string = dt.strftime("%m-%d-%Y %H:%M:%S")
|
|
237
|
-
|
|
238
|
+
|
|
238
239
|
# Location, sequence, and image IDs are only guaranteed to be unique within
|
|
239
240
|
# a dataset, so for the output .csv file, include both
|
|
240
241
|
if 'location' in im:
|
|
@@ -242,25 +243,25 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
|
|
|
242
243
|
location_id = ds_name + ' : ' + str(im['location'])
|
|
243
244
|
else:
|
|
244
245
|
location_id = ds_name
|
|
245
|
-
|
|
246
|
+
|
|
246
247
|
image_id = ds_name + ' : ' + str(im['id'])
|
|
247
|
-
|
|
248
|
+
|
|
248
249
|
if 'seq_id' in im:
|
|
249
250
|
sequence_id = ds_name + ' : ' + str(im['seq_id'])
|
|
250
251
|
else:
|
|
251
252
|
sequence_id = ds_name + ' : ' + 'unknown'
|
|
252
|
-
|
|
253
|
+
|
|
253
254
|
if 'frame_num' in im:
|
|
254
255
|
frame_num = im['frame_num']
|
|
255
256
|
else:
|
|
256
257
|
frame_num = -1
|
|
257
|
-
|
|
258
|
+
|
|
258
259
|
annotations_this_image = image_id_to_annotations[im['id']]
|
|
259
|
-
|
|
260
|
+
|
|
260
261
|
categories_this_image = set()
|
|
261
|
-
|
|
262
|
+
|
|
262
263
|
annotation_level = 'unknown'
|
|
263
|
-
|
|
264
|
+
|
|
264
265
|
for ann in annotations_this_image:
|
|
265
266
|
assert ann['image_id'] == im['id']
|
|
266
267
|
categories_this_image.add(category_id_to_name[ann['category_id']])
|
|
@@ -275,35 +276,35 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
|
|
|
275
276
|
'Unexpected annotation level'
|
|
276
277
|
elif expected_annotation_level is not None:
|
|
277
278
|
annotation_level = expected_annotation_level
|
|
278
|
-
|
|
279
|
+
|
|
279
280
|
if len(categories_this_image) == 0:
|
|
280
281
|
unannotated_images.append(im)
|
|
281
282
|
continue
|
|
282
|
-
|
|
283
|
+
|
|
283
284
|
# category_name = list(categories_this_image)[0]
|
|
284
285
|
for category_name in categories_this_image:
|
|
285
|
-
|
|
286
|
+
|
|
286
287
|
ds_label = ds_name + ':' + category_name.lower()
|
|
287
|
-
|
|
288
|
+
|
|
288
289
|
if ds_label not in ds_label_to_taxonomy:
|
|
289
|
-
|
|
290
|
+
|
|
290
291
|
assert ds_label in known_unmapped_labels
|
|
291
|
-
|
|
292
|
+
|
|
292
293
|
# Only print a warning the first time we see an unmapped label
|
|
293
294
|
if ds_label not in missing_annotations:
|
|
294
295
|
print('Warning: {} not in taxonomy file'.format(ds_label))
|
|
295
296
|
missing_annotations.add(ds_label)
|
|
296
297
|
continue
|
|
297
|
-
|
|
298
|
+
|
|
298
299
|
taxonomy_labels = ds_label_to_taxonomy[ds_label]
|
|
299
|
-
|
|
300
|
+
|
|
300
301
|
"""
|
|
301
|
-
header =
|
|
302
|
+
header =
|
|
302
303
|
['dataset_name','url','image_id','sequence_id','location_id',
|
|
303
304
|
'frame_num','original_label','scientific_name','common_name',
|
|
304
305
|
'datetime','annotation_level']
|
|
305
306
|
"""
|
|
306
|
-
|
|
307
|
+
|
|
307
308
|
row = []
|
|
308
309
|
row.append(ds_name)
|
|
309
310
|
row.append(url_gcp)
|
|
@@ -314,37 +315,37 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
|
|
|
314
315
|
row.append(location_id)
|
|
315
316
|
row.append(frame_num)
|
|
316
317
|
row.append(taxonomy_labels['query'])
|
|
317
|
-
row.append(
|
|
318
|
-
row.append(
|
|
318
|
+
row.append(_clearnan(taxonomy_labels['scientific_name']))
|
|
319
|
+
row.append(_clearnan(taxonomy_labels['common_name']))
|
|
319
320
|
row.append(dt_string)
|
|
320
321
|
row.append(annotation_level)
|
|
321
|
-
|
|
322
|
+
|
|
322
323
|
for s in taxonomy_levels_to_include:
|
|
323
|
-
row.append(
|
|
324
|
-
|
|
324
|
+
row.append(_clearnan(taxonomy_labels[s]))
|
|
325
|
+
|
|
325
326
|
assert len(row) == len(header)
|
|
326
|
-
|
|
327
|
+
|
|
327
328
|
csv_writer.writerow(row)
|
|
328
|
-
|
|
329
|
+
|
|
329
330
|
# ...for each category that was applied at least once to this image
|
|
330
|
-
|
|
331
|
+
|
|
331
332
|
# ...for each image in this dataset
|
|
332
|
-
|
|
333
|
+
|
|
333
334
|
if not found_date:
|
|
334
335
|
pass
|
|
335
336
|
# print('Warning: no date information available for this dataset')
|
|
336
|
-
|
|
337
|
+
|
|
337
338
|
if not found_location:
|
|
338
339
|
pass
|
|
339
340
|
# print('Warning: no location information available for this dataset')
|
|
340
|
-
|
|
341
|
+
|
|
341
342
|
if not found_annotation_level and (ds_name not in ds_name_to_annotation_level):
|
|
342
343
|
print('Warning: no annotation level information available for this dataset')
|
|
343
|
-
|
|
344
|
+
|
|
344
345
|
if len(unannotated_images) > 0:
|
|
345
346
|
print('Warning: {} of {} images are un-annotated\n'.\
|
|
346
347
|
format(len(unannotated_images),len(images)))
|
|
347
|
-
|
|
348
|
+
|
|
348
349
|
# ...for each dataset
|
|
349
350
|
|
|
350
351
|
# ...with open()
|
|
@@ -364,7 +365,7 @@ print('Read {} rows from {}'.format(len(df),output_file))
|
|
|
364
365
|
|
|
365
366
|
tqdm.pandas()
|
|
366
367
|
|
|
367
|
-
def
|
|
368
|
+
def _isint(v):
|
|
368
369
|
return isinstance(v,int) or isinstance(v,np.int64)
|
|
369
370
|
|
|
370
371
|
valid_annotation_levels = set(['sequence','image','unknown'])
|
|
@@ -373,8 +374,8 @@ valid_annotation_levels = set(['sequence','image','unknown'])
|
|
|
373
374
|
# in the next cell to look for datasets that only have a single location
|
|
374
375
|
dataset_name_to_locations = defaultdict(set)
|
|
375
376
|
|
|
376
|
-
def
|
|
377
|
-
|
|
377
|
+
def _check_row(row):
|
|
378
|
+
|
|
378
379
|
assert row['dataset_name'] in metadata_table.keys()
|
|
379
380
|
for url_column in ['url_gcp','url_aws','url_azure']:
|
|
380
381
|
assert row[url_column].startswith('https://') or row[url_column].startswith('http://')
|
|
@@ -387,21 +388,21 @@ def check_row(row):
|
|
|
387
388
|
assert np.isnan(row['frame_num'])
|
|
388
389
|
else:
|
|
389
390
|
# -1 is sometimes used for sequences of unknown length
|
|
390
|
-
assert
|
|
391
|
+
assert _isint(row['frame_num']) and row['frame_num'] >= -1
|
|
391
392
|
|
|
392
393
|
ds_name = row['dataset_name']
|
|
393
394
|
dataset_name_to_locations[ds_name].add(row['location_id'])
|
|
394
|
-
|
|
395
|
+
|
|
395
396
|
# Faster, but more annoying to debug
|
|
396
397
|
if True:
|
|
397
|
-
|
|
398
|
-
df.progress_apply(
|
|
398
|
+
|
|
399
|
+
df.progress_apply(_check_row, axis=1)
|
|
399
400
|
|
|
400
401
|
else:
|
|
401
|
-
|
|
402
|
+
|
|
402
403
|
# i_row = 0; row = df.iloc[i_row]
|
|
403
404
|
for i_row,row in tqdm(df.iterrows(),total=len(df)):
|
|
404
|
-
|
|
405
|
+
_check_row(row)
|
|
405
406
|
|
|
406
407
|
|
|
407
408
|
#%% Check for datasets that have only one location string (typically "unknown")
|
|
@@ -428,19 +429,19 @@ images_to_download = []
|
|
|
428
429
|
|
|
429
430
|
# ds_name = list(metadata_table.keys())[2]
|
|
430
431
|
for ds_name in metadata_table.keys():
|
|
431
|
-
|
|
432
|
+
|
|
432
433
|
if 'bbox' in ds_name:
|
|
433
434
|
continue
|
|
434
|
-
|
|
435
|
+
|
|
435
436
|
# Find all rows for this dataset
|
|
436
437
|
ds_rows = df.loc[df['dataset_name'] == ds_name]
|
|
437
|
-
|
|
438
|
+
|
|
438
439
|
print('{} rows available for {}'.format(len(ds_rows),ds_name))
|
|
439
440
|
assert len(ds_rows) > 0
|
|
440
|
-
|
|
441
|
+
|
|
441
442
|
empty_rows = ds_rows[ds_rows['scientific_name'].isnull()]
|
|
442
443
|
non_empty_rows = ds_rows[~ds_rows['scientific_name'].isnull()]
|
|
443
|
-
|
|
444
|
+
|
|
444
445
|
if len(empty_rows) == 0:
|
|
445
446
|
print('No empty images available for {}'.format(ds_name))
|
|
446
447
|
elif len(empty_rows) > n_empty_images_per_dataset:
|
|
@@ -452,7 +453,7 @@ for ds_name in metadata_table.keys():
|
|
|
452
453
|
elif len(non_empty_rows) > n_non_empty_images_per_dataset:
|
|
453
454
|
non_empty_rows = non_empty_rows.sample(n=n_non_empty_images_per_dataset)
|
|
454
455
|
images_to_download.extend(non_empty_rows.to_dict('records'))
|
|
455
|
-
|
|
456
|
+
|
|
456
457
|
# ...for each dataset
|
|
457
458
|
|
|
458
459
|
print('Selected {} total images'.format(len(images_to_download)))
|
|
@@ -468,7 +469,7 @@ url_to_target_file = {}
|
|
|
468
469
|
|
|
469
470
|
# i_image = 10; image = images_to_download[i_image]
|
|
470
471
|
for i_image,image in tqdm(enumerate(images_to_download),total=len(images_to_download)):
|
|
471
|
-
|
|
472
|
+
|
|
472
473
|
url = image['url_' + preferred_cloud]
|
|
473
474
|
ext = os.path.splitext(url)[1]
|
|
474
475
|
fn_relative = 'image_{}'.format(str(i_image).zfill(4)) + ext
|
|
@@ -476,11 +477,10 @@ for i_image,image in tqdm(enumerate(images_to_download),total=len(images_to_down
|
|
|
476
477
|
image['relative_file'] = fn_relative
|
|
477
478
|
image['url'] = url
|
|
478
479
|
url_to_target_file[url] = fn_abs
|
|
479
|
-
|
|
480
|
+
|
|
480
481
|
|
|
481
482
|
#%% Download images (execution)
|
|
482
483
|
|
|
483
|
-
from megadetector.utils.url_utils import parallel_download_urls
|
|
484
484
|
download_results = parallel_download_urls(url_to_target_file,verbose=False,overwrite=True,
|
|
485
485
|
n_workers=20,pool_type='thread')
|
|
486
486
|
|
|
@@ -493,10 +493,10 @@ html_images = []
|
|
|
493
493
|
|
|
494
494
|
# im = images_to_download[0]
|
|
495
495
|
for im in images_to_download:
|
|
496
|
-
|
|
496
|
+
|
|
497
497
|
if im['relative_file'] is None:
|
|
498
498
|
continue
|
|
499
|
-
|
|
499
|
+
|
|
500
500
|
output_im = {}
|
|
501
501
|
output_im['filename'] = im['relative_file']
|
|
502
502
|
output_im['linkTarget'] = im['url']
|
|
@@ -504,7 +504,7 @@ for im in images_to_download:
|
|
|
504
504
|
output_im['imageStyle'] = 'width:600px;'
|
|
505
505
|
output_im['textStyle'] = 'font-weight:normal;font-size:100%;'
|
|
506
506
|
html_images.append(output_im)
|
|
507
|
-
|
|
507
|
+
|
|
508
508
|
write_html_image_list.write_html_image_list(html_filename,html_images)
|
|
509
509
|
|
|
510
510
|
open_file(html_filename)
|