megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/efficientnet/model.py +8 -8
- megadetector/classification/efficientnet/utils.py +6 -5
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +26 -26
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -2
- megadetector/data_management/camtrap_dp_to_coco.py +79 -46
- megadetector/data_management/cct_json_utils.py +103 -103
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +210 -193
- megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
- megadetector/data_management/databases/integrity_check_json_db.py +228 -200
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +88 -39
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +133 -125
- megadetector/data_management/labelme_to_yolo.py +159 -73
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
- megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +73 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
- megadetector/data_management/mewc_to_md.py +344 -340
- megadetector/data_management/ocr_tools.py +262 -255
- megadetector/data_management/read_exif.py +249 -227
- megadetector/data_management/remap_coco_categories.py +90 -28
- megadetector/data_management/remove_exif.py +81 -21
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +588 -120
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +248 -122
- megadetector/data_management/yolo_to_coco.py +333 -191
- megadetector/detection/change_detection.py +832 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +358 -278
- megadetector/detection/run_detector.py +399 -186
- megadetector/detection/run_detector_batch.py +404 -377
- megadetector/detection/run_inference_with_yolov5_val.py +340 -327
- megadetector/detection/run_tiled_inference.py +257 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +332 -295
- megadetector/postprocessing/add_max_conf.py +19 -11
- megadetector/postprocessing/categorize_detections_by_size.py +45 -45
- megadetector/postprocessing/classification_postprocessing.py +468 -433
- megadetector/postprocessing/combine_batch_outputs.py +23 -23
- megadetector/postprocessing/compare_batch_results.py +590 -525
- megadetector/postprocessing/convert_output_format.py +106 -102
- megadetector/postprocessing/create_crop_folder.py +347 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +48 -27
- megadetector/postprocessing/md_to_coco.py +133 -102
- megadetector/postprocessing/md_to_labelme.py +107 -90
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +92 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -301
- megadetector/postprocessing/remap_detection_categories.py +91 -38
- megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +156 -74
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/ct_utils.py +1049 -211
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +632 -529
- megadetector/utils/path_utils.py +1520 -431
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/split_locations_into_train_val.py +62 -62
- megadetector/utils/string_utils.py +148 -27
- megadetector/utils/url_utils.py +489 -176
- megadetector/utils/wi_utils.py +2658 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +34 -30
- megadetector/visualization/render_images_with_thumbnails.py +39 -74
- megadetector/visualization/visualization_utils.py +487 -435
- megadetector/visualization/visualize_db.py +232 -198
- megadetector/visualization/visualize_detector_output.py +82 -76
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
- megadetector-10.0.0.dist-info/RECORD +139 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
- megadetector/api/batch_processing/api_core/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
- megadetector/api/batch_processing/api_core/server.py +0 -294
- megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
- megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
- megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
- megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
- megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
- megadetector/api/batch_processing/api_core/server_utils.py +0 -88
- megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
- megadetector/api/batch_processing/api_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
- megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
- megadetector/api/synchronous/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
- megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
- megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
- megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
- megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector/utils/azure_utils.py +0 -178
- megadetector/utils/sas_blob_utils.py +0 -509
- megadetector-5.0.28.dist-info/RECORD +0 -209
- /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
|
@@ -4,7 +4,7 @@ create_lila_blank_set.py
|
|
|
4
4
|
|
|
5
5
|
Create a folder of blank images sampled from LILA. We'll aim for diversity, so less-common
|
|
6
6
|
locations will be oversampled relative to more common locations. We'll also run MegaDetector
|
|
7
|
-
(with manual review) to remove some incorrectly-labeled, not-actually-empty images from our
|
|
7
|
+
(with manual review) to remove some incorrectly-labeled, not-actually-empty images from our
|
|
8
8
|
blank set.
|
|
9
9
|
|
|
10
10
|
We'll store location information for each image in a .json file, so we can split locations
|
|
@@ -27,8 +27,15 @@ from collections import defaultdict
|
|
|
27
27
|
|
|
28
28
|
from megadetector.data_management.lila.lila_common import read_lila_all_images_file
|
|
29
29
|
from megadetector.utils.url_utils import download_url
|
|
30
|
+
from megadetector.utils.ct_utils import sort_dictionary_by_value
|
|
31
|
+
from megadetector.utils.path_utils import is_image_file
|
|
32
|
+
from megadetector.utils.path_utils import find_images
|
|
30
33
|
from megadetector.visualization import visualization_utils as vis_utils
|
|
31
34
|
from megadetector.utils.path_utils import recursive_file_list
|
|
35
|
+
from megadetector.utils import ct_utils
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
#%% Environment
|
|
32
39
|
|
|
33
40
|
# We'll write images, metadata downloads, and temporary files here
|
|
34
41
|
lila_local_base = os.path.expanduser('~/lila')
|
|
@@ -48,7 +55,7 @@ md_possible_non_blanks_folder = os.path.join(project_base,'candidate_non_blanks'
|
|
|
48
55
|
os.makedirs(md_possible_non_blanks_folder,exist_ok=True)
|
|
49
56
|
|
|
50
57
|
location_to_blank_image_urls_cache_file = os.path.join(project_base,
|
|
51
|
-
|
|
58
|
+
'location_to_blank_image_urls.json')
|
|
52
59
|
|
|
53
60
|
md_results_file = os.path.join(project_base,'lila_blanks_md_results.json')
|
|
54
61
|
|
|
@@ -90,10 +97,10 @@ other_labels_without_common_names = (
|
|
|
90
97
|
'car', 'motorcycle', 'vehicle'
|
|
91
98
|
)
|
|
92
99
|
|
|
93
|
-
common_names = sorted(list(df['common_name'].unique()),
|
|
94
|
-
|
|
100
|
+
common_names = sorted(list(df['common_name'].unique()),
|
|
101
|
+
key=lambda x:str(x) if isinstance(x,float) else x)
|
|
95
102
|
original_labels = sorted(list(df['original_label'].unique()),
|
|
96
|
-
|
|
103
|
+
key=lambda x:str(x) if isinstance(x,float) else x)
|
|
97
104
|
|
|
98
105
|
# Blanks are represented as NaN in the "common_name" column (though not all NaN's are blanks)
|
|
99
106
|
assert '' not in common_names
|
|
@@ -118,16 +125,16 @@ original_label_to_count = defaultdict(int)
|
|
|
118
125
|
|
|
119
126
|
# This loop takes ~10 mins
|
|
120
127
|
for i_row,row in tqdm(df.iterrows(),total=len(df)):
|
|
121
|
-
|
|
128
|
+
|
|
122
129
|
common_name = row['common_name']
|
|
123
130
|
original_label = row['original_label']
|
|
124
|
-
|
|
131
|
+
|
|
125
132
|
if isinstance(common_name,float):
|
|
126
133
|
assert np.isnan(common_name)
|
|
127
134
|
original_labels_with_nan_common_names.add(original_label)
|
|
128
|
-
|
|
135
|
+
|
|
129
136
|
common_name = str(common_name)
|
|
130
|
-
|
|
137
|
+
|
|
131
138
|
assert isinstance(original_label,str)
|
|
132
139
|
if original_label in blank_original_labels:
|
|
133
140
|
common_names_with_empty_original_labels.add(common_name)
|
|
@@ -137,7 +144,6 @@ for i_row,row in tqdm(df.iterrows(),total=len(df)):
|
|
|
137
144
|
|
|
138
145
|
#%% Look at the most common labels and common names
|
|
139
146
|
|
|
140
|
-
from megadetector.utils.ct_utils import sort_dictionary_by_value
|
|
141
147
|
common_name_to_count = sort_dictionary_by_value(common_name_to_count,reverse=True)
|
|
142
148
|
original_label_to_count = sort_dictionary_by_value(original_label_to_count,reverse=True)
|
|
143
149
|
|
|
@@ -185,32 +191,31 @@ force_map_locations = False
|
|
|
185
191
|
|
|
186
192
|
# Load from .json if available
|
|
187
193
|
if (not force_map_locations) and (os.path.isfile(location_to_blank_image_urls_cache_file)):
|
|
188
|
-
|
|
194
|
+
|
|
189
195
|
with open(location_to_blank_image_urls_cache_file,'r') as f:
|
|
190
196
|
location_to_blank_image_urls = json.load(f)
|
|
191
197
|
|
|
192
198
|
else:
|
|
193
|
-
|
|
199
|
+
|
|
194
200
|
location_to_blank_image_urls = defaultdict(list)
|
|
195
|
-
|
|
201
|
+
|
|
196
202
|
# i_row = 0; row = df.iloc[i_row]
|
|
197
203
|
for i_row,row in tqdm(df.iterrows(),total=len(df)):
|
|
198
|
-
|
|
204
|
+
|
|
199
205
|
location_id = row['location_id']
|
|
200
206
|
url = row['url']
|
|
201
|
-
|
|
207
|
+
|
|
202
208
|
original_label = row['original_label']
|
|
203
209
|
if original_label in blank_original_labels:
|
|
204
210
|
assert np.isnan(row['common_name'])
|
|
205
211
|
location_to_blank_image_urls[location_id].append(url)
|
|
206
212
|
|
|
207
|
-
|
|
208
|
-
json.dump(location_to_blank_image_urls,f,indent=1)
|
|
213
|
+
ct_utils.write_json(location_to_blank_image_urls_cache_file, location_to_blank_image_urls)
|
|
209
214
|
|
|
210
215
|
n_locations_with_blanks = len(location_to_blank_image_urls)
|
|
211
216
|
print('Found {} locations with blank images'.format(n_locations_with_blanks))
|
|
212
217
|
|
|
213
|
-
|
|
218
|
+
|
|
214
219
|
#%% Sample blanks
|
|
215
220
|
|
|
216
221
|
random.seed(0)
|
|
@@ -223,7 +228,7 @@ for location in location_to_blank_image_urls:
|
|
|
223
228
|
blank_image_urls_this_location = location_to_blank_image_urls[location]
|
|
224
229
|
unsampled_blank_image_urls_this_location = blank_image_urls_this_location.copy()
|
|
225
230
|
location_to_unsampled_blank_image_urls[location] = unsampled_blank_image_urls_this_location
|
|
226
|
-
|
|
231
|
+
|
|
227
232
|
# Put locations in a random order
|
|
228
233
|
location_ids = list(location_to_unsampled_blank_image_urls.keys())
|
|
229
234
|
random.shuffle(location_ids)
|
|
@@ -234,32 +239,32 @@ fully_sampled_locations = set()
|
|
|
234
239
|
|
|
235
240
|
# Pick from each location until we hit our limit or have no blanks left
|
|
236
241
|
while(True):
|
|
237
|
-
|
|
242
|
+
|
|
238
243
|
found_sample = False
|
|
239
|
-
|
|
244
|
+
|
|
240
245
|
# location = location_ids[0]
|
|
241
246
|
for location in location_ids:
|
|
242
|
-
|
|
247
|
+
|
|
243
248
|
unsampled_images_this_location = location_to_unsampled_blank_image_urls[location]
|
|
244
249
|
if len(unsampled_images_this_location) == 0:
|
|
245
250
|
fully_sampled_locations.add(location)
|
|
246
251
|
continue
|
|
247
|
-
|
|
252
|
+
|
|
248
253
|
url = random.choice(unsampled_images_this_location)
|
|
249
|
-
blank_urls.append(url)
|
|
254
|
+
blank_urls.append(url)
|
|
250
255
|
location_to_unsampled_blank_image_urls[location].remove(url)
|
|
251
256
|
location_to_sampled_blanks[location].append(url)
|
|
252
257
|
found_sample = True
|
|
253
|
-
|
|
258
|
+
|
|
254
259
|
if len(blank_urls) == n_blanks:
|
|
255
260
|
break
|
|
256
|
-
|
|
261
|
+
|
|
257
262
|
# ...for each location
|
|
258
|
-
|
|
263
|
+
|
|
259
264
|
if not found_sample:
|
|
260
265
|
print('Terminating after {} blanks, we ran out before hitting {}'.format(
|
|
261
266
|
len(blank_urls),n_blanks))
|
|
262
|
-
|
|
267
|
+
|
|
263
268
|
if len(blank_urls) == n_blanks:
|
|
264
269
|
break
|
|
265
270
|
|
|
@@ -278,39 +283,39 @@ for location in location_to_sampled_blanks:
|
|
|
278
283
|
print('Choose {} blanks from {} locations'.format(n_blanks,len(location_ids)))
|
|
279
284
|
print('Fully sampled {} locations'.format(len(fully_sampled_locations)))
|
|
280
285
|
print('Max samples per location: {}'.format(max_blanks_per_location))
|
|
281
|
-
|
|
286
|
+
|
|
282
287
|
|
|
283
288
|
#%% Download those image files (prep)
|
|
284
289
|
|
|
285
290
|
container_to_url_base = {
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
291
|
+
'lilawildlife.blob.core.windows.net':'/lila-wildlide/',
|
|
292
|
+
'storage.googleapis.com':'/public-datasets-lila/'
|
|
293
|
+
}
|
|
289
294
|
|
|
290
295
|
def download_relative_filename(url, output_base, verbose=False, url_base=None, overwrite=False):
|
|
291
296
|
"""
|
|
292
297
|
Download a URL to output_base, preserving relative path
|
|
293
298
|
"""
|
|
294
|
-
|
|
299
|
+
|
|
295
300
|
result = {'status':'unknown','url':url,'destination_filename':None}
|
|
296
|
-
|
|
301
|
+
|
|
297
302
|
if url_base is None:
|
|
298
303
|
assert url.startswith('https://')
|
|
299
304
|
container = url.split('/')[2]
|
|
300
305
|
assert container in container_to_url_base
|
|
301
306
|
url_base = container_to_url_base[container]
|
|
302
|
-
|
|
307
|
+
|
|
303
308
|
assert url_base.startswith('/') and url_base.endswith('/')
|
|
304
|
-
|
|
309
|
+
|
|
305
310
|
p = urlparse(url)
|
|
306
311
|
relative_filename = str(p.path)
|
|
307
312
|
# remove the leading '/'
|
|
308
313
|
assert relative_filename.startswith(url_base)
|
|
309
|
-
relative_filename = relative_filename.replace(url_base,'',1)
|
|
310
|
-
|
|
314
|
+
relative_filename = relative_filename.replace(url_base,'',1)
|
|
315
|
+
|
|
311
316
|
destination_filename = os.path.join(output_base,relative_filename)
|
|
312
317
|
result['destination_filename'] = destination_filename
|
|
313
|
-
|
|
318
|
+
|
|
314
319
|
if ((os.path.isfile(destination_filename)) and (not overwrite)):
|
|
315
320
|
result['status'] = 'skipped'
|
|
316
321
|
return result
|
|
@@ -318,10 +323,10 @@ def download_relative_filename(url, output_base, verbose=False, url_base=None, o
|
|
|
318
323
|
download_url(url, destination_filename, verbose=verbose)
|
|
319
324
|
except Exception as e:
|
|
320
325
|
print('Warning: error downloading URL {}: {}'.format(
|
|
321
|
-
url,str(e)))
|
|
326
|
+
url,str(e)))
|
|
322
327
|
result['status'] = 'error: {}'.format(str(e))
|
|
323
328
|
return result
|
|
324
|
-
|
|
329
|
+
|
|
325
330
|
result['status'] = 'success'
|
|
326
331
|
return result
|
|
327
332
|
|
|
@@ -331,11 +336,11 @@ def azure_url_to_gcp_http_url(url,error_if_not_azure_url=True):
|
|
|
331
336
|
Most URLs point to Azure by default, but most files are available on both Azure and GCP.
|
|
332
337
|
This function converts an Azure URL to the corresponding GCP http:// url.
|
|
333
338
|
"""
|
|
334
|
-
|
|
339
|
+
|
|
335
340
|
lila_azure_storage_account = 'https://lilawildlife.blob.core.windows.net'
|
|
336
341
|
gcp_bucket_api_url = 'https://storage.googleapis.com/public-datasets-lila'
|
|
337
342
|
error_if_not_azure_url = False
|
|
338
|
-
|
|
343
|
+
|
|
339
344
|
if error_if_not_azure_url:
|
|
340
345
|
assert url.startswith(lila_azure_storage_account)
|
|
341
346
|
gcp_url = url.replace(lila_azure_storage_account,gcp_bucket_api_url,1)
|
|
@@ -344,7 +349,7 @@ def azure_url_to_gcp_http_url(url,error_if_not_azure_url=True):
|
|
|
344
349
|
# Convert Azure URLs to GCP URLs if necessary
|
|
345
350
|
if preferred_image_download_source != 'azure':
|
|
346
351
|
assert preferred_image_download_source == 'gcp'
|
|
347
|
-
blank_urls = [azure_url_to_gcp_http_url(url) for url in blank_urls]
|
|
352
|
+
blank_urls = [azure_url_to_gcp_http_url(url) for url in blank_urls]
|
|
348
353
|
|
|
349
354
|
|
|
350
355
|
#%% Download those image files (execution)
|
|
@@ -354,16 +359,16 @@ print('Downloading {} images on {} workers'.format(len(blank_urls),n_download_th
|
|
|
354
359
|
if n_download_threads <= 1:
|
|
355
360
|
|
|
356
361
|
results = []
|
|
357
|
-
|
|
362
|
+
|
|
358
363
|
# url = all_urls[0]
|
|
359
|
-
for url in tqdm(blank_urls):
|
|
364
|
+
for url in tqdm(blank_urls):
|
|
360
365
|
results.append(download_relative_filename(url,candidate_blanks_base,url_base=None))
|
|
361
|
-
|
|
366
|
+
|
|
362
367
|
else:
|
|
363
368
|
|
|
364
|
-
pool = ThreadPool(n_download_threads)
|
|
369
|
+
pool = ThreadPool(n_download_threads)
|
|
365
370
|
results = list(tqdm(pool.imap(lambda s: download_relative_filename(
|
|
366
|
-
s,candidate_blanks_base,url_base=None),
|
|
371
|
+
s,candidate_blanks_base,url_base=None),
|
|
367
372
|
blank_urls), total=len(blank_urls)))
|
|
368
373
|
|
|
369
374
|
# pool.terminate()
|
|
@@ -385,7 +390,7 @@ cmd = 'python run_detector_batch.py MDV5A "{}" "{}"'.format(
|
|
|
385
390
|
candidate_blanks_base,md_results_file)
|
|
386
391
|
cmd += ' --recursive --output_relative_filenames'
|
|
387
392
|
|
|
388
|
-
import clipboard; clipboard.copy(cmd); print(cmd)
|
|
393
|
+
# import clipboard; clipboard.copy(cmd); print(cmd)
|
|
389
394
|
|
|
390
395
|
|
|
391
396
|
#%% Review MD results that suggests images are non-empty
|
|
@@ -406,11 +411,11 @@ for category_id in md_results['detection_categories']:
|
|
|
406
411
|
|
|
407
412
|
# im = md_results['images'][0]
|
|
408
413
|
for im in md_results['images']:
|
|
409
|
-
|
|
414
|
+
|
|
410
415
|
if 'detections' not in im:
|
|
411
416
|
continue
|
|
412
|
-
|
|
413
|
-
found_object = False
|
|
417
|
+
|
|
418
|
+
found_object = False
|
|
414
419
|
for det in im['detections']:
|
|
415
420
|
threshold = category_id_to_threshold[det['category']]
|
|
416
421
|
if det['conf'] >= threshold:
|
|
@@ -425,8 +430,8 @@ output_file_to_source_file = {}
|
|
|
425
430
|
|
|
426
431
|
# i_fn = 0; source_file_relative = images_to_review[i_fn]
|
|
427
432
|
for i_fn,source_file_relative in tqdm(enumerate(images_to_review_to_detections),
|
|
428
|
-
|
|
429
|
-
|
|
433
|
+
total=len(images_to_review_to_detections)):
|
|
434
|
+
|
|
430
435
|
source_file_abs = os.path.join(candidate_blanks_base,source_file_relative)
|
|
431
436
|
assert os.path.isfile(source_file_abs)
|
|
432
437
|
ext = os.path.splitext(source_file_abs)[1]
|
|
@@ -435,16 +440,15 @@ for i_fn,source_file_relative in tqdm(enumerate(images_to_review_to_detections),
|
|
|
435
440
|
output_file_to_source_file[target_file_relative] = source_file_relative
|
|
436
441
|
# shutil.copyfile(source_file_abs,target_file_abs)
|
|
437
442
|
vis_utils.draw_bounding_boxes_on_file(input_file=source_file_abs,
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
443
|
+
output_file=target_file_abs,
|
|
444
|
+
detections=images_to_review_to_detections[source_file_relative],
|
|
445
|
+
confidence_threshold=min_threshold,
|
|
446
|
+
target_size=(1280,-1))
|
|
442
447
|
|
|
443
448
|
# This is a temporary file I just used during debugging
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
449
|
+
ct_utils.write_json(os.path.join(project_base,'output_file_to_source_file.json'), output_file_to_source_file)
|
|
450
|
+
|
|
451
|
+
|
|
448
452
|
#%% Manual review
|
|
449
453
|
|
|
450
454
|
# Delete images that are *not* empty
|
|
@@ -463,15 +467,13 @@ for output_file in tqdm(output_file_to_source_file.keys()):
|
|
|
463
467
|
if output_file not in remaining_images:
|
|
464
468
|
source_file_relative = output_file_to_source_file[output_file]
|
|
465
469
|
removed_blank_images_relative.append(source_file_relative)
|
|
466
|
-
|
|
470
|
+
|
|
467
471
|
removed_blank_images_relative_set = set(removed_blank_images_relative)
|
|
468
472
|
assert len(removed_blank_images_relative) + len(remaining_images) == len(output_file_to_source_file)
|
|
469
473
|
|
|
470
474
|
|
|
471
475
|
#%% Copy only the confirmed blanks to the confirmed folder
|
|
472
476
|
|
|
473
|
-
from megadetector.utils.path_utils import is_image_file
|
|
474
|
-
|
|
475
477
|
all_candidate_blanks = recursive_file_list(candidate_blanks_base,return_relative_paths=True)
|
|
476
478
|
print('Found {} candidate blanks'.format(len(all_candidate_blanks)))
|
|
477
479
|
|
|
@@ -479,19 +481,19 @@ skipped_images_relative = []
|
|
|
479
481
|
skipped_non_images = []
|
|
480
482
|
|
|
481
483
|
for source_fn_relative in tqdm(all_candidate_blanks):
|
|
482
|
-
|
|
484
|
+
|
|
483
485
|
# Skip anything we removed from the "candidate non-blanks" folder; these weren't really
|
|
484
486
|
# blank.
|
|
485
487
|
if source_fn_relative in removed_blank_images_relative_set:
|
|
486
488
|
skipped_images_relative.append(source_fn_relative)
|
|
487
489
|
continue
|
|
488
|
-
|
|
490
|
+
|
|
489
491
|
if not is_image_file(source_fn_relative):
|
|
490
492
|
# Not a typo; "skipped images" really means "skipped files"
|
|
491
493
|
skipped_images_relative.append(source_fn_relative)
|
|
492
494
|
skipped_non_images.append(source_fn_relative)
|
|
493
|
-
|
|
494
|
-
|
|
495
|
+
|
|
496
|
+
|
|
495
497
|
source_fn_abs = os.path.join(candidate_blanks_base,source_fn_relative)
|
|
496
498
|
assert os.path.isfile(source_fn_abs)
|
|
497
499
|
target_fn_abs = os.path.join(confirmed_blanks_base,source_fn_relative)
|
|
@@ -499,12 +501,11 @@ for source_fn_relative in tqdm(all_candidate_blanks):
|
|
|
499
501
|
# shutil.copyfile(source_fn_abs,target_fn_abs)
|
|
500
502
|
|
|
501
503
|
print('Skipped {} files ({} non-image files)'.format(len(skipped_images_relative),
|
|
502
|
-
|
|
504
|
+
len(skipped_non_images)))
|
|
503
505
|
|
|
504
506
|
|
|
505
507
|
#%% Validate the folder of confirmed blanks
|
|
506
508
|
|
|
507
|
-
from megadetector.utils.path_utils import find_images
|
|
508
509
|
# all_confirmed_blanks = recursive_file_list(confirmed_blanks_base,return_relative_paths=True)
|
|
509
510
|
all_confirmed_blanks = find_images(confirmed_blanks_base,return_relative_paths=True,recursive=True)
|
|
510
511
|
assert len(all_confirmed_blanks) < len(all_candidate_blanks)
|
|
@@ -518,8 +519,8 @@ i_image = random.randint(0, len(skipped_images_relative))
|
|
|
518
519
|
fn_relative = skipped_images_relative[i_image]
|
|
519
520
|
fn_abs = os.path.join(candidate_blanks_base,fn_relative)
|
|
520
521
|
assert os.path.isfile(fn_abs)
|
|
521
|
-
|
|
522
|
-
clipboard.copy('feh --scale-down "{}"'.format(fn_abs))
|
|
522
|
+
|
|
523
|
+
# import clipboard; clipboard.copy('feh --scale-down "{}"'.format(fn_abs))
|
|
523
524
|
|
|
524
525
|
|
|
525
526
|
#%% Record location information for each confirmed file
|
|
@@ -532,27 +533,24 @@ all_fn_relative_to_location = {}
|
|
|
532
533
|
# location = next(iter(location_to_blank_image_urls.keys()))
|
|
533
534
|
for location in tqdm(location_to_blank_image_urls):
|
|
534
535
|
urls_this_location = location_to_blank_image_urls[location]
|
|
535
|
-
|
|
536
|
+
|
|
536
537
|
# url = urls_this_location[0]
|
|
537
538
|
for url in urls_this_location:
|
|
538
539
|
# Turn:
|
|
539
|
-
#
|
|
540
|
+
#
|
|
540
541
|
# https://lilablobssc.blob.core.windows.net/caltech-unzipped/cct_images/5968c0f9-23d2-11e8-a6a3-ec086b02610b.jpg'
|
|
541
542
|
#
|
|
542
543
|
# ...into:
|
|
543
544
|
#
|
|
544
|
-
# caltech-unzipped/cct_images/5968c0f9-23d2-11e8-a6a3-ec086b02610b.jpg'
|
|
545
|
+
# caltech-unzipped/cct_images/5968c0f9-23d2-11e8-a6a3-ec086b02610b.jpg'
|
|
545
546
|
p = urlparse(url)
|
|
546
547
|
fn_relative = str(p.path)[1:]
|
|
547
548
|
all_fn_relative_to_location[fn_relative] = location
|
|
548
549
|
|
|
549
550
|
# Build a much smaller mapping of just the confirmed blanks
|
|
550
|
-
confirmed_fn_relative_to_location = {}
|
|
551
|
+
confirmed_fn_relative_to_location = {}
|
|
551
552
|
for i_fn,fn_relative in tqdm(enumerate(all_confirmed_blanks),total=len(all_confirmed_blanks)):
|
|
552
553
|
confirmed_fn_relative_to_location[fn_relative] = all_fn_relative_to_location[fn_relative]
|
|
553
554
|
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
with open(confirmed_fn_relative_to_location_file,'w') as f:
|
|
558
|
-
json.dump(confirmed_fn_relative_to_location,f,indent=1)
|
|
555
|
+
ct_utils.write_json(all_fn_relative_to_location_file, all_fn_relative_to_location)
|
|
556
|
+
ct_utils.write_json(confirmed_fn_relative_to_location_file, confirmed_fn_relative_to_location)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
create_lila_test_set.py
|
|
4
4
|
|
|
5
|
-
Create a test set of camera trap images, containing N empty and N non-empty
|
|
5
|
+
Create a test set of camera trap images, containing N empty and N non-empty
|
|
6
6
|
images from each LILA data set.
|
|
7
7
|
|
|
8
8
|
"""
|
|
@@ -15,6 +15,7 @@ import random
|
|
|
15
15
|
|
|
16
16
|
from megadetector.data_management.lila.lila_common import \
|
|
17
17
|
read_lila_metadata, read_metadata_file_for_dataset
|
|
18
|
+
from megadetector.utils.url_utils import parallel_download_urls
|
|
18
19
|
|
|
19
20
|
n_empty_images_per_dataset = 1
|
|
20
21
|
n_non_empty_images_per_dataset = 1
|
|
@@ -39,9 +40,10 @@ metadata_table = read_lila_metadata(metadata_dir)
|
|
|
39
40
|
#%% Download and extract metadata for every dataset
|
|
40
41
|
|
|
41
42
|
for ds_name in metadata_table.keys():
|
|
42
|
-
metadata_table[ds_name]['metadata_filename'] =
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
metadata_table[ds_name]['metadata_filename'] = \
|
|
44
|
+
read_metadata_file_for_dataset(ds_name=ds_name,
|
|
45
|
+
metadata_dir=metadata_dir,
|
|
46
|
+
metadata_table=metadata_table)
|
|
45
47
|
|
|
46
48
|
|
|
47
49
|
#%% Choose images from each dataset
|
|
@@ -52,49 +54,49 @@ for ds_name in metadata_table.keys():
|
|
|
52
54
|
for ds_name in metadata_table.keys():
|
|
53
55
|
|
|
54
56
|
print('Choosing images for {}'.format(ds_name))
|
|
55
|
-
|
|
57
|
+
|
|
56
58
|
json_filename = metadata_table[ds_name]['metadata_filename']
|
|
57
|
-
|
|
59
|
+
|
|
58
60
|
with open(json_filename,'r') as f:
|
|
59
61
|
d = json.load(f)
|
|
60
|
-
|
|
62
|
+
|
|
61
63
|
category_id_to_name = {c['id']:c['name'] for c in d['categories']}
|
|
62
64
|
category_name_to_id = {c['name']:c['id'] for c in d['categories']}
|
|
63
|
-
|
|
65
|
+
|
|
64
66
|
## Find empty images
|
|
65
|
-
|
|
67
|
+
|
|
66
68
|
if 'empty' not in category_name_to_id:
|
|
67
69
|
empty_annotations_to_download = []
|
|
68
70
|
else:
|
|
69
|
-
empty_category_id = category_name_to_id['empty']
|
|
71
|
+
empty_category_id = category_name_to_id['empty']
|
|
70
72
|
empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] == empty_category_id]
|
|
71
73
|
try:
|
|
72
|
-
empty_annotations_to_download = random.sample(empty_annotations,n_empty_images_per_dataset)
|
|
74
|
+
empty_annotations_to_download = random.sample(empty_annotations,n_empty_images_per_dataset)
|
|
73
75
|
except ValueError:
|
|
74
76
|
print('No empty images available for dataset {}'.format(ds_name))
|
|
75
77
|
empty_annotations_to_download = []
|
|
76
|
-
|
|
78
|
+
|
|
77
79
|
## Find non-empty images
|
|
78
|
-
|
|
79
|
-
non_empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] != empty_category_id]
|
|
80
|
+
|
|
81
|
+
non_empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] != empty_category_id]
|
|
80
82
|
try:
|
|
81
83
|
non_empty_annotations_to_download = random.sample(non_empty_annotations,n_non_empty_images_per_dataset)
|
|
82
84
|
except ValueError:
|
|
83
85
|
print('No non-empty images available for dataset {}'.format(ds_name))
|
|
84
86
|
non_empty_annotations_to_download = []
|
|
85
87
|
|
|
86
|
-
|
|
88
|
+
|
|
87
89
|
annotations_to_download = empty_annotations_to_download + non_empty_annotations_to_download
|
|
88
|
-
|
|
90
|
+
|
|
89
91
|
image_ids_to_download = set([ann['image_id'] for ann in annotations_to_download])
|
|
90
92
|
assert len(image_ids_to_download) == len(set(image_ids_to_download))
|
|
91
|
-
|
|
93
|
+
|
|
92
94
|
images_to_download = []
|
|
93
95
|
for im in d['images']:
|
|
94
96
|
if im['id'] in image_ids_to_download:
|
|
95
97
|
images_to_download.append(im)
|
|
96
98
|
assert len(images_to_download) == len(image_ids_to_download)
|
|
97
|
-
|
|
99
|
+
|
|
98
100
|
metadata_table[ds_name]['images_to_download'] = images_to_download
|
|
99
101
|
|
|
100
102
|
# ...for each dataset
|
|
@@ -109,19 +111,19 @@ for ds_name in metadata_table.keys():
|
|
|
109
111
|
|
|
110
112
|
base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
|
|
111
113
|
assert not base_url.endswith('/')
|
|
112
|
-
|
|
114
|
+
|
|
113
115
|
# Retrieve image file names
|
|
114
116
|
filenames = [im['file_name'] for im in metadata_table[ds_name]['images_to_download']]
|
|
115
|
-
|
|
117
|
+
|
|
116
118
|
urls_to_download = []
|
|
117
|
-
|
|
119
|
+
|
|
118
120
|
# Convert to URLs
|
|
119
|
-
for fn in filenames:
|
|
121
|
+
for fn in filenames:
|
|
120
122
|
url = base_url + '/' + fn
|
|
121
123
|
urls_to_download.append(url)
|
|
122
124
|
|
|
123
125
|
metadata_table[ds_name]['urls_to_download'] = urls_to_download
|
|
124
|
-
|
|
126
|
+
|
|
125
127
|
# ...for each dataset
|
|
126
128
|
|
|
127
129
|
|
|
@@ -135,26 +137,25 @@ for ds_name in metadata_table.keys():
|
|
|
135
137
|
base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
|
|
136
138
|
assert not base_url.endswith('/')
|
|
137
139
|
base_url += '/'
|
|
138
|
-
|
|
140
|
+
|
|
139
141
|
urls_to_download = metadata_table[ds_name]['urls_to_download']
|
|
140
|
-
|
|
142
|
+
|
|
141
143
|
# url = urls_to_download[0]
|
|
142
144
|
for url in urls_to_download:
|
|
143
|
-
|
|
145
|
+
|
|
144
146
|
assert base_url in url
|
|
145
|
-
output_file_relative = ds_name.lower().replace(' ','_') +
|
|
147
|
+
output_file_relative = ds_name.lower().replace(' ','_') + \
|
|
148
|
+
'_' + url.replace(base_url,'').replace('/','_').replace('\\','_')
|
|
146
149
|
output_file_absolute = os.path.join(output_dir,output_file_relative)
|
|
147
150
|
url_to_target_file[url] = output_file_absolute
|
|
148
|
-
|
|
151
|
+
|
|
149
152
|
# ...for each url
|
|
150
|
-
|
|
153
|
+
|
|
151
154
|
# ...for each dataset
|
|
152
155
|
|
|
153
156
|
|
|
154
157
|
#%% Download image files (execution)
|
|
155
158
|
|
|
156
|
-
from megadetector.utils.url_utils import parallel_download_urls
|
|
157
|
-
|
|
158
159
|
download_results = parallel_download_urls(url_to_target_file,
|
|
159
160
|
verbose=False,
|
|
160
161
|
overwrite=False,
|
|
@@ -19,7 +19,7 @@ md_results_local_folder = r'g:\temp\lila-md-results'
|
|
|
19
19
|
md_base_url = 'https://lila.science/public/lila-md-results/'
|
|
20
20
|
assert md_base_url.endswith('/')
|
|
21
21
|
|
|
22
|
-
# No RDE files for datasets with no location information
|
|
22
|
+
# No RDE files for datasets with no location information
|
|
23
23
|
datasets_without_location_info = ('ena24','missouri-camera-traps')
|
|
24
24
|
|
|
25
25
|
md_results_column_names = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw','md_results_with_rde']
|
|
@@ -32,8 +32,8 @@ validate_urls = False
|
|
|
32
32
|
df = pd.read_csv(input_csv_file)
|
|
33
33
|
for s in md_results_column_names:
|
|
34
34
|
df[s] = ''
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
|
|
36
|
+
|
|
37
37
|
#%% Find matching files locally, and create URLs
|
|
38
38
|
|
|
39
39
|
local_files = os.listdir(md_results_local_folder)
|
|
@@ -41,14 +41,14 @@ local_files = [fn for fn in local_files if fn.endswith('.zip')]
|
|
|
41
41
|
|
|
42
42
|
# i_row = 0; row = df.iloc[i_row]
|
|
43
43
|
for i_row,row in df.iterrows():
|
|
44
|
-
|
|
44
|
+
|
|
45
45
|
if not isinstance(row['name'],str):
|
|
46
46
|
continue
|
|
47
|
-
|
|
47
|
+
|
|
48
48
|
dataset_shortname = row['short_name']
|
|
49
49
|
matching_files = [fn for fn in local_files if dataset_shortname in fn]
|
|
50
|
-
|
|
51
|
-
# No RDE files for datasets with no location information
|
|
50
|
+
|
|
51
|
+
# No RDE files for datasets with no location information
|
|
52
52
|
if dataset_shortname in datasets_without_location_info:
|
|
53
53
|
assert len(matching_files) == 2
|
|
54
54
|
mdv5a_files = [fn for fn in matching_files if 'mdv5a' in fn]
|
|
@@ -57,10 +57,10 @@ for i_row,row in df.iterrows():
|
|
|
57
57
|
df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
|
|
58
58
|
df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
|
|
59
59
|
else:
|
|
60
|
-
# Exclude single-season files for snapshot-serengeti
|
|
60
|
+
# Exclude single-season files for snapshot-serengeti
|
|
61
61
|
if dataset_shortname == 'snapshot-serengeti':
|
|
62
62
|
matching_files = [fn for fn in matching_files if '_S' not in fn]
|
|
63
|
-
assert len(matching_files) == 2
|
|
63
|
+
assert len(matching_files) == 2
|
|
64
64
|
assert all(['mdv4' in fn for fn in matching_files])
|
|
65
65
|
rde_files = [fn for fn in matching_files if 'rde' in fn]
|
|
66
66
|
raw_files = [fn for fn in matching_files if 'rde' not in fn]
|
|
@@ -76,28 +76,28 @@ for i_row,row in df.iterrows():
|
|
|
76
76
|
df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
|
|
77
77
|
df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
|
|
78
78
|
df.loc[i_row,'md_results_with_rde'] = md_base_url + rde_files[0]
|
|
79
|
-
|
|
79
|
+
|
|
80
80
|
print('Found {} matching files for {}'.format(len(matching_files),dataset_shortname))
|
|
81
81
|
|
|
82
|
-
# ...for each row
|
|
82
|
+
# ...for each row
|
|
83
83
|
|
|
84
84
|
|
|
85
85
|
#%% Validate URLs
|
|
86
86
|
|
|
87
87
|
if validate_urls:
|
|
88
|
-
|
|
88
|
+
|
|
89
89
|
from megadetector.utils.url_utils import test_urls
|
|
90
|
-
|
|
90
|
+
|
|
91
91
|
urls = set()
|
|
92
|
-
|
|
92
|
+
|
|
93
93
|
for i_row,row in df.iterrows():
|
|
94
94
|
for column_name in md_results_column_names:
|
|
95
95
|
if len(row[column_name]) > 0:
|
|
96
|
-
assert row[column_name] not in urls
|
|
96
|
+
assert row[column_name] not in urls
|
|
97
97
|
urls.add(row[column_name])
|
|
98
|
-
|
|
99
|
-
test_urls(urls,error_on_failure=True)
|
|
100
|
-
|
|
98
|
+
|
|
99
|
+
test_urls(urls,error_on_failure=True)
|
|
100
|
+
|
|
101
101
|
print('Validated {} URLs'.format(len(urls)))
|
|
102
102
|
|
|
103
103
|
|