megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +93 -79
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
- api/batch_processing/postprocessing/compare_batch_results.py +114 -44
- api/batch_processing/postprocessing/convert_output_format.py +62 -19
- api/batch_processing/postprocessing/load_api_results.py +17 -20
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +165 -68
- api/batch_processing/postprocessing/merge_detections.py +40 -15
- api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
- api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +107 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -0
- data_management/coco_to_yolo.py +86 -62
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +130 -83
- data_management/databases/subset_json_db.py +25 -16
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -144
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -160
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +8 -8
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +309 -159
- data_management/labelme_to_yolo.py +103 -60
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +114 -31
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +92 -90
- data_management/lila/generate_lila_per_image_labels.py +56 -43
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +103 -70
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +161 -99
- data_management/remap_coco_categories.py +84 -0
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +32 -44
- data_management/wi_download_csv_to_coco.py +246 -0
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +535 -95
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +189 -114
- detection/run_inference_with_yolov5_val.py +118 -51
- detection/run_tiled_inference.py +113 -42
- detection/tf_detector.py +51 -28
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +249 -70
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -862
- md_utils/path_utils.py +655 -155
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +208 -27
- md_utils/write_html_image_list.py +51 -35
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +908 -311
- md_visualization/visualize_db.py +109 -58
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- md_visualization/visualize_megadb.py +0 -183
- megadetector-5.0.7.dist-info/RECORD +0 -202
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
|
@@ -1,12 +1,16 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
create_lila_blank_set.py
|
|
4
|
+
|
|
5
|
+
Create a folder of blank images sampled from LILA. We'll aim for diversity, so less-common
|
|
6
|
+
locations will be oversampled relative to more common locations. We'll also run MegaDetector
|
|
7
|
+
(with manual review) to remove some incorrectly-labeled, not-actually-empty images from our
|
|
8
|
+
blank set.
|
|
9
|
+
|
|
10
|
+
We'll store location information for each image in a .json file, so we can split locations
|
|
11
|
+
into train/val in downstream tasks.
|
|
12
|
+
|
|
13
|
+
"""
|
|
10
14
|
|
|
11
15
|
#%% Constants and imports
|
|
12
16
|
|
|
@@ -14,7 +18,6 @@ import os
|
|
|
14
18
|
import random
|
|
15
19
|
import math
|
|
16
20
|
import json
|
|
17
|
-
import shutil
|
|
18
21
|
|
|
19
22
|
import numpy as np
|
|
20
23
|
from tqdm import tqdm
|
|
@@ -22,8 +25,7 @@ from multiprocessing.pool import ThreadPool
|
|
|
22
25
|
from urllib.parse import urlparse
|
|
23
26
|
from collections import defaultdict
|
|
24
27
|
|
|
25
|
-
from data_management.lila.lila_common import
|
|
26
|
-
read_lila_all_images_file, azure_url_to_gcp_http_url
|
|
28
|
+
from data_management.lila.lila_common import read_lila_all_images_file
|
|
27
29
|
from md_utils.url_utils import download_url
|
|
28
30
|
from md_visualization import visualization_utils as vis_utils
|
|
29
31
|
from md_utils.path_utils import recursive_file_list
|
|
@@ -45,6 +47,14 @@ os.makedirs(confirmed_blanks_base,exist_ok=True)
|
|
|
45
47
|
md_possible_non_blanks_folder = os.path.join(project_base,'candidate_non_blanks')
|
|
46
48
|
os.makedirs(md_possible_non_blanks_folder,exist_ok=True)
|
|
47
49
|
|
|
50
|
+
location_to_blank_image_urls_cache_file = os.path.join(project_base,
|
|
51
|
+
'location_to_blank_image_urls.json')
|
|
52
|
+
|
|
53
|
+
md_results_file = os.path.join(project_base,'lila_blanks_md_results.json')
|
|
54
|
+
|
|
55
|
+
all_fn_relative_to_location_file = os.path.join(project_base,'all_fn_relative_to_location.json')
|
|
56
|
+
confirmed_fn_relative_to_location_file = os.path.join(project_base,'confirmed_fn_relative_to_location.json')
|
|
57
|
+
|
|
48
58
|
preferred_image_download_source = 'gcp'
|
|
49
59
|
|
|
50
60
|
# Number of concurrent download threads
|
|
@@ -171,9 +181,6 @@ for s in original_labels_with_nan_common_names:
|
|
|
171
181
|
|
|
172
182
|
#%% Map locations to blank images
|
|
173
183
|
|
|
174
|
-
location_to_blank_image_urls_cache_file = os.path.join(project_base,
|
|
175
|
-
'location_to_blank_image_urls.json')
|
|
176
|
-
|
|
177
184
|
force_map_locations = False
|
|
178
185
|
|
|
179
186
|
# Load from .json if available
|
|
@@ -275,7 +282,7 @@ print('Max samples per location: {}'.format(max_blanks_per_location))
|
|
|
275
282
|
|
|
276
283
|
#%% Download those image files (prep)
|
|
277
284
|
|
|
278
|
-
container_to_url_base = {
|
|
285
|
+
container_to_url_base = {
|
|
279
286
|
'lilablobssc.blob.core.windows.net':'/',
|
|
280
287
|
'storage.googleapis.com':'/public-datasets-lila/'
|
|
281
288
|
}
|
|
@@ -318,6 +325,21 @@ def download_relative_filename(url, output_base, verbose=False, url_base=None, o
|
|
|
318
325
|
result['status'] = 'success'
|
|
319
326
|
return result
|
|
320
327
|
|
|
328
|
+
def azure_url_to_gcp_http_url(url,error_if_not_azure_url=True):
|
|
329
|
+
"""
|
|
330
|
+
Most URLs point to Azure by default, but most files are available on both Azure and GCP.
|
|
331
|
+
This function converts an Azure URL to the corresponding GCP http:// url.
|
|
332
|
+
"""
|
|
333
|
+
|
|
334
|
+
lila_azure_storage_account = 'https://lilablobssc.blob.core.windows.net'
|
|
335
|
+
gcp_bucket_api_url = 'https://storage.googleapis.com/public-datasets-lila'
|
|
336
|
+
error_if_not_azure_url = False
|
|
337
|
+
|
|
338
|
+
if error_if_not_azure_url:
|
|
339
|
+
assert url.startswith(lila_azure_storage_account)
|
|
340
|
+
gcp_url = url.replace(lila_azure_storage_account,gcp_bucket_api_url,1)
|
|
341
|
+
return gcp_url
|
|
342
|
+
|
|
321
343
|
# Convert Azure URLs to GCP URLs if necessary
|
|
322
344
|
if preferred_image_download_source != 'azure':
|
|
323
345
|
assert preferred_image_download_source == 'gcp'
|
|
@@ -358,8 +380,6 @@ print('Errors on {} of {} downloads'.format(len(error_urls),len(results)))
|
|
|
358
380
|
|
|
359
381
|
#%% Run MegaDetector on the folder
|
|
360
382
|
|
|
361
|
-
md_results_file = os.path.join(project_base,'lila_blanks_md_results.json')
|
|
362
|
-
|
|
363
383
|
cmd = 'python run_detector_batch.py MDV5A "{}" "{}"'.format(
|
|
364
384
|
candidate_blanks_base,md_results_file)
|
|
365
385
|
cmd += ' --recursive --output_relative_filenames'
|
|
@@ -419,6 +439,7 @@ for i_fn,source_file_relative in tqdm(enumerate(images_to_review_to_detections),
|
|
|
419
439
|
confidence_threshold=min_threshold,
|
|
420
440
|
target_size=(1280,-1))
|
|
421
441
|
|
|
442
|
+
# This is a temporary file I just used during debugging
|
|
422
443
|
with open(os.path.join(project_base,'output_file_to_source_file.json'),'w') as f:
|
|
423
444
|
json.dump(output_file_to_source_file,f,indent=1)
|
|
424
445
|
|
|
@@ -442,33 +463,95 @@ for output_file in tqdm(output_file_to_source_file.keys()):
|
|
|
442
463
|
source_file_relative = output_file_to_source_file[output_file]
|
|
443
464
|
removed_blank_images_relative.append(source_file_relative)
|
|
444
465
|
|
|
466
|
+
removed_blank_images_relative_set = set(removed_blank_images_relative)
|
|
445
467
|
assert len(removed_blank_images_relative) + len(remaining_images) == len(output_file_to_source_file)
|
|
446
468
|
|
|
447
469
|
|
|
448
|
-
#%% Copy
|
|
470
|
+
#%% Copy only the confirmed blanks to the confirmed folder
|
|
471
|
+
|
|
472
|
+
from md_utils.path_utils import is_image_file
|
|
449
473
|
|
|
450
474
|
all_candidate_blanks = recursive_file_list(candidate_blanks_base,return_relative_paths=True)
|
|
451
475
|
print('Found {} candidate blanks'.format(len(all_candidate_blanks)))
|
|
452
476
|
|
|
477
|
+
skipped_images_relative = []
|
|
478
|
+
skipped_non_images = []
|
|
479
|
+
|
|
453
480
|
for source_fn_relative in tqdm(all_candidate_blanks):
|
|
481
|
+
|
|
482
|
+
# Skip anything we removed from the "candidate non-blanks" folder; these weren't really
|
|
483
|
+
# blank.
|
|
484
|
+
if source_fn_relative in removed_blank_images_relative_set:
|
|
485
|
+
skipped_images_relative.append(source_fn_relative)
|
|
486
|
+
continue
|
|
487
|
+
|
|
488
|
+
if not is_image_file(source_fn_relative):
|
|
489
|
+
# Not a typo; "skipped images" really means "skipped files"
|
|
490
|
+
skipped_images_relative.append(source_fn_relative)
|
|
491
|
+
skipped_non_images.append(source_fn_relative)
|
|
492
|
+
|
|
493
|
+
|
|
454
494
|
source_fn_abs = os.path.join(candidate_blanks_base,source_fn_relative)
|
|
455
495
|
assert os.path.isfile(source_fn_abs)
|
|
456
496
|
target_fn_abs = os.path.join(confirmed_blanks_base,source_fn_relative)
|
|
457
497
|
os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
|
|
458
|
-
shutil.copyfile(source_fn_abs,target_fn_abs)
|
|
498
|
+
# shutil.copyfile(source_fn_abs,target_fn_abs)
|
|
459
499
|
|
|
500
|
+
print('Skipped {} files ({} non-image files)'.format(len(skipped_images_relative),
|
|
501
|
+
len(skipped_non_images)))
|
|
460
502
|
|
|
461
|
-
#%% Record location information for each file
|
|
462
503
|
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
all_confirmed_blanks = recursive_file_list(confirmed_blanks_base,return_relative_paths=True)
|
|
504
|
+
#%% Validate the folder of confirmed blanks
|
|
505
|
+
|
|
506
|
+
from md_utils.path_utils import find_images
|
|
507
|
+
# all_confirmed_blanks = recursive_file_list(confirmed_blanks_base,return_relative_paths=True)
|
|
508
|
+
all_confirmed_blanks = find_images(confirmed_blanks_base,return_relative_paths=True,recursive=True)
|
|
509
|
+
assert len(all_confirmed_blanks) < len(all_candidate_blanks)
|
|
471
510
|
print('Found {} confirmed blanks'.format(len(all_confirmed_blanks)))
|
|
472
511
|
|
|
473
|
-
|
|
474
|
-
|
|
512
|
+
|
|
513
|
+
#%% Manually review a few of the images we skipped
|
|
514
|
+
|
|
515
|
+
# ...to make sure they're non-blank
|
|
516
|
+
i_image = random.randint(0, len(skipped_images_relative))
|
|
517
|
+
fn_relative = skipped_images_relative[i_image]
|
|
518
|
+
fn_abs = os.path.join(candidate_blanks_base,fn_relative)
|
|
519
|
+
assert os.path.isfile(fn_abs)
|
|
520
|
+
import clipboard
|
|
521
|
+
clipboard.copy('feh --scale-down "{}"'.format(fn_abs))
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
#%% Record location information for each confirmed file
|
|
525
|
+
|
|
526
|
+
# Map every URL's path to the corresponding location
|
|
527
|
+
#
|
|
528
|
+
# This is *all empty URLs*, not just the ones we downloaded
|
|
529
|
+
all_fn_relative_to_location = {}
|
|
530
|
+
|
|
531
|
+
# location = next(iter(location_to_blank_image_urls.keys()))
|
|
532
|
+
for location in tqdm(location_to_blank_image_urls):
|
|
533
|
+
urls_this_location = location_to_blank_image_urls[location]
|
|
534
|
+
|
|
535
|
+
# url = urls_this_location[0]
|
|
536
|
+
for url in urls_this_location:
|
|
537
|
+
# Turn:
|
|
538
|
+
#
|
|
539
|
+
# https://lilablobssc.blob.core.windows.net/caltech-unzipped/cct_images/5968c0f9-23d2-11e8-a6a3-ec086b02610b.jpg'
|
|
540
|
+
#
|
|
541
|
+
# ...into:
|
|
542
|
+
#
|
|
543
|
+
# caltech-unzipped/cct_images/5968c0f9-23d2-11e8-a6a3-ec086b02610b.jpg'
|
|
544
|
+
p = urlparse(url)
|
|
545
|
+
fn_relative = str(p.path)[1:]
|
|
546
|
+
all_fn_relative_to_location[fn_relative] = location
|
|
547
|
+
|
|
548
|
+
# Build a much smaller mapping of just the confirmed blanks
|
|
549
|
+
confirmed_fn_relative_to_location = {}
|
|
550
|
+
for i_fn,fn_relative in tqdm(enumerate(all_confirmed_blanks),total=len(all_confirmed_blanks)):
|
|
551
|
+
confirmed_fn_relative_to_location[fn_relative] = all_fn_relative_to_location[fn_relative]
|
|
552
|
+
|
|
553
|
+
with open(all_fn_relative_to_location_file,'w') as f:
|
|
554
|
+
json.dump(all_fn_relative_to_location,f,indent=1)
|
|
555
|
+
|
|
556
|
+
with open(confirmed_fn_relative_to_location_file,'w') as f:
|
|
557
|
+
json.dump(confirmed_fn_relative_to_location,f,indent=1)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
create_lila_test_set.py
|
|
4
|
+
|
|
5
|
+
Create a test set of camera trap images, containing N empty and N non-empty
|
|
6
|
+
images from each LILA data set.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
9
|
|
|
10
10
|
#%% Constants and imports
|
|
11
11
|
|
|
@@ -1,106 +1,106 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
#%% Imports and constants
|
|
10
|
-
|
|
11
|
-
import os
|
|
12
|
-
|
|
13
|
-
import pandas as pd
|
|
14
|
-
|
|
15
|
-
input_csv_file = r'g:\temp\lila_camera_trap_datasets_no_md_results.csv'
|
|
16
|
-
output_csv_file = r'g:\temp\lila_camera_trap_datasets.csv'
|
|
17
|
-
|
|
18
|
-
md_results_local_folder = r'g:\temp\lila-md-results'
|
|
19
|
-
md_base_url = 'https://lila.science/public/lila-md-results/'
|
|
20
|
-
assert md_base_url.endswith('/')
|
|
21
|
-
|
|
22
|
-
# No RDE files for datasets with no location information
|
|
23
|
-
datasets_without_location_info = ('ena24','missouri-camera-traps')
|
|
24
|
-
|
|
25
|
-
md_results_column_names = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw','md_results_with_rde']
|
|
26
|
-
|
|
27
|
-
validate_urls = False
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
#%% Read input data
|
|
31
|
-
|
|
32
|
-
df = pd.read_csv(input_csv_file)
|
|
33
|
-
for s in md_results_column_names:
|
|
34
|
-
df[s] = ''
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
#%% Find matching files locally, and create URLs
|
|
38
|
-
|
|
39
|
-
local_files = os.listdir(md_results_local_folder)
|
|
40
|
-
local_files = [fn for fn in local_files if fn.endswith('.zip')]
|
|
41
|
-
|
|
42
|
-
# i_row = 0; row = df.iloc[i_row]
|
|
43
|
-
for i_row,row in df.iterrows():
|
|
44
|
-
|
|
45
|
-
if not isinstance(row['name'],str):
|
|
46
|
-
continue
|
|
47
|
-
|
|
48
|
-
dataset_shortname = row['short_name']
|
|
49
|
-
matching_files = [fn for fn in local_files if dataset_shortname in fn]
|
|
50
|
-
|
|
51
|
-
# No RDE files for datasets with no location information
|
|
52
|
-
if dataset_shortname in datasets_without_location_info:
|
|
53
|
-
assert len(matching_files) == 2
|
|
54
|
-
mdv5a_files = [fn for fn in matching_files if 'mdv5a' in fn]
|
|
55
|
-
mdv5b_files = [fn for fn in matching_files if 'mdv5b' in fn]
|
|
56
|
-
assert len(mdv5a_files) == 1 and len(mdv5b_files) == 1
|
|
57
|
-
df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
|
|
58
|
-
df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
|
|
59
|
-
else:
|
|
60
|
-
# Exclude single-season files for snapshot-serengeti
|
|
61
|
-
if dataset_shortname == 'snapshot-serengeti':
|
|
62
|
-
matching_files = [fn for fn in matching_files if '_S' not in fn]
|
|
63
|
-
assert len(matching_files) == 2
|
|
64
|
-
assert all(['mdv4' in fn for fn in matching_files])
|
|
65
|
-
rde_files = [fn for fn in matching_files if 'rde' in fn]
|
|
66
|
-
raw_files = [fn for fn in matching_files if 'rde' not in fn]
|
|
67
|
-
assert len(rde_files) == 1 and len(raw_files) == 1
|
|
68
|
-
df.loc[i_row,'mdv4_results_raw'] = md_base_url + raw_files[0]
|
|
69
|
-
df.loc[i_row,'md_results_with_rde'] = md_base_url + rde_files[0]
|
|
70
|
-
else:
|
|
71
|
-
assert len(matching_files) == 3
|
|
72
|
-
mdv5a_files = [fn for fn in matching_files if 'mdv5a' in fn and 'rde' not in fn]
|
|
73
|
-
mdv5b_files = [fn for fn in matching_files if 'mdv5b' in fn and 'rde' not in fn]
|
|
74
|
-
rde_files = [fn for fn in matching_files if 'rde' in fn]
|
|
75
|
-
assert len(mdv5a_files) == 1 and len(mdv5b_files) == 1 and len(rde_files) == 1
|
|
76
|
-
df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
|
|
77
|
-
df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
|
|
78
|
-
df.loc[i_row,'md_results_with_rde'] = md_base_url + rde_files[0]
|
|
79
|
-
|
|
80
|
-
print('Found {} matching files for {}'.format(len(matching_files),dataset_shortname))
|
|
81
|
-
|
|
82
|
-
# ...for each row
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
#%% Validate URLs
|
|
86
|
-
|
|
87
|
-
if validate_urls:
|
|
88
|
-
|
|
89
|
-
from md_utils.url_utils import test_urls
|
|
90
|
-
|
|
91
|
-
urls = set()
|
|
92
|
-
|
|
93
|
-
for i_row,row in df.iterrows():
|
|
94
|
-
for column_name in md_results_column_names:
|
|
95
|
-
if len(row[column_name]) > 0:
|
|
96
|
-
assert row[column_name] not in urls
|
|
97
|
-
urls.add(row[column_name])
|
|
98
|
-
|
|
99
|
-
test_urls(urls,error_on_failure=True)
|
|
100
|
-
|
|
101
|
-
print('Validated {} URLs'.format(len(urls)))
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
#%% Write new .csv file
|
|
105
|
-
|
|
106
|
-
df.to_csv(output_csv_file,header=True,index=False)
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
create_links_to_md_results_files.py
|
|
4
|
+
|
|
5
|
+
One-off script to populate the columns in the camera trap data .csv file that point to MD results.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
#%% Imports and constants
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
input_csv_file = r'g:\temp\lila_camera_trap_datasets_no_md_results.csv'
|
|
16
|
+
output_csv_file = r'g:\temp\lila_camera_trap_datasets.csv'
|
|
17
|
+
|
|
18
|
+
md_results_local_folder = r'g:\temp\lila-md-results'
|
|
19
|
+
md_base_url = 'https://lila.science/public/lila-md-results/'
|
|
20
|
+
assert md_base_url.endswith('/')
|
|
21
|
+
|
|
22
|
+
# No RDE files for datasets with no location information
|
|
23
|
+
datasets_without_location_info = ('ena24','missouri-camera-traps')
|
|
24
|
+
|
|
25
|
+
md_results_column_names = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw','md_results_with_rde']
|
|
26
|
+
|
|
27
|
+
validate_urls = False
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
#%% Read input data
|
|
31
|
+
|
|
32
|
+
df = pd.read_csv(input_csv_file)
|
|
33
|
+
for s in md_results_column_names:
|
|
34
|
+
df[s] = ''
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
#%% Find matching files locally, and create URLs
|
|
38
|
+
|
|
39
|
+
local_files = os.listdir(md_results_local_folder)
|
|
40
|
+
local_files = [fn for fn in local_files if fn.endswith('.zip')]
|
|
41
|
+
|
|
42
|
+
# i_row = 0; row = df.iloc[i_row]
|
|
43
|
+
for i_row,row in df.iterrows():
|
|
44
|
+
|
|
45
|
+
if not isinstance(row['name'],str):
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
dataset_shortname = row['short_name']
|
|
49
|
+
matching_files = [fn for fn in local_files if dataset_shortname in fn]
|
|
50
|
+
|
|
51
|
+
# No RDE files for datasets with no location information
|
|
52
|
+
if dataset_shortname in datasets_without_location_info:
|
|
53
|
+
assert len(matching_files) == 2
|
|
54
|
+
mdv5a_files = [fn for fn in matching_files if 'mdv5a' in fn]
|
|
55
|
+
mdv5b_files = [fn for fn in matching_files if 'mdv5b' in fn]
|
|
56
|
+
assert len(mdv5a_files) == 1 and len(mdv5b_files) == 1
|
|
57
|
+
df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
|
|
58
|
+
df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
|
|
59
|
+
else:
|
|
60
|
+
# Exclude single-season files for snapshot-serengeti
|
|
61
|
+
if dataset_shortname == 'snapshot-serengeti':
|
|
62
|
+
matching_files = [fn for fn in matching_files if '_S' not in fn]
|
|
63
|
+
assert len(matching_files) == 2
|
|
64
|
+
assert all(['mdv4' in fn for fn in matching_files])
|
|
65
|
+
rde_files = [fn for fn in matching_files if 'rde' in fn]
|
|
66
|
+
raw_files = [fn for fn in matching_files if 'rde' not in fn]
|
|
67
|
+
assert len(rde_files) == 1 and len(raw_files) == 1
|
|
68
|
+
df.loc[i_row,'mdv4_results_raw'] = md_base_url + raw_files[0]
|
|
69
|
+
df.loc[i_row,'md_results_with_rde'] = md_base_url + rde_files[0]
|
|
70
|
+
else:
|
|
71
|
+
assert len(matching_files) == 3
|
|
72
|
+
mdv5a_files = [fn for fn in matching_files if 'mdv5a' in fn and 'rde' not in fn]
|
|
73
|
+
mdv5b_files = [fn for fn in matching_files if 'mdv5b' in fn and 'rde' not in fn]
|
|
74
|
+
rde_files = [fn for fn in matching_files if 'rde' in fn]
|
|
75
|
+
assert len(mdv5a_files) == 1 and len(mdv5b_files) == 1 and len(rde_files) == 1
|
|
76
|
+
df.loc[i_row,'mdv5a_results_raw'] = md_base_url + mdv5a_files[0]
|
|
77
|
+
df.loc[i_row,'mdv5b_results_raw'] = md_base_url + mdv5b_files[0]
|
|
78
|
+
df.loc[i_row,'md_results_with_rde'] = md_base_url + rde_files[0]
|
|
79
|
+
|
|
80
|
+
print('Found {} matching files for {}'.format(len(matching_files),dataset_shortname))
|
|
81
|
+
|
|
82
|
+
# ...for each row
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
#%% Validate URLs
|
|
86
|
+
|
|
87
|
+
if validate_urls:
|
|
88
|
+
|
|
89
|
+
from md_utils.url_utils import test_urls
|
|
90
|
+
|
|
91
|
+
urls = set()
|
|
92
|
+
|
|
93
|
+
for i_row,row in df.iterrows():
|
|
94
|
+
for column_name in md_results_column_names:
|
|
95
|
+
if len(row[column_name]) > 0:
|
|
96
|
+
assert row[column_name] not in urls
|
|
97
|
+
urls.add(row[column_name])
|
|
98
|
+
|
|
99
|
+
test_urls(urls,error_on_failure=True)
|
|
100
|
+
|
|
101
|
+
print('Validated {} URLs'.format(len(urls)))
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
#%% Write new .csv file
|
|
105
|
+
|
|
106
|
+
df.to_csv(output_csv_file,header=True,index=False)
|