megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/efficientnet/model.py +8 -8
- megadetector/classification/efficientnet/utils.py +6 -5
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +26 -26
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -2
- megadetector/data_management/camtrap_dp_to_coco.py +79 -46
- megadetector/data_management/cct_json_utils.py +103 -103
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +210 -193
- megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
- megadetector/data_management/databases/integrity_check_json_db.py +228 -200
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +88 -39
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +133 -125
- megadetector/data_management/labelme_to_yolo.py +159 -73
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
- megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +73 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
- megadetector/data_management/mewc_to_md.py +344 -340
- megadetector/data_management/ocr_tools.py +262 -255
- megadetector/data_management/read_exif.py +249 -227
- megadetector/data_management/remap_coco_categories.py +90 -28
- megadetector/data_management/remove_exif.py +81 -21
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +588 -120
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +248 -122
- megadetector/data_management/yolo_to_coco.py +333 -191
- megadetector/detection/change_detection.py +832 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +358 -278
- megadetector/detection/run_detector.py +399 -186
- megadetector/detection/run_detector_batch.py +404 -377
- megadetector/detection/run_inference_with_yolov5_val.py +340 -327
- megadetector/detection/run_tiled_inference.py +257 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +332 -295
- megadetector/postprocessing/add_max_conf.py +19 -11
- megadetector/postprocessing/categorize_detections_by_size.py +45 -45
- megadetector/postprocessing/classification_postprocessing.py +468 -433
- megadetector/postprocessing/combine_batch_outputs.py +23 -23
- megadetector/postprocessing/compare_batch_results.py +590 -525
- megadetector/postprocessing/convert_output_format.py +106 -102
- megadetector/postprocessing/create_crop_folder.py +347 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +48 -27
- megadetector/postprocessing/md_to_coco.py +133 -102
- megadetector/postprocessing/md_to_labelme.py +107 -90
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +92 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -301
- megadetector/postprocessing/remap_detection_categories.py +91 -38
- megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +156 -74
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/ct_utils.py +1049 -211
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +632 -529
- megadetector/utils/path_utils.py +1520 -431
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/split_locations_into_train_val.py +62 -62
- megadetector/utils/string_utils.py +148 -27
- megadetector/utils/url_utils.py +489 -176
- megadetector/utils/wi_utils.py +2658 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +34 -30
- megadetector/visualization/render_images_with_thumbnails.py +39 -74
- megadetector/visualization/visualization_utils.py +487 -435
- megadetector/visualization/visualize_db.py +232 -198
- megadetector/visualization/visualize_detector_output.py +82 -76
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
- megadetector-10.0.0.dist-info/RECORD +139 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
- megadetector/api/batch_processing/api_core/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
- megadetector/api/batch_processing/api_core/server.py +0 -294
- megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
- megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
- megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
- megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
- megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
- megadetector/api/batch_processing/api_core/server_utils.py +0 -88
- megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
- megadetector/api/batch_processing/api_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
- megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
- megadetector/api/synchronous/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
- megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
- megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
- megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
- megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector/utils/azure_utils.py +0 -178
- megadetector/utils/sas_blob_utils.py +0 -509
- megadetector-5.0.28.dist-info/RECORD +0 -209
- /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,180 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
zamba_results_to_md_results.py
|
|
4
|
-
|
|
5
|
-
Convert a labels.csv file produced by Zamba Cloud to a MD results file suitable
|
|
6
|
-
for import into Timelapse.
|
|
7
|
-
|
|
8
|
-
Columns are expected to be:
|
|
9
|
-
|
|
10
|
-
video_uuid (not used)
|
|
11
|
-
original_filename (assumed to be a relative path name)
|
|
12
|
-
top_k_label,top_k_probability, for k = 1..N
|
|
13
|
-
[category name 1],[category name 2],...
|
|
14
|
-
corrected_label
|
|
15
|
-
|
|
16
|
-
Because the MD results file fundamentally stores detections, what we'll
|
|
17
|
-
actually do is create bogus detections that fill the entire image.
|
|
18
|
-
|
|
19
|
-
There is no special handling of empty/blank categories; because these results are
|
|
20
|
-
based on a classifier, rather than a detector (where "blank" would be the absence of
|
|
21
|
-
all other categories), "blank" can be queried in Timelapse just like any other class.
|
|
22
|
-
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
#%% Imports and constants
|
|
26
|
-
|
|
27
|
-
import pandas as pd
|
|
28
|
-
import json
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
#%% Main function
|
|
32
|
-
|
|
33
|
-
def zamba_results_to_md_results(input_file,output_file=None):
|
|
34
|
-
"""
|
|
35
|
-
Converts the .csv file [input_file] to the MD-formatted .json file [output_file].
|
|
36
|
-
|
|
37
|
-
If [output_file] is None, '.json' will be appended to the input file.
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
|
-
if output_file is None:
|
|
41
|
-
output_file = input_file + '.json'
|
|
42
|
-
|
|
43
|
-
df = pd.read_csv(input_file)
|
|
44
|
-
|
|
45
|
-
expected_columns = ('video_uuid','corrected_label','original_filename')
|
|
46
|
-
for s in expected_columns:
|
|
47
|
-
assert s in df.columns,\
|
|
48
|
-
'Expected column {} not found, are you sure this is a Zamba results .csv file?'.format(
|
|
49
|
-
s)
|
|
50
|
-
|
|
51
|
-
# How many results are included per file?
|
|
52
|
-
assert 'top_1_probability' in df.columns and 'top_1_label' in df.columns
|
|
53
|
-
top_k = 2
|
|
54
|
-
while(True):
|
|
55
|
-
p_string = 'top_' + str(top_k) + '_probability'
|
|
56
|
-
label_string = 'top_' + str(top_k) + '_label'
|
|
57
|
-
|
|
58
|
-
if p_string in df.columns:
|
|
59
|
-
assert label_string in df.columns,\
|
|
60
|
-
'Oops, {} is a column but {} is not'.format(
|
|
61
|
-
p_string,label_string)
|
|
62
|
-
top_k += 1
|
|
63
|
-
continue
|
|
64
|
-
else:
|
|
65
|
-
assert label_string not in df.columns,\
|
|
66
|
-
'Oops, {} is a column but {} is not'.format(
|
|
67
|
-
label_string,p_string)
|
|
68
|
-
top_k -= 1
|
|
69
|
-
break
|
|
70
|
-
|
|
71
|
-
print('Found {} probability column pairs'.format(top_k))
|
|
72
|
-
|
|
73
|
-
# Category names start after the fixed columns and the probability columns
|
|
74
|
-
category_names = []
|
|
75
|
-
column_names = list(df.columns)
|
|
76
|
-
first_category_name_index = 0
|
|
77
|
-
while('top_' in column_names[first_category_name_index] or \
|
|
78
|
-
column_names[first_category_name_index] in expected_columns):
|
|
79
|
-
first_category_name_index += 1
|
|
80
|
-
|
|
81
|
-
i_column = first_category_name_index
|
|
82
|
-
while( (i_column < len(column_names)) and (column_names[i_column] != 'corrected_label') ):
|
|
83
|
-
category_names.append(column_names[i_column])
|
|
84
|
-
i_column += 1
|
|
85
|
-
|
|
86
|
-
print('Found {} categories:\n'.format(len(category_names)))
|
|
87
|
-
|
|
88
|
-
for s in category_names:
|
|
89
|
-
print(s)
|
|
90
|
-
|
|
91
|
-
info = {}
|
|
92
|
-
info['format_version'] = '1.3'
|
|
93
|
-
info['detector'] = 'Zamba Cloud'
|
|
94
|
-
info['classifier'] = 'Zamba Cloud'
|
|
95
|
-
|
|
96
|
-
detection_category_id_to_name = {}
|
|
97
|
-
for category_id,category_name in enumerate(category_names):
|
|
98
|
-
detection_category_id_to_name[str(category_id)] = category_name
|
|
99
|
-
detection_category_name_to_id = {v: k for k, v in detection_category_id_to_name.items()}
|
|
100
|
-
|
|
101
|
-
images = []
|
|
102
|
-
|
|
103
|
-
# i_row = 0; row = df.iloc[i_row]
|
|
104
|
-
for i_row,row in df.iterrows():
|
|
105
|
-
|
|
106
|
-
im = {}
|
|
107
|
-
images.append(im)
|
|
108
|
-
im['file'] = row['original_filename']
|
|
109
|
-
|
|
110
|
-
detections = []
|
|
111
|
-
|
|
112
|
-
# k = 1
|
|
113
|
-
for k in range(1,top_k+1):
|
|
114
|
-
label = row['top_{}_label'.format(k)]
|
|
115
|
-
confidence = row['top_{}_probability'.format(k)]
|
|
116
|
-
det = {}
|
|
117
|
-
det['category'] = detection_category_name_to_id[label]
|
|
118
|
-
det['conf'] = confidence
|
|
119
|
-
det['bbox'] = [0,0,1.0,1.0]
|
|
120
|
-
detections.append(det)
|
|
121
|
-
|
|
122
|
-
im['detections'] = detections
|
|
123
|
-
|
|
124
|
-
# ...for each row
|
|
125
|
-
|
|
126
|
-
results = {}
|
|
127
|
-
results['info'] = info
|
|
128
|
-
results['detection_categories'] = detection_category_id_to_name
|
|
129
|
-
results['images'] = images
|
|
130
|
-
|
|
131
|
-
with open(output_file,'w') as f:
|
|
132
|
-
json.dump(results,f,indent=1)
|
|
133
|
-
|
|
134
|
-
# ...zamba_results_to_md_results(...)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
#%% Interactive driver
|
|
138
|
-
|
|
139
|
-
if False:
|
|
140
|
-
|
|
141
|
-
pass
|
|
142
|
-
|
|
143
|
-
#%%
|
|
144
|
-
|
|
145
|
-
input_file = r"G:\temp\labels-job-b95a4b76-e332-4e17-ab40-03469392d36a-2023-11-04_16-28-50.060130.csv"
|
|
146
|
-
output_file = None
|
|
147
|
-
zamba_results_to_md_results(input_file,output_file)
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
#%% Command-line driver
|
|
151
|
-
|
|
152
|
-
import sys,argparse
|
|
153
|
-
|
|
154
|
-
def main():
|
|
155
|
-
|
|
156
|
-
parser = argparse.ArgumentParser(
|
|
157
|
-
description='Convert a Zamba-formatted .csv results file to a MD-formatted .json results file')
|
|
158
|
-
|
|
159
|
-
parser.add_argument(
|
|
160
|
-
'input_file',
|
|
161
|
-
type=str,
|
|
162
|
-
help='input .csv file')
|
|
163
|
-
|
|
164
|
-
parser.add_argument(
|
|
165
|
-
'--output_file',
|
|
166
|
-
type=str,
|
|
167
|
-
default=None,
|
|
168
|
-
help='output .json file (defaults to input file appended with ".json")')
|
|
169
|
-
|
|
170
|
-
if len(sys.argv[1:]) == 0:
|
|
171
|
-
parser.print_help()
|
|
172
|
-
parser.exit()
|
|
173
|
-
|
|
174
|
-
args = parser.parse_args()
|
|
175
|
-
|
|
176
|
-
zamba_results_to_md_results(args.input_file,args.output_file)
|
|
177
|
-
|
|
178
|
-
if __name__ == '__main__':
|
|
179
|
-
main()
|
|
180
|
-
|
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
add_locations_to_island_camera_traps.py
|
|
4
|
-
|
|
5
|
-
The Island Conservation Camera Traps dataset had unique camera identifiers embedded
|
|
6
|
-
in filenames, but not in the proper metadata fields. This script copies that information
|
|
7
|
-
to metadata.
|
|
8
|
-
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
#%% Imports and constants
|
|
12
|
-
|
|
13
|
-
import os
|
|
14
|
-
import json
|
|
15
|
-
from tqdm import tqdm
|
|
16
|
-
|
|
17
|
-
input_fn = os.path.expanduser('~/lila/metadata/island_conservation.json')
|
|
18
|
-
output_fn = os.path.expanduser('~/tmp/island_conservation.json')
|
|
19
|
-
preview_folder = os.path.expanduser('~/tmp/island_conservation_preview')
|
|
20
|
-
image_directory = os.path.expanduser('~/data/icct/public/')
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
#%% Prevent imports during testing
|
|
24
|
-
|
|
25
|
-
if False:
|
|
26
|
-
|
|
27
|
-
#%% Read input file
|
|
28
|
-
|
|
29
|
-
with open(input_fn,'r') as f:
|
|
30
|
-
d = json.load(f)
|
|
31
|
-
|
|
32
|
-
d['info']
|
|
33
|
-
d['info']['version'] = '1.01'
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
#%% Find locations
|
|
37
|
-
|
|
38
|
-
images = d['images']
|
|
39
|
-
|
|
40
|
-
locations = set()
|
|
41
|
-
|
|
42
|
-
for i_image,im in tqdm(enumerate(images),total=len(images)):
|
|
43
|
-
tokens_fn = im['file_name'].split('/')
|
|
44
|
-
tokens_id = im['id'].split('_')
|
|
45
|
-
assert tokens_fn[0] == tokens_id[0]
|
|
46
|
-
assert tokens_fn[1] == tokens_id[1]
|
|
47
|
-
location = tokens_fn[0] + '_' + tokens_fn[1]
|
|
48
|
-
im['location'] = location
|
|
49
|
-
locations.add(location)
|
|
50
|
-
|
|
51
|
-
locations = sorted(list(locations))
|
|
52
|
-
|
|
53
|
-
for s in locations:
|
|
54
|
-
print(s)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
#%% Write output file
|
|
58
|
-
|
|
59
|
-
with open(output_fn,'w') as f:
|
|
60
|
-
json.dump(d,f,indent=1)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
#%% Validate .json files
|
|
64
|
-
|
|
65
|
-
from megadetector.data_management.databases import integrity_check_json_db
|
|
66
|
-
|
|
67
|
-
options = integrity_check_json_db.IntegrityCheckOptions()
|
|
68
|
-
options.baseDir = image_directory
|
|
69
|
-
options.bCheckImageSizes = False
|
|
70
|
-
options.bCheckImageExistence = True
|
|
71
|
-
options.bFindUnusedImages = True
|
|
72
|
-
|
|
73
|
-
sorted_categories, data, error_info = integrity_check_json_db.integrity_check_json_db(output_fn, options)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
#%% Preview labels
|
|
77
|
-
|
|
78
|
-
from megadetector.visualization import visualize_db
|
|
79
|
-
|
|
80
|
-
viz_options = visualize_db.DbVizOptions()
|
|
81
|
-
viz_options.num_to_visualize = 2000
|
|
82
|
-
viz_options.trim_to_images_with_bboxes = False
|
|
83
|
-
viz_options.add_search_links = False
|
|
84
|
-
viz_options.sort_by_filename = False
|
|
85
|
-
viz_options.parallelize_rendering = True
|
|
86
|
-
viz_options.classes_to_exclude = ['test']
|
|
87
|
-
html_output_file, image_db = visualize_db.visualize_db(db_path=output_fn,
|
|
88
|
-
output_dir=preview_folder,
|
|
89
|
-
image_base_dir=image_directory,
|
|
90
|
-
options=viz_options)
|
|
91
|
-
|
|
92
|
-
from megadetector.utils import path_utils
|
|
93
|
-
path_utils.open_file(html_output_file)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
#%% Zip output file
|
|
97
|
-
|
|
98
|
-
from megadetector.utils.path_utils import zip_file
|
|
99
|
-
|
|
100
|
-
zip_file(output_fn, verbose=True)
|
|
101
|
-
assert os.path.isfile(output_fn + '.zip')
|
|
@@ -1,151 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
add_locations_to_nacti.py
|
|
4
|
-
|
|
5
|
-
As of 10.2023, NACTI metadata only has very coarse location information (e.g. "Florida"),
|
|
6
|
-
but camera IDs are embedded in filenames. This script pulls that information from filenames
|
|
7
|
-
and adds it to metadata.
|
|
8
|
-
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
#%% Imports and constants
|
|
12
|
-
|
|
13
|
-
import os
|
|
14
|
-
import json
|
|
15
|
-
import shutil
|
|
16
|
-
|
|
17
|
-
from tqdm import tqdm
|
|
18
|
-
from collections import defaultdict
|
|
19
|
-
|
|
20
|
-
input_file = r'd:\lila\nacti\nacti_metadata.json.1.13\nacti_metadata.json'
|
|
21
|
-
output_file = r'g:\temp\nacti_metadata.1.14.json'
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
#%% Prevent execution during testing
|
|
25
|
-
|
|
26
|
-
if False:
|
|
27
|
-
|
|
28
|
-
#%% Read metadata
|
|
29
|
-
|
|
30
|
-
with open(input_file,'r') as f:
|
|
31
|
-
d = json.load(f)
|
|
32
|
-
|
|
33
|
-
assert d['info']['version'] == 1.13
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
#%% Map images to locations (according to the metadata)
|
|
37
|
-
|
|
38
|
-
file_name_to_original_location = {}
|
|
39
|
-
|
|
40
|
-
# im = dataset_labels['images'][0]
|
|
41
|
-
for im in tqdm(d['images']):
|
|
42
|
-
file_name_to_original_location[im['file_name']] = im['location']
|
|
43
|
-
|
|
44
|
-
original_locations = set(file_name_to_original_location.values())
|
|
45
|
-
|
|
46
|
-
print('Found {} locations in the original metadata:'.format(len(original_locations)))
|
|
47
|
-
for loc in original_locations:
|
|
48
|
-
print('[{}]'.format(loc))
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
#%% Map images to new locations
|
|
52
|
-
|
|
53
|
-
def path_to_location(relative_path):
|
|
54
|
-
|
|
55
|
-
relative_path = relative_path.replace('\\','/')
|
|
56
|
-
if relative_path in file_name_to_original_location:
|
|
57
|
-
location_name = file_name_to_original_location[relative_path]
|
|
58
|
-
if location_name == 'San Juan Mntns, Colorado':
|
|
59
|
-
# "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
|
|
60
|
-
tokens = relative_path.split('/')[-1].split('_')
|
|
61
|
-
assert tokens[1].startswith('Unit')
|
|
62
|
-
location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
|
|
63
|
-
elif location_name == 'Lebec, California':
|
|
64
|
-
# "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
|
|
65
|
-
tokens = relative_path.split('/')[-1].split('_')
|
|
66
|
-
assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
|
|
67
|
-
location_name = 'lebec_{}'.format(tokens[0])
|
|
68
|
-
elif location_name == 'Archbold, FL':
|
|
69
|
-
# "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
|
|
70
|
-
tokens = relative_path.split('/')[-1].split('_')
|
|
71
|
-
assert tokens[0].startswith('FL-')
|
|
72
|
-
location_name = 'archbold_{}'.format(tokens[0])
|
|
73
|
-
else:
|
|
74
|
-
assert location_name == ''
|
|
75
|
-
tokens = relative_path.split('/')[-1].split('_')
|
|
76
|
-
if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
|
|
77
|
-
location_name = '{}'.format(tokens[0])
|
|
78
|
-
|
|
79
|
-
else:
|
|
80
|
-
|
|
81
|
-
location_name = 'unknown'
|
|
82
|
-
|
|
83
|
-
# print('Returning location {} for file {}'.format(location_name,relative_path))
|
|
84
|
-
|
|
85
|
-
return location_name
|
|
86
|
-
|
|
87
|
-
file_name_to_updated_location = {}
|
|
88
|
-
updated_location_to_count = defaultdict(int)
|
|
89
|
-
for im in tqdm(d['images']):
|
|
90
|
-
|
|
91
|
-
updated_location = path_to_location(im['file_name'])
|
|
92
|
-
file_name_to_updated_location[im['file_name']] = updated_location
|
|
93
|
-
updated_location_to_count[updated_location] += 1
|
|
94
|
-
|
|
95
|
-
updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
|
|
96
|
-
key=lambda item: item[1],
|
|
97
|
-
reverse=True)}
|
|
98
|
-
|
|
99
|
-
updated_locations = set(file_name_to_updated_location.values())
|
|
100
|
-
|
|
101
|
-
print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
|
|
102
|
-
for loc in updated_location_to_count:
|
|
103
|
-
print('{}: {}'.format(loc,updated_location_to_count[loc]))
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
#%% Re-write metadata
|
|
107
|
-
|
|
108
|
-
for im in d['images']:
|
|
109
|
-
im['location'] = file_name_to_updated_location[im['file_name']]
|
|
110
|
-
d['info']['version'] = 1.14
|
|
111
|
-
|
|
112
|
-
with open(output_file,'w') as f:
|
|
113
|
-
json.dump(d,f,indent=1)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
#%% For each location, sample some random images to make sure they look consistent
|
|
117
|
-
|
|
118
|
-
input_base = r'd:\lila\nacti-unzipped'
|
|
119
|
-
assert os.path.isdir(input_base)
|
|
120
|
-
|
|
121
|
-
location_to_images = defaultdict(list)
|
|
122
|
-
|
|
123
|
-
for im in d['images']:
|
|
124
|
-
location_to_images[im['location']].append(im)
|
|
125
|
-
|
|
126
|
-
n_to_sample = 10
|
|
127
|
-
import random
|
|
128
|
-
random.seed(0)
|
|
129
|
-
sampling_folder_base = r'g:\temp\nacti_samples'
|
|
130
|
-
|
|
131
|
-
for location in tqdm(location_to_images):
|
|
132
|
-
|
|
133
|
-
images_this_location = location_to_images[location]
|
|
134
|
-
if len(images_this_location) > n_to_sample:
|
|
135
|
-
images_this_location = random.sample(images_this_location,n_to_sample)
|
|
136
|
-
|
|
137
|
-
for i_image,im in enumerate(images_this_location):
|
|
138
|
-
|
|
139
|
-
fn_relative = im['file_name']
|
|
140
|
-
source_fn_abs = os.path.join(input_base,fn_relative)
|
|
141
|
-
assert os.path.isfile(source_fn_abs)
|
|
142
|
-
ext = os.path.splitext(fn_relative)[1]
|
|
143
|
-
target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
|
|
144
|
-
location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
|
|
145
|
-
os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
|
|
146
|
-
shutil.copyfile(source_fn_abs,target_fn_abs)
|
|
147
|
-
|
|
148
|
-
# ...for each image
|
|
149
|
-
|
|
150
|
-
# ...for each location
|
|
151
|
-
|
|
@@ -1,178 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
azure_utils.py
|
|
4
|
-
|
|
5
|
-
Miscellaneous Azure Blob Storage utilities
|
|
6
|
-
|
|
7
|
-
Requires azure-storage-blob>=12.4.0
|
|
8
|
-
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
#%% Imports
|
|
12
|
-
|
|
13
|
-
import json
|
|
14
|
-
|
|
15
|
-
from typing import Any, Iterable, List, Optional, Tuple, Union
|
|
16
|
-
from azure.storage.blob import BlobPrefix, ContainerClient
|
|
17
|
-
|
|
18
|
-
from megadetector.utils import path_utils
|
|
19
|
-
from megadetector.utils import sas_blob_utils
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
#%% Functions
|
|
23
|
-
|
|
24
|
-
def walk_container(container_client: ContainerClient,
|
|
25
|
-
max_depth: int = -1,
|
|
26
|
-
prefix: str = '',
|
|
27
|
-
store_folders: bool = True,
|
|
28
|
-
store_blobs: bool = True,
|
|
29
|
-
debug_max_items: int = -1) -> Tuple[List[str], List[str]]:
|
|
30
|
-
"""
|
|
31
|
-
Recursively walk folders a Azure Blob Storage container.
|
|
32
|
-
|
|
33
|
-
Based on:
|
|
34
|
-
https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/storage/azure-storage-blob/samples/blob_samples_walk_blob_hierarchy.py
|
|
35
|
-
"""
|
|
36
|
-
|
|
37
|
-
depth = 1
|
|
38
|
-
|
|
39
|
-
def walk_blob_hierarchy(prefix: str,
|
|
40
|
-
folders: Optional[List[str]] = None,
|
|
41
|
-
blobs: Optional[List[str]] = None
|
|
42
|
-
) -> Tuple[List[str], List[str]]:
|
|
43
|
-
if folders is None:
|
|
44
|
-
folders = []
|
|
45
|
-
if blobs is None:
|
|
46
|
-
blobs = []
|
|
47
|
-
|
|
48
|
-
nonlocal depth
|
|
49
|
-
|
|
50
|
-
if 0 < max_depth < depth:
|
|
51
|
-
return folders, blobs
|
|
52
|
-
|
|
53
|
-
for item in container_client.walk_blobs(name_starts_with=prefix):
|
|
54
|
-
short_name = item.name[len(prefix):]
|
|
55
|
-
if isinstance(item, BlobPrefix):
|
|
56
|
-
# print('F: ' + prefix + short_name)
|
|
57
|
-
if store_folders:
|
|
58
|
-
folders.append(prefix + short_name)
|
|
59
|
-
depth += 1
|
|
60
|
-
walk_blob_hierarchy(item.name, folders=folders, blobs=blobs)
|
|
61
|
-
if (debug_max_items > 0
|
|
62
|
-
and len(folders) + len(blobs) > debug_max_items):
|
|
63
|
-
return folders, blobs
|
|
64
|
-
depth -= 1
|
|
65
|
-
else:
|
|
66
|
-
if store_blobs:
|
|
67
|
-
blobs.append(prefix + short_name)
|
|
68
|
-
|
|
69
|
-
return folders, blobs
|
|
70
|
-
|
|
71
|
-
folders, blobs = walk_blob_hierarchy(prefix=prefix)
|
|
72
|
-
|
|
73
|
-
assert all(s.endswith('/') for s in folders)
|
|
74
|
-
folders = [s.strip('/') for s in folders]
|
|
75
|
-
|
|
76
|
-
return folders, blobs
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def list_top_level_blob_folders(container_client: ContainerClient) -> List[str]:
|
|
80
|
-
"""
|
|
81
|
-
List all top-level folders in a container.
|
|
82
|
-
"""
|
|
83
|
-
|
|
84
|
-
top_level_folders, _ = walk_container(
|
|
85
|
-
container_client, max_depth=1, store_blobs=False)
|
|
86
|
-
return top_level_folders
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def concatenate_json_lists(input_files: Iterable[str],
|
|
90
|
-
output_file: Optional[str] = None
|
|
91
|
-
) -> List[Any]:
|
|
92
|
-
"""
|
|
93
|
-
Given a list of JSON files that contain lists (typically string
|
|
94
|
-
filenames), concatenates the lists into a single list and optionally
|
|
95
|
-
writes out this list to a new output JSON file.
|
|
96
|
-
"""
|
|
97
|
-
|
|
98
|
-
output_list = []
|
|
99
|
-
for fn in input_files:
|
|
100
|
-
with open(fn, 'r') as f:
|
|
101
|
-
file_list = json.load(f)
|
|
102
|
-
output_list.extend(file_list)
|
|
103
|
-
if output_file is not None:
|
|
104
|
-
with open(output_file, 'w') as f:
|
|
105
|
-
json.dump(output_list, f, indent=1)
|
|
106
|
-
return output_list
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def upload_file_to_blob(account_name: str,
|
|
110
|
-
container_name: str,
|
|
111
|
-
local_path: str,
|
|
112
|
-
blob_name: str,
|
|
113
|
-
sas_token: str,
|
|
114
|
-
overwrite: bool=False) -> str:
|
|
115
|
-
"""
|
|
116
|
-
Uploads a local file to Azure Blob Storage and returns the uploaded
|
|
117
|
-
blob URI with SAS token.
|
|
118
|
-
"""
|
|
119
|
-
|
|
120
|
-
container_uri = sas_blob_utils.build_azure_storage_uri(
|
|
121
|
-
account=account_name, container=container_name, sas_token=sas_token)
|
|
122
|
-
with open(local_path, 'rb') as data:
|
|
123
|
-
return sas_blob_utils.upload_blob(
|
|
124
|
-
container_uri=container_uri, blob_name=blob_name, data=data,
|
|
125
|
-
overwrite=overwrite)
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def enumerate_blobs_to_file(
|
|
129
|
-
output_file: str,
|
|
130
|
-
account_name: str,
|
|
131
|
-
container_name: str,
|
|
132
|
-
sas_token: Optional[str] = None,
|
|
133
|
-
blob_prefix: Optional[str] = None,
|
|
134
|
-
blob_suffix: Optional[Union[str, Tuple[str]]] = None,
|
|
135
|
-
rsearch: Optional[str] = None,
|
|
136
|
-
limit: Optional[int] = None,
|
|
137
|
-
verbose: Optional[bool] = True
|
|
138
|
-
) -> List[str]:
|
|
139
|
-
"""
|
|
140
|
-
Enumerates blobs in a container, and writes the blob names to an output
|
|
141
|
-
file.
|
|
142
|
-
|
|
143
|
-
Args:
|
|
144
|
-
output_file: str, path to save list of files in container
|
|
145
|
-
If ends in '.json', writes a JSON string. Otherwise, writes a
|
|
146
|
-
newline-delimited list. Can be None, in which case this is just a
|
|
147
|
-
convenient wrapper for blob enumeration.
|
|
148
|
-
account_name: str, Azure Storage account name
|
|
149
|
-
container_name: str, Azure Blob Storage container name
|
|
150
|
-
sas_token: optional str, container SAS token, leading ? will be removed if present.
|
|
151
|
-
blob_prefix: optional str, returned results will only contain blob names
|
|
152
|
-
to with this prefix
|
|
153
|
-
blob_suffix: optional str or tuple of str, returned results will only
|
|
154
|
-
contain blob names with this/these suffix(es). The blob names will
|
|
155
|
-
be lowercased first before comparing with the suffix(es).
|
|
156
|
-
rsearch: optional str, returned results will only contain blob names
|
|
157
|
-
that match this regex. Can also be a list of regexes, in which case
|
|
158
|
-
blobs matching *any* of the regex's will be returned.
|
|
159
|
-
limit: int, maximum # of blob names to list
|
|
160
|
-
if None, then returns all blob names
|
|
161
|
-
|
|
162
|
-
Returns: list of str, sorted blob names, of length limit or shorter.
|
|
163
|
-
"""
|
|
164
|
-
|
|
165
|
-
if sas_token is not None and len(sas_token) > 9 and sas_token[0] == '?':
|
|
166
|
-
sas_token = sas_token[1:]
|
|
167
|
-
|
|
168
|
-
container_uri = sas_blob_utils.build_azure_storage_uri(
|
|
169
|
-
account=account_name, container=container_name, sas_token=sas_token)
|
|
170
|
-
|
|
171
|
-
matched_blobs = sas_blob_utils.list_blobs_in_container(
|
|
172
|
-
container_uri=container_uri, blob_prefix=blob_prefix,
|
|
173
|
-
blob_suffix=blob_suffix, rsearch=rsearch, limit=limit, verbose=verbose)
|
|
174
|
-
|
|
175
|
-
if output_file is not None:
|
|
176
|
-
path_utils.write_list_to_file(output_file, matched_blobs)
|
|
177
|
-
|
|
178
|
-
return matched_blobs
|