megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +231 -224
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +304 -262
- megadetector/detection/run_detector.py +177 -164
- megadetector/detection/run_detector_batch.py +364 -363
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +256 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +290 -282
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +415 -415
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +219 -146
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +23 -20
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +313 -298
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1018 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1457 -398
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +61 -61
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +401 -397
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +79 -73
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.28.dist-info/RECORD +0 -209
|
@@ -18,33 +18,33 @@ import subprocess
|
|
|
18
18
|
def execute(cmd,encoding=None,errors=None,env=None,verbose=False):
|
|
19
19
|
"""
|
|
20
20
|
Run [cmd] (a single string) in a shell, yielding each line of output to the caller.
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
The "encoding", "errors", and "env" parameters are passed directly to subprocess.Popen().
|
|
23
|
-
|
|
23
|
+
|
|
24
24
|
"verbose" only impacts output about process management, it is not related to printing
|
|
25
25
|
output from the child process.
|
|
26
|
-
|
|
26
|
+
|
|
27
27
|
Args:
|
|
28
28
|
cmd (str): command to run
|
|
29
29
|
encoding (str, optional): stdout encoding, see Popen() documentation
|
|
30
30
|
errors (str, optional): error handling, see Popen() documentation
|
|
31
31
|
env (dict, optional): environment variables, see Popen() documentation
|
|
32
32
|
verbose (bool, optional): enable additional debug console output
|
|
33
|
-
|
|
33
|
+
|
|
34
34
|
Returns:
|
|
35
|
-
int: the command's return code, always zero, otherwise a CalledProcessError is raised
|
|
35
|
+
int: the command's return code, always zero, otherwise a CalledProcessError is raised
|
|
36
36
|
"""
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
os.environ["PYTHONUNBUFFERED"] = "1"
|
|
39
|
-
|
|
40
|
-
if verbose:
|
|
39
|
+
|
|
40
|
+
if verbose:
|
|
41
41
|
if encoding is not None:
|
|
42
42
|
print('Launching child process with non-default encoding {}'.format(encoding))
|
|
43
43
|
if errors is not None:
|
|
44
44
|
print('Launching child process with non-default text error handling {}'.format(errors))
|
|
45
45
|
if env is not None:
|
|
46
46
|
print('Launching child process with non-default environment {}'.format(str(env)))
|
|
47
|
-
|
|
47
|
+
|
|
48
48
|
# https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
|
|
49
49
|
popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
|
50
50
|
shell=True, universal_newlines=True, encoding=encoding,
|
|
@@ -55,7 +55,7 @@ def execute(cmd,encoding=None,errors=None,env=None,verbose=False):
|
|
|
55
55
|
return_code = popen.wait()
|
|
56
56
|
if return_code:
|
|
57
57
|
raise subprocess.CalledProcessError(return_code, cmd)
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
return return_code
|
|
60
60
|
|
|
61
61
|
|
|
@@ -70,15 +70,15 @@ def execute_and_print(cmd,
|
|
|
70
70
|
"""
|
|
71
71
|
Run [cmd] (a single string) in a shell, capturing and printing output. Returns
|
|
72
72
|
a dictionary with fields "status" and "output".
|
|
73
|
-
|
|
73
|
+
|
|
74
74
|
The "encoding", "errors", and "env" parameters are passed directly to subprocess.Popen().
|
|
75
|
-
|
|
75
|
+
|
|
76
76
|
"verbose" only impacts output about process management, it is not related to printing
|
|
77
77
|
output from the child process.
|
|
78
|
-
|
|
78
|
+
|
|
79
79
|
Args:
|
|
80
80
|
cmd (str): command to run
|
|
81
|
-
print_output (bool, optional): whether to print output from [cmd] (stdout is
|
|
81
|
+
print_output (bool, optional): whether to print output from [cmd] (stdout is
|
|
82
82
|
captured regardless of the value of print_output)
|
|
83
83
|
encoding (str, optional): stdout encoding, see Popen() documentation
|
|
84
84
|
errors (str, optional): error handling, see Popen() documentation
|
|
@@ -86,15 +86,15 @@ def execute_and_print(cmd,
|
|
|
86
86
|
verbose (bool, optional): enable additional debug console output
|
|
87
87
|
catch_exceptions (bool, optional): catch exceptions and include in the output, otherwise raise
|
|
88
88
|
echo_command (bool, optional): print the command before executing
|
|
89
|
-
|
|
89
|
+
|
|
90
90
|
Returns:
|
|
91
91
|
dict: a dictionary with fields "status" (the process return code) and "output"
|
|
92
|
-
(the content of stdout)
|
|
92
|
+
(the content of stdout)
|
|
93
93
|
"""
|
|
94
94
|
|
|
95
95
|
if echo_command:
|
|
96
96
|
print('Running command:\n{}\n'.format(cmd))
|
|
97
|
-
|
|
97
|
+
|
|
98
98
|
to_return = {'status':'unknown','output':''}
|
|
99
99
|
output = []
|
|
100
100
|
try:
|
|
@@ -109,64 +109,64 @@ def execute_and_print(cmd,
|
|
|
109
109
|
print('execute_and_print caught error: {} ({})'.format(cpe.output,str(cpe)))
|
|
110
110
|
to_return['status'] = cpe.returncode
|
|
111
111
|
to_return['output'] = output
|
|
112
|
-
|
|
112
|
+
|
|
113
113
|
return to_return
|
|
114
114
|
|
|
115
115
|
|
|
116
116
|
#%% Single-threaded test driver for execute_and_print
|
|
117
117
|
|
|
118
118
|
if False:
|
|
119
|
-
|
|
119
|
+
|
|
120
120
|
pass
|
|
121
121
|
|
|
122
122
|
#%%
|
|
123
|
-
|
|
123
|
+
|
|
124
124
|
if os.name == 'nt':
|
|
125
|
-
execute_and_print('echo hello && ping -n 5 127.0.0.1 && echo goodbye')
|
|
125
|
+
execute_and_print('echo hello && ping -n 5 127.0.0.1 && echo goodbye')
|
|
126
126
|
else:
|
|
127
|
-
execute_and_print('echo hello && sleep 1 && echo goodbye')
|
|
128
|
-
|
|
127
|
+
execute_and_print('echo hello && sleep 1 && echo goodbye')
|
|
128
|
+
|
|
129
129
|
|
|
130
130
|
#%% Parallel test driver for execute_and_print
|
|
131
131
|
|
|
132
132
|
if False:
|
|
133
|
-
|
|
133
|
+
|
|
134
134
|
pass
|
|
135
135
|
|
|
136
136
|
#%%
|
|
137
|
-
|
|
137
|
+
|
|
138
138
|
from functools import partial
|
|
139
139
|
from multiprocessing.pool import ThreadPool as ThreadPool
|
|
140
140
|
from multiprocessing.pool import Pool as Pool
|
|
141
|
-
|
|
141
|
+
|
|
142
142
|
n_workers = 10
|
|
143
|
-
|
|
143
|
+
|
|
144
144
|
# Should we use threads (vs. processes) for parallelization?
|
|
145
145
|
use_threads = True
|
|
146
|
-
|
|
146
|
+
|
|
147
147
|
test_data = ['a','b','c','d']
|
|
148
|
-
|
|
149
|
-
def
|
|
148
|
+
|
|
149
|
+
def _process_sample(s):
|
|
150
150
|
return execute_and_print('echo ' + s,True)
|
|
151
|
-
|
|
152
|
-
if n_workers == 1:
|
|
153
|
-
|
|
151
|
+
|
|
152
|
+
if n_workers == 1:
|
|
153
|
+
|
|
154
154
|
results = []
|
|
155
|
-
for i_sample,sample in enumerate(test_data):
|
|
156
|
-
results.append(
|
|
157
|
-
|
|
155
|
+
for i_sample,sample in enumerate(test_data):
|
|
156
|
+
results.append(_process_sample(sample))
|
|
157
|
+
|
|
158
158
|
else:
|
|
159
|
-
|
|
159
|
+
|
|
160
160
|
n_threads = min(n_workers,len(test_data))
|
|
161
|
-
|
|
161
|
+
|
|
162
162
|
if use_threads:
|
|
163
163
|
print('Starting parallel thread pool with {} workers'.format(n_threads))
|
|
164
164
|
pool = ThreadPool(n_threads)
|
|
165
165
|
else:
|
|
166
166
|
print('Starting parallel process pool with {} workers'.format(n_threads))
|
|
167
167
|
pool = Pool(n_threads)
|
|
168
|
-
|
|
169
|
-
results = list(pool.map(partial(
|
|
170
|
-
|
|
168
|
+
|
|
169
|
+
results = list(pool.map(partial(_process_sample),test_data))
|
|
170
|
+
|
|
171
171
|
for r in results:
|
|
172
172
|
print(r)
|
|
@@ -24,13 +24,13 @@ docs.microsoft.com/en-us/azure/storage/common/storage-sas-overview
|
|
|
24
24
|
from datetime import datetime, timedelta
|
|
25
25
|
import io
|
|
26
26
|
import re
|
|
27
|
-
from typing import (Any, AnyStr,
|
|
27
|
+
from typing import (Any, AnyStr, IO, Iterable, Optional, Union)
|
|
28
28
|
from urllib import parse
|
|
29
29
|
import uuid
|
|
30
30
|
|
|
31
31
|
from tqdm import tqdm
|
|
32
32
|
|
|
33
|
-
from azure.storage.blob import (
|
|
33
|
+
from azure.storage.blob import ( # type: ignore
|
|
34
34
|
BlobClient,
|
|
35
35
|
BlobProperties,
|
|
36
36
|
ContainerClient,
|
|
@@ -48,21 +48,24 @@ def build_azure_storage_uri(
|
|
|
48
48
|
account_url_template: str = 'https://{account}.blob.core.windows.net'
|
|
49
49
|
) -> str:
|
|
50
50
|
"""
|
|
51
|
+
Build an Azure SAS URL from path name components.
|
|
52
|
+
|
|
51
53
|
Args:
|
|
52
54
|
account: str, name of Azure Storage account
|
|
53
55
|
container: optional str, name of Azure Blob Storage container
|
|
54
56
|
blob: optional str, name of blob, not URL-escaped
|
|
55
57
|
if blob is given, must also specify container
|
|
56
|
-
sas_token: optional str, Shared Access Signature (SAS). Leading ?
|
|
58
|
+
sas_token: optional str, Shared Access Signature (SAS). Leading ?
|
|
57
59
|
is removed if present.
|
|
58
60
|
account_url_template: str, Python 3 string formatting template,
|
|
59
61
|
contains '{account}' placeholder, defaults to default Azure
|
|
60
62
|
Storage URL format. Set this value if using Azurite Azure Storage
|
|
61
63
|
emulator.
|
|
62
64
|
|
|
63
|
-
Returns:
|
|
65
|
+
Returns:
|
|
66
|
+
str, Azure storage URI
|
|
64
67
|
"""
|
|
65
|
-
|
|
68
|
+
|
|
66
69
|
uri = account_url_template.format(account=account)
|
|
67
70
|
if container is not None:
|
|
68
71
|
uri = f'{uri}/{container}'
|
|
@@ -85,7 +88,7 @@ def get_client_from_uri(container_uri: str) -> ContainerClient:
|
|
|
85
88
|
"""
|
|
86
89
|
Gets a ContainerClient for the given container URI.
|
|
87
90
|
"""
|
|
88
|
-
|
|
91
|
+
|
|
89
92
|
return ContainerClient.from_container_url(container_uri)
|
|
90
93
|
|
|
91
94
|
|
|
@@ -95,7 +98,7 @@ def get_account_from_uri(sas_uri: str) -> str:
|
|
|
95
98
|
a default Azure URI. Does not work for locally-emulated Azure Storage
|
|
96
99
|
or Azure Storage hosted at custom endpoints.
|
|
97
100
|
"""
|
|
98
|
-
|
|
101
|
+
|
|
99
102
|
url_parts = parse.urlsplit(sas_uri)
|
|
100
103
|
loc = url_parts.netloc # "<account>.blob.core.windows.net"
|
|
101
104
|
return loc.split('.')[0]
|
|
@@ -106,7 +109,7 @@ def is_container_uri(sas_uri: str) -> bool:
|
|
|
106
109
|
Returns True if the signed resource field in the URI "sr" is a container "c"
|
|
107
110
|
or a directory "d"
|
|
108
111
|
"""
|
|
109
|
-
|
|
112
|
+
|
|
110
113
|
data = get_all_query_parts(sas_uri)
|
|
111
114
|
if 'sr' not in data:
|
|
112
115
|
return False
|
|
@@ -120,7 +123,7 @@ def is_blob_uri(sas_uri: str) -> bool:
|
|
|
120
123
|
"""
|
|
121
124
|
Returns True if the signed resource field in the URI "sr" is a blob "b".
|
|
122
125
|
"""
|
|
123
|
-
|
|
126
|
+
|
|
124
127
|
data = get_all_query_parts(sas_uri)
|
|
125
128
|
if 'sr' not in data:
|
|
126
129
|
return False
|
|
@@ -144,11 +147,10 @@ def get_container_from_uri(sas_uri: str, unquote: bool = True) -> str:
|
|
|
144
147
|
unquote: bool, whether to replace any %xx escapes by their
|
|
145
148
|
single-character equivalent, default True
|
|
146
149
|
|
|
147
|
-
Returns:
|
|
148
|
-
|
|
149
|
-
Raises: ValueError, if sas_uri does not include a container
|
|
150
|
+
Returns:
|
|
151
|
+
str, container name
|
|
150
152
|
"""
|
|
151
|
-
|
|
153
|
+
|
|
152
154
|
url_parts = parse.urlsplit(sas_uri)
|
|
153
155
|
raw_path = url_parts.path.lstrip('/') # remove leading "/" from path
|
|
154
156
|
container = raw_path.split('/')[0]
|
|
@@ -173,7 +175,7 @@ def get_blob_from_uri(sas_uri: str, unquote: bool = True) -> str:
|
|
|
173
175
|
|
|
174
176
|
Raises: ValueError, if sas_uri does not include a blob name
|
|
175
177
|
"""
|
|
176
|
-
|
|
178
|
+
|
|
177
179
|
# Get the entire path with all slashes after the container
|
|
178
180
|
url_parts = parse.urlsplit(sas_uri)
|
|
179
181
|
raw_path = url_parts.path.lstrip('/') # remove leading "/" from path
|
|
@@ -198,7 +200,7 @@ def get_sas_token_from_uri(sas_uri: str) -> Optional[str]:
|
|
|
198
200
|
Returns: str, query part of the SAS token (without leading '?'),
|
|
199
201
|
or None if URI has no token.
|
|
200
202
|
"""
|
|
201
|
-
|
|
203
|
+
|
|
202
204
|
url_parts = parse.urlsplit(sas_uri)
|
|
203
205
|
sas_token = url_parts.query or None # None if query is empty string
|
|
204
206
|
return sas_token
|
|
@@ -213,7 +215,7 @@ def get_resource_type_from_uri(sas_uri: str) -> Optional[str]:
|
|
|
213
215
|
|
|
214
216
|
Returns: A string (either 'blob' or 'container') or None.
|
|
215
217
|
"""
|
|
216
|
-
|
|
218
|
+
|
|
217
219
|
url_parts = parse.urlsplit(sas_uri)
|
|
218
220
|
data = parse.parse_qs(url_parts.query)
|
|
219
221
|
if 'sr' in data:
|
|
@@ -228,29 +230,32 @@ def get_resource_type_from_uri(sas_uri: str) -> Optional[str]:
|
|
|
228
230
|
def get_endpoint_suffix(sas_uri):
|
|
229
231
|
"""
|
|
230
232
|
Gets the endpoint at which the blob storage account is served.
|
|
233
|
+
|
|
231
234
|
Args:
|
|
232
235
|
sas_uri: str, Azure blob storage URI with SAS token
|
|
233
236
|
|
|
234
|
-
Returns:
|
|
235
|
-
|
|
237
|
+
Returns:
|
|
238
|
+
str: usually 'core.windows.net' or 'core.chinacloudapi.cn', to
|
|
239
|
+
use for the `endpoint` argument in various blob storage SDK functions.
|
|
236
240
|
"""
|
|
237
|
-
|
|
241
|
+
|
|
238
242
|
url_parts = parse.urlsplit(sas_uri)
|
|
239
243
|
suffix = url_parts.netloc.split('.blob.')[1].split('/')[0]
|
|
240
244
|
return suffix
|
|
241
245
|
|
|
242
246
|
|
|
243
|
-
def get_permissions_from_uri(sas_uri: str) ->
|
|
247
|
+
def get_permissions_from_uri(sas_uri: str) -> set[str]:
|
|
244
248
|
"""
|
|
245
249
|
Get the permissions given by this SAS token.
|
|
246
250
|
|
|
247
251
|
Args:
|
|
248
252
|
sas_uri: str, Azure blob storage URI with SAS token
|
|
249
253
|
|
|
250
|
-
Returns:
|
|
254
|
+
Returns:
|
|
255
|
+
set: a set containing some of 'read', 'write', 'delete' and 'list'.
|
|
251
256
|
Empty set returned if no permission specified in sas_uri.
|
|
252
257
|
"""
|
|
253
|
-
|
|
258
|
+
|
|
254
259
|
data = get_all_query_parts(sas_uri)
|
|
255
260
|
permissions_set = set()
|
|
256
261
|
if 'sp' in data:
|
|
@@ -266,16 +271,16 @@ def get_permissions_from_uri(sas_uri: str) -> Set[str]:
|
|
|
266
271
|
return permissions_set
|
|
267
272
|
|
|
268
273
|
|
|
269
|
-
def get_all_query_parts(sas_uri: str) ->
|
|
274
|
+
def get_all_query_parts(sas_uri: str) -> dict[str, Any]:
|
|
270
275
|
"""
|
|
271
276
|
Gets the SAS token parameters.
|
|
272
277
|
"""
|
|
273
|
-
|
|
278
|
+
|
|
274
279
|
url_parts = parse.urlsplit(sas_uri)
|
|
275
280
|
return parse.parse_qs(url_parts.query)
|
|
276
281
|
|
|
277
282
|
|
|
278
|
-
#%% Blob
|
|
283
|
+
#%% Blob
|
|
279
284
|
|
|
280
285
|
def check_blob_exists(sas_uri: str, blob_name: Optional[str] = None) -> bool:
|
|
281
286
|
"""
|
|
@@ -294,9 +299,10 @@ def check_blob_exists(sas_uri: str, blob_name: Optional[str] = None) -> bool:
|
|
|
294
299
|
blob_name: optional str, name of blob, not URL-escaped
|
|
295
300
|
must be given if sas_uri is a URI to a container
|
|
296
301
|
|
|
297
|
-
Returns:
|
|
302
|
+
Returns:
|
|
303
|
+
bool: whether the sas_uri given points to an existing blob
|
|
298
304
|
"""
|
|
299
|
-
|
|
305
|
+
|
|
300
306
|
if blob_name is not None:
|
|
301
307
|
sas_uri = build_blob_uri(
|
|
302
308
|
container_uri=sas_uri, blob_name=blob_name)
|
|
@@ -318,9 +324,10 @@ def upload_blob(container_uri: str, blob_name: str,
|
|
|
318
324
|
if str, assumes utf-8 encoding
|
|
319
325
|
overwrite: bool, whether to overwrite existing blob (if any)
|
|
320
326
|
|
|
321
|
-
Returns:
|
|
327
|
+
Returns:
|
|
328
|
+
str: URL to blob, includes SAS token if container_uri has SAS token
|
|
322
329
|
"""
|
|
323
|
-
|
|
330
|
+
|
|
324
331
|
account_url, container, sas_token = split_container_uri(container_uri)
|
|
325
332
|
with BlobClient(account_url=account_url, container_name=container,
|
|
326
333
|
blob_name=blob_name, credential=sas_token) as blob_client:
|
|
@@ -328,7 +335,7 @@ def upload_blob(container_uri: str, blob_name: str,
|
|
|
328
335
|
return blob_client.url
|
|
329
336
|
|
|
330
337
|
|
|
331
|
-
def download_blob_to_stream(sas_uri: str) ->
|
|
338
|
+
def download_blob_to_stream(sas_uri: str) -> tuple[io.BytesIO, BlobProperties]:
|
|
332
339
|
"""
|
|
333
340
|
Downloads a blob to an IO stream.
|
|
334
341
|
|
|
@@ -338,11 +345,8 @@ def download_blob_to_stream(sas_uri: str) -> Tuple[io.BytesIO, BlobProperties]:
|
|
|
338
345
|
Returns:
|
|
339
346
|
output_stream: io.BytesIO, remember to close it when finished using
|
|
340
347
|
blob_properties: BlobProperties
|
|
341
|
-
|
|
342
|
-
Raises: azure.core.exceptions.ResourceNotFoundError, if sas_uri points
|
|
343
|
-
to a non-existent blob
|
|
344
348
|
"""
|
|
345
|
-
|
|
349
|
+
|
|
346
350
|
with BlobClient.from_blob_url(sas_uri) as blob_client:
|
|
347
351
|
output_stream = io.BytesIO()
|
|
348
352
|
blob_client.download_blob().readinto(output_stream)
|
|
@@ -358,10 +362,11 @@ def build_blob_uri(container_uri: str, blob_name: str) -> str:
|
|
|
358
362
|
<account_url>/<container>?<sas_token>
|
|
359
363
|
blob_name: str, name of blob, not URL-escaped
|
|
360
364
|
|
|
361
|
-
Returns:
|
|
365
|
+
Returns:
|
|
366
|
+
str: blob URI <account_url>/<container>/<blob_name>?<sas_token>,
|
|
362
367
|
<blob_name> is URL-escaped
|
|
363
368
|
"""
|
|
364
|
-
|
|
369
|
+
|
|
365
370
|
account_url, container, sas_token = split_container_uri(container_uri)
|
|
366
371
|
|
|
367
372
|
blob_name = parse.quote(blob_name)
|
|
@@ -376,11 +381,11 @@ def build_blob_uri(container_uri: str, blob_name: str) -> str:
|
|
|
376
381
|
def list_blobs_in_container(
|
|
377
382
|
container_uri: str,
|
|
378
383
|
blob_prefix: Optional[str] = None,
|
|
379
|
-
blob_suffix: Optional[Union[str,
|
|
384
|
+
blob_suffix: Optional[Union[str, tuple[str]]] = None,
|
|
380
385
|
rsearch: Optional[str] = None,
|
|
381
386
|
limit: Optional[int] = None,
|
|
382
387
|
verbose: Optional[bool] = True
|
|
383
|
-
) ->
|
|
388
|
+
) -> list[str]:
|
|
384
389
|
"""
|
|
385
390
|
Get a sorted list of blob names in this container.
|
|
386
391
|
|
|
@@ -398,12 +403,12 @@ def list_blobs_in_container(
|
|
|
398
403
|
if None, then returns all blob names
|
|
399
404
|
|
|
400
405
|
Returns:
|
|
401
|
-
sorted list of blob names, of length limit or shorter.
|
|
406
|
+
list: sorted list of blob names, of length limit or shorter.
|
|
402
407
|
"""
|
|
403
408
|
|
|
404
409
|
if verbose:
|
|
405
410
|
print('Listing blobs')
|
|
406
|
-
|
|
411
|
+
|
|
407
412
|
if (get_sas_token_from_uri(container_uri) is not None
|
|
408
413
|
and get_resource_type_from_uri(container_uri) != 'container'):
|
|
409
414
|
raise ValueError('The SAS token provided is not for a container.')
|
|
@@ -448,7 +453,7 @@ def list_blobs_in_container(
|
|
|
448
453
|
|
|
449
454
|
if verbose:
|
|
450
455
|
print(f'Enumerated {len(list_blobs)} matching blobs out of {i} total')
|
|
451
|
-
|
|
456
|
+
|
|
452
457
|
return sorted(list_blobs) # sort for determinism
|
|
453
458
|
|
|
454
459
|
|
|
@@ -470,12 +475,10 @@ def generate_writable_container_sas(account_name: str,
|
|
|
470
475
|
access_duration_hrs: float
|
|
471
476
|
account_url: str, optional, defaults to default Azure Storage URL
|
|
472
477
|
|
|
473
|
-
Returns:
|
|
474
|
-
|
|
475
|
-
Raises: azure.core.exceptions.ResourceExistsError, if container already
|
|
476
|
-
exists
|
|
478
|
+
Returns:
|
|
479
|
+
str: URL to newly created container
|
|
477
480
|
"""
|
|
478
|
-
|
|
481
|
+
|
|
479
482
|
if account_url is None:
|
|
480
483
|
account_url = build_azure_storage_uri(account=account_name)
|
|
481
484
|
with ContainerClient(account_url=account_url,
|
|
@@ -494,15 +497,16 @@ def generate_writable_container_sas(account_name: str,
|
|
|
494
497
|
return f'{account_url}/{container_name}?{container_sas_token}'
|
|
495
498
|
|
|
496
499
|
|
|
497
|
-
def split_container_uri(container_uri: str) ->
|
|
500
|
+
def split_container_uri(container_uri: str) -> tuple[str, str, Optional[str]]:
|
|
498
501
|
"""
|
|
499
502
|
Args:
|
|
500
503
|
container_uri: str, URI to blob storage container
|
|
501
504
|
<account_url>/<container>?<sas_token>
|
|
502
505
|
|
|
503
|
-
Returns:
|
|
506
|
+
Returns:
|
|
507
|
+
tuple: account_url, container_name, sas_token
|
|
504
508
|
"""
|
|
505
|
-
|
|
509
|
+
|
|
506
510
|
account_container = container_uri.split('?', maxsplit=1)[0]
|
|
507
511
|
account_url, container_name = account_container.rsplit('/', maxsplit=1)
|
|
508
512
|
sas_token = get_sas_token_from_uri(container_uri)
|