ONE-api 3.0b1__py3-none-any.whl → 3.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ONE_api-3.0b1.dist-info → ONE_api-3.0b4.dist-info}/LICENSE +21 -21
- {ONE_api-3.0b1.dist-info → ONE_api-3.0b4.dist-info}/METADATA +115 -115
- ONE_api-3.0b4.dist-info/RECORD +37 -0
- one/__init__.py +2 -2
- one/alf/__init__.py +1 -1
- one/alf/cache.py +640 -653
- one/alf/exceptions.py +105 -105
- one/alf/io.py +876 -876
- one/alf/path.py +1450 -1450
- one/alf/spec.py +519 -504
- one/api.py +2949 -2973
- one/converters.py +850 -850
- one/params.py +414 -414
- one/registration.py +845 -845
- one/remote/__init__.py +1 -1
- one/remote/aws.py +313 -313
- one/remote/base.py +142 -142
- one/remote/globus.py +1254 -1254
- one/tests/fixtures/params/.caches +6 -6
- one/tests/fixtures/params/.test.alyx.internationalbrainlab.org +8 -8
- one/tests/fixtures/rest_responses/1f187d80fd59677b395fcdb18e68e4401bfa1cc9 +1 -1
- one/tests/fixtures/rest_responses/47893cf67c985e6361cdee009334963f49fb0746 +1 -1
- one/tests/fixtures/rest_responses/535d0e9a1e2c1efbdeba0d673b131e00361a2edb +1 -1
- one/tests/fixtures/rest_responses/6dc96f7e9bcc6ac2e7581489b9580a6cd3f28293 +1 -1
- one/tests/fixtures/rest_responses/db1731fb8df0208944ae85f76718430813a8bf50 +1 -1
- one/tests/fixtures/rest_responses/dcce48259bb929661f60a02a48563f70aa6185b3 +1 -1
- one/tests/fixtures/rest_responses/f530d6022f61cdc9e38cc66beb3cb71f3003c9a1 +1 -1
- one/tests/fixtures/test_dbs.json +14 -14
- one/util.py +524 -524
- one/webclient.py +1366 -1354
- ONE_api-3.0b1.dist-info/RECORD +0 -37
- {ONE_api-3.0b1.dist-info → ONE_api-3.0b4.dist-info}/WHEEL +0 -0
- {ONE_api-3.0b1.dist-info → ONE_api-3.0b4.dist-info}/top_level.txt +0 -0
one/remote/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"""A package for remote data access."""
|
|
1
|
+
"""A package for remote data access."""
|
one/remote/aws.py
CHANGED
|
@@ -1,313 +1,313 @@
|
|
|
1
|
-
"""A backend to download IBL data from AWS Buckets.
|
|
2
|
-
|
|
3
|
-
Examples
|
|
4
|
-
--------
|
|
5
|
-
Without any credentials, to download a public file from the IBL public bucket:
|
|
6
|
-
|
|
7
|
-
>>> from one.remote import aws
|
|
8
|
-
... source = 'caches/unit_test/cache_info.json'
|
|
9
|
-
... destination = '/home/olivier/scratch/cache_info.json'
|
|
10
|
-
... aws.s3_download_file(source, destination)
|
|
11
|
-
|
|
12
|
-
For a folder, the following:
|
|
13
|
-
|
|
14
|
-
>>> source = 'caches/unit_test'
|
|
15
|
-
>>> destination = '/home/olivier/scratch/caches/unit_test'
|
|
16
|
-
>>> local_files = aws.s3_download_folder(source, destination)
|
|
17
|
-
|
|
18
|
-
"""
|
|
19
|
-
import re
|
|
20
|
-
from pathlib import Path, PurePosixPath
|
|
21
|
-
import logging
|
|
22
|
-
import urllib.parse
|
|
23
|
-
|
|
24
|
-
from tqdm import tqdm
|
|
25
|
-
import boto3
|
|
26
|
-
|
|
27
|
-
from botocore import UNSIGNED
|
|
28
|
-
from botocore.config import Config
|
|
29
|
-
from botocore.exceptions import NoCredentialsError, PartialCredentialsError, ClientError
|
|
30
|
-
|
|
31
|
-
_logger = logging.getLogger(__name__)
|
|
32
|
-
|
|
33
|
-
REPO_DEFAULT = 'aws_cortexlab'
|
|
34
|
-
S3_BUCKET_IBL = 'ibl-brain-wide-map-public'
|
|
35
|
-
REGION_NAME = 'us-east-1'
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def _callback_hook(t):
|
|
39
|
-
"""A callback hook for boto3.download_file to update the progress bar.
|
|
40
|
-
|
|
41
|
-
Parameters
|
|
42
|
-
----------
|
|
43
|
-
t : tqdm.tqdm
|
|
44
|
-
An tqdm instance used as the progress bar.
|
|
45
|
-
|
|
46
|
-
See Also
|
|
47
|
-
--------
|
|
48
|
-
https://gist.github.com/wy193777/e7607d12fad13459e8992d4f69b53586
|
|
49
|
-
For example that uses actual file size:
|
|
50
|
-
https://boto3.amazonaws.com/v1/documentation/api/latest/_modules/boto3/s3/transfer.html
|
|
51
|
-
|
|
52
|
-
"""
|
|
53
|
-
def inner(bytes_amount):
|
|
54
|
-
t.update(bytes_amount)
|
|
55
|
-
return inner
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def get_s3_virtual_host(uri, region) -> str:
|
|
59
|
-
"""Convert a given bucket URI to a generic Amazon virtual host URL.
|
|
60
|
-
|
|
61
|
-
URI may be the bucket (+ path) or a full URI starting with 's3://'
|
|
62
|
-
|
|
63
|
-
.. _S3 documentation
|
|
64
|
-
https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-bucket-intro.html#virtual-host-style-url-ex
|
|
65
|
-
|
|
66
|
-
Parameters
|
|
67
|
-
----------
|
|
68
|
-
uri : str
|
|
69
|
-
The bucket name or full path URI.
|
|
70
|
-
region : str
|
|
71
|
-
The region, e.g. eu-west-1.
|
|
72
|
-
|
|
73
|
-
Returns
|
|
74
|
-
-------
|
|
75
|
-
str
|
|
76
|
-
The Web URL (virtual host name and https scheme).
|
|
77
|
-
|
|
78
|
-
"""
|
|
79
|
-
assert region and re.match(r'\w{2}-\w+-[1-3]', region), 'Invalid region'
|
|
80
|
-
parsed = urllib.parse.urlparse(uri) # remove scheme if necessary
|
|
81
|
-
key = parsed.path.strip('/').split('/')
|
|
82
|
-
bucket = parsed.netloc or key.pop(0)
|
|
83
|
-
hostname = f"{bucket}.{parsed.scheme or 's3'}.{region}.amazonaws.com"
|
|
84
|
-
return 'https://' + '/'.join((hostname, *key))
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def url2uri(data_path, return_location=False):
|
|
88
|
-
"""Convert a generic Amazon virtual host URL to an S3 URI.
|
|
89
|
-
|
|
90
|
-
Parameters
|
|
91
|
-
----------
|
|
92
|
-
data_path : str
|
|
93
|
-
An Amazon virtual host URL to convert.
|
|
94
|
-
return_location : bool
|
|
95
|
-
If true, additionally returns the location string.
|
|
96
|
-
|
|
97
|
-
Returns
|
|
98
|
-
-------
|
|
99
|
-
str
|
|
100
|
-
An S3 URI with scheme 's3://'.
|
|
101
|
-
str
|
|
102
|
-
If return_location is true, returns the bucket location, e.g. 'eu-east-1'.
|
|
103
|
-
|
|
104
|
-
"""
|
|
105
|
-
parsed = urllib.parse.urlparse(data_path)
|
|
106
|
-
assert parsed.netloc and parsed.scheme and parsed.path
|
|
107
|
-
bucket_name, _, loc, *_ = parsed.netloc.split('.')
|
|
108
|
-
uri = f's3://{bucket_name}{parsed.path}'
|
|
109
|
-
return (uri, loc) if return_location else uri
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def is_folder(obj_summery) -> bool:
|
|
113
|
-
"""Given an S3 ObjectSummery instance, returns true if the associated object is a directory.
|
|
114
|
-
|
|
115
|
-
Parameters
|
|
116
|
-
----------
|
|
117
|
-
obj_summery : s3.ObjectSummery
|
|
118
|
-
An S3 ObjectSummery instance to test.
|
|
119
|
-
|
|
120
|
-
Returns
|
|
121
|
-
-------
|
|
122
|
-
bool
|
|
123
|
-
True if object is a directory.
|
|
124
|
-
|
|
125
|
-
"""
|
|
126
|
-
return obj_summery.key.endswith('/') and obj_summery.size == 0
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
def get_aws_access_keys(alyx, repo_name=REPO_DEFAULT):
|
|
130
|
-
"""Query Alyx database to get credentials in the json field of an aws repository.
|
|
131
|
-
|
|
132
|
-
Parameters
|
|
133
|
-
----------
|
|
134
|
-
alyx : one.webclient.AlyxInstance
|
|
135
|
-
An instance of alyx.
|
|
136
|
-
repo_name : str
|
|
137
|
-
The data repository name in Alyx from which to fetch the S3 access keys.
|
|
138
|
-
|
|
139
|
-
Returns
|
|
140
|
-
-------
|
|
141
|
-
dict
|
|
142
|
-
The API access keys and region name to use with boto3.
|
|
143
|
-
str
|
|
144
|
-
The name of the S3 bucket associated with the Alyx data repository.
|
|
145
|
-
|
|
146
|
-
"""
|
|
147
|
-
repo_json = alyx.rest('data-repository', 'read', id=repo_name)['json']
|
|
148
|
-
bucket_name = repo_json['bucket_name']
|
|
149
|
-
session_keys = {
|
|
150
|
-
'aws_access_key_id': repo_json.get('Access key ID', None),
|
|
151
|
-
'aws_secret_access_key': repo_json.get('Secret access key', None),
|
|
152
|
-
'region_name': repo_json.get('region_name', None)
|
|
153
|
-
}
|
|
154
|
-
return session_keys, bucket_name
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
def get_s3_public():
|
|
158
|
-
"""Retrieve the IBL public S3 service resource.
|
|
159
|
-
|
|
160
|
-
Returns
|
|
161
|
-
-------
|
|
162
|
-
s3.ServiceResource
|
|
163
|
-
An S3 ServiceResource instance with the provided.
|
|
164
|
-
str
|
|
165
|
-
The name of the S3 bucket.
|
|
166
|
-
|
|
167
|
-
"""
|
|
168
|
-
session = boto3.Session(region_name=REGION_NAME)
|
|
169
|
-
s3 = session.resource('s3', config=Config(signature_version=UNSIGNED))
|
|
170
|
-
return s3, S3_BUCKET_IBL
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
def get_s3_allen():
|
|
174
|
-
"""Retrieve the Allen public S3 service resource.
|
|
175
|
-
|
|
176
|
-
Returns
|
|
177
|
-
-------
|
|
178
|
-
s3.ServiceResource
|
|
179
|
-
An S3 ServiceResource instance with the provided.
|
|
180
|
-
str
|
|
181
|
-
The name of the S3 bucket.
|
|
182
|
-
|
|
183
|
-
"""
|
|
184
|
-
S3_BUCKET_ALLEN = 'allen-brain-cell-atlas'
|
|
185
|
-
session = boto3.Session(region_name='us-west-2')
|
|
186
|
-
s3 = session.resource('s3', config=Config(signature_version=UNSIGNED))
|
|
187
|
-
return s3, S3_BUCKET_ALLEN
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
def get_s3_from_alyx(alyx, repo_name=REPO_DEFAULT):
|
|
191
|
-
"""Create an S3 resource instance using credentials from an Alyx data repository.
|
|
192
|
-
|
|
193
|
-
Parameters
|
|
194
|
-
----------
|
|
195
|
-
alyx : one.webclient.AlyxInstance
|
|
196
|
-
An instance of alyx.
|
|
197
|
-
repo_name : str
|
|
198
|
-
The data repository name in Alyx from which to fetch the S3 access keys.
|
|
199
|
-
|
|
200
|
-
Returns
|
|
201
|
-
-------
|
|
202
|
-
s3.ServiceResource
|
|
203
|
-
An S3 ServiceResource instance with the provided.
|
|
204
|
-
str
|
|
205
|
-
The name of the S3 bucket.
|
|
206
|
-
|
|
207
|
-
Notes
|
|
208
|
-
-----
|
|
209
|
-
- If no credentials are present in the database, boto3 will use environment config or default
|
|
210
|
-
AWS profile settings instead.
|
|
211
|
-
- If there are no credentials for the bucket and the bucket has 'public' in the name, the
|
|
212
|
-
returned resource will use an unsigned signature.
|
|
213
|
-
|
|
214
|
-
"""
|
|
215
|
-
session_keys, bucket_name = get_aws_access_keys(alyx, repo_name)
|
|
216
|
-
no_creds = not any(filter(None, (v for k, v in session_keys.items() if 'key' in k.casefold())))
|
|
217
|
-
session = boto3.Session(**session_keys)
|
|
218
|
-
if no_creds and 'public' in bucket_name.casefold():
|
|
219
|
-
config = Config(signature_version=UNSIGNED)
|
|
220
|
-
else:
|
|
221
|
-
config = None
|
|
222
|
-
s3 = session.resource('s3', config=config)
|
|
223
|
-
return s3, bucket_name
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
def s3_download_file(source, destination, s3=None, bucket_name=None, overwrite=False):
|
|
227
|
-
"""Downloads a file from an S3 instance to a local folder.
|
|
228
|
-
|
|
229
|
-
Parameters
|
|
230
|
-
----------
|
|
231
|
-
source : str, pathlib.Path, pathlib.PurePosixPath
|
|
232
|
-
Relative path (key) within the bucket, for example: 'atlas/dorsal_cortex_50.nrrd'.
|
|
233
|
-
destination : str, pathlib.Path
|
|
234
|
-
The full file path on local machine.
|
|
235
|
-
s3 : s3.ServiceResource
|
|
236
|
-
An S3 ServiceResource instance. Defaults to the IBL public instance.
|
|
237
|
-
bucket_name : str
|
|
238
|
-
The name of the bucket to access. Defaults to the public IBL repository.
|
|
239
|
-
overwrite : bool
|
|
240
|
-
If True, will re-download files even if the file sizes match.
|
|
241
|
-
|
|
242
|
-
Returns
|
|
243
|
-
-------
|
|
244
|
-
pathlib.Path
|
|
245
|
-
The local file path of the downloaded file.
|
|
246
|
-
|
|
247
|
-
"""
|
|
248
|
-
destination = Path(destination)
|
|
249
|
-
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
250
|
-
if s3 is None:
|
|
251
|
-
s3, bucket_name = get_s3_public()
|
|
252
|
-
try:
|
|
253
|
-
file_object = s3.Object(bucket_name, Path(source).as_posix())
|
|
254
|
-
filesize = file_object.content_length
|
|
255
|
-
if not overwrite and destination.exists() and filesize == destination.stat().st_size:
|
|
256
|
-
_logger.debug(f"{destination} exists and match size -- skipping")
|
|
257
|
-
return destination
|
|
258
|
-
with tqdm(total=filesize, unit='B',
|
|
259
|
-
unit_scale=True, desc=f'(S3) {destination}') as t:
|
|
260
|
-
file_object.download_file(Filename=str(destination), Callback=_callback_hook(t))
|
|
261
|
-
except (NoCredentialsError, PartialCredentialsError) as ex:
|
|
262
|
-
raise ex # Credentials need updating in Alyx # pragma: no cover
|
|
263
|
-
except ClientError as ex:
|
|
264
|
-
if ex.response.get('Error', {}).get('Code', None) == '404':
|
|
265
|
-
_logger.error(f'File {source} not found on {bucket_name}')
|
|
266
|
-
return None
|
|
267
|
-
else:
|
|
268
|
-
raise ex
|
|
269
|
-
return destination
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
def s3_download_folder(source, destination, s3=None, bucket_name=S3_BUCKET_IBL, overwrite=False):
|
|
273
|
-
"""Downloads S3 folder content to a local folder.
|
|
274
|
-
|
|
275
|
-
Parameters
|
|
276
|
-
----------
|
|
277
|
-
source : str
|
|
278
|
-
Relative path (key) within the bucket, for example: 'spikesorting/benchmark'.
|
|
279
|
-
destination : str, pathlib.Path
|
|
280
|
-
Local folder path. Note: The contents of the source folder will be downloaded to
|
|
281
|
-
`destination`, not the folder itself.
|
|
282
|
-
s3 : s3.ServiceResource
|
|
283
|
-
An S3 ServiceResource instance. Defaults to the IBL public instance.
|
|
284
|
-
bucket_name : str
|
|
285
|
-
The name of the bucket to access. Defaults to the public IBL repository.
|
|
286
|
-
overwrite : bool
|
|
287
|
-
If True, will re-download files even if the file sizes match.
|
|
288
|
-
|
|
289
|
-
Returns
|
|
290
|
-
-------
|
|
291
|
-
list of pathlib.Path
|
|
292
|
-
The local file paths.
|
|
293
|
-
|
|
294
|
-
"""
|
|
295
|
-
destination = Path(destination)
|
|
296
|
-
if destination.exists():
|
|
297
|
-
assert destination.is_dir(), 'destination must be a folder'
|
|
298
|
-
if s3 is None:
|
|
299
|
-
s3, bucket_name = get_s3_public()
|
|
300
|
-
local_files = []
|
|
301
|
-
objects = s3.Bucket(name=bucket_name).objects.filter(Prefix=source)
|
|
302
|
-
for obj_summary in filter(lambda x: not is_folder(x), objects):
|
|
303
|
-
# we can only filter an object collection by prefix, so we need to make sure the file
|
|
304
|
-
# is in the subpath of the source folder
|
|
305
|
-
# for example, if source is '/toto/tata' and obj_summary.key is
|
|
306
|
-
# '/toto/tata_alaternate/titi.txt', we need to exclude it
|
|
307
|
-
if PurePosixPath(source) not in PurePosixPath(obj_summary.key).parents:
|
|
308
|
-
continue
|
|
309
|
-
local_file = Path(destination).joinpath(Path(obj_summary.key).relative_to(source))
|
|
310
|
-
lf = s3_download_file(obj_summary.key, local_file, s3=s3, bucket_name=bucket_name,
|
|
311
|
-
overwrite=overwrite)
|
|
312
|
-
local_files.append(lf)
|
|
313
|
-
return local_files
|
|
1
|
+
"""A backend to download IBL data from AWS Buckets.
|
|
2
|
+
|
|
3
|
+
Examples
|
|
4
|
+
--------
|
|
5
|
+
Without any credentials, to download a public file from the IBL public bucket:
|
|
6
|
+
|
|
7
|
+
>>> from one.remote import aws
|
|
8
|
+
... source = 'caches/unit_test/cache_info.json'
|
|
9
|
+
... destination = '/home/olivier/scratch/cache_info.json'
|
|
10
|
+
... aws.s3_download_file(source, destination)
|
|
11
|
+
|
|
12
|
+
For a folder, the following:
|
|
13
|
+
|
|
14
|
+
>>> source = 'caches/unit_test'
|
|
15
|
+
>>> destination = '/home/olivier/scratch/caches/unit_test'
|
|
16
|
+
>>> local_files = aws.s3_download_folder(source, destination)
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
import re
|
|
20
|
+
from pathlib import Path, PurePosixPath
|
|
21
|
+
import logging
|
|
22
|
+
import urllib.parse
|
|
23
|
+
|
|
24
|
+
from tqdm import tqdm
|
|
25
|
+
import boto3
|
|
26
|
+
|
|
27
|
+
from botocore import UNSIGNED
|
|
28
|
+
from botocore.config import Config
|
|
29
|
+
from botocore.exceptions import NoCredentialsError, PartialCredentialsError, ClientError
|
|
30
|
+
|
|
31
|
+
_logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
REPO_DEFAULT = 'aws_cortexlab'
|
|
34
|
+
S3_BUCKET_IBL = 'ibl-brain-wide-map-public'
|
|
35
|
+
REGION_NAME = 'us-east-1'
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _callback_hook(t):
|
|
39
|
+
"""A callback hook for boto3.download_file to update the progress bar.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
t : tqdm.tqdm
|
|
44
|
+
An tqdm instance used as the progress bar.
|
|
45
|
+
|
|
46
|
+
See Also
|
|
47
|
+
--------
|
|
48
|
+
https://gist.github.com/wy193777/e7607d12fad13459e8992d4f69b53586
|
|
49
|
+
For example that uses actual file size:
|
|
50
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/_modules/boto3/s3/transfer.html
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
def inner(bytes_amount):
|
|
54
|
+
t.update(bytes_amount)
|
|
55
|
+
return inner
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_s3_virtual_host(uri, region) -> str:
|
|
59
|
+
"""Convert a given bucket URI to a generic Amazon virtual host URL.
|
|
60
|
+
|
|
61
|
+
URI may be the bucket (+ path) or a full URI starting with 's3://'
|
|
62
|
+
|
|
63
|
+
.. _S3 documentation
|
|
64
|
+
https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-bucket-intro.html#virtual-host-style-url-ex
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
uri : str
|
|
69
|
+
The bucket name or full path URI.
|
|
70
|
+
region : str
|
|
71
|
+
The region, e.g. eu-west-1.
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
str
|
|
76
|
+
The Web URL (virtual host name and https scheme).
|
|
77
|
+
|
|
78
|
+
"""
|
|
79
|
+
assert region and re.match(r'\w{2}-\w+-[1-3]', region), 'Invalid region'
|
|
80
|
+
parsed = urllib.parse.urlparse(uri) # remove scheme if necessary
|
|
81
|
+
key = parsed.path.strip('/').split('/')
|
|
82
|
+
bucket = parsed.netloc or key.pop(0)
|
|
83
|
+
hostname = f"{bucket}.{parsed.scheme or 's3'}.{region}.amazonaws.com"
|
|
84
|
+
return 'https://' + '/'.join((hostname, *key))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def url2uri(data_path, return_location=False):
|
|
88
|
+
"""Convert a generic Amazon virtual host URL to an S3 URI.
|
|
89
|
+
|
|
90
|
+
Parameters
|
|
91
|
+
----------
|
|
92
|
+
data_path : str
|
|
93
|
+
An Amazon virtual host URL to convert.
|
|
94
|
+
return_location : bool
|
|
95
|
+
If true, additionally returns the location string.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
str
|
|
100
|
+
An S3 URI with scheme 's3://'.
|
|
101
|
+
str
|
|
102
|
+
If return_location is true, returns the bucket location, e.g. 'eu-east-1'.
|
|
103
|
+
|
|
104
|
+
"""
|
|
105
|
+
parsed = urllib.parse.urlparse(data_path)
|
|
106
|
+
assert parsed.netloc and parsed.scheme and parsed.path
|
|
107
|
+
bucket_name, _, loc, *_ = parsed.netloc.split('.')
|
|
108
|
+
uri = f's3://{bucket_name}{parsed.path}'
|
|
109
|
+
return (uri, loc) if return_location else uri
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def is_folder(obj_summery) -> bool:
|
|
113
|
+
"""Given an S3 ObjectSummery instance, returns true if the associated object is a directory.
|
|
114
|
+
|
|
115
|
+
Parameters
|
|
116
|
+
----------
|
|
117
|
+
obj_summery : s3.ObjectSummery
|
|
118
|
+
An S3 ObjectSummery instance to test.
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
bool
|
|
123
|
+
True if object is a directory.
|
|
124
|
+
|
|
125
|
+
"""
|
|
126
|
+
return obj_summery.key.endswith('/') and obj_summery.size == 0
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def get_aws_access_keys(alyx, repo_name=REPO_DEFAULT):
|
|
130
|
+
"""Query Alyx database to get credentials in the json field of an aws repository.
|
|
131
|
+
|
|
132
|
+
Parameters
|
|
133
|
+
----------
|
|
134
|
+
alyx : one.webclient.AlyxInstance
|
|
135
|
+
An instance of alyx.
|
|
136
|
+
repo_name : str
|
|
137
|
+
The data repository name in Alyx from which to fetch the S3 access keys.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
dict
|
|
142
|
+
The API access keys and region name to use with boto3.
|
|
143
|
+
str
|
|
144
|
+
The name of the S3 bucket associated with the Alyx data repository.
|
|
145
|
+
|
|
146
|
+
"""
|
|
147
|
+
repo_json = alyx.rest('data-repository', 'read', id=repo_name)['json']
|
|
148
|
+
bucket_name = repo_json['bucket_name']
|
|
149
|
+
session_keys = {
|
|
150
|
+
'aws_access_key_id': repo_json.get('Access key ID', None),
|
|
151
|
+
'aws_secret_access_key': repo_json.get('Secret access key', None),
|
|
152
|
+
'region_name': repo_json.get('region_name', None)
|
|
153
|
+
}
|
|
154
|
+
return session_keys, bucket_name
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def get_s3_public():
|
|
158
|
+
"""Retrieve the IBL public S3 service resource.
|
|
159
|
+
|
|
160
|
+
Returns
|
|
161
|
+
-------
|
|
162
|
+
s3.ServiceResource
|
|
163
|
+
An S3 ServiceResource instance with the provided.
|
|
164
|
+
str
|
|
165
|
+
The name of the S3 bucket.
|
|
166
|
+
|
|
167
|
+
"""
|
|
168
|
+
session = boto3.Session(region_name=REGION_NAME)
|
|
169
|
+
s3 = session.resource('s3', config=Config(signature_version=UNSIGNED))
|
|
170
|
+
return s3, S3_BUCKET_IBL
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def get_s3_allen():
|
|
174
|
+
"""Retrieve the Allen public S3 service resource.
|
|
175
|
+
|
|
176
|
+
Returns
|
|
177
|
+
-------
|
|
178
|
+
s3.ServiceResource
|
|
179
|
+
An S3 ServiceResource instance with the provided.
|
|
180
|
+
str
|
|
181
|
+
The name of the S3 bucket.
|
|
182
|
+
|
|
183
|
+
"""
|
|
184
|
+
S3_BUCKET_ALLEN = 'allen-brain-cell-atlas'
|
|
185
|
+
session = boto3.Session(region_name='us-west-2')
|
|
186
|
+
s3 = session.resource('s3', config=Config(signature_version=UNSIGNED))
|
|
187
|
+
return s3, S3_BUCKET_ALLEN
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def get_s3_from_alyx(alyx, repo_name=REPO_DEFAULT):
|
|
191
|
+
"""Create an S3 resource instance using credentials from an Alyx data repository.
|
|
192
|
+
|
|
193
|
+
Parameters
|
|
194
|
+
----------
|
|
195
|
+
alyx : one.webclient.AlyxInstance
|
|
196
|
+
An instance of alyx.
|
|
197
|
+
repo_name : str
|
|
198
|
+
The data repository name in Alyx from which to fetch the S3 access keys.
|
|
199
|
+
|
|
200
|
+
Returns
|
|
201
|
+
-------
|
|
202
|
+
s3.ServiceResource
|
|
203
|
+
An S3 ServiceResource instance with the provided.
|
|
204
|
+
str
|
|
205
|
+
The name of the S3 bucket.
|
|
206
|
+
|
|
207
|
+
Notes
|
|
208
|
+
-----
|
|
209
|
+
- If no credentials are present in the database, boto3 will use environment config or default
|
|
210
|
+
AWS profile settings instead.
|
|
211
|
+
- If there are no credentials for the bucket and the bucket has 'public' in the name, the
|
|
212
|
+
returned resource will use an unsigned signature.
|
|
213
|
+
|
|
214
|
+
"""
|
|
215
|
+
session_keys, bucket_name = get_aws_access_keys(alyx, repo_name)
|
|
216
|
+
no_creds = not any(filter(None, (v for k, v in session_keys.items() if 'key' in k.casefold())))
|
|
217
|
+
session = boto3.Session(**session_keys)
|
|
218
|
+
if no_creds and 'public' in bucket_name.casefold():
|
|
219
|
+
config = Config(signature_version=UNSIGNED)
|
|
220
|
+
else:
|
|
221
|
+
config = None
|
|
222
|
+
s3 = session.resource('s3', config=config)
|
|
223
|
+
return s3, bucket_name
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def s3_download_file(source, destination, s3=None, bucket_name=None, overwrite=False):
|
|
227
|
+
"""Downloads a file from an S3 instance to a local folder.
|
|
228
|
+
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
source : str, pathlib.Path, pathlib.PurePosixPath
|
|
232
|
+
Relative path (key) within the bucket, for example: 'atlas/dorsal_cortex_50.nrrd'.
|
|
233
|
+
destination : str, pathlib.Path
|
|
234
|
+
The full file path on local machine.
|
|
235
|
+
s3 : s3.ServiceResource
|
|
236
|
+
An S3 ServiceResource instance. Defaults to the IBL public instance.
|
|
237
|
+
bucket_name : str
|
|
238
|
+
The name of the bucket to access. Defaults to the public IBL repository.
|
|
239
|
+
overwrite : bool
|
|
240
|
+
If True, will re-download files even if the file sizes match.
|
|
241
|
+
|
|
242
|
+
Returns
|
|
243
|
+
-------
|
|
244
|
+
pathlib.Path
|
|
245
|
+
The local file path of the downloaded file.
|
|
246
|
+
|
|
247
|
+
"""
|
|
248
|
+
destination = Path(destination)
|
|
249
|
+
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
250
|
+
if s3 is None:
|
|
251
|
+
s3, bucket_name = get_s3_public()
|
|
252
|
+
try:
|
|
253
|
+
file_object = s3.Object(bucket_name, Path(source).as_posix())
|
|
254
|
+
filesize = file_object.content_length
|
|
255
|
+
if not overwrite and destination.exists() and filesize == destination.stat().st_size:
|
|
256
|
+
_logger.debug(f"{destination} exists and match size -- skipping")
|
|
257
|
+
return destination
|
|
258
|
+
with tqdm(total=filesize, unit='B',
|
|
259
|
+
unit_scale=True, desc=f'(S3) {destination}') as t:
|
|
260
|
+
file_object.download_file(Filename=str(destination), Callback=_callback_hook(t))
|
|
261
|
+
except (NoCredentialsError, PartialCredentialsError) as ex:
|
|
262
|
+
raise ex # Credentials need updating in Alyx # pragma: no cover
|
|
263
|
+
except ClientError as ex:
|
|
264
|
+
if ex.response.get('Error', {}).get('Code', None) == '404':
|
|
265
|
+
_logger.error(f'File {source} not found on {bucket_name}')
|
|
266
|
+
return None
|
|
267
|
+
else:
|
|
268
|
+
raise ex
|
|
269
|
+
return destination
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def s3_download_folder(source, destination, s3=None, bucket_name=S3_BUCKET_IBL, overwrite=False):
|
|
273
|
+
"""Downloads S3 folder content to a local folder.
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
source : str
|
|
278
|
+
Relative path (key) within the bucket, for example: 'spikesorting/benchmark'.
|
|
279
|
+
destination : str, pathlib.Path
|
|
280
|
+
Local folder path. Note: The contents of the source folder will be downloaded to
|
|
281
|
+
`destination`, not the folder itself.
|
|
282
|
+
s3 : s3.ServiceResource
|
|
283
|
+
An S3 ServiceResource instance. Defaults to the IBL public instance.
|
|
284
|
+
bucket_name : str
|
|
285
|
+
The name of the bucket to access. Defaults to the public IBL repository.
|
|
286
|
+
overwrite : bool
|
|
287
|
+
If True, will re-download files even if the file sizes match.
|
|
288
|
+
|
|
289
|
+
Returns
|
|
290
|
+
-------
|
|
291
|
+
list of pathlib.Path
|
|
292
|
+
The local file paths.
|
|
293
|
+
|
|
294
|
+
"""
|
|
295
|
+
destination = Path(destination)
|
|
296
|
+
if destination.exists():
|
|
297
|
+
assert destination.is_dir(), 'destination must be a folder'
|
|
298
|
+
if s3 is None:
|
|
299
|
+
s3, bucket_name = get_s3_public()
|
|
300
|
+
local_files = []
|
|
301
|
+
objects = s3.Bucket(name=bucket_name).objects.filter(Prefix=source)
|
|
302
|
+
for obj_summary in filter(lambda x: not is_folder(x), objects):
|
|
303
|
+
# we can only filter an object collection by prefix, so we need to make sure the file
|
|
304
|
+
# is in the subpath of the source folder
|
|
305
|
+
# for example, if source is '/toto/tata' and obj_summary.key is
|
|
306
|
+
# '/toto/tata_alaternate/titi.txt', we need to exclude it
|
|
307
|
+
if PurePosixPath(source) not in PurePosixPath(obj_summary.key).parents:
|
|
308
|
+
continue
|
|
309
|
+
local_file = Path(destination).joinpath(Path(obj_summary.key).relative_to(source))
|
|
310
|
+
lf = s3_download_file(obj_summary.key, local_file, s3=s3, bucket_name=bucket_name,
|
|
311
|
+
overwrite=overwrite)
|
|
312
|
+
local_files.append(lf)
|
|
313
|
+
return local_files
|