ONE-api 3.0b1__py3-none-any.whl → 3.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ONE_api-3.0b1.dist-info → ONE_api-3.0b4.dist-info}/LICENSE +21 -21
- {ONE_api-3.0b1.dist-info → ONE_api-3.0b4.dist-info}/METADATA +115 -115
- ONE_api-3.0b4.dist-info/RECORD +37 -0
- one/__init__.py +2 -2
- one/alf/__init__.py +1 -1
- one/alf/cache.py +640 -653
- one/alf/exceptions.py +105 -105
- one/alf/io.py +876 -876
- one/alf/path.py +1450 -1450
- one/alf/spec.py +519 -504
- one/api.py +2949 -2973
- one/converters.py +850 -850
- one/params.py +414 -414
- one/registration.py +845 -845
- one/remote/__init__.py +1 -1
- one/remote/aws.py +313 -313
- one/remote/base.py +142 -142
- one/remote/globus.py +1254 -1254
- one/tests/fixtures/params/.caches +6 -6
- one/tests/fixtures/params/.test.alyx.internationalbrainlab.org +8 -8
- one/tests/fixtures/rest_responses/1f187d80fd59677b395fcdb18e68e4401bfa1cc9 +1 -1
- one/tests/fixtures/rest_responses/47893cf67c985e6361cdee009334963f49fb0746 +1 -1
- one/tests/fixtures/rest_responses/535d0e9a1e2c1efbdeba0d673b131e00361a2edb +1 -1
- one/tests/fixtures/rest_responses/6dc96f7e9bcc6ac2e7581489b9580a6cd3f28293 +1 -1
- one/tests/fixtures/rest_responses/db1731fb8df0208944ae85f76718430813a8bf50 +1 -1
- one/tests/fixtures/rest_responses/dcce48259bb929661f60a02a48563f70aa6185b3 +1 -1
- one/tests/fixtures/rest_responses/f530d6022f61cdc9e38cc66beb3cb71f3003c9a1 +1 -1
- one/tests/fixtures/test_dbs.json +14 -14
- one/util.py +524 -524
- one/webclient.py +1366 -1354
- ONE_api-3.0b1.dist-info/RECORD +0 -37
- {ONE_api-3.0b1.dist-info → ONE_api-3.0b4.dist-info}/WHEEL +0 -0
- {ONE_api-3.0b1.dist-info → ONE_api-3.0b4.dist-info}/top_level.txt +0 -0
one/remote/globus.py
CHANGED
|
@@ -1,1254 +1,1254 @@
|
|
|
1
|
-
"""A module for handling file operations through the Globus SDK.
|
|
2
|
-
|
|
3
|
-
Setup
|
|
4
|
-
-----
|
|
5
|
-
|
|
6
|
-
To set up Globus simply instantiate the `Globus` class for the first time and follow the prompts.
|
|
7
|
-
Providing a client name string to the constructor allows one to set up multiple Globus clients
|
|
8
|
-
(i.e. when switching between different Globus client IDs).
|
|
9
|
-
|
|
10
|
-
In order to use this function you need:
|
|
11
|
-
|
|
12
|
-
1. The client ID of an existing Globus Client (`see this tutorial`_).
|
|
13
|
-
2. Set up `Global Connect`_ on your local device.
|
|
14
|
-
3. Register your local device as an `endpoint`_ in your Globus Client.
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
To modify the settings for a pre-established client, call the `Globus.setup` method with the client
|
|
18
|
-
name:
|
|
19
|
-
|
|
20
|
-
>>> globus = Globus.setup('default')
|
|
21
|
-
|
|
22
|
-
You can update the list of endpoints using the `fetch_endpoints_from_alyx` method:
|
|
23
|
-
|
|
24
|
-
>>> globus = Globus('admin')
|
|
25
|
-
>>> remote_endpoints = globus.fetch_endpoints_from_alyx(alyx=AlyxClient())
|
|
26
|
-
|
|
27
|
-
The endpoints are stored in the `endpoints` property
|
|
28
|
-
|
|
29
|
-
>>> print(globus.endpoints.keys())
|
|
30
|
-
>>> print(globus.endpoints['local'])
|
|
31
|
-
|
|
32
|
-
.. _see this tutorial: https://globus-sdk-python.readthedocs.io/en/stable/tutorial.html
|
|
33
|
-
.. _Global Connect: https://www.globus.org/globus-connect-personal
|
|
34
|
-
.. _endpoint: https://app.globus.org/
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
Examples
|
|
38
|
-
--------
|
|
39
|
-
Get the full Globus file path
|
|
40
|
-
|
|
41
|
-
>>> relative_path = 'subject/2020-01-01/001/alf/_ibl_trials.table.pqt'
|
|
42
|
-
>>> full_path = globus.to_address(relative_path, 'flatiron_cortexlab')
|
|
43
|
-
|
|
44
|
-
Log in with a limited time token
|
|
45
|
-
|
|
46
|
-
>>> globus = Globus('admin')
|
|
47
|
-
>>> globus.login(stay_logged_in=False)
|
|
48
|
-
|
|
49
|
-
Log out of Globus, revoking and deleting all tokens
|
|
50
|
-
|
|
51
|
-
>>> globus.logout()
|
|
52
|
-
>>> assert not globus.is_logged_in
|
|
53
|
-
|
|
54
|
-
Asynchronously transfer data between Alyx repositories
|
|
55
|
-
|
|
56
|
-
>>> alyx = AlyxClient()
|
|
57
|
-
>>> glo = Globus('admin')
|
|
58
|
-
>>> glo.add_endpoint('flatiron_cortexlab', alyx=alyx)
|
|
59
|
-
>>> glo.add_endpoint('cortex_lab_SR', alyx=alyx)
|
|
60
|
-
>>> task_id = glo.transfer_data('path/to/file', 'flatiron_cortexlab', 'cortex_lab_SR')
|
|
61
|
-
|
|
62
|
-
Synchronously transfer data to an alternate local location
|
|
63
|
-
|
|
64
|
-
>>> from functools import partial
|
|
65
|
-
>>> root_path = '/path/to/new/location'
|
|
66
|
-
>>> glo.add_endpoint(get_local_endpoint_id(), label='alternate_local', root_path=root_path)
|
|
67
|
-
>>> folder = 'camera/ZFM-01867/2021-03-23/002' # An example folder to download
|
|
68
|
-
>>> task = partial(glo.transfer_data, folder, 'integration', 'integration_local',
|
|
69
|
-
... label='alternate data', recursive=True)
|
|
70
|
-
>>> task_id = glo.run_task(task) # Submit task to Globus and await completion
|
|
71
|
-
|
|
72
|
-
Temporarily change local data root path and synchronously download file
|
|
73
|
-
|
|
74
|
-
>>> glo.endpoints['local']['root_path'] = '/path/to/new/location'
|
|
75
|
-
>>> file = glo.download_file('path/to/file.ext', 'source_endpoint')
|
|
76
|
-
Path('/path/to/new/location/path/to/file.ext')
|
|
77
|
-
|
|
78
|
-
Await multiple tasks to complete by passing a list of Globus transfer IDs
|
|
79
|
-
|
|
80
|
-
>>> import asyncio
|
|
81
|
-
>>> tasks = [asyncio.create_task(globus.task_wait_async(task_id))) for task_id in task_ids]
|
|
82
|
-
>>> success = asyncio.run(asyncio.gather(*tasks))
|
|
83
|
-
|
|
84
|
-
"""
|
|
85
|
-
import os
|
|
86
|
-
import re
|
|
87
|
-
import sys
|
|
88
|
-
import asyncio
|
|
89
|
-
import logging
|
|
90
|
-
from uuid import UUID
|
|
91
|
-
from datetime import datetime
|
|
92
|
-
from pathlib import Path, PurePosixPath, PurePath, PureWindowsPath
|
|
93
|
-
import warnings
|
|
94
|
-
from functools import partial, wraps
|
|
95
|
-
|
|
96
|
-
import globus_sdk
|
|
97
|
-
from globus_sdk import TransferAPIError, GlobusAPIError, NetworkError, GlobusTimeoutError, \
|
|
98
|
-
GlobusConnectionError, GlobusConnectionTimeoutError, GlobusSDKUsageError, NullAuthorizer
|
|
99
|
-
from iblutil.io import params as iopar
|
|
100
|
-
from iblutil.util import ensure_list
|
|
101
|
-
|
|
102
|
-
from one.alf.spec import is_uuid
|
|
103
|
-
from one.alf.path import remove_uuid_string
|
|
104
|
-
import one.params
|
|
105
|
-
from one.webclient import AlyxClient
|
|
106
|
-
from .base import DownloadClient, load_client_params, save_client_params
|
|
107
|
-
|
|
108
|
-
__all__ = ['Globus', 'get_lab_from_endpoint_id', 'as_globus_path']
|
|
109
|
-
_logger = logging.getLogger(__name__)
|
|
110
|
-
CLIENT_KEY = 'globus'
|
|
111
|
-
"""str: The default key in the remote settings file"""
|
|
112
|
-
|
|
113
|
-
DEFAULT_PAR = {'GLOBUS_CLIENT_ID': None, 'local_endpoint': None, 'local_path': None}
|
|
114
|
-
"""dict: The default Globus parameter fields"""
|
|
115
|
-
|
|
116
|
-
STATUS_MAP = {
|
|
117
|
-
'ACTIVE': ('QUEUED', 'ACTIVE', 'GC_NOT_CONNECTED', 'UNKNOWN'),
|
|
118
|
-
'FAILED': ('ENDPOINT_ERROR', 'PERMISSION_DENIED', 'CONNECT_FAILED'),
|
|
119
|
-
'INACTIVE': 'PAUSED_BY_ADMIN'}
|
|
120
|
-
"""dict: A map of Globus status to "nice" status"""
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def ensure_logged_in(func):
|
|
124
|
-
"""Decorator for the Globus methods.
|
|
125
|
-
|
|
126
|
-
Before calling methods that require authentication, attempts to log in. If the user is already
|
|
127
|
-
logged in, the token may be refreshed to extend the session. If the token has expired and not
|
|
128
|
-
in headless mode, the user is prompted to authorize a new session. If in headless mode and not
|
|
129
|
-
logged in an error is raised.
|
|
130
|
-
|
|
131
|
-
Parameters
|
|
132
|
-
----------
|
|
133
|
-
func : function
|
|
134
|
-
Method to wrap (e.g. Globus.transfer_data).
|
|
135
|
-
|
|
136
|
-
Returns
|
|
137
|
-
-------
|
|
138
|
-
function
|
|
139
|
-
Handle to wrapped method.
|
|
140
|
-
|
|
141
|
-
"""
|
|
142
|
-
@wraps(func)
|
|
143
|
-
def wrapper_decorator(self, *args, **kwargs):
|
|
144
|
-
self.login()
|
|
145
|
-
return func(self, *args, **kwargs)
|
|
146
|
-
return wrapper_decorator
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
def _setup(par_id=None, login=True, refresh_tokens=True):
|
|
150
|
-
"""Sets up Globus as a backend for ONE functions.
|
|
151
|
-
|
|
152
|
-
Parameters
|
|
153
|
-
----------
|
|
154
|
-
par_id : str
|
|
155
|
-
Parameter profile name to set up e.g. 'default', 'admin'.
|
|
156
|
-
|
|
157
|
-
Returns
|
|
158
|
-
-------
|
|
159
|
-
IBLParams
|
|
160
|
-
A set of Globus parameters.
|
|
161
|
-
|
|
162
|
-
"""
|
|
163
|
-
print('Setting up Globus parameter file. See docstring for help.')
|
|
164
|
-
if not par_id:
|
|
165
|
-
default_par_id = 'default'
|
|
166
|
-
par_id = input(
|
|
167
|
-
f'Enter name for this client or press Enter to keep value "{default_par_id}": '
|
|
168
|
-
)
|
|
169
|
-
par_id = par_id.strip() or default_par_id
|
|
170
|
-
|
|
171
|
-
# Read existing globus params if present
|
|
172
|
-
globus_pars = iopar.as_dict(load_client_params(CLIENT_KEY, assert_present=False) or {})
|
|
173
|
-
pars = {**DEFAULT_PAR, **globus_pars.get(par_id, {})}
|
|
174
|
-
|
|
175
|
-
# Set GLOBUS_CLIENT_ID
|
|
176
|
-
current_id = pars['GLOBUS_CLIENT_ID']
|
|
177
|
-
if current_id:
|
|
178
|
-
prompt = (f'Found Globus client ID in parameter file ({current_id}). '
|
|
179
|
-
'Press Enter to keep it, or enter a new ID here: ')
|
|
180
|
-
pars['GLOBUS_CLIENT_ID'] = input(prompt).strip() or current_id
|
|
181
|
-
else:
|
|
182
|
-
new_id = input('Please enter the Globus client ID: ').strip()
|
|
183
|
-
if not new_id:
|
|
184
|
-
raise ValueError('Globus client ID is a required field')
|
|
185
|
-
pars['GLOBUS_CLIENT_ID'] = new_id
|
|
186
|
-
if not is_uuid(pars['GLOBUS_CLIENT_ID']):
|
|
187
|
-
raise ValueError('Invalid Globus client ID "%s"', pars['GLOBUS_CLIENT_ID'])
|
|
188
|
-
|
|
189
|
-
# Find and set local ID
|
|
190
|
-
message = 'Please enter the local endpoint ID'
|
|
191
|
-
try:
|
|
192
|
-
default_endpoint = str(pars['local_endpoint'] or get_local_endpoint_id())
|
|
193
|
-
message += f' (default: {default_endpoint})'
|
|
194
|
-
except AssertionError:
|
|
195
|
-
default_endpoint = ''
|
|
196
|
-
warnings.warn(
|
|
197
|
-
'Cannot find local endpoint ID. Beware that this might mean that Globus Connect '
|
|
198
|
-
'is not set up properly.')
|
|
199
|
-
pars['local_endpoint'] = input(message + ':').strip() or default_endpoint
|
|
200
|
-
if not is_uuid(pars['local_endpoint'], (1, 2)):
|
|
201
|
-
raise ValueError('Globus local endpoint ID must be a UUID version 1 or 2')
|
|
202
|
-
|
|
203
|
-
# Check for local path
|
|
204
|
-
message = 'Please enter the local endpoint path'
|
|
205
|
-
local_path = pars['local_path'] or one.params.get(silent=True).CACHE_DIR
|
|
206
|
-
message += f' (default: {local_path})'
|
|
207
|
-
pars['local_path'] = input(message + ':').strip() or local_path
|
|
208
|
-
|
|
209
|
-
if login:
|
|
210
|
-
# Log in manually and get refresh token to avoid having to login repeatedly
|
|
211
|
-
token = get_token(pars['GLOBUS_CLIENT_ID'], refresh_tokens=refresh_tokens)
|
|
212
|
-
pars.update(token)
|
|
213
|
-
|
|
214
|
-
globus_pars[par_id] = pars
|
|
215
|
-
save_client_params(globus_pars, client_key=CLIENT_KEY)
|
|
216
|
-
print('Finished setup.')
|
|
217
|
-
return iopar.from_dict(pars)
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
def get_token(client_id, refresh_tokens=True):
|
|
221
|
-
"""Get a Globus authentication token.
|
|
222
|
-
|
|
223
|
-
This step requires the user to login to Globus via a browser.
|
|
224
|
-
|
|
225
|
-
Parameters
|
|
226
|
-
----------
|
|
227
|
-
client_id : str
|
|
228
|
-
A Globus client ID.
|
|
229
|
-
refresh_tokens : bool
|
|
230
|
-
If true, requests a refresh token for repeat logins.
|
|
231
|
-
|
|
232
|
-
Returns
|
|
233
|
-
-------
|
|
234
|
-
dict
|
|
235
|
-
A dict containing the keys {'refresh_token', 'access_token', 'expires_at_seconds'}.
|
|
236
|
-
|
|
237
|
-
"""
|
|
238
|
-
client = globus_sdk.NativeAppAuthClient(client_id)
|
|
239
|
-
client.oauth2_start_flow(refresh_tokens=bool(refresh_tokens))
|
|
240
|
-
authorize_url = client.oauth2_get_authorize_url()
|
|
241
|
-
fields = ('refresh_token', 'access_token', 'expires_at_seconds')
|
|
242
|
-
print('To get a new token, go to this URL and login: {0}'.format(authorize_url))
|
|
243
|
-
auth_code = input('Enter the code you get after login here (press "c" to cancel): ').strip()
|
|
244
|
-
if auth_code and auth_code.casefold() != 'c':
|
|
245
|
-
token_response = client.oauth2_exchange_code_for_tokens(auth_code)
|
|
246
|
-
globus_transfer_data = token_response.by_resource_server['transfer.api.globus.org']
|
|
247
|
-
return {k: globus_transfer_data.get(k) for k in fields}
|
|
248
|
-
else:
|
|
249
|
-
return dict.fromkeys(fields)
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
def _remove_token_fields(pars):
|
|
253
|
-
"""Remove the token fields from a parameters object.
|
|
254
|
-
|
|
255
|
-
Parameters
|
|
256
|
-
----------
|
|
257
|
-
pars : IBLParams, dict
|
|
258
|
-
The Globus parameters containing token fields.
|
|
259
|
-
|
|
260
|
-
Returns
|
|
261
|
-
-------
|
|
262
|
-
IBLParams
|
|
263
|
-
A copy of the params without the token fields.
|
|
264
|
-
|
|
265
|
-
"""
|
|
266
|
-
if pars is None:
|
|
267
|
-
return pars
|
|
268
|
-
fields = ('refresh_token', 'access_token', 'expires_at_seconds')
|
|
269
|
-
return iopar.from_dict({k: v for k, v in iopar.as_dict(pars).items() if k not in fields})
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
def _save_globus_params(pars, client_name):
|
|
273
|
-
"""Save Globus client parameters.
|
|
274
|
-
|
|
275
|
-
Parameters
|
|
276
|
-
----------
|
|
277
|
-
pars : IBLParams, dict
|
|
278
|
-
The Globus client parameters to save.
|
|
279
|
-
client_name : str
|
|
280
|
-
The Globus client name, e.g. 'default'.
|
|
281
|
-
|
|
282
|
-
"""
|
|
283
|
-
globus_pars = iopar.as_dict(load_client_params(CLIENT_KEY, assert_present=False) or {})
|
|
284
|
-
globus_pars[client_name] = iopar.as_dict(pars)
|
|
285
|
-
save_client_params(globus_pars, CLIENT_KEY)
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
def get_local_endpoint_id():
|
|
289
|
-
"""Extracts the ID of the local Globus Connect endpoint.
|
|
290
|
-
|
|
291
|
-
Returns
|
|
292
|
-
-------
|
|
293
|
-
uuid.UUID
|
|
294
|
-
The local Globus endpoint ID.
|
|
295
|
-
|
|
296
|
-
"""
|
|
297
|
-
msg = ('Cannot find local endpoint ID, check if Globus Connect is set up correctly, '
|
|
298
|
-
'{} exists and contains a UUID.')
|
|
299
|
-
if sys.platform in ('win32', 'cygwin'):
|
|
300
|
-
id_path = Path(os.environ['LOCALAPPDATA']).joinpath('Globus Connect')
|
|
301
|
-
else:
|
|
302
|
-
id_path = Path.home().joinpath('.globusonline', 'lta')
|
|
303
|
-
|
|
304
|
-
id_file = id_path.joinpath('client-id.txt')
|
|
305
|
-
assert id_file.exists(), msg.format(id_file)
|
|
306
|
-
local_id = id_file.read_text().strip()
|
|
307
|
-
assert isinstance(local_id, str), msg.format(id_file)
|
|
308
|
-
_logger.debug(f'Found local endpoint ID in Globus Connect settings {local_id}')
|
|
309
|
-
return UUID(local_id)
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
def get_local_endpoint_paths():
|
|
313
|
-
"""Extracts the local endpoint paths accessible by Globus Connect.
|
|
314
|
-
|
|
315
|
-
NB: This is only supported on Linux.
|
|
316
|
-
|
|
317
|
-
Returns
|
|
318
|
-
-------
|
|
319
|
-
list of pathlib.Path
|
|
320
|
-
Local endpoint paths set in Globus Connect.
|
|
321
|
-
|
|
322
|
-
"""
|
|
323
|
-
if sys.platform in ('win32', 'cygwin'):
|
|
324
|
-
print('On windows the local Globus path needs to be entered manually')
|
|
325
|
-
return []
|
|
326
|
-
else:
|
|
327
|
-
path_file = Path.home().joinpath('.globusonline', 'lta', 'config-paths')
|
|
328
|
-
if path_file.exists():
|
|
329
|
-
local_paths = map(Path, filter(None, path_file.read_text().strip().split(',')))
|
|
330
|
-
_logger.debug('Found local endpoint paths in Globus Connect settings')
|
|
331
|
-
else:
|
|
332
|
-
msg = ('Cannot find local endpoint path, check if Globus Connect is set up correctly, '
|
|
333
|
-
'{} exists and contains a valid path.')
|
|
334
|
-
warnings.warn(msg.format(path_file))
|
|
335
|
-
local_paths = []
|
|
336
|
-
return list(local_paths)
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
def get_lab_from_endpoint_id(endpoint=None, alyx=None):
|
|
340
|
-
"""Extracts lab names associated with a given an endpoint UUID.
|
|
341
|
-
|
|
342
|
-
Finds the lab names that are associated to data repositories with the provided Globus endpoint
|
|
343
|
-
UUID.
|
|
344
|
-
|
|
345
|
-
Parameters
|
|
346
|
-
----------
|
|
347
|
-
endpoint : uuid.UUID, str
|
|
348
|
-
Endpoint UUID, optional if not given will get attempt to find local endpoint UUID.
|
|
349
|
-
alyx : one.webclient.AlyxClient
|
|
350
|
-
An instance of AlyxClient to use.
|
|
351
|
-
|
|
352
|
-
Returns
|
|
353
|
-
-------
|
|
354
|
-
list
|
|
355
|
-
The lab names associated with the endpoint UUID.
|
|
356
|
-
|
|
357
|
-
"""
|
|
358
|
-
alyx = alyx or AlyxClient(silent=True)
|
|
359
|
-
if not endpoint:
|
|
360
|
-
endpoint = get_local_endpoint_id()
|
|
361
|
-
lab = alyx.rest('labs', 'list', django=f'repositories__globus_endpoint_id,{endpoint}')
|
|
362
|
-
if len(lab):
|
|
363
|
-
lab_names = [la['name'] for la in lab]
|
|
364
|
-
return lab_names
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
def as_globus_path(path):
|
|
368
|
-
"""Convert a path into one suitable for the Globus TransferClient.
|
|
369
|
-
|
|
370
|
-
Parameters
|
|
371
|
-
----------
|
|
372
|
-
path : pathlib.Path, pathlib.PurePath, str
|
|
373
|
-
A path to convert to a Globus-complient path string.
|
|
374
|
-
|
|
375
|
-
Returns
|
|
376
|
-
-------
|
|
377
|
-
str
|
|
378
|
-
A formatted path string.
|
|
379
|
-
|
|
380
|
-
Notes
|
|
381
|
-
-----
|
|
382
|
-
- If using tilda in path, the home folder of your Globus Connect instance must be the same as
|
|
383
|
-
the OS home dir.
|
|
384
|
-
- If validating a path for another system ensure the input path is a PurePath, in particular,
|
|
385
|
-
on a Linux computer a remote Windows should first be made into a PureWindowsPath.
|
|
386
|
-
|
|
387
|
-
Examples
|
|
388
|
-
--------
|
|
389
|
-
A Windows path (on Windows OS)
|
|
390
|
-
|
|
391
|
-
>>> as_globus_path('E:\\FlatIron\\integration')
|
|
392
|
-
'/E/FlatIron/integration'
|
|
393
|
-
|
|
394
|
-
When explicitly a POSIX path, remains unchanged
|
|
395
|
-
|
|
396
|
-
>>> as_globus_path(PurePosixPath('E:\\FlatIron\\integration'))
|
|
397
|
-
'E:\\FlatIron\\integration'
|
|
398
|
-
|
|
399
|
-
A relative POSIX path (on *nix OS)
|
|
400
|
-
|
|
401
|
-
>>> as_globus_path('../data/integration')
|
|
402
|
-
'/mnt/data/integration'
|
|
403
|
-
|
|
404
|
-
A valid Globus path remains unchanged
|
|
405
|
-
|
|
406
|
-
>>> as_globus_path('/E/FlatIron/integration')
|
|
407
|
-
'/E/FlatIron/integration'
|
|
408
|
-
|
|
409
|
-
"""
|
|
410
|
-
is_pure_path = isinstance(path, PurePath)
|
|
411
|
-
is_win = sys.platform in ('win32', 'cygwin') or isinstance(path, PureWindowsPath)
|
|
412
|
-
if isinstance(path, str):
|
|
413
|
-
path = Path(path)
|
|
414
|
-
if (
|
|
415
|
-
re.match(r'/[A-Z]($|/)', path.as_posix())
|
|
416
|
-
if is_win
|
|
417
|
-
else path.is_absolute()
|
|
418
|
-
):
|
|
419
|
-
return path.as_posix()
|
|
420
|
-
if not is_pure_path:
|
|
421
|
-
path = path.resolve()
|
|
422
|
-
if path.drive:
|
|
423
|
-
path = '/' + str(path.as_posix().replace(':', '', 1))
|
|
424
|
-
return str(path)
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
class Globus(DownloadClient):
|
|
428
|
-
|
|
429
|
-
def __init__(self, client_name='default', connect=True, headless=False):
|
|
430
|
-
"""Wrapper for managing files on Globus endpoints.
|
|
431
|
-
|
|
432
|
-
Parameters
|
|
433
|
-
----------
|
|
434
|
-
client_name : str
|
|
435
|
-
Parameter profile name to load e.g. 'default', 'admin'.
|
|
436
|
-
connect : bool
|
|
437
|
-
Whether to create the Globus SDK client on init.
|
|
438
|
-
headless : bool
|
|
439
|
-
If true, raises ValueError if unable to log in automatically. Otherwise the user is
|
|
440
|
-
prompted to enter information.
|
|
441
|
-
|
|
442
|
-
Examples
|
|
443
|
-
--------
|
|
444
|
-
Instantiate without authentication
|
|
445
|
-
|
|
446
|
-
>>> globus = Globus(connect=False)
|
|
447
|
-
|
|
448
|
-
Instantiate without user prompts
|
|
449
|
-
|
|
450
|
-
>>> globus = Globus('server', headless=True)
|
|
451
|
-
|
|
452
|
-
"""
|
|
453
|
-
# Setting up transfer client
|
|
454
|
-
super().__init__()
|
|
455
|
-
self.client = None
|
|
456
|
-
self.client_name = client_name
|
|
457
|
-
self.headless = headless
|
|
458
|
-
self._pars = load_client_params(f'{CLIENT_KEY}.{client_name}', assert_present=False)
|
|
459
|
-
|
|
460
|
-
# If no parameters, Globus must be set up for this client
|
|
461
|
-
if self._pars is None:
|
|
462
|
-
if self.headless:
|
|
463
|
-
raise RuntimeError(f'Globus not set up for client "{self.client_name}"')
|
|
464
|
-
self._pars = _setup(self.client_name, login=False)
|
|
465
|
-
|
|
466
|
-
if connect:
|
|
467
|
-
self.login()
|
|
468
|
-
|
|
469
|
-
# Try adding local endpoint
|
|
470
|
-
self.endpoints = {'local': {'id': UUID(self._pars.local_endpoint)}}
|
|
471
|
-
_logger.info('Adding local endpoint.')
|
|
472
|
-
self.endpoints['local']['root_path'] = self._pars.local_path
|
|
473
|
-
|
|
474
|
-
@property
|
|
475
|
-
def is_logged_in(self):
|
|
476
|
-
"""bool: Check if client exists and is authenticated."""
|
|
477
|
-
has_token = self.client and self.client.authorizer.get_authorization_header() is not None
|
|
478
|
-
return has_token and not self._token_expired
|
|
479
|
-
|
|
480
|
-
@property
|
|
481
|
-
def _token_expired(self):
|
|
482
|
-
"""bool: True if token absent or expired; False if valid.
|
|
483
|
-
|
|
484
|
-
Note the 'expires_at_seconds' may be greater than `Globus.client.authorizer.expires_at` if
|
|
485
|
-
using refresh tokens. The `login` method will always refresh the token if still valid.
|
|
486
|
-
"""
|
|
487
|
-
try:
|
|
488
|
-
authorizer = getattr(self.client, 'authorizer', None)
|
|
489
|
-
has_refresh_token = self._pars.as_dict().get('refresh_token') is not None
|
|
490
|
-
if has_refresh_token and isinstance(authorizer, globus_sdk.RefreshTokenAuthorizer):
|
|
491
|
-
self.client.authorizer.ensure_valid_token() # Fetch new refresh token if needed
|
|
492
|
-
except Exception as ex:
|
|
493
|
-
_logger.debug('Failed to refresh token: %s', ex)
|
|
494
|
-
expires_at_seconds = getattr(self._pars, 'expires_at_seconds', 0)
|
|
495
|
-
return expires_at_seconds - datetime.utcnow().timestamp() < 60
|
|
496
|
-
|
|
497
|
-
def login(self, stay_logged_in=None):
|
|
498
|
-
"""Authenticate Globus client.
|
|
499
|
-
|
|
500
|
-
Parameters
|
|
501
|
-
----------
|
|
502
|
-
stay_logged_in : bool, optional
|
|
503
|
-
If True, use refresh token to remain logged in for longer. If False, use an auth
|
|
504
|
-
token without the option of refreshing when expired. If not specified, uses the refresh
|
|
505
|
-
token if available.
|
|
506
|
-
|
|
507
|
-
"""
|
|
508
|
-
if self.is_logged_in:
|
|
509
|
-
_logger.debug('Already logged in')
|
|
510
|
-
return
|
|
511
|
-
|
|
512
|
-
# Default depends on refresh token
|
|
513
|
-
stay_logged_in = True if stay_logged_in is None else stay_logged_in
|
|
514
|
-
expired = bool(
|
|
515
|
-
self._pars.as_dict().get('refresh_token') is None
|
|
516
|
-
if stay_logged_in else self._token_expired
|
|
517
|
-
)
|
|
518
|
-
# If no tokens in parameters, Globus must be authenticated
|
|
519
|
-
required_fields = {'refresh_token', 'access_token', 'expires_at_seconds'}
|
|
520
|
-
if not required_fields.issubset(iopar.as_dict(self._pars)) or expired:
|
|
521
|
-
if self.headless:
|
|
522
|
-
raise RuntimeError(f'Globus not authenticated for client "{self.client_name}"')
|
|
523
|
-
token = get_token(self._pars.GLOBUS_CLIENT_ID, refresh_tokens=stay_logged_in)
|
|
524
|
-
if not any(token.values()):
|
|
525
|
-
_logger.debug('Login cancelled by user')
|
|
526
|
-
return
|
|
527
|
-
self._pars = iopar.from_dict({**self._pars.as_dict(), **token})
|
|
528
|
-
_save_globus_params(self._pars, self.client_name)
|
|
529
|
-
|
|
530
|
-
# Ready to authenticate
|
|
531
|
-
self._authenticate(stay_logged_in)
|
|
532
|
-
|
|
533
|
-
def logout(self):
|
|
534
|
-
"""Revoke any tokens and delete them from the client and parameter file."""
|
|
535
|
-
if self.client and self.client.authorizer and \
|
|
536
|
-
not isinstance(self.client.authorizer, NullAuthorizer):
|
|
537
|
-
self.client.authorizer.auth_client.oauth2_revoke_token()
|
|
538
|
-
del self.client.authorizer
|
|
539
|
-
self.client.authorizer = NullAuthorizer()
|
|
540
|
-
if pars := load_client_params(f'{CLIENT_KEY}.{self.client_name}', assert_present=False):
|
|
541
|
-
_save_globus_params(_remove_token_fields(pars), self.client_name)
|
|
542
|
-
self._pars = _remove_token_fields(self._pars)
|
|
543
|
-
|
|
544
|
-
def _authenticate(self, stay_logged_in=None):
|
|
545
|
-
"""Authenticate and instantiate Globus SDK client."""
|
|
546
|
-
if self._pars.as_dict().get('refresh_token') and stay_logged_in is not False:
|
|
547
|
-
client = globus_sdk.NativeAppAuthClient(self._pars.GLOBUS_CLIENT_ID)
|
|
548
|
-
client.oauth2_start_flow(refresh_tokens=True)
|
|
549
|
-
authorizer = globus_sdk.RefreshTokenAuthorizer(
|
|
550
|
-
self._pars.refresh_token, client, on_refresh=self._save_refresh_token_callback)
|
|
551
|
-
else:
|
|
552
|
-
if stay_logged_in is True:
|
|
553
|
-
warnings.warn('No refresh token. Please log out and back in to remain logged in.')
|
|
554
|
-
if self._token_expired is not False:
|
|
555
|
-
raise RuntimeError(f'token no longer valid for client "{self.client_name}"')
|
|
556
|
-
authorizer = globus_sdk.AccessTokenAuthorizer(self._pars.access_token)
|
|
557
|
-
self.client = globus_sdk.TransferClient(authorizer=authorizer)
|
|
558
|
-
|
|
559
|
-
def _save_refresh_token_callback(self, res):
|
|
560
|
-
"""Save a token fetched by the refresh token authorizer.
|
|
561
|
-
|
|
562
|
-
This is a callback for the globus_sdk.RefreshTokenAuthorizer to update the parameters.
|
|
563
|
-
|
|
564
|
-
Parameters
|
|
565
|
-
----------
|
|
566
|
-
res : globus_sdk.services.auth.OAuthTokenResponse
|
|
567
|
-
An Open Authorization response object.
|
|
568
|
-
|
|
569
|
-
"""
|
|
570
|
-
if not res or not (token := next(iter(res.by_resource_server.values()), None)):
|
|
571
|
-
return
|
|
572
|
-
token_fields = {'refresh_token', 'access_token', 'expires_at_seconds'}
|
|
573
|
-
self._pars = iopar.from_dict(
|
|
574
|
-
{**self._pars.as_dict(), **{k: v for k, v in token.items() if k in token_fields}})
|
|
575
|
-
_save_globus_params(self._pars, self.client_name)
|
|
576
|
-
|
|
577
|
-
def fetch_endpoints_from_alyx(self, alyx=None, overwrite=False):
|
|
578
|
-
"""Update endpoints property with Alyx Globus data repositories.
|
|
579
|
-
|
|
580
|
-
Parameters
|
|
581
|
-
----------
|
|
582
|
-
alyx : one.webclient.AlyxClient
|
|
583
|
-
An optional AlyxClient.
|
|
584
|
-
overwrite : bool
|
|
585
|
-
Whether existing endpoint with the same label should be replaced.
|
|
586
|
-
|
|
587
|
-
Returns
|
|
588
|
-
-------
|
|
589
|
-
dict
|
|
590
|
-
The endpoints added from Alyx.
|
|
591
|
-
|
|
592
|
-
"""
|
|
593
|
-
alyx = alyx or AlyxClient()
|
|
594
|
-
alyx_endpoints = alyx.rest('data-repository', 'list')
|
|
595
|
-
for endpoint in alyx_endpoints:
|
|
596
|
-
if not endpoint['globus_endpoint_id']:
|
|
597
|
-
continue
|
|
598
|
-
uid = UUID(endpoint['globus_endpoint_id'])
|
|
599
|
-
self.add_endpoint(
|
|
600
|
-
uid, label=endpoint['name'], root_path=endpoint['globus_path'], overwrite=overwrite
|
|
601
|
-
)
|
|
602
|
-
endpoint_names = {e['name'] for e in alyx_endpoints}
|
|
603
|
-
return {k: v for k, v in self.endpoints.items() if k in endpoint_names}
|
|
604
|
-
|
|
605
|
-
def to_address(self, data_path, endpoint):
|
|
606
|
-
"""Get full path for a given endpoint.
|
|
607
|
-
|
|
608
|
-
Parameters
|
|
609
|
-
----------
|
|
610
|
-
data_path : Path, PurePath, str
|
|
611
|
-
An absolute or relative POSIX path
|
|
612
|
-
endpoint : str, uuid.UUID
|
|
613
|
-
An endpoint label or UUID.
|
|
614
|
-
|
|
615
|
-
Returns
|
|
616
|
-
-------
|
|
617
|
-
str
|
|
618
|
-
A complete path string formatted for Globus.
|
|
619
|
-
|
|
620
|
-
Examples
|
|
621
|
-
--------
|
|
622
|
-
>>> glo = Globus()
|
|
623
|
-
>>> glo.add_endpoint('0ec47586-3a19-11eb-b173-0ee0d5d9299f',
|
|
624
|
-
... label='foobar', root_path='/foo')
|
|
625
|
-
>>> glo.to_address('bar/baz.ext', 'foobar')
|
|
626
|
-
'/foo/bar/baz.ext'
|
|
627
|
-
|
|
628
|
-
"""
|
|
629
|
-
_, root_path = self._endpoint_id_root(endpoint)
|
|
630
|
-
return self._endpoint_path(data_path, root_path)
|
|
631
|
-
|
|
632
|
-
@ensure_logged_in
|
|
633
|
-
def download_file(self, file_address, source_endpoint, recursive=False, **kwargs):
|
|
634
|
-
"""Download one or more files via Globus.
|
|
635
|
-
|
|
636
|
-
Parameters
|
|
637
|
-
----------
|
|
638
|
-
file_address : str, list of str
|
|
639
|
-
One or more relative POSIX paths to download.
|
|
640
|
-
source_endpoint : str, uuid.UUID
|
|
641
|
-
The source endpoint name or uuid.
|
|
642
|
-
recursive : bool
|
|
643
|
-
If true, transfer the contents of nested directories (NB: all data_paths must be
|
|
644
|
-
directories).
|
|
645
|
-
**kwargs
|
|
646
|
-
See Globus.transfer_data.
|
|
647
|
-
|
|
648
|
-
Returns
|
|
649
|
-
-------
|
|
650
|
-
pathlib.Path, list of pathlib.Path
|
|
651
|
-
The downloaded file path(s). If recursive is True, a list is always returned.
|
|
652
|
-
|
|
653
|
-
Notes
|
|
654
|
-
-----
|
|
655
|
-
- Assumes that the local endpoint root path is NOT POSIX style on Windows.
|
|
656
|
-
|
|
657
|
-
TODO Return None for failed files
|
|
658
|
-
|
|
659
|
-
Examples
|
|
660
|
-
--------
|
|
661
|
-
Download a single file
|
|
662
|
-
|
|
663
|
-
>>> file = Globus().download_file('path/to/file', '0ec47586-3a19-11eb-b173-0ee0d5d9299f')
|
|
664
|
-
|
|
665
|
-
Download multiple files and verify checksum
|
|
666
|
-
|
|
667
|
-
>>> files = ['relative/file/path.ext', 'foo.bar']
|
|
668
|
-
>>> files = Globus().download_file(files, 'source_endpoint_name', verify_checksum=True)
|
|
669
|
-
|
|
670
|
-
Download a folder
|
|
671
|
-
|
|
672
|
-
>>> files = Globus().download_file('folder/path', 'source_endpoint_name', recursive=True)
|
|
673
|
-
|
|
674
|
-
"""
|
|
675
|
-
return_single = isinstance(file_address, str) and recursive is False
|
|
676
|
-
kwargs['label'] = kwargs.get('label', 'ONE download')
|
|
677
|
-
task = partial(self.transfer_data, file_address, source_endpoint, 'local',
|
|
678
|
-
recursive=recursive, **kwargs)
|
|
679
|
-
task_id = self.run_task(task)
|
|
680
|
-
files = []
|
|
681
|
-
root = Path(self.endpoints['local']['root_path'])
|
|
682
|
-
idx = len(self._endpoint_path(PurePosixPath(as_globus_path(root))))
|
|
683
|
-
for info in self.client.task_successful_transfers(task_id):
|
|
684
|
-
files.append(info['destination_path'][idx:].strip('/'))
|
|
685
|
-
|
|
686
|
-
if return_single:
|
|
687
|
-
file = root / files[0]
|
|
688
|
-
assert file.exists()
|
|
689
|
-
return file
|
|
690
|
-
|
|
691
|
-
# Order files by input
|
|
692
|
-
def _best_match(x):
|
|
693
|
-
"""Return the index of the input file that best matches downloaded file."""
|
|
694
|
-
spans = [len(frag) / len(x) if frag in x else 0 for frag in ensure_list(file_address)]
|
|
695
|
-
return spans.index(max(spans))
|
|
696
|
-
files = list(map(root.joinpath, sorted(files, key=_best_match)))
|
|
697
|
-
assert all(map(Path.exists, filter(None, files)))
|
|
698
|
-
return files
|
|
699
|
-
|
|
700
|
-
@staticmethod
|
|
701
|
-
def setup(client_name='default', **kwargs):
|
|
702
|
-
"""Setup a Globus client.
|
|
703
|
-
|
|
704
|
-
In order to use this function you need:
|
|
705
|
-
|
|
706
|
-
1. The client ID of an existing Globus Client (`see this tutorial`_).
|
|
707
|
-
2. Set up `Global Connect`_ on your local device.
|
|
708
|
-
3. Register your local device as an `endpoint`_ in your Globus Client.
|
|
709
|
-
|
|
710
|
-
.. _see this tutorial: https://globus-sdk-python.readthedocs.io/en/stable/tutorial.html
|
|
711
|
-
.. _Global Connect: https://www.globus.org/globus-connect-personal
|
|
712
|
-
.. _endpoint: https://app.globus.org/
|
|
713
|
-
|
|
714
|
-
Parameters
|
|
715
|
-
----------
|
|
716
|
-
client_name : str
|
|
717
|
-
Parameter profile name to set up e.g. 'default', 'admin'.
|
|
718
|
-
**kwargs
|
|
719
|
-
Optional Globus constructor arguments.
|
|
720
|
-
|
|
721
|
-
Returns
|
|
722
|
-
-------
|
|
723
|
-
Globus
|
|
724
|
-
A new Globus client object.
|
|
725
|
-
|
|
726
|
-
"""
|
|
727
|
-
_setup(client_name, login=False)
|
|
728
|
-
return Globus(client_name, **kwargs)
|
|
729
|
-
|
|
730
|
-
def add_endpoint(self, endpoint, label=None, root_path=None, overwrite=False, alyx=None):
|
|
731
|
-
"""Add an endpoint to the Globus instance to be used by other functions.
|
|
732
|
-
|
|
733
|
-
Parameters
|
|
734
|
-
----------
|
|
735
|
-
endpoint : uuid.UUID, str
|
|
736
|
-
The endpoint UUID or database repository name of the endpoint.
|
|
737
|
-
label : str
|
|
738
|
-
Label to access the endpoint. If endpoint is UUID this has to be set, otherwise is
|
|
739
|
-
optional.
|
|
740
|
-
root_path : str, pathlib.Path, pathlib.PurePath
|
|
741
|
-
File path to be accessed by Globus on the endpoint.
|
|
742
|
-
overwrite : bool
|
|
743
|
-
Whether existing endpoint with the same label should be replaced.
|
|
744
|
-
alyx : one.webclient.AlyxClient
|
|
745
|
-
An AlyxClient instance for looking up repository information.
|
|
746
|
-
|
|
747
|
-
"""
|
|
748
|
-
if is_uuid(endpoint, versions=(1, 2)): # MAC address UUID
|
|
749
|
-
if label is None:
|
|
750
|
-
raise ValueError('If "endpoint" is a UUID, "label" cannot be None.')
|
|
751
|
-
endpoint_id = self._ensure_uuid(endpoint)
|
|
752
|
-
else:
|
|
753
|
-
repo = self.repo_from_alyx(endpoint, alyx=alyx)
|
|
754
|
-
endpoint_id = UUID(repo['globus_endpoint_id'])
|
|
755
|
-
root_path = root_path or repo['globus_path']
|
|
756
|
-
label = label or endpoint
|
|
757
|
-
if label in self.endpoints.keys() and overwrite is False:
|
|
758
|
-
_logger.error(f'An endpoint called "{label}" already exists. Choose a different label '
|
|
759
|
-
'or set overwrite=True')
|
|
760
|
-
else:
|
|
761
|
-
self.endpoints[label] = {'id': endpoint_id}
|
|
762
|
-
if root_path:
|
|
763
|
-
self.endpoints[label]['root_path'] = root_path
|
|
764
|
-
|
|
765
|
-
@staticmethod
|
|
766
|
-
def _endpoint_path(path, root_path=None):
|
|
767
|
-
"""Given an absolute path or relative path with a root path, return a Globus path str.
|
|
768
|
-
|
|
769
|
-
Note: Paths must be POSIX or Globus-compliant paths. In other words for Windows systems
|
|
770
|
-
the input root_path or absolute path must be passed through `as_globus_path` before
|
|
771
|
-
calling this method.
|
|
772
|
-
|
|
773
|
-
TODO include globus_path_from_dataset
|
|
774
|
-
|
|
775
|
-
Parameters
|
|
776
|
-
----------
|
|
777
|
-
path : Path, PurePath, str
|
|
778
|
-
An absolute or relative POSIX path
|
|
779
|
-
root_path : Path, PurePath, str
|
|
780
|
-
A root path to prepend. Optional if `path` is absolute.
|
|
781
|
-
|
|
782
|
-
Returns
|
|
783
|
-
-------
|
|
784
|
-
str
|
|
785
|
-
A path string formatted for Globus.
|
|
786
|
-
|
|
787
|
-
See Also
|
|
788
|
-
--------
|
|
789
|
-
as_globus_path
|
|
790
|
-
|
|
791
|
-
Raises
|
|
792
|
-
------
|
|
793
|
-
ValueError
|
|
794
|
-
Path was not absolute and no root path was given. An absolute path must start with
|
|
795
|
-
a slash on *nix systems.
|
|
796
|
-
|
|
797
|
-
"""
|
|
798
|
-
if isinstance(path, str):
|
|
799
|
-
path = PurePosixPath(path)
|
|
800
|
-
if root_path and not str(path).startswith(str(root_path)):
|
|
801
|
-
path = PurePosixPath(root_path) / path
|
|
802
|
-
if not path.is_absolute():
|
|
803
|
-
raise ValueError(f'{path} is relative and no root_path defined')
|
|
804
|
-
return as_globus_path(path)
|
|
805
|
-
|
|
806
|
-
@staticmethod
|
|
807
|
-
def _ensure_uuid(uid):
|
|
808
|
-
"""Ensures UUID object returned.
|
|
809
|
-
|
|
810
|
-
Parameters
|
|
811
|
-
----------
|
|
812
|
-
uid : str, uuid.UUID
|
|
813
|
-
A UUID to cast to UUID object.
|
|
814
|
-
|
|
815
|
-
Returns
|
|
816
|
-
-------
|
|
817
|
-
uuid.UUID
|
|
818
|
-
A UUID object.
|
|
819
|
-
|
|
820
|
-
"""
|
|
821
|
-
return UUID(uid) if not isinstance(uid, UUID) else uid
|
|
822
|
-
|
|
823
|
-
def _endpoint_id_root(self, endpoint):
|
|
824
|
-
"""Return endpoint UUID and root path from a given endpoint identifier.
|
|
825
|
-
|
|
826
|
-
Parameters
|
|
827
|
-
----------
|
|
828
|
-
endpoint : str, uuid.UUID
|
|
829
|
-
An endpoint label or UUID.
|
|
830
|
-
|
|
831
|
-
Returns
|
|
832
|
-
-------
|
|
833
|
-
uuid.UUID
|
|
834
|
-
The endpoint UUID.
|
|
835
|
-
str, None
|
|
836
|
-
The POSIX-style endpoint root path (if defined).
|
|
837
|
-
|
|
838
|
-
Warnings
|
|
839
|
-
--------
|
|
840
|
-
UserWarning
|
|
841
|
-
If endpoint UUID is associated with multiple root paths, it is better to provide the
|
|
842
|
-
endpoint label to avoid this warning and to ensure the intended root path is returned.
|
|
843
|
-
|
|
844
|
-
See Also
|
|
845
|
-
--------
|
|
846
|
-
Globus._sanitize_local
|
|
847
|
-
|
|
848
|
-
"""
|
|
849
|
-
root_path = None
|
|
850
|
-
if endpoint in self.endpoints.keys():
|
|
851
|
-
endpoint_id = self.endpoints[endpoint]['id']
|
|
852
|
-
if 'root_path' in self.endpoints[endpoint].keys():
|
|
853
|
-
root_path = self.endpoints[endpoint]['root_path']
|
|
854
|
-
return self._sanitize_local(endpoint_id, root_path)
|
|
855
|
-
elif is_uuid(endpoint, range(1, 5)):
|
|
856
|
-
# If a UUID was provided, find the first endpoint with a root path with the UUID
|
|
857
|
-
endpoint_id = self._ensure_uuid(endpoint)
|
|
858
|
-
matching = (
|
|
859
|
-
k for k, v in self.endpoints.items() if v['id'] == endpoint_id and 'root_path' in v
|
|
860
|
-
)
|
|
861
|
-
if name := next(matching, None):
|
|
862
|
-
# Warn of ambiguity if multiple endpoints share a UUID
|
|
863
|
-
if next(matching, None) is not None:
|
|
864
|
-
warnings.warn(
|
|
865
|
-
f'Multiple endpoints added with the same UUID, '
|
|
866
|
-
f'using root path from "{name}"')
|
|
867
|
-
root_path = self.endpoints[name]['root_path']
|
|
868
|
-
else:
|
|
869
|
-
root_path = None
|
|
870
|
-
return self._sanitize_local(endpoint_id, root_path)
|
|
871
|
-
else:
|
|
872
|
-
raise ValueError(
|
|
873
|
-
'"endpoint" must be a UUID or the label of an endpoint registered in this '
|
|
874
|
-
'Globus instance. You can add endpoints via the add_endpoints method')
|
|
875
|
-
|
|
876
|
-
def _sanitize_local(self, endpoint_id, root_path):
|
|
877
|
-
"""Ensure local root path on Windows is POSIX-style.
|
|
878
|
-
|
|
879
|
-
Parameters
|
|
880
|
-
----------
|
|
881
|
-
endpoint_id : uuid.UUID
|
|
882
|
-
The endpoint UUID to determine if root path is local.
|
|
883
|
-
root_path : pathlib.Path, str, None
|
|
884
|
-
The root path to sanitize.
|
|
885
|
-
|
|
886
|
-
Returns
|
|
887
|
-
-------
|
|
888
|
-
endpoint_id : uuid.UUID
|
|
889
|
-
The endpoint UUID, returned unchanged to match `Globus._endpoint_id_root` signature.
|
|
890
|
-
str, None
|
|
891
|
-
The root path as a POSIX style string, or None if root_path is None.
|
|
892
|
-
|
|
893
|
-
Examples
|
|
894
|
-
--------
|
|
895
|
-
Providing a local root path on Windows
|
|
896
|
-
|
|
897
|
-
>>> glo = Globus()
|
|
898
|
-
>>> uid = glo.endpoints['local']['id']
|
|
899
|
-
>>> glo._sanitize_local(uid, 'C:\\Data')
|
|
900
|
-
UUID('50282ed5-3124-11ee-b977-482ae33bf6ca'), '/C/Data'
|
|
901
|
-
|
|
902
|
-
Path left unchanged on *nix systems or when endpoint ID is not local
|
|
903
|
-
|
|
904
|
-
>>> uid = UUID('c7c46cec-3124-11ee-bf50-482ae33bf6ca')
|
|
905
|
-
>>> glo._sanitize_local(uid, 'C:\\Data')
|
|
906
|
-
UUID('c7c46cec-3124-11ee-bf50-482ae33bf6ca'), 'C:\\Data'
|
|
907
|
-
|
|
908
|
-
"""
|
|
909
|
-
if not root_path:
|
|
910
|
-
return endpoint_id, None
|
|
911
|
-
# If the local root path is not explicitly a Windows Path and we're on windows, make sure
|
|
912
|
-
# it's converted correctly to a POSIX style path
|
|
913
|
-
if isinstance(root_path, str):
|
|
914
|
-
is_win = sys.platform in ('win32', 'cygwin')
|
|
915
|
-
if endpoint_id == self.endpoints['local']['id'] and is_win:
|
|
916
|
-
root_path = PureWindowsPath(root_path)
|
|
917
|
-
else:
|
|
918
|
-
root_path = PurePosixPath(root_path)
|
|
919
|
-
return endpoint_id, as_globus_path(root_path)
|
|
920
|
-
|
|
921
|
-
@ensure_logged_in
|
|
922
|
-
def transfer_data(self, data_path, source_endpoint, destination_endpoint,
|
|
923
|
-
recursive=False, **kwargs):
|
|
924
|
-
"""Transfer one or more paths between endpoints.
|
|
925
|
-
|
|
926
|
-
At least one of the endpoints must be a server endpoint. Both file and directory paths may
|
|
927
|
-
be provided, however if recursive is true, all paths must be directories.
|
|
928
|
-
|
|
929
|
-
Parameters
|
|
930
|
-
----------
|
|
931
|
-
data_path : str, list of str
|
|
932
|
-
One or more data paths, relative to the endpoint root path.
|
|
933
|
-
source_endpoint : str, uuid.UUID
|
|
934
|
-
The name or UUID of the source endpoint.
|
|
935
|
-
destination_endpoint : str, uuid.UUID
|
|
936
|
-
The name or UUID of the destination endpoint.
|
|
937
|
-
recursive : bool
|
|
938
|
-
If true, transfer the contents of nested directories (NB: all data_paths must be
|
|
939
|
-
directories).
|
|
940
|
-
**kwargs
|
|
941
|
-
See globus_sdk.TransferData.
|
|
942
|
-
|
|
943
|
-
Returns
|
|
944
|
-
-------
|
|
945
|
-
uuid.UUID
|
|
946
|
-
The Globus transfer ID.
|
|
947
|
-
|
|
948
|
-
Examples
|
|
949
|
-
--------
|
|
950
|
-
Transfer two files (asynchronous)
|
|
951
|
-
|
|
952
|
-
>>> glo = Globus()
|
|
953
|
-
>>> files = ['file.ext', 'foo.bar']
|
|
954
|
-
>>> task_id = glo.transfer_data(files, 'source_endpoint', 'destination_endpoint')
|
|
955
|
-
|
|
956
|
-
Transfer a file (synchronous)
|
|
957
|
-
>>> file = 'file.ext'
|
|
958
|
-
>>> task_id = glo.run_task(lambda: glo.transfer_data(file, 'src_endpoint', 'dst_endpoint'))
|
|
959
|
-
|
|
960
|
-
Transfer a folder (asynchronous)
|
|
961
|
-
|
|
962
|
-
>>> folder = 'path/to/folder'
|
|
963
|
-
>>> task_id = glo.transfer_data(
|
|
964
|
-
... folder, 'source_endpoint', 'destination_endpoint', recursive=True)
|
|
965
|
-
|
|
966
|
-
"""
|
|
967
|
-
kwargs['source_endpoint'] = (source_endpoint
|
|
968
|
-
if is_uuid(source_endpoint, versions=(1,))
|
|
969
|
-
else self.endpoints.get(source_endpoint)['id'])
|
|
970
|
-
kwargs['destination_endpoint'] = (destination_endpoint
|
|
971
|
-
if is_uuid(destination_endpoint, versions=(1,))
|
|
972
|
-
else self.endpoints.get(destination_endpoint)['id'])
|
|
973
|
-
transfer_object = globus_sdk.TransferData(self.client, **kwargs)
|
|
974
|
-
|
|
975
|
-
# add any number of items to the submission data
|
|
976
|
-
for path in ensure_list(data_path):
|
|
977
|
-
src = self._endpoint_path(path, self._endpoint_id_root(source_endpoint)[1])
|
|
978
|
-
dst = self._endpoint_path(path, self._endpoint_id_root(destination_endpoint)[1])
|
|
979
|
-
transfer_object.add_item(src, dst, recursive=recursive)
|
|
980
|
-
response = self.client.submit_transfer(transfer_object)
|
|
981
|
-
return UUID(response.data['task_id'])
|
|
982
|
-
|
|
983
|
-
@ensure_logged_in
|
|
984
|
-
def delete_data(self, data_path, endpoint, recursive=False, **kwargs):
|
|
985
|
-
"""Delete one or more paths within an endpoint.
|
|
986
|
-
|
|
987
|
-
Both file and directory paths may be provided, however if recursive is true, all paths must
|
|
988
|
-
be directories.
|
|
989
|
-
|
|
990
|
-
Parameters
|
|
991
|
-
----------
|
|
992
|
-
data_path : str, list of str
|
|
993
|
-
One or more data paths, relative to the endpoint root path.
|
|
994
|
-
endpoint : str, uuid.UUID
|
|
995
|
-
The name or UUID of the endpoint.
|
|
996
|
-
recursive : bool
|
|
997
|
-
If true, delete the contents of nested directories (NB: all data_paths must be
|
|
998
|
-
directories).
|
|
999
|
-
**kwargs
|
|
1000
|
-
See globus_sdk.DeleteData.
|
|
1001
|
-
|
|
1002
|
-
Returns
|
|
1003
|
-
-------
|
|
1004
|
-
uuid.UUID
|
|
1005
|
-
The Globus transfer ID.
|
|
1006
|
-
|
|
1007
|
-
Examples
|
|
1008
|
-
--------
|
|
1009
|
-
Delete two files, ingnoring those that don't exist (asynchronous)
|
|
1010
|
-
|
|
1011
|
-
>>> glo = Globus()
|
|
1012
|
-
>>> files = ['file.ext', 'foo.bar']
|
|
1013
|
-
>>> task_id = glo.delete_data(files, 'endpoint_name', ignore_missing=True)
|
|
1014
|
-
|
|
1015
|
-
Delete a file (synchronous)
|
|
1016
|
-
|
|
1017
|
-
>>> task_id = glo.run_task(lambda: glo.delete_data('file.ext', 'endpoint_name')
|
|
1018
|
-
|
|
1019
|
-
Recursively delete a folder (asynchronous)
|
|
1020
|
-
|
|
1021
|
-
>>> folder = 'path/to/folder'
|
|
1022
|
-
>>> task_id = glo.delete_data(folder, 'endpoint_name', recursive=True)
|
|
1023
|
-
|
|
1024
|
-
"""
|
|
1025
|
-
kwargs['endpoint'] = (endpoint
|
|
1026
|
-
if is_uuid(endpoint, versions=(1,))
|
|
1027
|
-
else self.endpoints.get(endpoint)['id'])
|
|
1028
|
-
delete_object = globus_sdk.DeleteData(self.client, recursive=recursive, **kwargs)
|
|
1029
|
-
|
|
1030
|
-
# add any number of items to the submission data
|
|
1031
|
-
for path in ensure_list(data_path):
|
|
1032
|
-
fullpath = self._endpoint_path(path, self._endpoint_id_root(endpoint)[1])
|
|
1033
|
-
delete_object.add_item(fullpath)
|
|
1034
|
-
response = self.client.submit_delete(delete_object)
|
|
1035
|
-
return UUID(response.data['task_id'])
|
|
1036
|
-
|
|
1037
|
-
@ensure_logged_in
|
|
1038
|
-
def ls(self, endpoint, path, remove_uuid=False, return_size=False, max_retries=1):
|
|
1039
|
-
"""Return the list of (filename, filesize) in a given endpoint directory.
|
|
1040
|
-
|
|
1041
|
-
NB: If you're using ls routinely when transferring or deleting files you're probably doing
|
|
1042
|
-
something wrong!
|
|
1043
|
-
|
|
1044
|
-
Parameters
|
|
1045
|
-
----------
|
|
1046
|
-
endpoint : uuid.UUID, str
|
|
1047
|
-
The Globus endpoint. May be a UUID or a key in the Globus.endpoints attribute.
|
|
1048
|
-
path : Path, PurePath, str
|
|
1049
|
-
The absolute or relative Globus path to list. Note: if endpoint is a UUID, the path
|
|
1050
|
-
must be absolute.
|
|
1051
|
-
remove_uuid : bool
|
|
1052
|
-
If True, remove the UUID from the returned filenames.
|
|
1053
|
-
return_size : bool
|
|
1054
|
-
If True, return the size of each listed file in bytes.
|
|
1055
|
-
max_retries : int
|
|
1056
|
-
The number of times to retry the remote operation before raising. Increasing this may
|
|
1057
|
-
mitigate unstable network issues.
|
|
1058
|
-
|
|
1059
|
-
Returns
|
|
1060
|
-
-------
|
|
1061
|
-
list
|
|
1062
|
-
A list of PurePosixPath objects of the files and folders listed, or if return_size is
|
|
1063
|
-
True, tuples of PurePosixPath objects and the corresponding file sizes.
|
|
1064
|
-
|
|
1065
|
-
"""
|
|
1066
|
-
# Check if endpoint is a UUID, if not try to get UUID from registered endpoints
|
|
1067
|
-
endpoint_id, root_path = self._endpoint_id_root(endpoint)
|
|
1068
|
-
# Check if root_path should be added and if path is absolute
|
|
1069
|
-
path = self._endpoint_path(path, root_path)
|
|
1070
|
-
# Do the actual listing
|
|
1071
|
-
out = []
|
|
1072
|
-
response = []
|
|
1073
|
-
for i in range(max_retries + 1):
|
|
1074
|
-
try:
|
|
1075
|
-
response = self.client.operation_ls(endpoint_id, path=path)
|
|
1076
|
-
break
|
|
1077
|
-
except (GlobusConnectionError, GlobusAPIError) as ex:
|
|
1078
|
-
if i == max_retries:
|
|
1079
|
-
raise ex
|
|
1080
|
-
for entry in response:
|
|
1081
|
-
fn = PurePosixPath(remove_uuid_string(entry['name']) if remove_uuid else entry['name'])
|
|
1082
|
-
if return_size:
|
|
1083
|
-
size = entry['size'] if entry['type'] == 'file' else None
|
|
1084
|
-
out.append((fn, size))
|
|
1085
|
-
else:
|
|
1086
|
-
out.append(fn)
|
|
1087
|
-
|
|
1088
|
-
return out
|
|
1089
|
-
|
|
1090
|
-
# TODO: allow to move all content of a directory with 'recursive' keyword in add_item
|
|
1091
|
-
@ensure_logged_in
|
|
1092
|
-
def mv(self, source_endpoint, target_endpoint, source_paths, target_paths,
|
|
1093
|
-
timeout=None, **kwargs):
|
|
1094
|
-
"""Move files from one endpoint to another.
|
|
1095
|
-
|
|
1096
|
-
Parameters
|
|
1097
|
-
----------
|
|
1098
|
-
source_endpoint : uuid.UUID, str
|
|
1099
|
-
The Globus source endpoint. May be a UUID or a key in the Globus.endpoints attribute.
|
|
1100
|
-
target_endpoint : uuid.UUID, str
|
|
1101
|
-
The Globus destination endpoint. May be a UUID or a key in the Globus.endpoints
|
|
1102
|
-
attribute.
|
|
1103
|
-
source_paths : list of str, pathlib.Path or pathlib.PurePath
|
|
1104
|
-
The absolute or relative Globus paths of source files to moves. Note: if endpoint is
|
|
1105
|
-
a UUID, the path must be absolute.
|
|
1106
|
-
target_paths : list of str, Path or PurePath
|
|
1107
|
-
The absolute or relative Globus paths of destination files to moves. Note: if endpoint
|
|
1108
|
-
is a UUID, the path must be absolute.
|
|
1109
|
-
timeout : int
|
|
1110
|
-
Maximum time in seconds to wait for the task to complete.
|
|
1111
|
-
**kwargs
|
|
1112
|
-
Optional arguments for globus_sdk.TransferData.
|
|
1113
|
-
|
|
1114
|
-
Returns
|
|
1115
|
-
-------
|
|
1116
|
-
uuid.UUID
|
|
1117
|
-
A Globus task ID.
|
|
1118
|
-
|
|
1119
|
-
"""
|
|
1120
|
-
source_endpoint, source_root = self._endpoint_id_root(source_endpoint)
|
|
1121
|
-
target_endpoint, target_root = self._endpoint_id_root(target_endpoint)
|
|
1122
|
-
source_paths = [str(self._endpoint_path(path, source_root)) for path in source_paths]
|
|
1123
|
-
target_paths = [str(self._endpoint_path(path, target_root)) for path in target_paths]
|
|
1124
|
-
|
|
1125
|
-
tdata = globus_sdk.TransferData(self.client, source_endpoint, target_endpoint,
|
|
1126
|
-
verify_checksum=True, sync_level='checksum',
|
|
1127
|
-
label='ONE globus', **kwargs)
|
|
1128
|
-
for source_path, target_path in zip(source_paths, target_paths):
|
|
1129
|
-
tdata.add_item(source_path, target_path)
|
|
1130
|
-
|
|
1131
|
-
def wrapper():
|
|
1132
|
-
"""Function to submit Globus transfer and return the resulting task ID."""
|
|
1133
|
-
response = self.client.submit_transfer(tdata)
|
|
1134
|
-
task_id = response.get('task_id', None)
|
|
1135
|
-
return task_id
|
|
1136
|
-
|
|
1137
|
-
return self.run_task(wrapper, timeout=timeout)
|
|
1138
|
-
|
|
1139
|
-
@ensure_logged_in
|
|
1140
|
-
def run_task(self, globus_func, retries=3, timeout=None):
|
|
1141
|
-
"""Block until a Globus task finishes and retry upon Network or REST Errors.
|
|
1142
|
-
|
|
1143
|
-
globus_func needs to submit a task to the client and return a task_id.
|
|
1144
|
-
|
|
1145
|
-
Parameters
|
|
1146
|
-
----------
|
|
1147
|
-
globus_func : function, Callable
|
|
1148
|
-
A function that returns a Globus task ID, typically it will submit a transfer.
|
|
1149
|
-
retries : int
|
|
1150
|
-
The number of times to call globus_func if it raises a Globus error.
|
|
1151
|
-
timeout : int
|
|
1152
|
-
Maximum time in seconds to wait for the task to complete.
|
|
1153
|
-
|
|
1154
|
-
Returns
|
|
1155
|
-
-------
|
|
1156
|
-
uuid.UUID
|
|
1157
|
-
Globus task ID.
|
|
1158
|
-
|
|
1159
|
-
Raises
|
|
1160
|
-
------
|
|
1161
|
-
IOError
|
|
1162
|
-
Timed out waiting for task to complete.
|
|
1163
|
-
|
|
1164
|
-
TODO Add a quick fail option that returns when files missing, etc.
|
|
1165
|
-
TODO Add status logging
|
|
1166
|
-
|
|
1167
|
-
"""
|
|
1168
|
-
try:
|
|
1169
|
-
task_id = globus_func()
|
|
1170
|
-
assert is_uuid(task_id, versions=(1, 2)), 'invalid UUID returned'
|
|
1171
|
-
print(f'Waiting for Globus task {task_id} to complete')
|
|
1172
|
-
# While the task with task is active, print a dot every second. Timeout after timeout
|
|
1173
|
-
i = 0
|
|
1174
|
-
while not self.client.task_wait(task_id, timeout=5, polling_interval=1):
|
|
1175
|
-
print('.', end='')
|
|
1176
|
-
i += 1
|
|
1177
|
-
if timeout and i >= timeout:
|
|
1178
|
-
task = self.client.get_task(task_id)
|
|
1179
|
-
raise IOError(f'Globus task {task_id} timed out after {timeout} seconds, '
|
|
1180
|
-
f'with task status {task["status"]}')
|
|
1181
|
-
task = self.client.get_task(task_id)
|
|
1182
|
-
if task['status'] == 'SUCCEEDED':
|
|
1183
|
-
# Sometime Globus sets the status to SUCCEEDED but doesn't truly finish.
|
|
1184
|
-
# Handle error thrown when querying task_successful_transfers too early
|
|
1185
|
-
try:
|
|
1186
|
-
successful = self.client.task_successful_transfers(task_id)
|
|
1187
|
-
skipped = self.client.task_skipped_errors(task_id)
|
|
1188
|
-
print(f'\nGlobus task {task_id} completed.'
|
|
1189
|
-
f'\nSkipped transfers: {len(list(skipped))}'
|
|
1190
|
-
f'\nSuccessful transfers: {len(list(successful))}')
|
|
1191
|
-
for info in successful:
|
|
1192
|
-
_logger.debug(f'{info["source_path"]} -> {info["destination_path"]}')
|
|
1193
|
-
except TransferAPIError:
|
|
1194
|
-
_logger.warning(f'\nGlobus task {task_id} SUCCEEDED but querying transfers was'
|
|
1195
|
-
f'unsuccessful')
|
|
1196
|
-
else:
|
|
1197
|
-
raise IOError(f'Globus task finished unsuccessfully with status {task["status"]}')
|
|
1198
|
-
return self._ensure_uuid(task_id)
|
|
1199
|
-
except (GlobusAPIError, NetworkError, GlobusTimeoutError, GlobusConnectionError,
|
|
1200
|
-
GlobusConnectionTimeoutError) as e:
|
|
1201
|
-
if retries < 1:
|
|
1202
|
-
_logger.error('\nMax retries exceeded.')
|
|
1203
|
-
raise e
|
|
1204
|
-
else:
|
|
1205
|
-
_logger.debug('\nGlobus experienced a network error', exc_info=True)
|
|
1206
|
-
# if we reach this point without returning or erring, retry
|
|
1207
|
-
_logger.warning('\nGlobus experienced a network error, retrying.')
|
|
1208
|
-
self.run_task(globus_func, retries=(retries - 1), timeout=timeout)
|
|
1209
|
-
|
|
1210
|
-
@ensure_logged_in
|
|
1211
|
-
async def task_wait_async(self, task_id, polling_interval=10, timeout=10):
|
|
1212
|
-
"""Asynchronously wait until a Task is complete or fails, with a time limit.
|
|
1213
|
-
|
|
1214
|
-
If the task status is ACTIVE after timout, returns False, otherwise returns True.
|
|
1215
|
-
|
|
1216
|
-
Parameters
|
|
1217
|
-
----------
|
|
1218
|
-
task_id : str, uuid.UUID
|
|
1219
|
-
A Globus task UUID to wait on for completion.
|
|
1220
|
-
polling_interval : float
|
|
1221
|
-
Number of seconds between queries to Globus about the task status. Minimum 1 second.
|
|
1222
|
-
timeout : float
|
|
1223
|
-
Number of seconds to wait in total. Minimum 1 second.
|
|
1224
|
-
|
|
1225
|
-
Returns
|
|
1226
|
-
-------
|
|
1227
|
-
bool
|
|
1228
|
-
True if status not ACTIVE before timeout. False if status still ACTIVE at timeout.
|
|
1229
|
-
|
|
1230
|
-
Examples
|
|
1231
|
-
--------
|
|
1232
|
-
Asynchronously await a task to complete
|
|
1233
|
-
|
|
1234
|
-
>>> await Globus().task_wait_async(task_id)
|
|
1235
|
-
|
|
1236
|
-
"""
|
|
1237
|
-
if polling_interval < 1:
|
|
1238
|
-
raise GlobusSDKUsageError('polling_interval must be at least 1 second')
|
|
1239
|
-
if timeout < 1:
|
|
1240
|
-
raise GlobusSDKUsageError('timout must be at least 1 second')
|
|
1241
|
-
polling_interval = min(timeout, polling_interval)
|
|
1242
|
-
waited_time = 0
|
|
1243
|
-
while True:
|
|
1244
|
-
task = self.client.get_task(task_id)
|
|
1245
|
-
status = task['status']
|
|
1246
|
-
if status != 'ACTIVE':
|
|
1247
|
-
return True
|
|
1248
|
-
|
|
1249
|
-
# check if we timed out before sleeping again
|
|
1250
|
-
waited_time += polling_interval
|
|
1251
|
-
if waited_time >= timeout:
|
|
1252
|
-
return False
|
|
1253
|
-
|
|
1254
|
-
await asyncio.sleep(polling_interval)
|
|
1
|
+
"""A module for handling file operations through the Globus SDK.
|
|
2
|
+
|
|
3
|
+
Setup
|
|
4
|
+
-----
|
|
5
|
+
|
|
6
|
+
To set up Globus simply instantiate the `Globus` class for the first time and follow the prompts.
|
|
7
|
+
Providing a client name string to the constructor allows one to set up multiple Globus clients
|
|
8
|
+
(i.e. when switching between different Globus client IDs).
|
|
9
|
+
|
|
10
|
+
In order to use this function you need:
|
|
11
|
+
|
|
12
|
+
1. The client ID of an existing Globus Client (`see this tutorial`_).
|
|
13
|
+
2. Set up `Global Connect`_ on your local device.
|
|
14
|
+
3. Register your local device as an `endpoint`_ in your Globus Client.
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
To modify the settings for a pre-established client, call the `Globus.setup` method with the client
|
|
18
|
+
name:
|
|
19
|
+
|
|
20
|
+
>>> globus = Globus.setup('default')
|
|
21
|
+
|
|
22
|
+
You can update the list of endpoints using the `fetch_endpoints_from_alyx` method:
|
|
23
|
+
|
|
24
|
+
>>> globus = Globus('admin')
|
|
25
|
+
>>> remote_endpoints = globus.fetch_endpoints_from_alyx(alyx=AlyxClient())
|
|
26
|
+
|
|
27
|
+
The endpoints are stored in the `endpoints` property
|
|
28
|
+
|
|
29
|
+
>>> print(globus.endpoints.keys())
|
|
30
|
+
>>> print(globus.endpoints['local'])
|
|
31
|
+
|
|
32
|
+
.. _see this tutorial: https://globus-sdk-python.readthedocs.io/en/stable/tutorial.html
|
|
33
|
+
.. _Global Connect: https://www.globus.org/globus-connect-personal
|
|
34
|
+
.. _endpoint: https://app.globus.org/
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
Examples
|
|
38
|
+
--------
|
|
39
|
+
Get the full Globus file path
|
|
40
|
+
|
|
41
|
+
>>> relative_path = 'subject/2020-01-01/001/alf/_ibl_trials.table.pqt'
|
|
42
|
+
>>> full_path = globus.to_address(relative_path, 'flatiron_cortexlab')
|
|
43
|
+
|
|
44
|
+
Log in with a limited time token
|
|
45
|
+
|
|
46
|
+
>>> globus = Globus('admin')
|
|
47
|
+
>>> globus.login(stay_logged_in=False)
|
|
48
|
+
|
|
49
|
+
Log out of Globus, revoking and deleting all tokens
|
|
50
|
+
|
|
51
|
+
>>> globus.logout()
|
|
52
|
+
>>> assert not globus.is_logged_in
|
|
53
|
+
|
|
54
|
+
Asynchronously transfer data between Alyx repositories
|
|
55
|
+
|
|
56
|
+
>>> alyx = AlyxClient()
|
|
57
|
+
>>> glo = Globus('admin')
|
|
58
|
+
>>> glo.add_endpoint('flatiron_cortexlab', alyx=alyx)
|
|
59
|
+
>>> glo.add_endpoint('cortex_lab_SR', alyx=alyx)
|
|
60
|
+
>>> task_id = glo.transfer_data('path/to/file', 'flatiron_cortexlab', 'cortex_lab_SR')
|
|
61
|
+
|
|
62
|
+
Synchronously transfer data to an alternate local location
|
|
63
|
+
|
|
64
|
+
>>> from functools import partial
|
|
65
|
+
>>> root_path = '/path/to/new/location'
|
|
66
|
+
>>> glo.add_endpoint(get_local_endpoint_id(), label='alternate_local', root_path=root_path)
|
|
67
|
+
>>> folder = 'camera/ZFM-01867/2021-03-23/002' # An example folder to download
|
|
68
|
+
>>> task = partial(glo.transfer_data, folder, 'integration', 'integration_local',
|
|
69
|
+
... label='alternate data', recursive=True)
|
|
70
|
+
>>> task_id = glo.run_task(task) # Submit task to Globus and await completion
|
|
71
|
+
|
|
72
|
+
Temporarily change local data root path and synchronously download file
|
|
73
|
+
|
|
74
|
+
>>> glo.endpoints['local']['root_path'] = '/path/to/new/location'
|
|
75
|
+
>>> file = glo.download_file('path/to/file.ext', 'source_endpoint')
|
|
76
|
+
Path('/path/to/new/location/path/to/file.ext')
|
|
77
|
+
|
|
78
|
+
Await multiple tasks to complete by passing a list of Globus transfer IDs
|
|
79
|
+
|
|
80
|
+
>>> import asyncio
|
|
81
|
+
>>> tasks = [asyncio.create_task(globus.task_wait_async(task_id))) for task_id in task_ids]
|
|
82
|
+
>>> success = asyncio.run(asyncio.gather(*tasks))
|
|
83
|
+
|
|
84
|
+
"""
|
|
85
|
+
import os
|
|
86
|
+
import re
|
|
87
|
+
import sys
|
|
88
|
+
import asyncio
|
|
89
|
+
import logging
|
|
90
|
+
from uuid import UUID
|
|
91
|
+
from datetime import datetime
|
|
92
|
+
from pathlib import Path, PurePosixPath, PurePath, PureWindowsPath
|
|
93
|
+
import warnings
|
|
94
|
+
from functools import partial, wraps
|
|
95
|
+
|
|
96
|
+
import globus_sdk
|
|
97
|
+
from globus_sdk import TransferAPIError, GlobusAPIError, NetworkError, GlobusTimeoutError, \
|
|
98
|
+
GlobusConnectionError, GlobusConnectionTimeoutError, GlobusSDKUsageError, NullAuthorizer
|
|
99
|
+
from iblutil.io import params as iopar
|
|
100
|
+
from iblutil.util import ensure_list
|
|
101
|
+
|
|
102
|
+
from one.alf.spec import is_uuid
|
|
103
|
+
from one.alf.path import remove_uuid_string
|
|
104
|
+
import one.params
|
|
105
|
+
from one.webclient import AlyxClient
|
|
106
|
+
from .base import DownloadClient, load_client_params, save_client_params
|
|
107
|
+
|
|
108
|
+
__all__ = ['Globus', 'get_lab_from_endpoint_id', 'as_globus_path']
|
|
109
|
+
_logger = logging.getLogger(__name__)
|
|
110
|
+
CLIENT_KEY = 'globus'
|
|
111
|
+
"""str: The default key in the remote settings file"""
|
|
112
|
+
|
|
113
|
+
DEFAULT_PAR = {'GLOBUS_CLIENT_ID': None, 'local_endpoint': None, 'local_path': None}
|
|
114
|
+
"""dict: The default Globus parameter fields"""
|
|
115
|
+
|
|
116
|
+
STATUS_MAP = {
|
|
117
|
+
'ACTIVE': ('QUEUED', 'ACTIVE', 'GC_NOT_CONNECTED', 'UNKNOWN'),
|
|
118
|
+
'FAILED': ('ENDPOINT_ERROR', 'PERMISSION_DENIED', 'CONNECT_FAILED'),
|
|
119
|
+
'INACTIVE': 'PAUSED_BY_ADMIN'}
|
|
120
|
+
"""dict: A map of Globus status to "nice" status"""
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def ensure_logged_in(func):
|
|
124
|
+
"""Decorator for the Globus methods.
|
|
125
|
+
|
|
126
|
+
Before calling methods that require authentication, attempts to log in. If the user is already
|
|
127
|
+
logged in, the token may be refreshed to extend the session. If the token has expired and not
|
|
128
|
+
in headless mode, the user is prompted to authorize a new session. If in headless mode and not
|
|
129
|
+
logged in an error is raised.
|
|
130
|
+
|
|
131
|
+
Parameters
|
|
132
|
+
----------
|
|
133
|
+
func : function
|
|
134
|
+
Method to wrap (e.g. Globus.transfer_data).
|
|
135
|
+
|
|
136
|
+
Returns
|
|
137
|
+
-------
|
|
138
|
+
function
|
|
139
|
+
Handle to wrapped method.
|
|
140
|
+
|
|
141
|
+
"""
|
|
142
|
+
@wraps(func)
|
|
143
|
+
def wrapper_decorator(self, *args, **kwargs):
|
|
144
|
+
self.login()
|
|
145
|
+
return func(self, *args, **kwargs)
|
|
146
|
+
return wrapper_decorator
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _setup(par_id=None, login=True, refresh_tokens=True):
|
|
150
|
+
"""Sets up Globus as a backend for ONE functions.
|
|
151
|
+
|
|
152
|
+
Parameters
|
|
153
|
+
----------
|
|
154
|
+
par_id : str
|
|
155
|
+
Parameter profile name to set up e.g. 'default', 'admin'.
|
|
156
|
+
|
|
157
|
+
Returns
|
|
158
|
+
-------
|
|
159
|
+
IBLParams
|
|
160
|
+
A set of Globus parameters.
|
|
161
|
+
|
|
162
|
+
"""
|
|
163
|
+
print('Setting up Globus parameter file. See docstring for help.')
|
|
164
|
+
if not par_id:
|
|
165
|
+
default_par_id = 'default'
|
|
166
|
+
par_id = input(
|
|
167
|
+
f'Enter name for this client or press Enter to keep value "{default_par_id}": '
|
|
168
|
+
)
|
|
169
|
+
par_id = par_id.strip() or default_par_id
|
|
170
|
+
|
|
171
|
+
# Read existing globus params if present
|
|
172
|
+
globus_pars = iopar.as_dict(load_client_params(CLIENT_KEY, assert_present=False) or {})
|
|
173
|
+
pars = {**DEFAULT_PAR, **globus_pars.get(par_id, {})}
|
|
174
|
+
|
|
175
|
+
# Set GLOBUS_CLIENT_ID
|
|
176
|
+
current_id = pars['GLOBUS_CLIENT_ID']
|
|
177
|
+
if current_id:
|
|
178
|
+
prompt = (f'Found Globus client ID in parameter file ({current_id}). '
|
|
179
|
+
'Press Enter to keep it, or enter a new ID here: ')
|
|
180
|
+
pars['GLOBUS_CLIENT_ID'] = input(prompt).strip() or current_id
|
|
181
|
+
else:
|
|
182
|
+
new_id = input('Please enter the Globus client ID: ').strip()
|
|
183
|
+
if not new_id:
|
|
184
|
+
raise ValueError('Globus client ID is a required field')
|
|
185
|
+
pars['GLOBUS_CLIENT_ID'] = new_id
|
|
186
|
+
if not is_uuid(pars['GLOBUS_CLIENT_ID']):
|
|
187
|
+
raise ValueError('Invalid Globus client ID "%s"', pars['GLOBUS_CLIENT_ID'])
|
|
188
|
+
|
|
189
|
+
# Find and set local ID
|
|
190
|
+
message = 'Please enter the local endpoint ID'
|
|
191
|
+
try:
|
|
192
|
+
default_endpoint = str(pars['local_endpoint'] or get_local_endpoint_id())
|
|
193
|
+
message += f' (default: {default_endpoint})'
|
|
194
|
+
except AssertionError:
|
|
195
|
+
default_endpoint = ''
|
|
196
|
+
warnings.warn(
|
|
197
|
+
'Cannot find local endpoint ID. Beware that this might mean that Globus Connect '
|
|
198
|
+
'is not set up properly.')
|
|
199
|
+
pars['local_endpoint'] = input(message + ':').strip() or default_endpoint
|
|
200
|
+
if not is_uuid(pars['local_endpoint'], (1, 2)):
|
|
201
|
+
raise ValueError('Globus local endpoint ID must be a UUID version 1 or 2')
|
|
202
|
+
|
|
203
|
+
# Check for local path
|
|
204
|
+
message = 'Please enter the local endpoint path'
|
|
205
|
+
local_path = pars['local_path'] or one.params.get(silent=True).CACHE_DIR
|
|
206
|
+
message += f' (default: {local_path})'
|
|
207
|
+
pars['local_path'] = input(message + ':').strip() or local_path
|
|
208
|
+
|
|
209
|
+
if login:
|
|
210
|
+
# Log in manually and get refresh token to avoid having to login repeatedly
|
|
211
|
+
token = get_token(pars['GLOBUS_CLIENT_ID'], refresh_tokens=refresh_tokens)
|
|
212
|
+
pars.update(token)
|
|
213
|
+
|
|
214
|
+
globus_pars[par_id] = pars
|
|
215
|
+
save_client_params(globus_pars, client_key=CLIENT_KEY)
|
|
216
|
+
print('Finished setup.')
|
|
217
|
+
return iopar.from_dict(pars)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def get_token(client_id, refresh_tokens=True):
|
|
221
|
+
"""Get a Globus authentication token.
|
|
222
|
+
|
|
223
|
+
This step requires the user to login to Globus via a browser.
|
|
224
|
+
|
|
225
|
+
Parameters
|
|
226
|
+
----------
|
|
227
|
+
client_id : str
|
|
228
|
+
A Globus client ID.
|
|
229
|
+
refresh_tokens : bool
|
|
230
|
+
If true, requests a refresh token for repeat logins.
|
|
231
|
+
|
|
232
|
+
Returns
|
|
233
|
+
-------
|
|
234
|
+
dict
|
|
235
|
+
A dict containing the keys {'refresh_token', 'access_token', 'expires_at_seconds'}.
|
|
236
|
+
|
|
237
|
+
"""
|
|
238
|
+
client = globus_sdk.NativeAppAuthClient(client_id)
|
|
239
|
+
client.oauth2_start_flow(refresh_tokens=bool(refresh_tokens))
|
|
240
|
+
authorize_url = client.oauth2_get_authorize_url()
|
|
241
|
+
fields = ('refresh_token', 'access_token', 'expires_at_seconds')
|
|
242
|
+
print('To get a new token, go to this URL and login: {0}'.format(authorize_url))
|
|
243
|
+
auth_code = input('Enter the code you get after login here (press "c" to cancel): ').strip()
|
|
244
|
+
if auth_code and auth_code.casefold() != 'c':
|
|
245
|
+
token_response = client.oauth2_exchange_code_for_tokens(auth_code)
|
|
246
|
+
globus_transfer_data = token_response.by_resource_server['transfer.api.globus.org']
|
|
247
|
+
return {k: globus_transfer_data.get(k) for k in fields}
|
|
248
|
+
else:
|
|
249
|
+
return dict.fromkeys(fields)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _remove_token_fields(pars):
|
|
253
|
+
"""Remove the token fields from a parameters object.
|
|
254
|
+
|
|
255
|
+
Parameters
|
|
256
|
+
----------
|
|
257
|
+
pars : IBLParams, dict
|
|
258
|
+
The Globus parameters containing token fields.
|
|
259
|
+
|
|
260
|
+
Returns
|
|
261
|
+
-------
|
|
262
|
+
IBLParams
|
|
263
|
+
A copy of the params without the token fields.
|
|
264
|
+
|
|
265
|
+
"""
|
|
266
|
+
if pars is None:
|
|
267
|
+
return pars
|
|
268
|
+
fields = ('refresh_token', 'access_token', 'expires_at_seconds')
|
|
269
|
+
return iopar.from_dict({k: v for k, v in iopar.as_dict(pars).items() if k not in fields})
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _save_globus_params(pars, client_name):
|
|
273
|
+
"""Save Globus client parameters.
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
pars : IBLParams, dict
|
|
278
|
+
The Globus client parameters to save.
|
|
279
|
+
client_name : str
|
|
280
|
+
The Globus client name, e.g. 'default'.
|
|
281
|
+
|
|
282
|
+
"""
|
|
283
|
+
globus_pars = iopar.as_dict(load_client_params(CLIENT_KEY, assert_present=False) or {})
|
|
284
|
+
globus_pars[client_name] = iopar.as_dict(pars)
|
|
285
|
+
save_client_params(globus_pars, CLIENT_KEY)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def get_local_endpoint_id():
|
|
289
|
+
"""Extracts the ID of the local Globus Connect endpoint.
|
|
290
|
+
|
|
291
|
+
Returns
|
|
292
|
+
-------
|
|
293
|
+
uuid.UUID
|
|
294
|
+
The local Globus endpoint ID.
|
|
295
|
+
|
|
296
|
+
"""
|
|
297
|
+
msg = ('Cannot find local endpoint ID, check if Globus Connect is set up correctly, '
|
|
298
|
+
'{} exists and contains a UUID.')
|
|
299
|
+
if sys.platform in ('win32', 'cygwin'):
|
|
300
|
+
id_path = Path(os.environ['LOCALAPPDATA']).joinpath('Globus Connect')
|
|
301
|
+
else:
|
|
302
|
+
id_path = Path.home().joinpath('.globusonline', 'lta')
|
|
303
|
+
|
|
304
|
+
id_file = id_path.joinpath('client-id.txt')
|
|
305
|
+
assert id_file.exists(), msg.format(id_file)
|
|
306
|
+
local_id = id_file.read_text().strip()
|
|
307
|
+
assert isinstance(local_id, str), msg.format(id_file)
|
|
308
|
+
_logger.debug(f'Found local endpoint ID in Globus Connect settings {local_id}')
|
|
309
|
+
return UUID(local_id)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def get_local_endpoint_paths():
|
|
313
|
+
"""Extracts the local endpoint paths accessible by Globus Connect.
|
|
314
|
+
|
|
315
|
+
NB: This is only supported on Linux.
|
|
316
|
+
|
|
317
|
+
Returns
|
|
318
|
+
-------
|
|
319
|
+
list of pathlib.Path
|
|
320
|
+
Local endpoint paths set in Globus Connect.
|
|
321
|
+
|
|
322
|
+
"""
|
|
323
|
+
if sys.platform in ('win32', 'cygwin'):
|
|
324
|
+
print('On windows the local Globus path needs to be entered manually')
|
|
325
|
+
return []
|
|
326
|
+
else:
|
|
327
|
+
path_file = Path.home().joinpath('.globusonline', 'lta', 'config-paths')
|
|
328
|
+
if path_file.exists():
|
|
329
|
+
local_paths = map(Path, filter(None, path_file.read_text().strip().split(',')))
|
|
330
|
+
_logger.debug('Found local endpoint paths in Globus Connect settings')
|
|
331
|
+
else:
|
|
332
|
+
msg = ('Cannot find local endpoint path, check if Globus Connect is set up correctly, '
|
|
333
|
+
'{} exists and contains a valid path.')
|
|
334
|
+
warnings.warn(msg.format(path_file))
|
|
335
|
+
local_paths = []
|
|
336
|
+
return list(local_paths)
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def get_lab_from_endpoint_id(endpoint=None, alyx=None):
|
|
340
|
+
"""Extracts lab names associated with a given an endpoint UUID.
|
|
341
|
+
|
|
342
|
+
Finds the lab names that are associated to data repositories with the provided Globus endpoint
|
|
343
|
+
UUID.
|
|
344
|
+
|
|
345
|
+
Parameters
|
|
346
|
+
----------
|
|
347
|
+
endpoint : uuid.UUID, str
|
|
348
|
+
Endpoint UUID, optional if not given will get attempt to find local endpoint UUID.
|
|
349
|
+
alyx : one.webclient.AlyxClient
|
|
350
|
+
An instance of AlyxClient to use.
|
|
351
|
+
|
|
352
|
+
Returns
|
|
353
|
+
-------
|
|
354
|
+
list
|
|
355
|
+
The lab names associated with the endpoint UUID.
|
|
356
|
+
|
|
357
|
+
"""
|
|
358
|
+
alyx = alyx or AlyxClient(silent=True)
|
|
359
|
+
if not endpoint:
|
|
360
|
+
endpoint = get_local_endpoint_id()
|
|
361
|
+
lab = alyx.rest('labs', 'list', django=f'repositories__globus_endpoint_id,{str(endpoint)}')
|
|
362
|
+
if len(lab):
|
|
363
|
+
lab_names = [la['name'] for la in lab]
|
|
364
|
+
return lab_names
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def as_globus_path(path):
|
|
368
|
+
"""Convert a path into one suitable for the Globus TransferClient.
|
|
369
|
+
|
|
370
|
+
Parameters
|
|
371
|
+
----------
|
|
372
|
+
path : pathlib.Path, pathlib.PurePath, str
|
|
373
|
+
A path to convert to a Globus-complient path string.
|
|
374
|
+
|
|
375
|
+
Returns
|
|
376
|
+
-------
|
|
377
|
+
str
|
|
378
|
+
A formatted path string.
|
|
379
|
+
|
|
380
|
+
Notes
|
|
381
|
+
-----
|
|
382
|
+
- If using tilda in path, the home folder of your Globus Connect instance must be the same as
|
|
383
|
+
the OS home dir.
|
|
384
|
+
- If validating a path for another system ensure the input path is a PurePath, in particular,
|
|
385
|
+
on a Linux computer a remote Windows should first be made into a PureWindowsPath.
|
|
386
|
+
|
|
387
|
+
Examples
|
|
388
|
+
--------
|
|
389
|
+
A Windows path (on Windows OS)
|
|
390
|
+
|
|
391
|
+
>>> as_globus_path('E:\\FlatIron\\integration')
|
|
392
|
+
'/E/FlatIron/integration'
|
|
393
|
+
|
|
394
|
+
When explicitly a POSIX path, remains unchanged
|
|
395
|
+
|
|
396
|
+
>>> as_globus_path(PurePosixPath('E:\\FlatIron\\integration'))
|
|
397
|
+
'E:\\FlatIron\\integration'
|
|
398
|
+
|
|
399
|
+
A relative POSIX path (on *nix OS)
|
|
400
|
+
|
|
401
|
+
>>> as_globus_path('../data/integration')
|
|
402
|
+
'/mnt/data/integration'
|
|
403
|
+
|
|
404
|
+
A valid Globus path remains unchanged
|
|
405
|
+
|
|
406
|
+
>>> as_globus_path('/E/FlatIron/integration')
|
|
407
|
+
'/E/FlatIron/integration'
|
|
408
|
+
|
|
409
|
+
"""
|
|
410
|
+
is_pure_path = isinstance(path, PurePath)
|
|
411
|
+
is_win = sys.platform in ('win32', 'cygwin') or isinstance(path, PureWindowsPath)
|
|
412
|
+
if isinstance(path, str):
|
|
413
|
+
path = Path(path)
|
|
414
|
+
if (
|
|
415
|
+
re.match(r'/[A-Z]($|/)', path.as_posix())
|
|
416
|
+
if is_win
|
|
417
|
+
else path.is_absolute()
|
|
418
|
+
):
|
|
419
|
+
return path.as_posix()
|
|
420
|
+
if not is_pure_path:
|
|
421
|
+
path = path.resolve()
|
|
422
|
+
if path.drive:
|
|
423
|
+
path = '/' + str(path.as_posix().replace(':', '', 1))
|
|
424
|
+
return str(path)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
class Globus(DownloadClient):
|
|
428
|
+
|
|
429
|
+
def __init__(self, client_name='default', connect=True, headless=False):
|
|
430
|
+
"""Wrapper for managing files on Globus endpoints.
|
|
431
|
+
|
|
432
|
+
Parameters
|
|
433
|
+
----------
|
|
434
|
+
client_name : str
|
|
435
|
+
Parameter profile name to load e.g. 'default', 'admin'.
|
|
436
|
+
connect : bool
|
|
437
|
+
Whether to create the Globus SDK client on init.
|
|
438
|
+
headless : bool
|
|
439
|
+
If true, raises ValueError if unable to log in automatically. Otherwise the user is
|
|
440
|
+
prompted to enter information.
|
|
441
|
+
|
|
442
|
+
Examples
|
|
443
|
+
--------
|
|
444
|
+
Instantiate without authentication
|
|
445
|
+
|
|
446
|
+
>>> globus = Globus(connect=False)
|
|
447
|
+
|
|
448
|
+
Instantiate without user prompts
|
|
449
|
+
|
|
450
|
+
>>> globus = Globus('server', headless=True)
|
|
451
|
+
|
|
452
|
+
"""
|
|
453
|
+
# Setting up transfer client
|
|
454
|
+
super().__init__()
|
|
455
|
+
self.client = None
|
|
456
|
+
self.client_name = client_name
|
|
457
|
+
self.headless = headless
|
|
458
|
+
self._pars = load_client_params(f'{CLIENT_KEY}.{client_name}', assert_present=False)
|
|
459
|
+
|
|
460
|
+
# If no parameters, Globus must be set up for this client
|
|
461
|
+
if self._pars is None:
|
|
462
|
+
if self.headless:
|
|
463
|
+
raise RuntimeError(f'Globus not set up for client "{self.client_name}"')
|
|
464
|
+
self._pars = _setup(self.client_name, login=False)
|
|
465
|
+
|
|
466
|
+
if connect:
|
|
467
|
+
self.login()
|
|
468
|
+
|
|
469
|
+
# Try adding local endpoint
|
|
470
|
+
self.endpoints = {'local': {'id': UUID(self._pars.local_endpoint)}}
|
|
471
|
+
_logger.info('Adding local endpoint.')
|
|
472
|
+
self.endpoints['local']['root_path'] = self._pars.local_path
|
|
473
|
+
|
|
474
|
+
@property
|
|
475
|
+
def is_logged_in(self):
|
|
476
|
+
"""bool: Check if client exists and is authenticated."""
|
|
477
|
+
has_token = self.client and self.client.authorizer.get_authorization_header() is not None
|
|
478
|
+
return has_token and not self._token_expired
|
|
479
|
+
|
|
480
|
+
@property
|
|
481
|
+
def _token_expired(self):
|
|
482
|
+
"""bool: True if token absent or expired; False if valid.
|
|
483
|
+
|
|
484
|
+
Note the 'expires_at_seconds' may be greater than `Globus.client.authorizer.expires_at` if
|
|
485
|
+
using refresh tokens. The `login` method will always refresh the token if still valid.
|
|
486
|
+
"""
|
|
487
|
+
try:
|
|
488
|
+
authorizer = getattr(self.client, 'authorizer', None)
|
|
489
|
+
has_refresh_token = self._pars.as_dict().get('refresh_token') is not None
|
|
490
|
+
if has_refresh_token and isinstance(authorizer, globus_sdk.RefreshTokenAuthorizer):
|
|
491
|
+
self.client.authorizer.ensure_valid_token() # Fetch new refresh token if needed
|
|
492
|
+
except Exception as ex:
|
|
493
|
+
_logger.debug('Failed to refresh token: %s', ex)
|
|
494
|
+
expires_at_seconds = getattr(self._pars, 'expires_at_seconds', 0)
|
|
495
|
+
return expires_at_seconds - datetime.utcnow().timestamp() < 60
|
|
496
|
+
|
|
497
|
+
def login(self, stay_logged_in=None):
|
|
498
|
+
"""Authenticate Globus client.
|
|
499
|
+
|
|
500
|
+
Parameters
|
|
501
|
+
----------
|
|
502
|
+
stay_logged_in : bool, optional
|
|
503
|
+
If True, use refresh token to remain logged in for longer. If False, use an auth
|
|
504
|
+
token without the option of refreshing when expired. If not specified, uses the refresh
|
|
505
|
+
token if available.
|
|
506
|
+
|
|
507
|
+
"""
|
|
508
|
+
if self.is_logged_in:
|
|
509
|
+
_logger.debug('Already logged in')
|
|
510
|
+
return
|
|
511
|
+
|
|
512
|
+
# Default depends on refresh token
|
|
513
|
+
stay_logged_in = True if stay_logged_in is None else stay_logged_in
|
|
514
|
+
expired = bool(
|
|
515
|
+
self._pars.as_dict().get('refresh_token') is None
|
|
516
|
+
if stay_logged_in else self._token_expired
|
|
517
|
+
)
|
|
518
|
+
# If no tokens in parameters, Globus must be authenticated
|
|
519
|
+
required_fields = {'refresh_token', 'access_token', 'expires_at_seconds'}
|
|
520
|
+
if not required_fields.issubset(iopar.as_dict(self._pars)) or expired:
|
|
521
|
+
if self.headless:
|
|
522
|
+
raise RuntimeError(f'Globus not authenticated for client "{self.client_name}"')
|
|
523
|
+
token = get_token(self._pars.GLOBUS_CLIENT_ID, refresh_tokens=stay_logged_in)
|
|
524
|
+
if not any(token.values()):
|
|
525
|
+
_logger.debug('Login cancelled by user')
|
|
526
|
+
return
|
|
527
|
+
self._pars = iopar.from_dict({**self._pars.as_dict(), **token})
|
|
528
|
+
_save_globus_params(self._pars, self.client_name)
|
|
529
|
+
|
|
530
|
+
# Ready to authenticate
|
|
531
|
+
self._authenticate(stay_logged_in)
|
|
532
|
+
|
|
533
|
+
def logout(self):
|
|
534
|
+
"""Revoke any tokens and delete them from the client and parameter file."""
|
|
535
|
+
if self.client and self.client.authorizer and \
|
|
536
|
+
not isinstance(self.client.authorizer, NullAuthorizer):
|
|
537
|
+
self.client.authorizer.auth_client.oauth2_revoke_token()
|
|
538
|
+
del self.client.authorizer
|
|
539
|
+
self.client.authorizer = NullAuthorizer()
|
|
540
|
+
if pars := load_client_params(f'{CLIENT_KEY}.{self.client_name}', assert_present=False):
|
|
541
|
+
_save_globus_params(_remove_token_fields(pars), self.client_name)
|
|
542
|
+
self._pars = _remove_token_fields(self._pars)
|
|
543
|
+
|
|
544
|
+
def _authenticate(self, stay_logged_in=None):
|
|
545
|
+
"""Authenticate and instantiate Globus SDK client."""
|
|
546
|
+
if self._pars.as_dict().get('refresh_token') and stay_logged_in is not False:
|
|
547
|
+
client = globus_sdk.NativeAppAuthClient(self._pars.GLOBUS_CLIENT_ID)
|
|
548
|
+
client.oauth2_start_flow(refresh_tokens=True)
|
|
549
|
+
authorizer = globus_sdk.RefreshTokenAuthorizer(
|
|
550
|
+
self._pars.refresh_token, client, on_refresh=self._save_refresh_token_callback)
|
|
551
|
+
else:
|
|
552
|
+
if stay_logged_in is True:
|
|
553
|
+
warnings.warn('No refresh token. Please log out and back in to remain logged in.')
|
|
554
|
+
if self._token_expired is not False:
|
|
555
|
+
raise RuntimeError(f'token no longer valid for client "{self.client_name}"')
|
|
556
|
+
authorizer = globus_sdk.AccessTokenAuthorizer(self._pars.access_token)
|
|
557
|
+
self.client = globus_sdk.TransferClient(authorizer=authorizer)
|
|
558
|
+
|
|
559
|
+
def _save_refresh_token_callback(self, res):
|
|
560
|
+
"""Save a token fetched by the refresh token authorizer.
|
|
561
|
+
|
|
562
|
+
This is a callback for the globus_sdk.RefreshTokenAuthorizer to update the parameters.
|
|
563
|
+
|
|
564
|
+
Parameters
|
|
565
|
+
----------
|
|
566
|
+
res : globus_sdk.services.auth.OAuthTokenResponse
|
|
567
|
+
An Open Authorization response object.
|
|
568
|
+
|
|
569
|
+
"""
|
|
570
|
+
if not res or not (token := next(iter(res.by_resource_server.values()), None)):
|
|
571
|
+
return
|
|
572
|
+
token_fields = {'refresh_token', 'access_token', 'expires_at_seconds'}
|
|
573
|
+
self._pars = iopar.from_dict(
|
|
574
|
+
{**self._pars.as_dict(), **{k: v for k, v in token.items() if k in token_fields}})
|
|
575
|
+
_save_globus_params(self._pars, self.client_name)
|
|
576
|
+
|
|
577
|
+
def fetch_endpoints_from_alyx(self, alyx=None, overwrite=False):
|
|
578
|
+
"""Update endpoints property with Alyx Globus data repositories.
|
|
579
|
+
|
|
580
|
+
Parameters
|
|
581
|
+
----------
|
|
582
|
+
alyx : one.webclient.AlyxClient
|
|
583
|
+
An optional AlyxClient.
|
|
584
|
+
overwrite : bool
|
|
585
|
+
Whether existing endpoint with the same label should be replaced.
|
|
586
|
+
|
|
587
|
+
Returns
|
|
588
|
+
-------
|
|
589
|
+
dict
|
|
590
|
+
The endpoints added from Alyx.
|
|
591
|
+
|
|
592
|
+
"""
|
|
593
|
+
alyx = alyx or AlyxClient()
|
|
594
|
+
alyx_endpoints = alyx.rest('data-repository', 'list')
|
|
595
|
+
for endpoint in alyx_endpoints:
|
|
596
|
+
if not endpoint['globus_endpoint_id']:
|
|
597
|
+
continue
|
|
598
|
+
uid = UUID(endpoint['globus_endpoint_id'])
|
|
599
|
+
self.add_endpoint(
|
|
600
|
+
uid, label=endpoint['name'], root_path=endpoint['globus_path'], overwrite=overwrite
|
|
601
|
+
)
|
|
602
|
+
endpoint_names = {e['name'] for e in alyx_endpoints}
|
|
603
|
+
return {k: v for k, v in self.endpoints.items() if k in endpoint_names}
|
|
604
|
+
|
|
605
|
+
def to_address(self, data_path, endpoint):
|
|
606
|
+
"""Get full path for a given endpoint.
|
|
607
|
+
|
|
608
|
+
Parameters
|
|
609
|
+
----------
|
|
610
|
+
data_path : Path, PurePath, str
|
|
611
|
+
An absolute or relative POSIX path
|
|
612
|
+
endpoint : str, uuid.UUID
|
|
613
|
+
An endpoint label or UUID.
|
|
614
|
+
|
|
615
|
+
Returns
|
|
616
|
+
-------
|
|
617
|
+
str
|
|
618
|
+
A complete path string formatted for Globus.
|
|
619
|
+
|
|
620
|
+
Examples
|
|
621
|
+
--------
|
|
622
|
+
>>> glo = Globus()
|
|
623
|
+
>>> glo.add_endpoint('0ec47586-3a19-11eb-b173-0ee0d5d9299f',
|
|
624
|
+
... label='foobar', root_path='/foo')
|
|
625
|
+
>>> glo.to_address('bar/baz.ext', 'foobar')
|
|
626
|
+
'/foo/bar/baz.ext'
|
|
627
|
+
|
|
628
|
+
"""
|
|
629
|
+
_, root_path = self._endpoint_id_root(endpoint)
|
|
630
|
+
return self._endpoint_path(data_path, root_path)
|
|
631
|
+
|
|
632
|
+
@ensure_logged_in
|
|
633
|
+
def download_file(self, file_address, source_endpoint, recursive=False, **kwargs):
|
|
634
|
+
"""Download one or more files via Globus.
|
|
635
|
+
|
|
636
|
+
Parameters
|
|
637
|
+
----------
|
|
638
|
+
file_address : str, list of str
|
|
639
|
+
One or more relative POSIX paths to download.
|
|
640
|
+
source_endpoint : str, uuid.UUID
|
|
641
|
+
The source endpoint name or uuid.
|
|
642
|
+
recursive : bool
|
|
643
|
+
If true, transfer the contents of nested directories (NB: all data_paths must be
|
|
644
|
+
directories).
|
|
645
|
+
**kwargs
|
|
646
|
+
See Globus.transfer_data.
|
|
647
|
+
|
|
648
|
+
Returns
|
|
649
|
+
-------
|
|
650
|
+
pathlib.Path, list of pathlib.Path
|
|
651
|
+
The downloaded file path(s). If recursive is True, a list is always returned.
|
|
652
|
+
|
|
653
|
+
Notes
|
|
654
|
+
-----
|
|
655
|
+
- Assumes that the local endpoint root path is NOT POSIX style on Windows.
|
|
656
|
+
|
|
657
|
+
TODO Return None for failed files
|
|
658
|
+
|
|
659
|
+
Examples
|
|
660
|
+
--------
|
|
661
|
+
Download a single file
|
|
662
|
+
|
|
663
|
+
>>> file = Globus().download_file('path/to/file', '0ec47586-3a19-11eb-b173-0ee0d5d9299f')
|
|
664
|
+
|
|
665
|
+
Download multiple files and verify checksum
|
|
666
|
+
|
|
667
|
+
>>> files = ['relative/file/path.ext', 'foo.bar']
|
|
668
|
+
>>> files = Globus().download_file(files, 'source_endpoint_name', verify_checksum=True)
|
|
669
|
+
|
|
670
|
+
Download a folder
|
|
671
|
+
|
|
672
|
+
>>> files = Globus().download_file('folder/path', 'source_endpoint_name', recursive=True)
|
|
673
|
+
|
|
674
|
+
"""
|
|
675
|
+
return_single = isinstance(file_address, str) and recursive is False
|
|
676
|
+
kwargs['label'] = kwargs.get('label', 'ONE download')
|
|
677
|
+
task = partial(self.transfer_data, file_address, source_endpoint, 'local',
|
|
678
|
+
recursive=recursive, **kwargs)
|
|
679
|
+
task_id = self.run_task(task)
|
|
680
|
+
files = []
|
|
681
|
+
root = Path(self.endpoints['local']['root_path'])
|
|
682
|
+
idx = len(self._endpoint_path(PurePosixPath(as_globus_path(root))))
|
|
683
|
+
for info in self.client.task_successful_transfers(task_id):
|
|
684
|
+
files.append(info['destination_path'][idx:].strip('/'))
|
|
685
|
+
|
|
686
|
+
if return_single:
|
|
687
|
+
file = root / files[0]
|
|
688
|
+
assert file.exists()
|
|
689
|
+
return file
|
|
690
|
+
|
|
691
|
+
# Order files by input
|
|
692
|
+
def _best_match(x):
|
|
693
|
+
"""Return the index of the input file that best matches downloaded file."""
|
|
694
|
+
spans = [len(frag) / len(x) if frag in x else 0 for frag in ensure_list(file_address)]
|
|
695
|
+
return spans.index(max(spans))
|
|
696
|
+
files = list(map(root.joinpath, sorted(files, key=_best_match)))
|
|
697
|
+
assert all(map(Path.exists, filter(None, files)))
|
|
698
|
+
return files
|
|
699
|
+
|
|
700
|
+
@staticmethod
|
|
701
|
+
def setup(client_name='default', **kwargs):
|
|
702
|
+
"""Setup a Globus client.
|
|
703
|
+
|
|
704
|
+
In order to use this function you need:
|
|
705
|
+
|
|
706
|
+
1. The client ID of an existing Globus Client (`see this tutorial`_).
|
|
707
|
+
2. Set up `Global Connect`_ on your local device.
|
|
708
|
+
3. Register your local device as an `endpoint`_ in your Globus Client.
|
|
709
|
+
|
|
710
|
+
.. _see this tutorial: https://globus-sdk-python.readthedocs.io/en/stable/tutorial.html
|
|
711
|
+
.. _Global Connect: https://www.globus.org/globus-connect-personal
|
|
712
|
+
.. _endpoint: https://app.globus.org/
|
|
713
|
+
|
|
714
|
+
Parameters
|
|
715
|
+
----------
|
|
716
|
+
client_name : str
|
|
717
|
+
Parameter profile name to set up e.g. 'default', 'admin'.
|
|
718
|
+
**kwargs
|
|
719
|
+
Optional Globus constructor arguments.
|
|
720
|
+
|
|
721
|
+
Returns
|
|
722
|
+
-------
|
|
723
|
+
Globus
|
|
724
|
+
A new Globus client object.
|
|
725
|
+
|
|
726
|
+
"""
|
|
727
|
+
_setup(client_name, login=False)
|
|
728
|
+
return Globus(client_name, **kwargs)
|
|
729
|
+
|
|
730
|
+
def add_endpoint(self, endpoint, label=None, root_path=None, overwrite=False, alyx=None):
|
|
731
|
+
"""Add an endpoint to the Globus instance to be used by other functions.
|
|
732
|
+
|
|
733
|
+
Parameters
|
|
734
|
+
----------
|
|
735
|
+
endpoint : uuid.UUID, str
|
|
736
|
+
The endpoint UUID or database repository name of the endpoint.
|
|
737
|
+
label : str
|
|
738
|
+
Label to access the endpoint. If endpoint is UUID this has to be set, otherwise is
|
|
739
|
+
optional.
|
|
740
|
+
root_path : str, pathlib.Path, pathlib.PurePath
|
|
741
|
+
File path to be accessed by Globus on the endpoint.
|
|
742
|
+
overwrite : bool
|
|
743
|
+
Whether existing endpoint with the same label should be replaced.
|
|
744
|
+
alyx : one.webclient.AlyxClient
|
|
745
|
+
An AlyxClient instance for looking up repository information.
|
|
746
|
+
|
|
747
|
+
"""
|
|
748
|
+
if is_uuid(endpoint, versions=(1, 2)): # MAC address UUID
|
|
749
|
+
if label is None:
|
|
750
|
+
raise ValueError('If "endpoint" is a UUID, "label" cannot be None.')
|
|
751
|
+
endpoint_id = self._ensure_uuid(endpoint)
|
|
752
|
+
else:
|
|
753
|
+
repo = self.repo_from_alyx(endpoint, alyx=alyx)
|
|
754
|
+
endpoint_id = UUID(repo['globus_endpoint_id'])
|
|
755
|
+
root_path = root_path or repo['globus_path']
|
|
756
|
+
label = label or endpoint
|
|
757
|
+
if label in self.endpoints.keys() and overwrite is False:
|
|
758
|
+
_logger.error(f'An endpoint called "{label}" already exists. Choose a different label '
|
|
759
|
+
'or set overwrite=True')
|
|
760
|
+
else:
|
|
761
|
+
self.endpoints[label] = {'id': endpoint_id}
|
|
762
|
+
if root_path:
|
|
763
|
+
self.endpoints[label]['root_path'] = root_path
|
|
764
|
+
|
|
765
|
+
@staticmethod
|
|
766
|
+
def _endpoint_path(path, root_path=None):
|
|
767
|
+
"""Given an absolute path or relative path with a root path, return a Globus path str.
|
|
768
|
+
|
|
769
|
+
Note: Paths must be POSIX or Globus-compliant paths. In other words for Windows systems
|
|
770
|
+
the input root_path or absolute path must be passed through `as_globus_path` before
|
|
771
|
+
calling this method.
|
|
772
|
+
|
|
773
|
+
TODO include globus_path_from_dataset
|
|
774
|
+
|
|
775
|
+
Parameters
|
|
776
|
+
----------
|
|
777
|
+
path : Path, PurePath, str
|
|
778
|
+
An absolute or relative POSIX path
|
|
779
|
+
root_path : Path, PurePath, str
|
|
780
|
+
A root path to prepend. Optional if `path` is absolute.
|
|
781
|
+
|
|
782
|
+
Returns
|
|
783
|
+
-------
|
|
784
|
+
str
|
|
785
|
+
A path string formatted for Globus.
|
|
786
|
+
|
|
787
|
+
See Also
|
|
788
|
+
--------
|
|
789
|
+
as_globus_path
|
|
790
|
+
|
|
791
|
+
Raises
|
|
792
|
+
------
|
|
793
|
+
ValueError
|
|
794
|
+
Path was not absolute and no root path was given. An absolute path must start with
|
|
795
|
+
a slash on *nix systems.
|
|
796
|
+
|
|
797
|
+
"""
|
|
798
|
+
if isinstance(path, str):
|
|
799
|
+
path = PurePosixPath(path)
|
|
800
|
+
if root_path and not str(path).startswith(str(root_path)):
|
|
801
|
+
path = PurePosixPath(root_path) / path
|
|
802
|
+
if not path.is_absolute():
|
|
803
|
+
raise ValueError(f'{path} is relative and no root_path defined')
|
|
804
|
+
return as_globus_path(path)
|
|
805
|
+
|
|
806
|
+
@staticmethod
|
|
807
|
+
def _ensure_uuid(uid):
|
|
808
|
+
"""Ensures UUID object returned.
|
|
809
|
+
|
|
810
|
+
Parameters
|
|
811
|
+
----------
|
|
812
|
+
uid : str, uuid.UUID
|
|
813
|
+
A UUID to cast to UUID object.
|
|
814
|
+
|
|
815
|
+
Returns
|
|
816
|
+
-------
|
|
817
|
+
uuid.UUID
|
|
818
|
+
A UUID object.
|
|
819
|
+
|
|
820
|
+
"""
|
|
821
|
+
return UUID(uid) if not isinstance(uid, UUID) else uid
|
|
822
|
+
|
|
823
|
+
def _endpoint_id_root(self, endpoint):
|
|
824
|
+
"""Return endpoint UUID and root path from a given endpoint identifier.
|
|
825
|
+
|
|
826
|
+
Parameters
|
|
827
|
+
----------
|
|
828
|
+
endpoint : str, uuid.UUID
|
|
829
|
+
An endpoint label or UUID.
|
|
830
|
+
|
|
831
|
+
Returns
|
|
832
|
+
-------
|
|
833
|
+
uuid.UUID
|
|
834
|
+
The endpoint UUID.
|
|
835
|
+
str, None
|
|
836
|
+
The POSIX-style endpoint root path (if defined).
|
|
837
|
+
|
|
838
|
+
Warnings
|
|
839
|
+
--------
|
|
840
|
+
UserWarning
|
|
841
|
+
If endpoint UUID is associated with multiple root paths, it is better to provide the
|
|
842
|
+
endpoint label to avoid this warning and to ensure the intended root path is returned.
|
|
843
|
+
|
|
844
|
+
See Also
|
|
845
|
+
--------
|
|
846
|
+
Globus._sanitize_local
|
|
847
|
+
|
|
848
|
+
"""
|
|
849
|
+
root_path = None
|
|
850
|
+
if endpoint in self.endpoints.keys():
|
|
851
|
+
endpoint_id = self.endpoints[endpoint]['id']
|
|
852
|
+
if 'root_path' in self.endpoints[endpoint].keys():
|
|
853
|
+
root_path = self.endpoints[endpoint]['root_path']
|
|
854
|
+
return self._sanitize_local(endpoint_id, root_path)
|
|
855
|
+
elif is_uuid(endpoint, range(1, 5)):
|
|
856
|
+
# If a UUID was provided, find the first endpoint with a root path with the UUID
|
|
857
|
+
endpoint_id = self._ensure_uuid(endpoint)
|
|
858
|
+
matching = (
|
|
859
|
+
k for k, v in self.endpoints.items() if v['id'] == endpoint_id and 'root_path' in v
|
|
860
|
+
)
|
|
861
|
+
if name := next(matching, None):
|
|
862
|
+
# Warn of ambiguity if multiple endpoints share a UUID
|
|
863
|
+
if next(matching, None) is not None:
|
|
864
|
+
warnings.warn(
|
|
865
|
+
f'Multiple endpoints added with the same UUID, '
|
|
866
|
+
f'using root path from "{name}"')
|
|
867
|
+
root_path = self.endpoints[name]['root_path']
|
|
868
|
+
else:
|
|
869
|
+
root_path = None
|
|
870
|
+
return self._sanitize_local(endpoint_id, root_path)
|
|
871
|
+
else:
|
|
872
|
+
raise ValueError(
|
|
873
|
+
'"endpoint" must be a UUID or the label of an endpoint registered in this '
|
|
874
|
+
'Globus instance. You can add endpoints via the add_endpoints method')
|
|
875
|
+
|
|
876
|
+
def _sanitize_local(self, endpoint_id, root_path):
|
|
877
|
+
"""Ensure local root path on Windows is POSIX-style.
|
|
878
|
+
|
|
879
|
+
Parameters
|
|
880
|
+
----------
|
|
881
|
+
endpoint_id : uuid.UUID
|
|
882
|
+
The endpoint UUID to determine if root path is local.
|
|
883
|
+
root_path : pathlib.Path, str, None
|
|
884
|
+
The root path to sanitize.
|
|
885
|
+
|
|
886
|
+
Returns
|
|
887
|
+
-------
|
|
888
|
+
endpoint_id : uuid.UUID
|
|
889
|
+
The endpoint UUID, returned unchanged to match `Globus._endpoint_id_root` signature.
|
|
890
|
+
str, None
|
|
891
|
+
The root path as a POSIX style string, or None if root_path is None.
|
|
892
|
+
|
|
893
|
+
Examples
|
|
894
|
+
--------
|
|
895
|
+
Providing a local root path on Windows
|
|
896
|
+
|
|
897
|
+
>>> glo = Globus()
|
|
898
|
+
>>> uid = glo.endpoints['local']['id']
|
|
899
|
+
>>> glo._sanitize_local(uid, 'C:\\Data')
|
|
900
|
+
UUID('50282ed5-3124-11ee-b977-482ae33bf6ca'), '/C/Data'
|
|
901
|
+
|
|
902
|
+
Path left unchanged on *nix systems or when endpoint ID is not local
|
|
903
|
+
|
|
904
|
+
>>> uid = UUID('c7c46cec-3124-11ee-bf50-482ae33bf6ca')
|
|
905
|
+
>>> glo._sanitize_local(uid, 'C:\\Data')
|
|
906
|
+
UUID('c7c46cec-3124-11ee-bf50-482ae33bf6ca'), 'C:\\Data'
|
|
907
|
+
|
|
908
|
+
"""
|
|
909
|
+
if not root_path:
|
|
910
|
+
return endpoint_id, None
|
|
911
|
+
# If the local root path is not explicitly a Windows Path and we're on windows, make sure
|
|
912
|
+
# it's converted correctly to a POSIX style path
|
|
913
|
+
if isinstance(root_path, str):
|
|
914
|
+
is_win = sys.platform in ('win32', 'cygwin')
|
|
915
|
+
if endpoint_id == self.endpoints['local']['id'] and is_win:
|
|
916
|
+
root_path = PureWindowsPath(root_path)
|
|
917
|
+
else:
|
|
918
|
+
root_path = PurePosixPath(root_path)
|
|
919
|
+
return endpoint_id, as_globus_path(root_path)
|
|
920
|
+
|
|
921
|
+
@ensure_logged_in
|
|
922
|
+
def transfer_data(self, data_path, source_endpoint, destination_endpoint,
|
|
923
|
+
recursive=False, **kwargs):
|
|
924
|
+
"""Transfer one or more paths between endpoints.
|
|
925
|
+
|
|
926
|
+
At least one of the endpoints must be a server endpoint. Both file and directory paths may
|
|
927
|
+
be provided, however if recursive is true, all paths must be directories.
|
|
928
|
+
|
|
929
|
+
Parameters
|
|
930
|
+
----------
|
|
931
|
+
data_path : str, list of str
|
|
932
|
+
One or more data paths, relative to the endpoint root path.
|
|
933
|
+
source_endpoint : str, uuid.UUID
|
|
934
|
+
The name or UUID of the source endpoint.
|
|
935
|
+
destination_endpoint : str, uuid.UUID
|
|
936
|
+
The name or UUID of the destination endpoint.
|
|
937
|
+
recursive : bool
|
|
938
|
+
If true, transfer the contents of nested directories (NB: all data_paths must be
|
|
939
|
+
directories).
|
|
940
|
+
**kwargs
|
|
941
|
+
See globus_sdk.TransferData.
|
|
942
|
+
|
|
943
|
+
Returns
|
|
944
|
+
-------
|
|
945
|
+
uuid.UUID
|
|
946
|
+
The Globus transfer ID.
|
|
947
|
+
|
|
948
|
+
Examples
|
|
949
|
+
--------
|
|
950
|
+
Transfer two files (asynchronous)
|
|
951
|
+
|
|
952
|
+
>>> glo = Globus()
|
|
953
|
+
>>> files = ['file.ext', 'foo.bar']
|
|
954
|
+
>>> task_id = glo.transfer_data(files, 'source_endpoint', 'destination_endpoint')
|
|
955
|
+
|
|
956
|
+
Transfer a file (synchronous)
|
|
957
|
+
>>> file = 'file.ext'
|
|
958
|
+
>>> task_id = glo.run_task(lambda: glo.transfer_data(file, 'src_endpoint', 'dst_endpoint'))
|
|
959
|
+
|
|
960
|
+
Transfer a folder (asynchronous)
|
|
961
|
+
|
|
962
|
+
>>> folder = 'path/to/folder'
|
|
963
|
+
>>> task_id = glo.transfer_data(
|
|
964
|
+
... folder, 'source_endpoint', 'destination_endpoint', recursive=True)
|
|
965
|
+
|
|
966
|
+
"""
|
|
967
|
+
kwargs['source_endpoint'] = (source_endpoint
|
|
968
|
+
if is_uuid(source_endpoint, versions=(1,))
|
|
969
|
+
else self.endpoints.get(source_endpoint)['id'])
|
|
970
|
+
kwargs['destination_endpoint'] = (destination_endpoint
|
|
971
|
+
if is_uuid(destination_endpoint, versions=(1,))
|
|
972
|
+
else self.endpoints.get(destination_endpoint)['id'])
|
|
973
|
+
transfer_object = globus_sdk.TransferData(self.client, **kwargs)
|
|
974
|
+
|
|
975
|
+
# add any number of items to the submission data
|
|
976
|
+
for path in ensure_list(data_path):
|
|
977
|
+
src = self._endpoint_path(path, self._endpoint_id_root(source_endpoint)[1])
|
|
978
|
+
dst = self._endpoint_path(path, self._endpoint_id_root(destination_endpoint)[1])
|
|
979
|
+
transfer_object.add_item(src, dst, recursive=recursive)
|
|
980
|
+
response = self.client.submit_transfer(transfer_object)
|
|
981
|
+
return UUID(response.data['task_id'])
|
|
982
|
+
|
|
983
|
+
@ensure_logged_in
|
|
984
|
+
def delete_data(self, data_path, endpoint, recursive=False, **kwargs):
|
|
985
|
+
"""Delete one or more paths within an endpoint.
|
|
986
|
+
|
|
987
|
+
Both file and directory paths may be provided, however if recursive is true, all paths must
|
|
988
|
+
be directories.
|
|
989
|
+
|
|
990
|
+
Parameters
|
|
991
|
+
----------
|
|
992
|
+
data_path : str, list of str
|
|
993
|
+
One or more data paths, relative to the endpoint root path.
|
|
994
|
+
endpoint : str, uuid.UUID
|
|
995
|
+
The name or UUID of the endpoint.
|
|
996
|
+
recursive : bool
|
|
997
|
+
If true, delete the contents of nested directories (NB: all data_paths must be
|
|
998
|
+
directories).
|
|
999
|
+
**kwargs
|
|
1000
|
+
See globus_sdk.DeleteData.
|
|
1001
|
+
|
|
1002
|
+
Returns
|
|
1003
|
+
-------
|
|
1004
|
+
uuid.UUID
|
|
1005
|
+
The Globus transfer ID.
|
|
1006
|
+
|
|
1007
|
+
Examples
|
|
1008
|
+
--------
|
|
1009
|
+
Delete two files, ingnoring those that don't exist (asynchronous)
|
|
1010
|
+
|
|
1011
|
+
>>> glo = Globus()
|
|
1012
|
+
>>> files = ['file.ext', 'foo.bar']
|
|
1013
|
+
>>> task_id = glo.delete_data(files, 'endpoint_name', ignore_missing=True)
|
|
1014
|
+
|
|
1015
|
+
Delete a file (synchronous)
|
|
1016
|
+
|
|
1017
|
+
>>> task_id = glo.run_task(lambda: glo.delete_data('file.ext', 'endpoint_name')
|
|
1018
|
+
|
|
1019
|
+
Recursively delete a folder (asynchronous)
|
|
1020
|
+
|
|
1021
|
+
>>> folder = 'path/to/folder'
|
|
1022
|
+
>>> task_id = glo.delete_data(folder, 'endpoint_name', recursive=True)
|
|
1023
|
+
|
|
1024
|
+
"""
|
|
1025
|
+
kwargs['endpoint'] = (endpoint
|
|
1026
|
+
if is_uuid(endpoint, versions=(1,))
|
|
1027
|
+
else self.endpoints.get(endpoint)['id'])
|
|
1028
|
+
delete_object = globus_sdk.DeleteData(self.client, recursive=recursive, **kwargs)
|
|
1029
|
+
|
|
1030
|
+
# add any number of items to the submission data
|
|
1031
|
+
for path in ensure_list(data_path):
|
|
1032
|
+
fullpath = self._endpoint_path(path, self._endpoint_id_root(endpoint)[1])
|
|
1033
|
+
delete_object.add_item(fullpath)
|
|
1034
|
+
response = self.client.submit_delete(delete_object)
|
|
1035
|
+
return UUID(response.data['task_id'])
|
|
1036
|
+
|
|
1037
|
+
@ensure_logged_in
|
|
1038
|
+
def ls(self, endpoint, path, remove_uuid=False, return_size=False, max_retries=1):
|
|
1039
|
+
"""Return the list of (filename, filesize) in a given endpoint directory.
|
|
1040
|
+
|
|
1041
|
+
NB: If you're using ls routinely when transferring or deleting files you're probably doing
|
|
1042
|
+
something wrong!
|
|
1043
|
+
|
|
1044
|
+
Parameters
|
|
1045
|
+
----------
|
|
1046
|
+
endpoint : uuid.UUID, str
|
|
1047
|
+
The Globus endpoint. May be a UUID or a key in the Globus.endpoints attribute.
|
|
1048
|
+
path : Path, PurePath, str
|
|
1049
|
+
The absolute or relative Globus path to list. Note: if endpoint is a UUID, the path
|
|
1050
|
+
must be absolute.
|
|
1051
|
+
remove_uuid : bool
|
|
1052
|
+
If True, remove the UUID from the returned filenames.
|
|
1053
|
+
return_size : bool
|
|
1054
|
+
If True, return the size of each listed file in bytes.
|
|
1055
|
+
max_retries : int
|
|
1056
|
+
The number of times to retry the remote operation before raising. Increasing this may
|
|
1057
|
+
mitigate unstable network issues.
|
|
1058
|
+
|
|
1059
|
+
Returns
|
|
1060
|
+
-------
|
|
1061
|
+
list
|
|
1062
|
+
A list of PurePosixPath objects of the files and folders listed, or if return_size is
|
|
1063
|
+
True, tuples of PurePosixPath objects and the corresponding file sizes.
|
|
1064
|
+
|
|
1065
|
+
"""
|
|
1066
|
+
# Check if endpoint is a UUID, if not try to get UUID from registered endpoints
|
|
1067
|
+
endpoint_id, root_path = self._endpoint_id_root(endpoint)
|
|
1068
|
+
# Check if root_path should be added and if path is absolute
|
|
1069
|
+
path = self._endpoint_path(path, root_path)
|
|
1070
|
+
# Do the actual listing
|
|
1071
|
+
out = []
|
|
1072
|
+
response = []
|
|
1073
|
+
for i in range(max_retries + 1):
|
|
1074
|
+
try:
|
|
1075
|
+
response = self.client.operation_ls(endpoint_id, path=path)
|
|
1076
|
+
break
|
|
1077
|
+
except (GlobusConnectionError, GlobusAPIError) as ex:
|
|
1078
|
+
if i == max_retries:
|
|
1079
|
+
raise ex
|
|
1080
|
+
for entry in response:
|
|
1081
|
+
fn = PurePosixPath(remove_uuid_string(entry['name']) if remove_uuid else entry['name'])
|
|
1082
|
+
if return_size:
|
|
1083
|
+
size = entry['size'] if entry['type'] == 'file' else None
|
|
1084
|
+
out.append((fn, size))
|
|
1085
|
+
else:
|
|
1086
|
+
out.append(fn)
|
|
1087
|
+
|
|
1088
|
+
return out
|
|
1089
|
+
|
|
1090
|
+
# TODO: allow to move all content of a directory with 'recursive' keyword in add_item
|
|
1091
|
+
@ensure_logged_in
|
|
1092
|
+
def mv(self, source_endpoint, target_endpoint, source_paths, target_paths,
|
|
1093
|
+
timeout=None, **kwargs):
|
|
1094
|
+
"""Move files from one endpoint to another.
|
|
1095
|
+
|
|
1096
|
+
Parameters
|
|
1097
|
+
----------
|
|
1098
|
+
source_endpoint : uuid.UUID, str
|
|
1099
|
+
The Globus source endpoint. May be a UUID or a key in the Globus.endpoints attribute.
|
|
1100
|
+
target_endpoint : uuid.UUID, str
|
|
1101
|
+
The Globus destination endpoint. May be a UUID or a key in the Globus.endpoints
|
|
1102
|
+
attribute.
|
|
1103
|
+
source_paths : list of str, pathlib.Path or pathlib.PurePath
|
|
1104
|
+
The absolute or relative Globus paths of source files to moves. Note: if endpoint is
|
|
1105
|
+
a UUID, the path must be absolute.
|
|
1106
|
+
target_paths : list of str, Path or PurePath
|
|
1107
|
+
The absolute or relative Globus paths of destination files to moves. Note: if endpoint
|
|
1108
|
+
is a UUID, the path must be absolute.
|
|
1109
|
+
timeout : int
|
|
1110
|
+
Maximum time in seconds to wait for the task to complete.
|
|
1111
|
+
**kwargs
|
|
1112
|
+
Optional arguments for globus_sdk.TransferData.
|
|
1113
|
+
|
|
1114
|
+
Returns
|
|
1115
|
+
-------
|
|
1116
|
+
uuid.UUID
|
|
1117
|
+
A Globus task ID.
|
|
1118
|
+
|
|
1119
|
+
"""
|
|
1120
|
+
source_endpoint, source_root = self._endpoint_id_root(source_endpoint)
|
|
1121
|
+
target_endpoint, target_root = self._endpoint_id_root(target_endpoint)
|
|
1122
|
+
source_paths = [str(self._endpoint_path(path, source_root)) for path in source_paths]
|
|
1123
|
+
target_paths = [str(self._endpoint_path(path, target_root)) for path in target_paths]
|
|
1124
|
+
|
|
1125
|
+
tdata = globus_sdk.TransferData(self.client, source_endpoint, target_endpoint,
|
|
1126
|
+
verify_checksum=True, sync_level='checksum',
|
|
1127
|
+
label='ONE globus', **kwargs)
|
|
1128
|
+
for source_path, target_path in zip(source_paths, target_paths):
|
|
1129
|
+
tdata.add_item(source_path, target_path)
|
|
1130
|
+
|
|
1131
|
+
def wrapper():
|
|
1132
|
+
"""Function to submit Globus transfer and return the resulting task ID."""
|
|
1133
|
+
response = self.client.submit_transfer(tdata)
|
|
1134
|
+
task_id = response.get('task_id', None)
|
|
1135
|
+
return task_id
|
|
1136
|
+
|
|
1137
|
+
return self.run_task(wrapper, timeout=timeout)
|
|
1138
|
+
|
|
1139
|
+
@ensure_logged_in
|
|
1140
|
+
def run_task(self, globus_func, retries=3, timeout=None):
|
|
1141
|
+
"""Block until a Globus task finishes and retry upon Network or REST Errors.
|
|
1142
|
+
|
|
1143
|
+
globus_func needs to submit a task to the client and return a task_id.
|
|
1144
|
+
|
|
1145
|
+
Parameters
|
|
1146
|
+
----------
|
|
1147
|
+
globus_func : function, Callable
|
|
1148
|
+
A function that returns a Globus task ID, typically it will submit a transfer.
|
|
1149
|
+
retries : int
|
|
1150
|
+
The number of times to call globus_func if it raises a Globus error.
|
|
1151
|
+
timeout : int
|
|
1152
|
+
Maximum time in seconds to wait for the task to complete.
|
|
1153
|
+
|
|
1154
|
+
Returns
|
|
1155
|
+
-------
|
|
1156
|
+
uuid.UUID
|
|
1157
|
+
Globus task ID.
|
|
1158
|
+
|
|
1159
|
+
Raises
|
|
1160
|
+
------
|
|
1161
|
+
IOError
|
|
1162
|
+
Timed out waiting for task to complete.
|
|
1163
|
+
|
|
1164
|
+
TODO Add a quick fail option that returns when files missing, etc.
|
|
1165
|
+
TODO Add status logging
|
|
1166
|
+
|
|
1167
|
+
"""
|
|
1168
|
+
try:
|
|
1169
|
+
task_id = globus_func()
|
|
1170
|
+
assert is_uuid(task_id, versions=(1, 2)), 'invalid UUID returned'
|
|
1171
|
+
print(f'Waiting for Globus task {task_id} to complete')
|
|
1172
|
+
# While the task with task is active, print a dot every second. Timeout after timeout
|
|
1173
|
+
i = 0
|
|
1174
|
+
while not self.client.task_wait(task_id, timeout=5, polling_interval=1):
|
|
1175
|
+
print('.', end='')
|
|
1176
|
+
i += 1
|
|
1177
|
+
if timeout and i >= timeout:
|
|
1178
|
+
task = self.client.get_task(task_id)
|
|
1179
|
+
raise IOError(f'Globus task {task_id} timed out after {timeout} seconds, '
|
|
1180
|
+
f'with task status {task["status"]}')
|
|
1181
|
+
task = self.client.get_task(task_id)
|
|
1182
|
+
if task['status'] == 'SUCCEEDED':
|
|
1183
|
+
# Sometime Globus sets the status to SUCCEEDED but doesn't truly finish.
|
|
1184
|
+
# Handle error thrown when querying task_successful_transfers too early
|
|
1185
|
+
try:
|
|
1186
|
+
successful = self.client.task_successful_transfers(task_id)
|
|
1187
|
+
skipped = self.client.task_skipped_errors(task_id)
|
|
1188
|
+
print(f'\nGlobus task {task_id} completed.'
|
|
1189
|
+
f'\nSkipped transfers: {len(list(skipped))}'
|
|
1190
|
+
f'\nSuccessful transfers: {len(list(successful))}')
|
|
1191
|
+
for info in successful:
|
|
1192
|
+
_logger.debug(f'{info["source_path"]} -> {info["destination_path"]}')
|
|
1193
|
+
except TransferAPIError:
|
|
1194
|
+
_logger.warning(f'\nGlobus task {task_id} SUCCEEDED but querying transfers was'
|
|
1195
|
+
f'unsuccessful')
|
|
1196
|
+
else:
|
|
1197
|
+
raise IOError(f'Globus task finished unsuccessfully with status {task["status"]}')
|
|
1198
|
+
return self._ensure_uuid(task_id)
|
|
1199
|
+
except (GlobusAPIError, NetworkError, GlobusTimeoutError, GlobusConnectionError,
|
|
1200
|
+
GlobusConnectionTimeoutError) as e:
|
|
1201
|
+
if retries < 1:
|
|
1202
|
+
_logger.error('\nMax retries exceeded.')
|
|
1203
|
+
raise e
|
|
1204
|
+
else:
|
|
1205
|
+
_logger.debug('\nGlobus experienced a network error', exc_info=True)
|
|
1206
|
+
# if we reach this point without returning or erring, retry
|
|
1207
|
+
_logger.warning('\nGlobus experienced a network error, retrying.')
|
|
1208
|
+
self.run_task(globus_func, retries=(retries - 1), timeout=timeout)
|
|
1209
|
+
|
|
1210
|
+
@ensure_logged_in
|
|
1211
|
+
async def task_wait_async(self, task_id, polling_interval=10, timeout=10):
|
|
1212
|
+
"""Asynchronously wait until a Task is complete or fails, with a time limit.
|
|
1213
|
+
|
|
1214
|
+
If the task status is ACTIVE after timout, returns False, otherwise returns True.
|
|
1215
|
+
|
|
1216
|
+
Parameters
|
|
1217
|
+
----------
|
|
1218
|
+
task_id : str, uuid.UUID
|
|
1219
|
+
A Globus task UUID to wait on for completion.
|
|
1220
|
+
polling_interval : float
|
|
1221
|
+
Number of seconds between queries to Globus about the task status. Minimum 1 second.
|
|
1222
|
+
timeout : float
|
|
1223
|
+
Number of seconds to wait in total. Minimum 1 second.
|
|
1224
|
+
|
|
1225
|
+
Returns
|
|
1226
|
+
-------
|
|
1227
|
+
bool
|
|
1228
|
+
True if status not ACTIVE before timeout. False if status still ACTIVE at timeout.
|
|
1229
|
+
|
|
1230
|
+
Examples
|
|
1231
|
+
--------
|
|
1232
|
+
Asynchronously await a task to complete
|
|
1233
|
+
|
|
1234
|
+
>>> await Globus().task_wait_async(task_id)
|
|
1235
|
+
|
|
1236
|
+
"""
|
|
1237
|
+
if polling_interval < 1:
|
|
1238
|
+
raise GlobusSDKUsageError('polling_interval must be at least 1 second')
|
|
1239
|
+
if timeout < 1:
|
|
1240
|
+
raise GlobusSDKUsageError('timout must be at least 1 second')
|
|
1241
|
+
polling_interval = min(timeout, polling_interval)
|
|
1242
|
+
waited_time = 0
|
|
1243
|
+
while True:
|
|
1244
|
+
task = self.client.get_task(task_id)
|
|
1245
|
+
status = task['status']
|
|
1246
|
+
if status != 'ACTIVE':
|
|
1247
|
+
return True
|
|
1248
|
+
|
|
1249
|
+
# check if we timed out before sleeping again
|
|
1250
|
+
waited_time += polling_interval
|
|
1251
|
+
if waited_time >= timeout:
|
|
1252
|
+
return False
|
|
1253
|
+
|
|
1254
|
+
await asyncio.sleep(polling_interval)
|