ONE-api 3.0b3__py3-none-any.whl → 3.0b5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {ONE_api-3.0b3.dist-info → ONE_api-3.0b5.dist-info}/LICENSE +21 -21
  2. {ONE_api-3.0b3.dist-info → ONE_api-3.0b5.dist-info}/METADATA +115 -115
  3. ONE_api-3.0b5.dist-info/RECORD +37 -0
  4. one/__init__.py +2 -2
  5. one/alf/__init__.py +1 -1
  6. one/alf/cache.py +640 -653
  7. one/alf/exceptions.py +105 -105
  8. one/alf/io.py +876 -876
  9. one/alf/path.py +1450 -1450
  10. one/alf/spec.py +519 -519
  11. one/api.py +2979 -2973
  12. one/converters.py +850 -850
  13. one/params.py +414 -414
  14. one/registration.py +845 -845
  15. one/remote/__init__.py +1 -1
  16. one/remote/aws.py +313 -313
  17. one/remote/base.py +142 -142
  18. one/remote/globus.py +1254 -1254
  19. one/tests/fixtures/params/.caches +6 -6
  20. one/tests/fixtures/params/.test.alyx.internationalbrainlab.org +8 -8
  21. one/tests/fixtures/rest_responses/1f187d80fd59677b395fcdb18e68e4401bfa1cc9 +1 -1
  22. one/tests/fixtures/rest_responses/47893cf67c985e6361cdee009334963f49fb0746 +1 -1
  23. one/tests/fixtures/rest_responses/535d0e9a1e2c1efbdeba0d673b131e00361a2edb +1 -1
  24. one/tests/fixtures/rest_responses/6dc96f7e9bcc6ac2e7581489b9580a6cd3f28293 +1 -1
  25. one/tests/fixtures/rest_responses/db1731fb8df0208944ae85f76718430813a8bf50 +1 -1
  26. one/tests/fixtures/rest_responses/dcce48259bb929661f60a02a48563f70aa6185b3 +1 -1
  27. one/tests/fixtures/rest_responses/f530d6022f61cdc9e38cc66beb3cb71f3003c9a1 +1 -1
  28. one/tests/fixtures/test_dbs.json +14 -14
  29. one/util.py +524 -524
  30. one/webclient.py +1368 -1354
  31. ONE_api-3.0b3.dist-info/RECORD +0 -37
  32. {ONE_api-3.0b3.dist-info → ONE_api-3.0b5.dist-info}/WHEEL +0 -0
  33. {ONE_api-3.0b3.dist-info → ONE_api-3.0b5.dist-info}/top_level.txt +0 -0
one/remote/globus.py CHANGED
@@ -1,1254 +1,1254 @@
1
- """A module for handling file operations through the Globus SDK.
2
-
3
- Setup
4
- -----
5
-
6
- To set up Globus simply instantiate the `Globus` class for the first time and follow the prompts.
7
- Providing a client name string to the constructor allows one to set up multiple Globus clients
8
- (i.e. when switching between different Globus client IDs).
9
-
10
- In order to use this function you need:
11
-
12
- 1. The client ID of an existing Globus Client (`see this tutorial`_).
13
- 2. Set up `Global Connect`_ on your local device.
14
- 3. Register your local device as an `endpoint`_ in your Globus Client.
15
-
16
-
17
- To modify the settings for a pre-established client, call the `Globus.setup` method with the client
18
- name:
19
-
20
- >>> globus = Globus.setup('default')
21
-
22
- You can update the list of endpoints using the `fetch_endpoints_from_alyx` method:
23
-
24
- >>> globus = Globus('admin')
25
- >>> remote_endpoints = globus.fetch_endpoints_from_alyx(alyx=AlyxClient())
26
-
27
- The endpoints are stored in the `endpoints` property
28
-
29
- >>> print(globus.endpoints.keys())
30
- >>> print(globus.endpoints['local'])
31
-
32
- .. _see this tutorial: https://globus-sdk-python.readthedocs.io/en/stable/tutorial.html
33
- .. _Global Connect: https://www.globus.org/globus-connect-personal
34
- .. _endpoint: https://app.globus.org/
35
-
36
-
37
- Examples
38
- --------
39
- Get the full Globus file path
40
-
41
- >>> relative_path = 'subject/2020-01-01/001/alf/_ibl_trials.table.pqt'
42
- >>> full_path = globus.to_address(relative_path, 'flatiron_cortexlab')
43
-
44
- Log in with a limited time token
45
-
46
- >>> globus = Globus('admin')
47
- >>> globus.login(stay_logged_in=False)
48
-
49
- Log out of Globus, revoking and deleting all tokens
50
-
51
- >>> globus.logout()
52
- >>> assert not globus.is_logged_in
53
-
54
- Asynchronously transfer data between Alyx repositories
55
-
56
- >>> alyx = AlyxClient()
57
- >>> glo = Globus('admin')
58
- >>> glo.add_endpoint('flatiron_cortexlab', alyx=alyx)
59
- >>> glo.add_endpoint('cortex_lab_SR', alyx=alyx)
60
- >>> task_id = glo.transfer_data('path/to/file', 'flatiron_cortexlab', 'cortex_lab_SR')
61
-
62
- Synchronously transfer data to an alternate local location
63
-
64
- >>> from functools import partial
65
- >>> root_path = '/path/to/new/location'
66
- >>> glo.add_endpoint(get_local_endpoint_id(), label='alternate_local', root_path=root_path)
67
- >>> folder = 'camera/ZFM-01867/2021-03-23/002' # An example folder to download
68
- >>> task = partial(glo.transfer_data, folder, 'integration', 'integration_local',
69
- ... label='alternate data', recursive=True)
70
- >>> task_id = glo.run_task(task) # Submit task to Globus and await completion
71
-
72
- Temporarily change local data root path and synchronously download file
73
-
74
- >>> glo.endpoints['local']['root_path'] = '/path/to/new/location'
75
- >>> file = glo.download_file('path/to/file.ext', 'source_endpoint')
76
- Path('/path/to/new/location/path/to/file.ext')
77
-
78
- Await multiple tasks to complete by passing a list of Globus transfer IDs
79
-
80
- >>> import asyncio
81
- >>> tasks = [asyncio.create_task(globus.task_wait_async(task_id))) for task_id in task_ids]
82
- >>> success = asyncio.run(asyncio.gather(*tasks))
83
-
84
- """
85
- import os
86
- import re
87
- import sys
88
- import asyncio
89
- import logging
90
- from uuid import UUID
91
- from datetime import datetime
92
- from pathlib import Path, PurePosixPath, PurePath, PureWindowsPath
93
- import warnings
94
- from functools import partial, wraps
95
-
96
- import globus_sdk
97
- from globus_sdk import TransferAPIError, GlobusAPIError, NetworkError, GlobusTimeoutError, \
98
- GlobusConnectionError, GlobusConnectionTimeoutError, GlobusSDKUsageError, NullAuthorizer
99
- from iblutil.io import params as iopar
100
- from iblutil.util import ensure_list
101
-
102
- from one.alf.spec import is_uuid
103
- from one.alf.path import remove_uuid_string
104
- import one.params
105
- from one.webclient import AlyxClient
106
- from .base import DownloadClient, load_client_params, save_client_params
107
-
108
- __all__ = ['Globus', 'get_lab_from_endpoint_id', 'as_globus_path']
109
- _logger = logging.getLogger(__name__)
110
- CLIENT_KEY = 'globus'
111
- """str: The default key in the remote settings file"""
112
-
113
- DEFAULT_PAR = {'GLOBUS_CLIENT_ID': None, 'local_endpoint': None, 'local_path': None}
114
- """dict: The default Globus parameter fields"""
115
-
116
- STATUS_MAP = {
117
- 'ACTIVE': ('QUEUED', 'ACTIVE', 'GC_NOT_CONNECTED', 'UNKNOWN'),
118
- 'FAILED': ('ENDPOINT_ERROR', 'PERMISSION_DENIED', 'CONNECT_FAILED'),
119
- 'INACTIVE': 'PAUSED_BY_ADMIN'}
120
- """dict: A map of Globus status to "nice" status"""
121
-
122
-
123
- def ensure_logged_in(func):
124
- """Decorator for the Globus methods.
125
-
126
- Before calling methods that require authentication, attempts to log in. If the user is already
127
- logged in, the token may be refreshed to extend the session. If the token has expired and not
128
- in headless mode, the user is prompted to authorize a new session. If in headless mode and not
129
- logged in an error is raised.
130
-
131
- Parameters
132
- ----------
133
- func : function
134
- Method to wrap (e.g. Globus.transfer_data).
135
-
136
- Returns
137
- -------
138
- function
139
- Handle to wrapped method.
140
-
141
- """
142
- @wraps(func)
143
- def wrapper_decorator(self, *args, **kwargs):
144
- self.login()
145
- return func(self, *args, **kwargs)
146
- return wrapper_decorator
147
-
148
-
149
- def _setup(par_id=None, login=True, refresh_tokens=True):
150
- """Sets up Globus as a backend for ONE functions.
151
-
152
- Parameters
153
- ----------
154
- par_id : str
155
- Parameter profile name to set up e.g. 'default', 'admin'.
156
-
157
- Returns
158
- -------
159
- IBLParams
160
- A set of Globus parameters.
161
-
162
- """
163
- print('Setting up Globus parameter file. See docstring for help.')
164
- if not par_id:
165
- default_par_id = 'default'
166
- par_id = input(
167
- f'Enter name for this client or press Enter to keep value "{default_par_id}": '
168
- )
169
- par_id = par_id.strip() or default_par_id
170
-
171
- # Read existing globus params if present
172
- globus_pars = iopar.as_dict(load_client_params(CLIENT_KEY, assert_present=False) or {})
173
- pars = {**DEFAULT_PAR, **globus_pars.get(par_id, {})}
174
-
175
- # Set GLOBUS_CLIENT_ID
176
- current_id = pars['GLOBUS_CLIENT_ID']
177
- if current_id:
178
- prompt = (f'Found Globus client ID in parameter file ({current_id}). '
179
- 'Press Enter to keep it, or enter a new ID here: ')
180
- pars['GLOBUS_CLIENT_ID'] = input(prompt).strip() or current_id
181
- else:
182
- new_id = input('Please enter the Globus client ID: ').strip()
183
- if not new_id:
184
- raise ValueError('Globus client ID is a required field')
185
- pars['GLOBUS_CLIENT_ID'] = new_id
186
- if not is_uuid(pars['GLOBUS_CLIENT_ID']):
187
- raise ValueError('Invalid Globus client ID "%s"', pars['GLOBUS_CLIENT_ID'])
188
-
189
- # Find and set local ID
190
- message = 'Please enter the local endpoint ID'
191
- try:
192
- default_endpoint = str(pars['local_endpoint'] or get_local_endpoint_id())
193
- message += f' (default: {default_endpoint})'
194
- except AssertionError:
195
- default_endpoint = ''
196
- warnings.warn(
197
- 'Cannot find local endpoint ID. Beware that this might mean that Globus Connect '
198
- 'is not set up properly.')
199
- pars['local_endpoint'] = input(message + ':').strip() or default_endpoint
200
- if not is_uuid(pars['local_endpoint'], (1, 2)):
201
- raise ValueError('Globus local endpoint ID must be a UUID version 1 or 2')
202
-
203
- # Check for local path
204
- message = 'Please enter the local endpoint path'
205
- local_path = pars['local_path'] or one.params.get(silent=True).CACHE_DIR
206
- message += f' (default: {local_path})'
207
- pars['local_path'] = input(message + ':').strip() or local_path
208
-
209
- if login:
210
- # Log in manually and get refresh token to avoid having to login repeatedly
211
- token = get_token(pars['GLOBUS_CLIENT_ID'], refresh_tokens=refresh_tokens)
212
- pars.update(token)
213
-
214
- globus_pars[par_id] = pars
215
- save_client_params(globus_pars, client_key=CLIENT_KEY)
216
- print('Finished setup.')
217
- return iopar.from_dict(pars)
218
-
219
-
220
- def get_token(client_id, refresh_tokens=True):
221
- """Get a Globus authentication token.
222
-
223
- This step requires the user to login to Globus via a browser.
224
-
225
- Parameters
226
- ----------
227
- client_id : str
228
- A Globus client ID.
229
- refresh_tokens : bool
230
- If true, requests a refresh token for repeat logins.
231
-
232
- Returns
233
- -------
234
- dict
235
- A dict containing the keys {'refresh_token', 'access_token', 'expires_at_seconds'}.
236
-
237
- """
238
- client = globus_sdk.NativeAppAuthClient(client_id)
239
- client.oauth2_start_flow(refresh_tokens=bool(refresh_tokens))
240
- authorize_url = client.oauth2_get_authorize_url()
241
- fields = ('refresh_token', 'access_token', 'expires_at_seconds')
242
- print('To get a new token, go to this URL and login: {0}'.format(authorize_url))
243
- auth_code = input('Enter the code you get after login here (press "c" to cancel): ').strip()
244
- if auth_code and auth_code.casefold() != 'c':
245
- token_response = client.oauth2_exchange_code_for_tokens(auth_code)
246
- globus_transfer_data = token_response.by_resource_server['transfer.api.globus.org']
247
- return {k: globus_transfer_data.get(k) for k in fields}
248
- else:
249
- return dict.fromkeys(fields)
250
-
251
-
252
- def _remove_token_fields(pars):
253
- """Remove the token fields from a parameters object.
254
-
255
- Parameters
256
- ----------
257
- pars : IBLParams, dict
258
- The Globus parameters containing token fields.
259
-
260
- Returns
261
- -------
262
- IBLParams
263
- A copy of the params without the token fields.
264
-
265
- """
266
- if pars is None:
267
- return pars
268
- fields = ('refresh_token', 'access_token', 'expires_at_seconds')
269
- return iopar.from_dict({k: v for k, v in iopar.as_dict(pars).items() if k not in fields})
270
-
271
-
272
- def _save_globus_params(pars, client_name):
273
- """Save Globus client parameters.
274
-
275
- Parameters
276
- ----------
277
- pars : IBLParams, dict
278
- The Globus client parameters to save.
279
- client_name : str
280
- The Globus client name, e.g. 'default'.
281
-
282
- """
283
- globus_pars = iopar.as_dict(load_client_params(CLIENT_KEY, assert_present=False) or {})
284
- globus_pars[client_name] = iopar.as_dict(pars)
285
- save_client_params(globus_pars, CLIENT_KEY)
286
-
287
-
288
- def get_local_endpoint_id():
289
- """Extracts the ID of the local Globus Connect endpoint.
290
-
291
- Returns
292
- -------
293
- uuid.UUID
294
- The local Globus endpoint ID.
295
-
296
- """
297
- msg = ('Cannot find local endpoint ID, check if Globus Connect is set up correctly, '
298
- '{} exists and contains a UUID.')
299
- if sys.platform in ('win32', 'cygwin'):
300
- id_path = Path(os.environ['LOCALAPPDATA']).joinpath('Globus Connect')
301
- else:
302
- id_path = Path.home().joinpath('.globusonline', 'lta')
303
-
304
- id_file = id_path.joinpath('client-id.txt')
305
- assert id_file.exists(), msg.format(id_file)
306
- local_id = id_file.read_text().strip()
307
- assert isinstance(local_id, str), msg.format(id_file)
308
- _logger.debug(f'Found local endpoint ID in Globus Connect settings {local_id}')
309
- return UUID(local_id)
310
-
311
-
312
- def get_local_endpoint_paths():
313
- """Extracts the local endpoint paths accessible by Globus Connect.
314
-
315
- NB: This is only supported on Linux.
316
-
317
- Returns
318
- -------
319
- list of pathlib.Path
320
- Local endpoint paths set in Globus Connect.
321
-
322
- """
323
- if sys.platform in ('win32', 'cygwin'):
324
- print('On windows the local Globus path needs to be entered manually')
325
- return []
326
- else:
327
- path_file = Path.home().joinpath('.globusonline', 'lta', 'config-paths')
328
- if path_file.exists():
329
- local_paths = map(Path, filter(None, path_file.read_text().strip().split(',')))
330
- _logger.debug('Found local endpoint paths in Globus Connect settings')
331
- else:
332
- msg = ('Cannot find local endpoint path, check if Globus Connect is set up correctly, '
333
- '{} exists and contains a valid path.')
334
- warnings.warn(msg.format(path_file))
335
- local_paths = []
336
- return list(local_paths)
337
-
338
-
339
- def get_lab_from_endpoint_id(endpoint=None, alyx=None):
340
- """Extracts lab names associated with a given an endpoint UUID.
341
-
342
- Finds the lab names that are associated to data repositories with the provided Globus endpoint
343
- UUID.
344
-
345
- Parameters
346
- ----------
347
- endpoint : uuid.UUID, str
348
- Endpoint UUID, optional if not given will get attempt to find local endpoint UUID.
349
- alyx : one.webclient.AlyxClient
350
- An instance of AlyxClient to use.
351
-
352
- Returns
353
- -------
354
- list
355
- The lab names associated with the endpoint UUID.
356
-
357
- """
358
- alyx = alyx or AlyxClient(silent=True)
359
- if not endpoint:
360
- endpoint = get_local_endpoint_id()
361
- lab = alyx.rest('labs', 'list', django=f'repositories__globus_endpoint_id,{endpoint}')
362
- if len(lab):
363
- lab_names = [la['name'] for la in lab]
364
- return lab_names
365
-
366
-
367
- def as_globus_path(path):
368
- """Convert a path into one suitable for the Globus TransferClient.
369
-
370
- Parameters
371
- ----------
372
- path : pathlib.Path, pathlib.PurePath, str
373
- A path to convert to a Globus-complient path string.
374
-
375
- Returns
376
- -------
377
- str
378
- A formatted path string.
379
-
380
- Notes
381
- -----
382
- - If using tilda in path, the home folder of your Globus Connect instance must be the same as
383
- the OS home dir.
384
- - If validating a path for another system ensure the input path is a PurePath, in particular,
385
- on a Linux computer a remote Windows should first be made into a PureWindowsPath.
386
-
387
- Examples
388
- --------
389
- A Windows path (on Windows OS)
390
-
391
- >>> as_globus_path('E:\\FlatIron\\integration')
392
- '/E/FlatIron/integration'
393
-
394
- When explicitly a POSIX path, remains unchanged
395
-
396
- >>> as_globus_path(PurePosixPath('E:\\FlatIron\\integration'))
397
- 'E:\\FlatIron\\integration'
398
-
399
- A relative POSIX path (on *nix OS)
400
-
401
- >>> as_globus_path('../data/integration')
402
- '/mnt/data/integration'
403
-
404
- A valid Globus path remains unchanged
405
-
406
- >>> as_globus_path('/E/FlatIron/integration')
407
- '/E/FlatIron/integration'
408
-
409
- """
410
- is_pure_path = isinstance(path, PurePath)
411
- is_win = sys.platform in ('win32', 'cygwin') or isinstance(path, PureWindowsPath)
412
- if isinstance(path, str):
413
- path = Path(path)
414
- if (
415
- re.match(r'/[A-Z]($|/)', path.as_posix())
416
- if is_win
417
- else path.is_absolute()
418
- ):
419
- return path.as_posix()
420
- if not is_pure_path:
421
- path = path.resolve()
422
- if path.drive:
423
- path = '/' + str(path.as_posix().replace(':', '', 1))
424
- return str(path)
425
-
426
-
427
- class Globus(DownloadClient):
428
-
429
- def __init__(self, client_name='default', connect=True, headless=False):
430
- """Wrapper for managing files on Globus endpoints.
431
-
432
- Parameters
433
- ----------
434
- client_name : str
435
- Parameter profile name to load e.g. 'default', 'admin'.
436
- connect : bool
437
- Whether to create the Globus SDK client on init.
438
- headless : bool
439
- If true, raises ValueError if unable to log in automatically. Otherwise the user is
440
- prompted to enter information.
441
-
442
- Examples
443
- --------
444
- Instantiate without authentication
445
-
446
- >>> globus = Globus(connect=False)
447
-
448
- Instantiate without user prompts
449
-
450
- >>> globus = Globus('server', headless=True)
451
-
452
- """
453
- # Setting up transfer client
454
- super().__init__()
455
- self.client = None
456
- self.client_name = client_name
457
- self.headless = headless
458
- self._pars = load_client_params(f'{CLIENT_KEY}.{client_name}', assert_present=False)
459
-
460
- # If no parameters, Globus must be set up for this client
461
- if self._pars is None:
462
- if self.headless:
463
- raise RuntimeError(f'Globus not set up for client "{self.client_name}"')
464
- self._pars = _setup(self.client_name, login=False)
465
-
466
- if connect:
467
- self.login()
468
-
469
- # Try adding local endpoint
470
- self.endpoints = {'local': {'id': UUID(self._pars.local_endpoint)}}
471
- _logger.info('Adding local endpoint.')
472
- self.endpoints['local']['root_path'] = self._pars.local_path
473
-
474
- @property
475
- def is_logged_in(self):
476
- """bool: Check if client exists and is authenticated."""
477
- has_token = self.client and self.client.authorizer.get_authorization_header() is not None
478
- return has_token and not self._token_expired
479
-
480
- @property
481
- def _token_expired(self):
482
- """bool: True if token absent or expired; False if valid.
483
-
484
- Note the 'expires_at_seconds' may be greater than `Globus.client.authorizer.expires_at` if
485
- using refresh tokens. The `login` method will always refresh the token if still valid.
486
- """
487
- try:
488
- authorizer = getattr(self.client, 'authorizer', None)
489
- has_refresh_token = self._pars.as_dict().get('refresh_token') is not None
490
- if has_refresh_token and isinstance(authorizer, globus_sdk.RefreshTokenAuthorizer):
491
- self.client.authorizer.ensure_valid_token() # Fetch new refresh token if needed
492
- except Exception as ex:
493
- _logger.debug('Failed to refresh token: %s', ex)
494
- expires_at_seconds = getattr(self._pars, 'expires_at_seconds', 0)
495
- return expires_at_seconds - datetime.utcnow().timestamp() < 60
496
-
497
- def login(self, stay_logged_in=None):
498
- """Authenticate Globus client.
499
-
500
- Parameters
501
- ----------
502
- stay_logged_in : bool, optional
503
- If True, use refresh token to remain logged in for longer. If False, use an auth
504
- token without the option of refreshing when expired. If not specified, uses the refresh
505
- token if available.
506
-
507
- """
508
- if self.is_logged_in:
509
- _logger.debug('Already logged in')
510
- return
511
-
512
- # Default depends on refresh token
513
- stay_logged_in = True if stay_logged_in is None else stay_logged_in
514
- expired = bool(
515
- self._pars.as_dict().get('refresh_token') is None
516
- if stay_logged_in else self._token_expired
517
- )
518
- # If no tokens in parameters, Globus must be authenticated
519
- required_fields = {'refresh_token', 'access_token', 'expires_at_seconds'}
520
- if not required_fields.issubset(iopar.as_dict(self._pars)) or expired:
521
- if self.headless:
522
- raise RuntimeError(f'Globus not authenticated for client "{self.client_name}"')
523
- token = get_token(self._pars.GLOBUS_CLIENT_ID, refresh_tokens=stay_logged_in)
524
- if not any(token.values()):
525
- _logger.debug('Login cancelled by user')
526
- return
527
- self._pars = iopar.from_dict({**self._pars.as_dict(), **token})
528
- _save_globus_params(self._pars, self.client_name)
529
-
530
- # Ready to authenticate
531
- self._authenticate(stay_logged_in)
532
-
533
- def logout(self):
534
- """Revoke any tokens and delete them from the client and parameter file."""
535
- if self.client and self.client.authorizer and \
536
- not isinstance(self.client.authorizer, NullAuthorizer):
537
- self.client.authorizer.auth_client.oauth2_revoke_token()
538
- del self.client.authorizer
539
- self.client.authorizer = NullAuthorizer()
540
- if pars := load_client_params(f'{CLIENT_KEY}.{self.client_name}', assert_present=False):
541
- _save_globus_params(_remove_token_fields(pars), self.client_name)
542
- self._pars = _remove_token_fields(self._pars)
543
-
544
- def _authenticate(self, stay_logged_in=None):
545
- """Authenticate and instantiate Globus SDK client."""
546
- if self._pars.as_dict().get('refresh_token') and stay_logged_in is not False:
547
- client = globus_sdk.NativeAppAuthClient(self._pars.GLOBUS_CLIENT_ID)
548
- client.oauth2_start_flow(refresh_tokens=True)
549
- authorizer = globus_sdk.RefreshTokenAuthorizer(
550
- self._pars.refresh_token, client, on_refresh=self._save_refresh_token_callback)
551
- else:
552
- if stay_logged_in is True:
553
- warnings.warn('No refresh token. Please log out and back in to remain logged in.')
554
- if self._token_expired is not False:
555
- raise RuntimeError(f'token no longer valid for client "{self.client_name}"')
556
- authorizer = globus_sdk.AccessTokenAuthorizer(self._pars.access_token)
557
- self.client = globus_sdk.TransferClient(authorizer=authorizer)
558
-
559
- def _save_refresh_token_callback(self, res):
560
- """Save a token fetched by the refresh token authorizer.
561
-
562
- This is a callback for the globus_sdk.RefreshTokenAuthorizer to update the parameters.
563
-
564
- Parameters
565
- ----------
566
- res : globus_sdk.services.auth.OAuthTokenResponse
567
- An Open Authorization response object.
568
-
569
- """
570
- if not res or not (token := next(iter(res.by_resource_server.values()), None)):
571
- return
572
- token_fields = {'refresh_token', 'access_token', 'expires_at_seconds'}
573
- self._pars = iopar.from_dict(
574
- {**self._pars.as_dict(), **{k: v for k, v in token.items() if k in token_fields}})
575
- _save_globus_params(self._pars, self.client_name)
576
-
577
- def fetch_endpoints_from_alyx(self, alyx=None, overwrite=False):
578
- """Update endpoints property with Alyx Globus data repositories.
579
-
580
- Parameters
581
- ----------
582
- alyx : one.webclient.AlyxClient
583
- An optional AlyxClient.
584
- overwrite : bool
585
- Whether existing endpoint with the same label should be replaced.
586
-
587
- Returns
588
- -------
589
- dict
590
- The endpoints added from Alyx.
591
-
592
- """
593
- alyx = alyx or AlyxClient()
594
- alyx_endpoints = alyx.rest('data-repository', 'list')
595
- for endpoint in alyx_endpoints:
596
- if not endpoint['globus_endpoint_id']:
597
- continue
598
- uid = UUID(endpoint['globus_endpoint_id'])
599
- self.add_endpoint(
600
- uid, label=endpoint['name'], root_path=endpoint['globus_path'], overwrite=overwrite
601
- )
602
- endpoint_names = {e['name'] for e in alyx_endpoints}
603
- return {k: v for k, v in self.endpoints.items() if k in endpoint_names}
604
-
605
- def to_address(self, data_path, endpoint):
606
- """Get full path for a given endpoint.
607
-
608
- Parameters
609
- ----------
610
- data_path : Path, PurePath, str
611
- An absolute or relative POSIX path
612
- endpoint : str, uuid.UUID
613
- An endpoint label or UUID.
614
-
615
- Returns
616
- -------
617
- str
618
- A complete path string formatted for Globus.
619
-
620
- Examples
621
- --------
622
- >>> glo = Globus()
623
- >>> glo.add_endpoint('0ec47586-3a19-11eb-b173-0ee0d5d9299f',
624
- ... label='foobar', root_path='/foo')
625
- >>> glo.to_address('bar/baz.ext', 'foobar')
626
- '/foo/bar/baz.ext'
627
-
628
- """
629
- _, root_path = self._endpoint_id_root(endpoint)
630
- return self._endpoint_path(data_path, root_path)
631
-
632
- @ensure_logged_in
633
- def download_file(self, file_address, source_endpoint, recursive=False, **kwargs):
634
- """Download one or more files via Globus.
635
-
636
- Parameters
637
- ----------
638
- file_address : str, list of str
639
- One or more relative POSIX paths to download.
640
- source_endpoint : str, uuid.UUID
641
- The source endpoint name or uuid.
642
- recursive : bool
643
- If true, transfer the contents of nested directories (NB: all data_paths must be
644
- directories).
645
- **kwargs
646
- See Globus.transfer_data.
647
-
648
- Returns
649
- -------
650
- pathlib.Path, list of pathlib.Path
651
- The downloaded file path(s). If recursive is True, a list is always returned.
652
-
653
- Notes
654
- -----
655
- - Assumes that the local endpoint root path is NOT POSIX style on Windows.
656
-
657
- TODO Return None for failed files
658
-
659
- Examples
660
- --------
661
- Download a single file
662
-
663
- >>> file = Globus().download_file('path/to/file', '0ec47586-3a19-11eb-b173-0ee0d5d9299f')
664
-
665
- Download multiple files and verify checksum
666
-
667
- >>> files = ['relative/file/path.ext', 'foo.bar']
668
- >>> files = Globus().download_file(files, 'source_endpoint_name', verify_checksum=True)
669
-
670
- Download a folder
671
-
672
- >>> files = Globus().download_file('folder/path', 'source_endpoint_name', recursive=True)
673
-
674
- """
675
- return_single = isinstance(file_address, str) and recursive is False
676
- kwargs['label'] = kwargs.get('label', 'ONE download')
677
- task = partial(self.transfer_data, file_address, source_endpoint, 'local',
678
- recursive=recursive, **kwargs)
679
- task_id = self.run_task(task)
680
- files = []
681
- root = Path(self.endpoints['local']['root_path'])
682
- idx = len(self._endpoint_path(PurePosixPath(as_globus_path(root))))
683
- for info in self.client.task_successful_transfers(task_id):
684
- files.append(info['destination_path'][idx:].strip('/'))
685
-
686
- if return_single:
687
- file = root / files[0]
688
- assert file.exists()
689
- return file
690
-
691
- # Order files by input
692
- def _best_match(x):
693
- """Return the index of the input file that best matches downloaded file."""
694
- spans = [len(frag) / len(x) if frag in x else 0 for frag in ensure_list(file_address)]
695
- return spans.index(max(spans))
696
- files = list(map(root.joinpath, sorted(files, key=_best_match)))
697
- assert all(map(Path.exists, filter(None, files)))
698
- return files
699
-
700
- @staticmethod
701
- def setup(client_name='default', **kwargs):
702
- """Setup a Globus client.
703
-
704
- In order to use this function you need:
705
-
706
- 1. The client ID of an existing Globus Client (`see this tutorial`_).
707
- 2. Set up `Global Connect`_ on your local device.
708
- 3. Register your local device as an `endpoint`_ in your Globus Client.
709
-
710
- .. _see this tutorial: https://globus-sdk-python.readthedocs.io/en/stable/tutorial.html
711
- .. _Global Connect: https://www.globus.org/globus-connect-personal
712
- .. _endpoint: https://app.globus.org/
713
-
714
- Parameters
715
- ----------
716
- client_name : str
717
- Parameter profile name to set up e.g. 'default', 'admin'.
718
- **kwargs
719
- Optional Globus constructor arguments.
720
-
721
- Returns
722
- -------
723
- Globus
724
- A new Globus client object.
725
-
726
- """
727
- _setup(client_name, login=False)
728
- return Globus(client_name, **kwargs)
729
-
730
- def add_endpoint(self, endpoint, label=None, root_path=None, overwrite=False, alyx=None):
731
- """Add an endpoint to the Globus instance to be used by other functions.
732
-
733
- Parameters
734
- ----------
735
- endpoint : uuid.UUID, str
736
- The endpoint UUID or database repository name of the endpoint.
737
- label : str
738
- Label to access the endpoint. If endpoint is UUID this has to be set, otherwise is
739
- optional.
740
- root_path : str, pathlib.Path, pathlib.PurePath
741
- File path to be accessed by Globus on the endpoint.
742
- overwrite : bool
743
- Whether existing endpoint with the same label should be replaced.
744
- alyx : one.webclient.AlyxClient
745
- An AlyxClient instance for looking up repository information.
746
-
747
- """
748
- if is_uuid(endpoint, versions=(1, 2)): # MAC address UUID
749
- if label is None:
750
- raise ValueError('If "endpoint" is a UUID, "label" cannot be None.')
751
- endpoint_id = self._ensure_uuid(endpoint)
752
- else:
753
- repo = self.repo_from_alyx(endpoint, alyx=alyx)
754
- endpoint_id = UUID(repo['globus_endpoint_id'])
755
- root_path = root_path or repo['globus_path']
756
- label = label or endpoint
757
- if label in self.endpoints.keys() and overwrite is False:
758
- _logger.error(f'An endpoint called "{label}" already exists. Choose a different label '
759
- 'or set overwrite=True')
760
- else:
761
- self.endpoints[label] = {'id': endpoint_id}
762
- if root_path:
763
- self.endpoints[label]['root_path'] = root_path
764
-
765
- @staticmethod
766
- def _endpoint_path(path, root_path=None):
767
- """Given an absolute path or relative path with a root path, return a Globus path str.
768
-
769
- Note: Paths must be POSIX or Globus-compliant paths. In other words for Windows systems
770
- the input root_path or absolute path must be passed through `as_globus_path` before
771
- calling this method.
772
-
773
- TODO include globus_path_from_dataset
774
-
775
- Parameters
776
- ----------
777
- path : Path, PurePath, str
778
- An absolute or relative POSIX path
779
- root_path : Path, PurePath, str
780
- A root path to prepend. Optional if `path` is absolute.
781
-
782
- Returns
783
- -------
784
- str
785
- A path string formatted for Globus.
786
-
787
- See Also
788
- --------
789
- as_globus_path
790
-
791
- Raises
792
- ------
793
- ValueError
794
- Path was not absolute and no root path was given. An absolute path must start with
795
- a slash on *nix systems.
796
-
797
- """
798
- if isinstance(path, str):
799
- path = PurePosixPath(path)
800
- if root_path and not str(path).startswith(str(root_path)):
801
- path = PurePosixPath(root_path) / path
802
- if not path.is_absolute():
803
- raise ValueError(f'{path} is relative and no root_path defined')
804
- return as_globus_path(path)
805
-
806
- @staticmethod
807
- def _ensure_uuid(uid):
808
- """Ensures UUID object returned.
809
-
810
- Parameters
811
- ----------
812
- uid : str, uuid.UUID
813
- A UUID to cast to UUID object.
814
-
815
- Returns
816
- -------
817
- uuid.UUID
818
- A UUID object.
819
-
820
- """
821
- return UUID(uid) if not isinstance(uid, UUID) else uid
822
-
823
- def _endpoint_id_root(self, endpoint):
824
- """Return endpoint UUID and root path from a given endpoint identifier.
825
-
826
- Parameters
827
- ----------
828
- endpoint : str, uuid.UUID
829
- An endpoint label or UUID.
830
-
831
- Returns
832
- -------
833
- uuid.UUID
834
- The endpoint UUID.
835
- str, None
836
- The POSIX-style endpoint root path (if defined).
837
-
838
- Warnings
839
- --------
840
- UserWarning
841
- If endpoint UUID is associated with multiple root paths, it is better to provide the
842
- endpoint label to avoid this warning and to ensure the intended root path is returned.
843
-
844
- See Also
845
- --------
846
- Globus._sanitize_local
847
-
848
- """
849
- root_path = None
850
- if endpoint in self.endpoints.keys():
851
- endpoint_id = self.endpoints[endpoint]['id']
852
- if 'root_path' in self.endpoints[endpoint].keys():
853
- root_path = self.endpoints[endpoint]['root_path']
854
- return self._sanitize_local(endpoint_id, root_path)
855
- elif is_uuid(endpoint, range(1, 5)):
856
- # If a UUID was provided, find the first endpoint with a root path with the UUID
857
- endpoint_id = self._ensure_uuid(endpoint)
858
- matching = (
859
- k for k, v in self.endpoints.items() if v['id'] == endpoint_id and 'root_path' in v
860
- )
861
- if name := next(matching, None):
862
- # Warn of ambiguity if multiple endpoints share a UUID
863
- if next(matching, None) is not None:
864
- warnings.warn(
865
- f'Multiple endpoints added with the same UUID, '
866
- f'using root path from "{name}"')
867
- root_path = self.endpoints[name]['root_path']
868
- else:
869
- root_path = None
870
- return self._sanitize_local(endpoint_id, root_path)
871
- else:
872
- raise ValueError(
873
- '"endpoint" must be a UUID or the label of an endpoint registered in this '
874
- 'Globus instance. You can add endpoints via the add_endpoints method')
875
-
876
- def _sanitize_local(self, endpoint_id, root_path):
877
- """Ensure local root path on Windows is POSIX-style.
878
-
879
- Parameters
880
- ----------
881
- endpoint_id : uuid.UUID
882
- The endpoint UUID to determine if root path is local.
883
- root_path : pathlib.Path, str, None
884
- The root path to sanitize.
885
-
886
- Returns
887
- -------
888
- endpoint_id : uuid.UUID
889
- The endpoint UUID, returned unchanged to match `Globus._endpoint_id_root` signature.
890
- str, None
891
- The root path as a POSIX style string, or None if root_path is None.
892
-
893
- Examples
894
- --------
895
- Providing a local root path on Windows
896
-
897
- >>> glo = Globus()
898
- >>> uid = glo.endpoints['local']['id']
899
- >>> glo._sanitize_local(uid, 'C:\\Data')
900
- UUID('50282ed5-3124-11ee-b977-482ae33bf6ca'), '/C/Data'
901
-
902
- Path left unchanged on *nix systems or when endpoint ID is not local
903
-
904
- >>> uid = UUID('c7c46cec-3124-11ee-bf50-482ae33bf6ca')
905
- >>> glo._sanitize_local(uid, 'C:\\Data')
906
- UUID('c7c46cec-3124-11ee-bf50-482ae33bf6ca'), 'C:\\Data'
907
-
908
- """
909
- if not root_path:
910
- return endpoint_id, None
911
- # If the local root path is not explicitly a Windows Path and we're on windows, make sure
912
- # it's converted correctly to a POSIX style path
913
- if isinstance(root_path, str):
914
- is_win = sys.platform in ('win32', 'cygwin')
915
- if endpoint_id == self.endpoints['local']['id'] and is_win:
916
- root_path = PureWindowsPath(root_path)
917
- else:
918
- root_path = PurePosixPath(root_path)
919
- return endpoint_id, as_globus_path(root_path)
920
-
921
- @ensure_logged_in
922
- def transfer_data(self, data_path, source_endpoint, destination_endpoint,
923
- recursive=False, **kwargs):
924
- """Transfer one or more paths between endpoints.
925
-
926
- At least one of the endpoints must be a server endpoint. Both file and directory paths may
927
- be provided, however if recursive is true, all paths must be directories.
928
-
929
- Parameters
930
- ----------
931
- data_path : str, list of str
932
- One or more data paths, relative to the endpoint root path.
933
- source_endpoint : str, uuid.UUID
934
- The name or UUID of the source endpoint.
935
- destination_endpoint : str, uuid.UUID
936
- The name or UUID of the destination endpoint.
937
- recursive : bool
938
- If true, transfer the contents of nested directories (NB: all data_paths must be
939
- directories).
940
- **kwargs
941
- See globus_sdk.TransferData.
942
-
943
- Returns
944
- -------
945
- uuid.UUID
946
- The Globus transfer ID.
947
-
948
- Examples
949
- --------
950
- Transfer two files (asynchronous)
951
-
952
- >>> glo = Globus()
953
- >>> files = ['file.ext', 'foo.bar']
954
- >>> task_id = glo.transfer_data(files, 'source_endpoint', 'destination_endpoint')
955
-
956
- Transfer a file (synchronous)
957
- >>> file = 'file.ext'
958
- >>> task_id = glo.run_task(lambda: glo.transfer_data(file, 'src_endpoint', 'dst_endpoint'))
959
-
960
- Transfer a folder (asynchronous)
961
-
962
- >>> folder = 'path/to/folder'
963
- >>> task_id = glo.transfer_data(
964
- ... folder, 'source_endpoint', 'destination_endpoint', recursive=True)
965
-
966
- """
967
- kwargs['source_endpoint'] = (source_endpoint
968
- if is_uuid(source_endpoint, versions=(1,))
969
- else self.endpoints.get(source_endpoint)['id'])
970
- kwargs['destination_endpoint'] = (destination_endpoint
971
- if is_uuid(destination_endpoint, versions=(1,))
972
- else self.endpoints.get(destination_endpoint)['id'])
973
- transfer_object = globus_sdk.TransferData(self.client, **kwargs)
974
-
975
- # add any number of items to the submission data
976
- for path in ensure_list(data_path):
977
- src = self._endpoint_path(path, self._endpoint_id_root(source_endpoint)[1])
978
- dst = self._endpoint_path(path, self._endpoint_id_root(destination_endpoint)[1])
979
- transfer_object.add_item(src, dst, recursive=recursive)
980
- response = self.client.submit_transfer(transfer_object)
981
- return UUID(response.data['task_id'])
982
-
983
- @ensure_logged_in
984
- def delete_data(self, data_path, endpoint, recursive=False, **kwargs):
985
- """Delete one or more paths within an endpoint.
986
-
987
- Both file and directory paths may be provided, however if recursive is true, all paths must
988
- be directories.
989
-
990
- Parameters
991
- ----------
992
- data_path : str, list of str
993
- One or more data paths, relative to the endpoint root path.
994
- endpoint : str, uuid.UUID
995
- The name or UUID of the endpoint.
996
- recursive : bool
997
- If true, delete the contents of nested directories (NB: all data_paths must be
998
- directories).
999
- **kwargs
1000
- See globus_sdk.DeleteData.
1001
-
1002
- Returns
1003
- -------
1004
- uuid.UUID
1005
- The Globus transfer ID.
1006
-
1007
- Examples
1008
- --------
1009
- Delete two files, ingnoring those that don't exist (asynchronous)
1010
-
1011
- >>> glo = Globus()
1012
- >>> files = ['file.ext', 'foo.bar']
1013
- >>> task_id = glo.delete_data(files, 'endpoint_name', ignore_missing=True)
1014
-
1015
- Delete a file (synchronous)
1016
-
1017
- >>> task_id = glo.run_task(lambda: glo.delete_data('file.ext', 'endpoint_name')
1018
-
1019
- Recursively delete a folder (asynchronous)
1020
-
1021
- >>> folder = 'path/to/folder'
1022
- >>> task_id = glo.delete_data(folder, 'endpoint_name', recursive=True)
1023
-
1024
- """
1025
- kwargs['endpoint'] = (endpoint
1026
- if is_uuid(endpoint, versions=(1,))
1027
- else self.endpoints.get(endpoint)['id'])
1028
- delete_object = globus_sdk.DeleteData(self.client, recursive=recursive, **kwargs)
1029
-
1030
- # add any number of items to the submission data
1031
- for path in ensure_list(data_path):
1032
- fullpath = self._endpoint_path(path, self._endpoint_id_root(endpoint)[1])
1033
- delete_object.add_item(fullpath)
1034
- response = self.client.submit_delete(delete_object)
1035
- return UUID(response.data['task_id'])
1036
-
1037
- @ensure_logged_in
1038
- def ls(self, endpoint, path, remove_uuid=False, return_size=False, max_retries=1):
1039
- """Return the list of (filename, filesize) in a given endpoint directory.
1040
-
1041
- NB: If you're using ls routinely when transferring or deleting files you're probably doing
1042
- something wrong!
1043
-
1044
- Parameters
1045
- ----------
1046
- endpoint : uuid.UUID, str
1047
- The Globus endpoint. May be a UUID or a key in the Globus.endpoints attribute.
1048
- path : Path, PurePath, str
1049
- The absolute or relative Globus path to list. Note: if endpoint is a UUID, the path
1050
- must be absolute.
1051
- remove_uuid : bool
1052
- If True, remove the UUID from the returned filenames.
1053
- return_size : bool
1054
- If True, return the size of each listed file in bytes.
1055
- max_retries : int
1056
- The number of times to retry the remote operation before raising. Increasing this may
1057
- mitigate unstable network issues.
1058
-
1059
- Returns
1060
- -------
1061
- list
1062
- A list of PurePosixPath objects of the files and folders listed, or if return_size is
1063
- True, tuples of PurePosixPath objects and the corresponding file sizes.
1064
-
1065
- """
1066
- # Check if endpoint is a UUID, if not try to get UUID from registered endpoints
1067
- endpoint_id, root_path = self._endpoint_id_root(endpoint)
1068
- # Check if root_path should be added and if path is absolute
1069
- path = self._endpoint_path(path, root_path)
1070
- # Do the actual listing
1071
- out = []
1072
- response = []
1073
- for i in range(max_retries + 1):
1074
- try:
1075
- response = self.client.operation_ls(endpoint_id, path=path)
1076
- break
1077
- except (GlobusConnectionError, GlobusAPIError) as ex:
1078
- if i == max_retries:
1079
- raise ex
1080
- for entry in response:
1081
- fn = PurePosixPath(remove_uuid_string(entry['name']) if remove_uuid else entry['name'])
1082
- if return_size:
1083
- size = entry['size'] if entry['type'] == 'file' else None
1084
- out.append((fn, size))
1085
- else:
1086
- out.append(fn)
1087
-
1088
- return out
1089
-
1090
- # TODO: allow to move all content of a directory with 'recursive' keyword in add_item
1091
- @ensure_logged_in
1092
- def mv(self, source_endpoint, target_endpoint, source_paths, target_paths,
1093
- timeout=None, **kwargs):
1094
- """Move files from one endpoint to another.
1095
-
1096
- Parameters
1097
- ----------
1098
- source_endpoint : uuid.UUID, str
1099
- The Globus source endpoint. May be a UUID or a key in the Globus.endpoints attribute.
1100
- target_endpoint : uuid.UUID, str
1101
- The Globus destination endpoint. May be a UUID or a key in the Globus.endpoints
1102
- attribute.
1103
- source_paths : list of str, pathlib.Path or pathlib.PurePath
1104
- The absolute or relative Globus paths of source files to moves. Note: if endpoint is
1105
- a UUID, the path must be absolute.
1106
- target_paths : list of str, Path or PurePath
1107
- The absolute or relative Globus paths of destination files to moves. Note: if endpoint
1108
- is a UUID, the path must be absolute.
1109
- timeout : int
1110
- Maximum time in seconds to wait for the task to complete.
1111
- **kwargs
1112
- Optional arguments for globus_sdk.TransferData.
1113
-
1114
- Returns
1115
- -------
1116
- uuid.UUID
1117
- A Globus task ID.
1118
-
1119
- """
1120
- source_endpoint, source_root = self._endpoint_id_root(source_endpoint)
1121
- target_endpoint, target_root = self._endpoint_id_root(target_endpoint)
1122
- source_paths = [str(self._endpoint_path(path, source_root)) for path in source_paths]
1123
- target_paths = [str(self._endpoint_path(path, target_root)) for path in target_paths]
1124
-
1125
- tdata = globus_sdk.TransferData(self.client, source_endpoint, target_endpoint,
1126
- verify_checksum=True, sync_level='checksum',
1127
- label='ONE globus', **kwargs)
1128
- for source_path, target_path in zip(source_paths, target_paths):
1129
- tdata.add_item(source_path, target_path)
1130
-
1131
- def wrapper():
1132
- """Function to submit Globus transfer and return the resulting task ID."""
1133
- response = self.client.submit_transfer(tdata)
1134
- task_id = response.get('task_id', None)
1135
- return task_id
1136
-
1137
- return self.run_task(wrapper, timeout=timeout)
1138
-
1139
- @ensure_logged_in
1140
- def run_task(self, globus_func, retries=3, timeout=None):
1141
- """Block until a Globus task finishes and retry upon Network or REST Errors.
1142
-
1143
- globus_func needs to submit a task to the client and return a task_id.
1144
-
1145
- Parameters
1146
- ----------
1147
- globus_func : function, Callable
1148
- A function that returns a Globus task ID, typically it will submit a transfer.
1149
- retries : int
1150
- The number of times to call globus_func if it raises a Globus error.
1151
- timeout : int
1152
- Maximum time in seconds to wait for the task to complete.
1153
-
1154
- Returns
1155
- -------
1156
- uuid.UUID
1157
- Globus task ID.
1158
-
1159
- Raises
1160
- ------
1161
- IOError
1162
- Timed out waiting for task to complete.
1163
-
1164
- TODO Add a quick fail option that returns when files missing, etc.
1165
- TODO Add status logging
1166
-
1167
- """
1168
- try:
1169
- task_id = globus_func()
1170
- assert is_uuid(task_id, versions=(1, 2)), 'invalid UUID returned'
1171
- print(f'Waiting for Globus task {task_id} to complete')
1172
- # While the task with task is active, print a dot every second. Timeout after timeout
1173
- i = 0
1174
- while not self.client.task_wait(task_id, timeout=5, polling_interval=1):
1175
- print('.', end='')
1176
- i += 1
1177
- if timeout and i >= timeout:
1178
- task = self.client.get_task(task_id)
1179
- raise IOError(f'Globus task {task_id} timed out after {timeout} seconds, '
1180
- f'with task status {task["status"]}')
1181
- task = self.client.get_task(task_id)
1182
- if task['status'] == 'SUCCEEDED':
1183
- # Sometime Globus sets the status to SUCCEEDED but doesn't truly finish.
1184
- # Handle error thrown when querying task_successful_transfers too early
1185
- try:
1186
- successful = self.client.task_successful_transfers(task_id)
1187
- skipped = self.client.task_skipped_errors(task_id)
1188
- print(f'\nGlobus task {task_id} completed.'
1189
- f'\nSkipped transfers: {len(list(skipped))}'
1190
- f'\nSuccessful transfers: {len(list(successful))}')
1191
- for info in successful:
1192
- _logger.debug(f'{info["source_path"]} -> {info["destination_path"]}')
1193
- except TransferAPIError:
1194
- _logger.warning(f'\nGlobus task {task_id} SUCCEEDED but querying transfers was'
1195
- f'unsuccessful')
1196
- else:
1197
- raise IOError(f'Globus task finished unsuccessfully with status {task["status"]}')
1198
- return self._ensure_uuid(task_id)
1199
- except (GlobusAPIError, NetworkError, GlobusTimeoutError, GlobusConnectionError,
1200
- GlobusConnectionTimeoutError) as e:
1201
- if retries < 1:
1202
- _logger.error('\nMax retries exceeded.')
1203
- raise e
1204
- else:
1205
- _logger.debug('\nGlobus experienced a network error', exc_info=True)
1206
- # if we reach this point without returning or erring, retry
1207
- _logger.warning('\nGlobus experienced a network error, retrying.')
1208
- self.run_task(globus_func, retries=(retries - 1), timeout=timeout)
1209
-
1210
- @ensure_logged_in
1211
- async def task_wait_async(self, task_id, polling_interval=10, timeout=10):
1212
- """Asynchronously wait until a Task is complete or fails, with a time limit.
1213
-
1214
- If the task status is ACTIVE after timout, returns False, otherwise returns True.
1215
-
1216
- Parameters
1217
- ----------
1218
- task_id : str, uuid.UUID
1219
- A Globus task UUID to wait on for completion.
1220
- polling_interval : float
1221
- Number of seconds between queries to Globus about the task status. Minimum 1 second.
1222
- timeout : float
1223
- Number of seconds to wait in total. Minimum 1 second.
1224
-
1225
- Returns
1226
- -------
1227
- bool
1228
- True if status not ACTIVE before timeout. False if status still ACTIVE at timeout.
1229
-
1230
- Examples
1231
- --------
1232
- Asynchronously await a task to complete
1233
-
1234
- >>> await Globus().task_wait_async(task_id)
1235
-
1236
- """
1237
- if polling_interval < 1:
1238
- raise GlobusSDKUsageError('polling_interval must be at least 1 second')
1239
- if timeout < 1:
1240
- raise GlobusSDKUsageError('timout must be at least 1 second')
1241
- polling_interval = min(timeout, polling_interval)
1242
- waited_time = 0
1243
- while True:
1244
- task = self.client.get_task(task_id)
1245
- status = task['status']
1246
- if status != 'ACTIVE':
1247
- return True
1248
-
1249
- # check if we timed out before sleeping again
1250
- waited_time += polling_interval
1251
- if waited_time >= timeout:
1252
- return False
1253
-
1254
- await asyncio.sleep(polling_interval)
1
+ """A module for handling file operations through the Globus SDK.
2
+
3
+ Setup
4
+ -----
5
+
6
+ To set up Globus simply instantiate the `Globus` class for the first time and follow the prompts.
7
+ Providing a client name string to the constructor allows one to set up multiple Globus clients
8
+ (i.e. when switching between different Globus client IDs).
9
+
10
+ In order to use this function you need:
11
+
12
+ 1. The client ID of an existing Globus Client (`see this tutorial`_).
13
+ 2. Set up `Global Connect`_ on your local device.
14
+ 3. Register your local device as an `endpoint`_ in your Globus Client.
15
+
16
+
17
+ To modify the settings for a pre-established client, call the `Globus.setup` method with the client
18
+ name:
19
+
20
+ >>> globus = Globus.setup('default')
21
+
22
+ You can update the list of endpoints using the `fetch_endpoints_from_alyx` method:
23
+
24
+ >>> globus = Globus('admin')
25
+ >>> remote_endpoints = globus.fetch_endpoints_from_alyx(alyx=AlyxClient())
26
+
27
+ The endpoints are stored in the `endpoints` property
28
+
29
+ >>> print(globus.endpoints.keys())
30
+ >>> print(globus.endpoints['local'])
31
+
32
+ .. _see this tutorial: https://globus-sdk-python.readthedocs.io/en/stable/tutorial.html
33
+ .. _Global Connect: https://www.globus.org/globus-connect-personal
34
+ .. _endpoint: https://app.globus.org/
35
+
36
+
37
+ Examples
38
+ --------
39
+ Get the full Globus file path
40
+
41
+ >>> relative_path = 'subject/2020-01-01/001/alf/_ibl_trials.table.pqt'
42
+ >>> full_path = globus.to_address(relative_path, 'flatiron_cortexlab')
43
+
44
+ Log in with a limited time token
45
+
46
+ >>> globus = Globus('admin')
47
+ >>> globus.login(stay_logged_in=False)
48
+
49
+ Log out of Globus, revoking and deleting all tokens
50
+
51
+ >>> globus.logout()
52
+ >>> assert not globus.is_logged_in
53
+
54
+ Asynchronously transfer data between Alyx repositories
55
+
56
+ >>> alyx = AlyxClient()
57
+ >>> glo = Globus('admin')
58
+ >>> glo.add_endpoint('flatiron_cortexlab', alyx=alyx)
59
+ >>> glo.add_endpoint('cortex_lab_SR', alyx=alyx)
60
+ >>> task_id = glo.transfer_data('path/to/file', 'flatiron_cortexlab', 'cortex_lab_SR')
61
+
62
+ Synchronously transfer data to an alternate local location
63
+
64
+ >>> from functools import partial
65
+ >>> root_path = '/path/to/new/location'
66
+ >>> glo.add_endpoint(get_local_endpoint_id(), label='alternate_local', root_path=root_path)
67
+ >>> folder = 'camera/ZFM-01867/2021-03-23/002' # An example folder to download
68
+ >>> task = partial(glo.transfer_data, folder, 'integration', 'integration_local',
69
+ ... label='alternate data', recursive=True)
70
+ >>> task_id = glo.run_task(task) # Submit task to Globus and await completion
71
+
72
+ Temporarily change local data root path and synchronously download file
73
+
74
+ >>> glo.endpoints['local']['root_path'] = '/path/to/new/location'
75
+ >>> file = glo.download_file('path/to/file.ext', 'source_endpoint')
76
+ Path('/path/to/new/location/path/to/file.ext')
77
+
78
+ Await multiple tasks to complete by passing a list of Globus transfer IDs
79
+
80
+ >>> import asyncio
81
+ >>> tasks = [asyncio.create_task(globus.task_wait_async(task_id))) for task_id in task_ids]
82
+ >>> success = asyncio.run(asyncio.gather(*tasks))
83
+
84
+ """
85
+ import os
86
+ import re
87
+ import sys
88
+ import asyncio
89
+ import logging
90
+ from uuid import UUID
91
+ from datetime import datetime
92
+ from pathlib import Path, PurePosixPath, PurePath, PureWindowsPath
93
+ import warnings
94
+ from functools import partial, wraps
95
+
96
+ import globus_sdk
97
+ from globus_sdk import TransferAPIError, GlobusAPIError, NetworkError, GlobusTimeoutError, \
98
+ GlobusConnectionError, GlobusConnectionTimeoutError, GlobusSDKUsageError, NullAuthorizer
99
+ from iblutil.io import params as iopar
100
+ from iblutil.util import ensure_list
101
+
102
+ from one.alf.spec import is_uuid
103
+ from one.alf.path import remove_uuid_string
104
+ import one.params
105
+ from one.webclient import AlyxClient
106
+ from .base import DownloadClient, load_client_params, save_client_params
107
+
108
+ __all__ = ['Globus', 'get_lab_from_endpoint_id', 'as_globus_path']
109
+ _logger = logging.getLogger(__name__)
110
+ CLIENT_KEY = 'globus'
111
+ """str: The default key in the remote settings file"""
112
+
113
+ DEFAULT_PAR = {'GLOBUS_CLIENT_ID': None, 'local_endpoint': None, 'local_path': None}
114
+ """dict: The default Globus parameter fields"""
115
+
116
+ STATUS_MAP = {
117
+ 'ACTIVE': ('QUEUED', 'ACTIVE', 'GC_NOT_CONNECTED', 'UNKNOWN'),
118
+ 'FAILED': ('ENDPOINT_ERROR', 'PERMISSION_DENIED', 'CONNECT_FAILED'),
119
+ 'INACTIVE': 'PAUSED_BY_ADMIN'}
120
+ """dict: A map of Globus status to "nice" status"""
121
+
122
+
123
+ def ensure_logged_in(func):
124
+ """Decorator for the Globus methods.
125
+
126
+ Before calling methods that require authentication, attempts to log in. If the user is already
127
+ logged in, the token may be refreshed to extend the session. If the token has expired and not
128
+ in headless mode, the user is prompted to authorize a new session. If in headless mode and not
129
+ logged in an error is raised.
130
+
131
+ Parameters
132
+ ----------
133
+ func : function
134
+ Method to wrap (e.g. Globus.transfer_data).
135
+
136
+ Returns
137
+ -------
138
+ function
139
+ Handle to wrapped method.
140
+
141
+ """
142
+ @wraps(func)
143
+ def wrapper_decorator(self, *args, **kwargs):
144
+ self.login()
145
+ return func(self, *args, **kwargs)
146
+ return wrapper_decorator
147
+
148
+
149
+ def _setup(par_id=None, login=True, refresh_tokens=True):
150
+ """Sets up Globus as a backend for ONE functions.
151
+
152
+ Parameters
153
+ ----------
154
+ par_id : str
155
+ Parameter profile name to set up e.g. 'default', 'admin'.
156
+
157
+ Returns
158
+ -------
159
+ IBLParams
160
+ A set of Globus parameters.
161
+
162
+ """
163
+ print('Setting up Globus parameter file. See docstring for help.')
164
+ if not par_id:
165
+ default_par_id = 'default'
166
+ par_id = input(
167
+ f'Enter name for this client or press Enter to keep value "{default_par_id}": '
168
+ )
169
+ par_id = par_id.strip() or default_par_id
170
+
171
+ # Read existing globus params if present
172
+ globus_pars = iopar.as_dict(load_client_params(CLIENT_KEY, assert_present=False) or {})
173
+ pars = {**DEFAULT_PAR, **globus_pars.get(par_id, {})}
174
+
175
+ # Set GLOBUS_CLIENT_ID
176
+ current_id = pars['GLOBUS_CLIENT_ID']
177
+ if current_id:
178
+ prompt = (f'Found Globus client ID in parameter file ({current_id}). '
179
+ 'Press Enter to keep it, or enter a new ID here: ')
180
+ pars['GLOBUS_CLIENT_ID'] = input(prompt).strip() or current_id
181
+ else:
182
+ new_id = input('Please enter the Globus client ID: ').strip()
183
+ if not new_id:
184
+ raise ValueError('Globus client ID is a required field')
185
+ pars['GLOBUS_CLIENT_ID'] = new_id
186
+ if not is_uuid(pars['GLOBUS_CLIENT_ID']):
187
+ raise ValueError('Invalid Globus client ID "%s"', pars['GLOBUS_CLIENT_ID'])
188
+
189
+ # Find and set local ID
190
+ message = 'Please enter the local endpoint ID'
191
+ try:
192
+ default_endpoint = str(pars['local_endpoint'] or get_local_endpoint_id())
193
+ message += f' (default: {default_endpoint})'
194
+ except AssertionError:
195
+ default_endpoint = ''
196
+ warnings.warn(
197
+ 'Cannot find local endpoint ID. Beware that this might mean that Globus Connect '
198
+ 'is not set up properly.')
199
+ pars['local_endpoint'] = input(message + ':').strip() or default_endpoint
200
+ if not is_uuid(pars['local_endpoint'], (1, 2)):
201
+ raise ValueError('Globus local endpoint ID must be a UUID version 1 or 2')
202
+
203
+ # Check for local path
204
+ message = 'Please enter the local endpoint path'
205
+ local_path = pars['local_path'] or one.params.get(silent=True).CACHE_DIR
206
+ message += f' (default: {local_path})'
207
+ pars['local_path'] = input(message + ':').strip() or local_path
208
+
209
+ if login:
210
+ # Log in manually and get refresh token to avoid having to login repeatedly
211
+ token = get_token(pars['GLOBUS_CLIENT_ID'], refresh_tokens=refresh_tokens)
212
+ pars.update(token)
213
+
214
+ globus_pars[par_id] = pars
215
+ save_client_params(globus_pars, client_key=CLIENT_KEY)
216
+ print('Finished setup.')
217
+ return iopar.from_dict(pars)
218
+
219
+
220
+ def get_token(client_id, refresh_tokens=True):
221
+ """Get a Globus authentication token.
222
+
223
+ This step requires the user to login to Globus via a browser.
224
+
225
+ Parameters
226
+ ----------
227
+ client_id : str
228
+ A Globus client ID.
229
+ refresh_tokens : bool
230
+ If true, requests a refresh token for repeat logins.
231
+
232
+ Returns
233
+ -------
234
+ dict
235
+ A dict containing the keys {'refresh_token', 'access_token', 'expires_at_seconds'}.
236
+
237
+ """
238
+ client = globus_sdk.NativeAppAuthClient(client_id)
239
+ client.oauth2_start_flow(refresh_tokens=bool(refresh_tokens))
240
+ authorize_url = client.oauth2_get_authorize_url()
241
+ fields = ('refresh_token', 'access_token', 'expires_at_seconds')
242
+ print('To get a new token, go to this URL and login: {0}'.format(authorize_url))
243
+ auth_code = input('Enter the code you get after login here (press "c" to cancel): ').strip()
244
+ if auth_code and auth_code.casefold() != 'c':
245
+ token_response = client.oauth2_exchange_code_for_tokens(auth_code)
246
+ globus_transfer_data = token_response.by_resource_server['transfer.api.globus.org']
247
+ return {k: globus_transfer_data.get(k) for k in fields}
248
+ else:
249
+ return dict.fromkeys(fields)
250
+
251
+
252
+ def _remove_token_fields(pars):
253
+ """Remove the token fields from a parameters object.
254
+
255
+ Parameters
256
+ ----------
257
+ pars : IBLParams, dict
258
+ The Globus parameters containing token fields.
259
+
260
+ Returns
261
+ -------
262
+ IBLParams
263
+ A copy of the params without the token fields.
264
+
265
+ """
266
+ if pars is None:
267
+ return pars
268
+ fields = ('refresh_token', 'access_token', 'expires_at_seconds')
269
+ return iopar.from_dict({k: v for k, v in iopar.as_dict(pars).items() if k not in fields})
270
+
271
+
272
+ def _save_globus_params(pars, client_name):
273
+ """Save Globus client parameters.
274
+
275
+ Parameters
276
+ ----------
277
+ pars : IBLParams, dict
278
+ The Globus client parameters to save.
279
+ client_name : str
280
+ The Globus client name, e.g. 'default'.
281
+
282
+ """
283
+ globus_pars = iopar.as_dict(load_client_params(CLIENT_KEY, assert_present=False) or {})
284
+ globus_pars[client_name] = iopar.as_dict(pars)
285
+ save_client_params(globus_pars, CLIENT_KEY)
286
+
287
+
288
+ def get_local_endpoint_id():
289
+ """Extracts the ID of the local Globus Connect endpoint.
290
+
291
+ Returns
292
+ -------
293
+ uuid.UUID
294
+ The local Globus endpoint ID.
295
+
296
+ """
297
+ msg = ('Cannot find local endpoint ID, check if Globus Connect is set up correctly, '
298
+ '{} exists and contains a UUID.')
299
+ if sys.platform in ('win32', 'cygwin'):
300
+ id_path = Path(os.environ['LOCALAPPDATA']).joinpath('Globus Connect')
301
+ else:
302
+ id_path = Path.home().joinpath('.globusonline', 'lta')
303
+
304
+ id_file = id_path.joinpath('client-id.txt')
305
+ assert id_file.exists(), msg.format(id_file)
306
+ local_id = id_file.read_text().strip()
307
+ assert isinstance(local_id, str), msg.format(id_file)
308
+ _logger.debug(f'Found local endpoint ID in Globus Connect settings {local_id}')
309
+ return UUID(local_id)
310
+
311
+
312
+ def get_local_endpoint_paths():
313
+ """Extracts the local endpoint paths accessible by Globus Connect.
314
+
315
+ NB: This is only supported on Linux.
316
+
317
+ Returns
318
+ -------
319
+ list of pathlib.Path
320
+ Local endpoint paths set in Globus Connect.
321
+
322
+ """
323
+ if sys.platform in ('win32', 'cygwin'):
324
+ print('On windows the local Globus path needs to be entered manually')
325
+ return []
326
+ else:
327
+ path_file = Path.home().joinpath('.globusonline', 'lta', 'config-paths')
328
+ if path_file.exists():
329
+ local_paths = map(Path, filter(None, path_file.read_text().strip().split(',')))
330
+ _logger.debug('Found local endpoint paths in Globus Connect settings')
331
+ else:
332
+ msg = ('Cannot find local endpoint path, check if Globus Connect is set up correctly, '
333
+ '{} exists and contains a valid path.')
334
+ warnings.warn(msg.format(path_file))
335
+ local_paths = []
336
+ return list(local_paths)
337
+
338
+
339
+ def get_lab_from_endpoint_id(endpoint=None, alyx=None):
340
+ """Extracts lab names associated with a given an endpoint UUID.
341
+
342
+ Finds the lab names that are associated to data repositories with the provided Globus endpoint
343
+ UUID.
344
+
345
+ Parameters
346
+ ----------
347
+ endpoint : uuid.UUID, str
348
+ Endpoint UUID, optional if not given will get attempt to find local endpoint UUID.
349
+ alyx : one.webclient.AlyxClient
350
+ An instance of AlyxClient to use.
351
+
352
+ Returns
353
+ -------
354
+ list
355
+ The lab names associated with the endpoint UUID.
356
+
357
+ """
358
+ alyx = alyx or AlyxClient(silent=True)
359
+ if not endpoint:
360
+ endpoint = get_local_endpoint_id()
361
+ lab = alyx.rest('labs', 'list', django=f'repositories__globus_endpoint_id,{str(endpoint)}')
362
+ if len(lab):
363
+ lab_names = [la['name'] for la in lab]
364
+ return lab_names
365
+
366
+
367
+ def as_globus_path(path):
368
+ """Convert a path into one suitable for the Globus TransferClient.
369
+
370
+ Parameters
371
+ ----------
372
+ path : pathlib.Path, pathlib.PurePath, str
373
+ A path to convert to a Globus-complient path string.
374
+
375
+ Returns
376
+ -------
377
+ str
378
+ A formatted path string.
379
+
380
+ Notes
381
+ -----
382
+ - If using tilda in path, the home folder of your Globus Connect instance must be the same as
383
+ the OS home dir.
384
+ - If validating a path for another system ensure the input path is a PurePath, in particular,
385
+ on a Linux computer a remote Windows should first be made into a PureWindowsPath.
386
+
387
+ Examples
388
+ --------
389
+ A Windows path (on Windows OS)
390
+
391
+ >>> as_globus_path('E:\\FlatIron\\integration')
392
+ '/E/FlatIron/integration'
393
+
394
+ When explicitly a POSIX path, remains unchanged
395
+
396
+ >>> as_globus_path(PurePosixPath('E:\\FlatIron\\integration'))
397
+ 'E:\\FlatIron\\integration'
398
+
399
+ A relative POSIX path (on *nix OS)
400
+
401
+ >>> as_globus_path('../data/integration')
402
+ '/mnt/data/integration'
403
+
404
+ A valid Globus path remains unchanged
405
+
406
+ >>> as_globus_path('/E/FlatIron/integration')
407
+ '/E/FlatIron/integration'
408
+
409
+ """
410
+ is_pure_path = isinstance(path, PurePath)
411
+ is_win = sys.platform in ('win32', 'cygwin') or isinstance(path, PureWindowsPath)
412
+ if isinstance(path, str):
413
+ path = Path(path)
414
+ if (
415
+ re.match(r'/[A-Z]($|/)', path.as_posix())
416
+ if is_win
417
+ else path.is_absolute()
418
+ ):
419
+ return path.as_posix()
420
+ if not is_pure_path:
421
+ path = path.resolve()
422
+ if path.drive:
423
+ path = '/' + str(path.as_posix().replace(':', '', 1))
424
+ return str(path)
425
+
426
+
427
+ class Globus(DownloadClient):
428
+
429
+ def __init__(self, client_name='default', connect=True, headless=False):
430
+ """Wrapper for managing files on Globus endpoints.
431
+
432
+ Parameters
433
+ ----------
434
+ client_name : str
435
+ Parameter profile name to load e.g. 'default', 'admin'.
436
+ connect : bool
437
+ Whether to create the Globus SDK client on init.
438
+ headless : bool
439
+ If true, raises ValueError if unable to log in automatically. Otherwise the user is
440
+ prompted to enter information.
441
+
442
+ Examples
443
+ --------
444
+ Instantiate without authentication
445
+
446
+ >>> globus = Globus(connect=False)
447
+
448
+ Instantiate without user prompts
449
+
450
+ >>> globus = Globus('server', headless=True)
451
+
452
+ """
453
+ # Setting up transfer client
454
+ super().__init__()
455
+ self.client = None
456
+ self.client_name = client_name
457
+ self.headless = headless
458
+ self._pars = load_client_params(f'{CLIENT_KEY}.{client_name}', assert_present=False)
459
+
460
+ # If no parameters, Globus must be set up for this client
461
+ if self._pars is None:
462
+ if self.headless:
463
+ raise RuntimeError(f'Globus not set up for client "{self.client_name}"')
464
+ self._pars = _setup(self.client_name, login=False)
465
+
466
+ if connect:
467
+ self.login()
468
+
469
+ # Try adding local endpoint
470
+ self.endpoints = {'local': {'id': UUID(self._pars.local_endpoint)}}
471
+ _logger.info('Adding local endpoint.')
472
+ self.endpoints['local']['root_path'] = self._pars.local_path
473
+
474
+ @property
475
+ def is_logged_in(self):
476
+ """bool: Check if client exists and is authenticated."""
477
+ has_token = self.client and self.client.authorizer.get_authorization_header() is not None
478
+ return has_token and not self._token_expired
479
+
480
+ @property
481
+ def _token_expired(self):
482
+ """bool: True if token absent or expired; False if valid.
483
+
484
+ Note the 'expires_at_seconds' may be greater than `Globus.client.authorizer.expires_at` if
485
+ using refresh tokens. The `login` method will always refresh the token if still valid.
486
+ """
487
+ try:
488
+ authorizer = getattr(self.client, 'authorizer', None)
489
+ has_refresh_token = self._pars.as_dict().get('refresh_token') is not None
490
+ if has_refresh_token and isinstance(authorizer, globus_sdk.RefreshTokenAuthorizer):
491
+ self.client.authorizer.ensure_valid_token() # Fetch new refresh token if needed
492
+ except Exception as ex:
493
+ _logger.debug('Failed to refresh token: %s', ex)
494
+ expires_at_seconds = getattr(self._pars, 'expires_at_seconds', 0)
495
+ return expires_at_seconds - datetime.utcnow().timestamp() < 60
496
+
497
+ def login(self, stay_logged_in=None):
498
+ """Authenticate Globus client.
499
+
500
+ Parameters
501
+ ----------
502
+ stay_logged_in : bool, optional
503
+ If True, use refresh token to remain logged in for longer. If False, use an auth
504
+ token without the option of refreshing when expired. If not specified, uses the refresh
505
+ token if available.
506
+
507
+ """
508
+ if self.is_logged_in:
509
+ _logger.debug('Already logged in')
510
+ return
511
+
512
+ # Default depends on refresh token
513
+ stay_logged_in = True if stay_logged_in is None else stay_logged_in
514
+ expired = bool(
515
+ self._pars.as_dict().get('refresh_token') is None
516
+ if stay_logged_in else self._token_expired
517
+ )
518
+ # If no tokens in parameters, Globus must be authenticated
519
+ required_fields = {'refresh_token', 'access_token', 'expires_at_seconds'}
520
+ if not required_fields.issubset(iopar.as_dict(self._pars)) or expired:
521
+ if self.headless:
522
+ raise RuntimeError(f'Globus not authenticated for client "{self.client_name}"')
523
+ token = get_token(self._pars.GLOBUS_CLIENT_ID, refresh_tokens=stay_logged_in)
524
+ if not any(token.values()):
525
+ _logger.debug('Login cancelled by user')
526
+ return
527
+ self._pars = iopar.from_dict({**self._pars.as_dict(), **token})
528
+ _save_globus_params(self._pars, self.client_name)
529
+
530
+ # Ready to authenticate
531
+ self._authenticate(stay_logged_in)
532
+
533
+ def logout(self):
534
+ """Revoke any tokens and delete them from the client and parameter file."""
535
+ if self.client and self.client.authorizer and \
536
+ not isinstance(self.client.authorizer, NullAuthorizer):
537
+ self.client.authorizer.auth_client.oauth2_revoke_token()
538
+ del self.client.authorizer
539
+ self.client.authorizer = NullAuthorizer()
540
+ if pars := load_client_params(f'{CLIENT_KEY}.{self.client_name}', assert_present=False):
541
+ _save_globus_params(_remove_token_fields(pars), self.client_name)
542
+ self._pars = _remove_token_fields(self._pars)
543
+
544
+ def _authenticate(self, stay_logged_in=None):
545
+ """Authenticate and instantiate Globus SDK client."""
546
+ if self._pars.as_dict().get('refresh_token') and stay_logged_in is not False:
547
+ client = globus_sdk.NativeAppAuthClient(self._pars.GLOBUS_CLIENT_ID)
548
+ client.oauth2_start_flow(refresh_tokens=True)
549
+ authorizer = globus_sdk.RefreshTokenAuthorizer(
550
+ self._pars.refresh_token, client, on_refresh=self._save_refresh_token_callback)
551
+ else:
552
+ if stay_logged_in is True:
553
+ warnings.warn('No refresh token. Please log out and back in to remain logged in.')
554
+ if self._token_expired is not False:
555
+ raise RuntimeError(f'token no longer valid for client "{self.client_name}"')
556
+ authorizer = globus_sdk.AccessTokenAuthorizer(self._pars.access_token)
557
+ self.client = globus_sdk.TransferClient(authorizer=authorizer)
558
+
559
+ def _save_refresh_token_callback(self, res):
560
+ """Save a token fetched by the refresh token authorizer.
561
+
562
+ This is a callback for the globus_sdk.RefreshTokenAuthorizer to update the parameters.
563
+
564
+ Parameters
565
+ ----------
566
+ res : globus_sdk.services.auth.OAuthTokenResponse
567
+ An Open Authorization response object.
568
+
569
+ """
570
+ if not res or not (token := next(iter(res.by_resource_server.values()), None)):
571
+ return
572
+ token_fields = {'refresh_token', 'access_token', 'expires_at_seconds'}
573
+ self._pars = iopar.from_dict(
574
+ {**self._pars.as_dict(), **{k: v for k, v in token.items() if k in token_fields}})
575
+ _save_globus_params(self._pars, self.client_name)
576
+
577
+ def fetch_endpoints_from_alyx(self, alyx=None, overwrite=False):
578
+ """Update endpoints property with Alyx Globus data repositories.
579
+
580
+ Parameters
581
+ ----------
582
+ alyx : one.webclient.AlyxClient
583
+ An optional AlyxClient.
584
+ overwrite : bool
585
+ Whether existing endpoint with the same label should be replaced.
586
+
587
+ Returns
588
+ -------
589
+ dict
590
+ The endpoints added from Alyx.
591
+
592
+ """
593
+ alyx = alyx or AlyxClient()
594
+ alyx_endpoints = alyx.rest('data-repository', 'list')
595
+ for endpoint in alyx_endpoints:
596
+ if not endpoint['globus_endpoint_id']:
597
+ continue
598
+ uid = UUID(endpoint['globus_endpoint_id'])
599
+ self.add_endpoint(
600
+ uid, label=endpoint['name'], root_path=endpoint['globus_path'], overwrite=overwrite
601
+ )
602
+ endpoint_names = {e['name'] for e in alyx_endpoints}
603
+ return {k: v for k, v in self.endpoints.items() if k in endpoint_names}
604
+
605
+ def to_address(self, data_path, endpoint):
606
+ """Get full path for a given endpoint.
607
+
608
+ Parameters
609
+ ----------
610
+ data_path : Path, PurePath, str
611
+ An absolute or relative POSIX path
612
+ endpoint : str, uuid.UUID
613
+ An endpoint label or UUID.
614
+
615
+ Returns
616
+ -------
617
+ str
618
+ A complete path string formatted for Globus.
619
+
620
+ Examples
621
+ --------
622
+ >>> glo = Globus()
623
+ >>> glo.add_endpoint('0ec47586-3a19-11eb-b173-0ee0d5d9299f',
624
+ ... label='foobar', root_path='/foo')
625
+ >>> glo.to_address('bar/baz.ext', 'foobar')
626
+ '/foo/bar/baz.ext'
627
+
628
+ """
629
+ _, root_path = self._endpoint_id_root(endpoint)
630
+ return self._endpoint_path(data_path, root_path)
631
+
632
+ @ensure_logged_in
633
+ def download_file(self, file_address, source_endpoint, recursive=False, **kwargs):
634
+ """Download one or more files via Globus.
635
+
636
+ Parameters
637
+ ----------
638
+ file_address : str, list of str
639
+ One or more relative POSIX paths to download.
640
+ source_endpoint : str, uuid.UUID
641
+ The source endpoint name or uuid.
642
+ recursive : bool
643
+ If true, transfer the contents of nested directories (NB: all data_paths must be
644
+ directories).
645
+ **kwargs
646
+ See Globus.transfer_data.
647
+
648
+ Returns
649
+ -------
650
+ pathlib.Path, list of pathlib.Path
651
+ The downloaded file path(s). If recursive is True, a list is always returned.
652
+
653
+ Notes
654
+ -----
655
+ - Assumes that the local endpoint root path is NOT POSIX style on Windows.
656
+
657
+ TODO Return None for failed files
658
+
659
+ Examples
660
+ --------
661
+ Download a single file
662
+
663
+ >>> file = Globus().download_file('path/to/file', '0ec47586-3a19-11eb-b173-0ee0d5d9299f')
664
+
665
+ Download multiple files and verify checksum
666
+
667
+ >>> files = ['relative/file/path.ext', 'foo.bar']
668
+ >>> files = Globus().download_file(files, 'source_endpoint_name', verify_checksum=True)
669
+
670
+ Download a folder
671
+
672
+ >>> files = Globus().download_file('folder/path', 'source_endpoint_name', recursive=True)
673
+
674
+ """
675
+ return_single = isinstance(file_address, str) and recursive is False
676
+ kwargs['label'] = kwargs.get('label', 'ONE download')
677
+ task = partial(self.transfer_data, file_address, source_endpoint, 'local',
678
+ recursive=recursive, **kwargs)
679
+ task_id = self.run_task(task)
680
+ files = []
681
+ root = Path(self.endpoints['local']['root_path'])
682
+ idx = len(self._endpoint_path(PurePosixPath(as_globus_path(root))))
683
+ for info in self.client.task_successful_transfers(task_id):
684
+ files.append(info['destination_path'][idx:].strip('/'))
685
+
686
+ if return_single:
687
+ file = root / files[0]
688
+ assert file.exists()
689
+ return file
690
+
691
+ # Order files by input
692
+ def _best_match(x):
693
+ """Return the index of the input file that best matches downloaded file."""
694
+ spans = [len(frag) / len(x) if frag in x else 0 for frag in ensure_list(file_address)]
695
+ return spans.index(max(spans))
696
+ files = list(map(root.joinpath, sorted(files, key=_best_match)))
697
+ assert all(map(Path.exists, filter(None, files)))
698
+ return files
699
+
700
+ @staticmethod
701
+ def setup(client_name='default', **kwargs):
702
+ """Setup a Globus client.
703
+
704
+ In order to use this function you need:
705
+
706
+ 1. The client ID of an existing Globus Client (`see this tutorial`_).
707
+ 2. Set up `Global Connect`_ on your local device.
708
+ 3. Register your local device as an `endpoint`_ in your Globus Client.
709
+
710
+ .. _see this tutorial: https://globus-sdk-python.readthedocs.io/en/stable/tutorial.html
711
+ .. _Global Connect: https://www.globus.org/globus-connect-personal
712
+ .. _endpoint: https://app.globus.org/
713
+
714
+ Parameters
715
+ ----------
716
+ client_name : str
717
+ Parameter profile name to set up e.g. 'default', 'admin'.
718
+ **kwargs
719
+ Optional Globus constructor arguments.
720
+
721
+ Returns
722
+ -------
723
+ Globus
724
+ A new Globus client object.
725
+
726
+ """
727
+ _setup(client_name, login=False)
728
+ return Globus(client_name, **kwargs)
729
+
730
+ def add_endpoint(self, endpoint, label=None, root_path=None, overwrite=False, alyx=None):
731
+ """Add an endpoint to the Globus instance to be used by other functions.
732
+
733
+ Parameters
734
+ ----------
735
+ endpoint : uuid.UUID, str
736
+ The endpoint UUID or database repository name of the endpoint.
737
+ label : str
738
+ Label to access the endpoint. If endpoint is UUID this has to be set, otherwise is
739
+ optional.
740
+ root_path : str, pathlib.Path, pathlib.PurePath
741
+ File path to be accessed by Globus on the endpoint.
742
+ overwrite : bool
743
+ Whether existing endpoint with the same label should be replaced.
744
+ alyx : one.webclient.AlyxClient
745
+ An AlyxClient instance for looking up repository information.
746
+
747
+ """
748
+ if is_uuid(endpoint, versions=(1, 2)): # MAC address UUID
749
+ if label is None:
750
+ raise ValueError('If "endpoint" is a UUID, "label" cannot be None.')
751
+ endpoint_id = self._ensure_uuid(endpoint)
752
+ else:
753
+ repo = self.repo_from_alyx(endpoint, alyx=alyx)
754
+ endpoint_id = UUID(repo['globus_endpoint_id'])
755
+ root_path = root_path or repo['globus_path']
756
+ label = label or endpoint
757
+ if label in self.endpoints.keys() and overwrite is False:
758
+ _logger.error(f'An endpoint called "{label}" already exists. Choose a different label '
759
+ 'or set overwrite=True')
760
+ else:
761
+ self.endpoints[label] = {'id': endpoint_id}
762
+ if root_path:
763
+ self.endpoints[label]['root_path'] = root_path
764
+
765
+ @staticmethod
766
+ def _endpoint_path(path, root_path=None):
767
+ """Given an absolute path or relative path with a root path, return a Globus path str.
768
+
769
+ Note: Paths must be POSIX or Globus-compliant paths. In other words for Windows systems
770
+ the input root_path or absolute path must be passed through `as_globus_path` before
771
+ calling this method.
772
+
773
+ TODO include globus_path_from_dataset
774
+
775
+ Parameters
776
+ ----------
777
+ path : Path, PurePath, str
778
+ An absolute or relative POSIX path
779
+ root_path : Path, PurePath, str
780
+ A root path to prepend. Optional if `path` is absolute.
781
+
782
+ Returns
783
+ -------
784
+ str
785
+ A path string formatted for Globus.
786
+
787
+ See Also
788
+ --------
789
+ as_globus_path
790
+
791
+ Raises
792
+ ------
793
+ ValueError
794
+ Path was not absolute and no root path was given. An absolute path must start with
795
+ a slash on *nix systems.
796
+
797
+ """
798
+ if isinstance(path, str):
799
+ path = PurePosixPath(path)
800
+ if root_path and not str(path).startswith(str(root_path)):
801
+ path = PurePosixPath(root_path) / path
802
+ if not path.is_absolute():
803
+ raise ValueError(f'{path} is relative and no root_path defined')
804
+ return as_globus_path(path)
805
+
806
+ @staticmethod
807
+ def _ensure_uuid(uid):
808
+ """Ensures UUID object returned.
809
+
810
+ Parameters
811
+ ----------
812
+ uid : str, uuid.UUID
813
+ A UUID to cast to UUID object.
814
+
815
+ Returns
816
+ -------
817
+ uuid.UUID
818
+ A UUID object.
819
+
820
+ """
821
+ return UUID(uid) if not isinstance(uid, UUID) else uid
822
+
823
+ def _endpoint_id_root(self, endpoint):
824
+ """Return endpoint UUID and root path from a given endpoint identifier.
825
+
826
+ Parameters
827
+ ----------
828
+ endpoint : str, uuid.UUID
829
+ An endpoint label or UUID.
830
+
831
+ Returns
832
+ -------
833
+ uuid.UUID
834
+ The endpoint UUID.
835
+ str, None
836
+ The POSIX-style endpoint root path (if defined).
837
+
838
+ Warnings
839
+ --------
840
+ UserWarning
841
+ If endpoint UUID is associated with multiple root paths, it is better to provide the
842
+ endpoint label to avoid this warning and to ensure the intended root path is returned.
843
+
844
+ See Also
845
+ --------
846
+ Globus._sanitize_local
847
+
848
+ """
849
+ root_path = None
850
+ if endpoint in self.endpoints.keys():
851
+ endpoint_id = self.endpoints[endpoint]['id']
852
+ if 'root_path' in self.endpoints[endpoint].keys():
853
+ root_path = self.endpoints[endpoint]['root_path']
854
+ return self._sanitize_local(endpoint_id, root_path)
855
+ elif is_uuid(endpoint, range(1, 5)):
856
+ # If a UUID was provided, find the first endpoint with a root path with the UUID
857
+ endpoint_id = self._ensure_uuid(endpoint)
858
+ matching = (
859
+ k for k, v in self.endpoints.items() if v['id'] == endpoint_id and 'root_path' in v
860
+ )
861
+ if name := next(matching, None):
862
+ # Warn of ambiguity if multiple endpoints share a UUID
863
+ if next(matching, None) is not None:
864
+ warnings.warn(
865
+ f'Multiple endpoints added with the same UUID, '
866
+ f'using root path from "{name}"')
867
+ root_path = self.endpoints[name]['root_path']
868
+ else:
869
+ root_path = None
870
+ return self._sanitize_local(endpoint_id, root_path)
871
+ else:
872
+ raise ValueError(
873
+ '"endpoint" must be a UUID or the label of an endpoint registered in this '
874
+ 'Globus instance. You can add endpoints via the add_endpoints method')
875
+
876
+ def _sanitize_local(self, endpoint_id, root_path):
877
+ """Ensure local root path on Windows is POSIX-style.
878
+
879
+ Parameters
880
+ ----------
881
+ endpoint_id : uuid.UUID
882
+ The endpoint UUID to determine if root path is local.
883
+ root_path : pathlib.Path, str, None
884
+ The root path to sanitize.
885
+
886
+ Returns
887
+ -------
888
+ endpoint_id : uuid.UUID
889
+ The endpoint UUID, returned unchanged to match `Globus._endpoint_id_root` signature.
890
+ str, None
891
+ The root path as a POSIX style string, or None if root_path is None.
892
+
893
+ Examples
894
+ --------
895
+ Providing a local root path on Windows
896
+
897
+ >>> glo = Globus()
898
+ >>> uid = glo.endpoints['local']['id']
899
+ >>> glo._sanitize_local(uid, 'C:\\Data')
900
+ UUID('50282ed5-3124-11ee-b977-482ae33bf6ca'), '/C/Data'
901
+
902
+ Path left unchanged on *nix systems or when endpoint ID is not local
903
+
904
+ >>> uid = UUID('c7c46cec-3124-11ee-bf50-482ae33bf6ca')
905
+ >>> glo._sanitize_local(uid, 'C:\\Data')
906
+ UUID('c7c46cec-3124-11ee-bf50-482ae33bf6ca'), 'C:\\Data'
907
+
908
+ """
909
+ if not root_path:
910
+ return endpoint_id, None
911
+ # If the local root path is not explicitly a Windows Path and we're on windows, make sure
912
+ # it's converted correctly to a POSIX style path
913
+ if isinstance(root_path, str):
914
+ is_win = sys.platform in ('win32', 'cygwin')
915
+ if endpoint_id == self.endpoints['local']['id'] and is_win:
916
+ root_path = PureWindowsPath(root_path)
917
+ else:
918
+ root_path = PurePosixPath(root_path)
919
+ return endpoint_id, as_globus_path(root_path)
920
+
921
+ @ensure_logged_in
922
+ def transfer_data(self, data_path, source_endpoint, destination_endpoint,
923
+ recursive=False, **kwargs):
924
+ """Transfer one or more paths between endpoints.
925
+
926
+ At least one of the endpoints must be a server endpoint. Both file and directory paths may
927
+ be provided, however if recursive is true, all paths must be directories.
928
+
929
+ Parameters
930
+ ----------
931
+ data_path : str, list of str
932
+ One or more data paths, relative to the endpoint root path.
933
+ source_endpoint : str, uuid.UUID
934
+ The name or UUID of the source endpoint.
935
+ destination_endpoint : str, uuid.UUID
936
+ The name or UUID of the destination endpoint.
937
+ recursive : bool
938
+ If true, transfer the contents of nested directories (NB: all data_paths must be
939
+ directories).
940
+ **kwargs
941
+ See globus_sdk.TransferData.
942
+
943
+ Returns
944
+ -------
945
+ uuid.UUID
946
+ The Globus transfer ID.
947
+
948
+ Examples
949
+ --------
950
+ Transfer two files (asynchronous)
951
+
952
+ >>> glo = Globus()
953
+ >>> files = ['file.ext', 'foo.bar']
954
+ >>> task_id = glo.transfer_data(files, 'source_endpoint', 'destination_endpoint')
955
+
956
+ Transfer a file (synchronous)
957
+ >>> file = 'file.ext'
958
+ >>> task_id = glo.run_task(lambda: glo.transfer_data(file, 'src_endpoint', 'dst_endpoint'))
959
+
960
+ Transfer a folder (asynchronous)
961
+
962
+ >>> folder = 'path/to/folder'
963
+ >>> task_id = glo.transfer_data(
964
+ ... folder, 'source_endpoint', 'destination_endpoint', recursive=True)
965
+
966
+ """
967
+ kwargs['source_endpoint'] = (source_endpoint
968
+ if is_uuid(source_endpoint, versions=(1,))
969
+ else self.endpoints.get(source_endpoint)['id'])
970
+ kwargs['destination_endpoint'] = (destination_endpoint
971
+ if is_uuid(destination_endpoint, versions=(1,))
972
+ else self.endpoints.get(destination_endpoint)['id'])
973
+ transfer_object = globus_sdk.TransferData(self.client, **kwargs)
974
+
975
+ # add any number of items to the submission data
976
+ for path in ensure_list(data_path):
977
+ src = self._endpoint_path(path, self._endpoint_id_root(source_endpoint)[1])
978
+ dst = self._endpoint_path(path, self._endpoint_id_root(destination_endpoint)[1])
979
+ transfer_object.add_item(src, dst, recursive=recursive)
980
+ response = self.client.submit_transfer(transfer_object)
981
+ return UUID(response.data['task_id'])
982
+
983
+ @ensure_logged_in
984
+ def delete_data(self, data_path, endpoint, recursive=False, **kwargs):
985
+ """Delete one or more paths within an endpoint.
986
+
987
+ Both file and directory paths may be provided, however if recursive is true, all paths must
988
+ be directories.
989
+
990
+ Parameters
991
+ ----------
992
+ data_path : str, list of str
993
+ One or more data paths, relative to the endpoint root path.
994
+ endpoint : str, uuid.UUID
995
+ The name or UUID of the endpoint.
996
+ recursive : bool
997
+ If true, delete the contents of nested directories (NB: all data_paths must be
998
+ directories).
999
+ **kwargs
1000
+ See globus_sdk.DeleteData.
1001
+
1002
+ Returns
1003
+ -------
1004
+ uuid.UUID
1005
+ The Globus transfer ID.
1006
+
1007
+ Examples
1008
+ --------
1009
+ Delete two files, ingnoring those that don't exist (asynchronous)
1010
+
1011
+ >>> glo = Globus()
1012
+ >>> files = ['file.ext', 'foo.bar']
1013
+ >>> task_id = glo.delete_data(files, 'endpoint_name', ignore_missing=True)
1014
+
1015
+ Delete a file (synchronous)
1016
+
1017
+ >>> task_id = glo.run_task(lambda: glo.delete_data('file.ext', 'endpoint_name')
1018
+
1019
+ Recursively delete a folder (asynchronous)
1020
+
1021
+ >>> folder = 'path/to/folder'
1022
+ >>> task_id = glo.delete_data(folder, 'endpoint_name', recursive=True)
1023
+
1024
+ """
1025
+ kwargs['endpoint'] = (endpoint
1026
+ if is_uuid(endpoint, versions=(1,))
1027
+ else self.endpoints.get(endpoint)['id'])
1028
+ delete_object = globus_sdk.DeleteData(self.client, recursive=recursive, **kwargs)
1029
+
1030
+ # add any number of items to the submission data
1031
+ for path in ensure_list(data_path):
1032
+ fullpath = self._endpoint_path(path, self._endpoint_id_root(endpoint)[1])
1033
+ delete_object.add_item(fullpath)
1034
+ response = self.client.submit_delete(delete_object)
1035
+ return UUID(response.data['task_id'])
1036
+
1037
+ @ensure_logged_in
1038
+ def ls(self, endpoint, path, remove_uuid=False, return_size=False, max_retries=1):
1039
+ """Return the list of (filename, filesize) in a given endpoint directory.
1040
+
1041
+ NB: If you're using ls routinely when transferring or deleting files you're probably doing
1042
+ something wrong!
1043
+
1044
+ Parameters
1045
+ ----------
1046
+ endpoint : uuid.UUID, str
1047
+ The Globus endpoint. May be a UUID or a key in the Globus.endpoints attribute.
1048
+ path : Path, PurePath, str
1049
+ The absolute or relative Globus path to list. Note: if endpoint is a UUID, the path
1050
+ must be absolute.
1051
+ remove_uuid : bool
1052
+ If True, remove the UUID from the returned filenames.
1053
+ return_size : bool
1054
+ If True, return the size of each listed file in bytes.
1055
+ max_retries : int
1056
+ The number of times to retry the remote operation before raising. Increasing this may
1057
+ mitigate unstable network issues.
1058
+
1059
+ Returns
1060
+ -------
1061
+ list
1062
+ A list of PurePosixPath objects of the files and folders listed, or if return_size is
1063
+ True, tuples of PurePosixPath objects and the corresponding file sizes.
1064
+
1065
+ """
1066
+ # Check if endpoint is a UUID, if not try to get UUID from registered endpoints
1067
+ endpoint_id, root_path = self._endpoint_id_root(endpoint)
1068
+ # Check if root_path should be added and if path is absolute
1069
+ path = self._endpoint_path(path, root_path)
1070
+ # Do the actual listing
1071
+ out = []
1072
+ response = []
1073
+ for i in range(max_retries + 1):
1074
+ try:
1075
+ response = self.client.operation_ls(endpoint_id, path=path)
1076
+ break
1077
+ except (GlobusConnectionError, GlobusAPIError) as ex:
1078
+ if i == max_retries:
1079
+ raise ex
1080
+ for entry in response:
1081
+ fn = PurePosixPath(remove_uuid_string(entry['name']) if remove_uuid else entry['name'])
1082
+ if return_size:
1083
+ size = entry['size'] if entry['type'] == 'file' else None
1084
+ out.append((fn, size))
1085
+ else:
1086
+ out.append(fn)
1087
+
1088
+ return out
1089
+
1090
+ # TODO: allow to move all content of a directory with 'recursive' keyword in add_item
1091
+ @ensure_logged_in
1092
+ def mv(self, source_endpoint, target_endpoint, source_paths, target_paths,
1093
+ timeout=None, **kwargs):
1094
+ """Move files from one endpoint to another.
1095
+
1096
+ Parameters
1097
+ ----------
1098
+ source_endpoint : uuid.UUID, str
1099
+ The Globus source endpoint. May be a UUID or a key in the Globus.endpoints attribute.
1100
+ target_endpoint : uuid.UUID, str
1101
+ The Globus destination endpoint. May be a UUID or a key in the Globus.endpoints
1102
+ attribute.
1103
+ source_paths : list of str, pathlib.Path or pathlib.PurePath
1104
+ The absolute or relative Globus paths of source files to moves. Note: if endpoint is
1105
+ a UUID, the path must be absolute.
1106
+ target_paths : list of str, Path or PurePath
1107
+ The absolute or relative Globus paths of destination files to moves. Note: if endpoint
1108
+ is a UUID, the path must be absolute.
1109
+ timeout : int
1110
+ Maximum time in seconds to wait for the task to complete.
1111
+ **kwargs
1112
+ Optional arguments for globus_sdk.TransferData.
1113
+
1114
+ Returns
1115
+ -------
1116
+ uuid.UUID
1117
+ A Globus task ID.
1118
+
1119
+ """
1120
+ source_endpoint, source_root = self._endpoint_id_root(source_endpoint)
1121
+ target_endpoint, target_root = self._endpoint_id_root(target_endpoint)
1122
+ source_paths = [str(self._endpoint_path(path, source_root)) for path in source_paths]
1123
+ target_paths = [str(self._endpoint_path(path, target_root)) for path in target_paths]
1124
+
1125
+ tdata = globus_sdk.TransferData(self.client, source_endpoint, target_endpoint,
1126
+ verify_checksum=True, sync_level='checksum',
1127
+ label='ONE globus', **kwargs)
1128
+ for source_path, target_path in zip(source_paths, target_paths):
1129
+ tdata.add_item(source_path, target_path)
1130
+
1131
+ def wrapper():
1132
+ """Function to submit Globus transfer and return the resulting task ID."""
1133
+ response = self.client.submit_transfer(tdata)
1134
+ task_id = response.get('task_id', None)
1135
+ return task_id
1136
+
1137
+ return self.run_task(wrapper, timeout=timeout)
1138
+
1139
+ @ensure_logged_in
1140
+ def run_task(self, globus_func, retries=3, timeout=None):
1141
+ """Block until a Globus task finishes and retry upon Network or REST Errors.
1142
+
1143
+ globus_func needs to submit a task to the client and return a task_id.
1144
+
1145
+ Parameters
1146
+ ----------
1147
+ globus_func : function, Callable
1148
+ A function that returns a Globus task ID, typically it will submit a transfer.
1149
+ retries : int
1150
+ The number of times to call globus_func if it raises a Globus error.
1151
+ timeout : int
1152
+ Maximum time in seconds to wait for the task to complete.
1153
+
1154
+ Returns
1155
+ -------
1156
+ uuid.UUID
1157
+ Globus task ID.
1158
+
1159
+ Raises
1160
+ ------
1161
+ IOError
1162
+ Timed out waiting for task to complete.
1163
+
1164
+ TODO Add a quick fail option that returns when files missing, etc.
1165
+ TODO Add status logging
1166
+
1167
+ """
1168
+ try:
1169
+ task_id = globus_func()
1170
+ assert is_uuid(task_id, versions=(1, 2)), 'invalid UUID returned'
1171
+ print(f'Waiting for Globus task {task_id} to complete')
1172
+ # While the task with task is active, print a dot every second. Timeout after timeout
1173
+ i = 0
1174
+ while not self.client.task_wait(task_id, timeout=5, polling_interval=1):
1175
+ print('.', end='')
1176
+ i += 1
1177
+ if timeout and i >= timeout:
1178
+ task = self.client.get_task(task_id)
1179
+ raise IOError(f'Globus task {task_id} timed out after {timeout} seconds, '
1180
+ f'with task status {task["status"]}')
1181
+ task = self.client.get_task(task_id)
1182
+ if task['status'] == 'SUCCEEDED':
1183
+ # Sometime Globus sets the status to SUCCEEDED but doesn't truly finish.
1184
+ # Handle error thrown when querying task_successful_transfers too early
1185
+ try:
1186
+ successful = self.client.task_successful_transfers(task_id)
1187
+ skipped = self.client.task_skipped_errors(task_id)
1188
+ print(f'\nGlobus task {task_id} completed.'
1189
+ f'\nSkipped transfers: {len(list(skipped))}'
1190
+ f'\nSuccessful transfers: {len(list(successful))}')
1191
+ for info in successful:
1192
+ _logger.debug(f'{info["source_path"]} -> {info["destination_path"]}')
1193
+ except TransferAPIError:
1194
+ _logger.warning(f'\nGlobus task {task_id} SUCCEEDED but querying transfers was'
1195
+ f'unsuccessful')
1196
+ else:
1197
+ raise IOError(f'Globus task finished unsuccessfully with status {task["status"]}')
1198
+ return self._ensure_uuid(task_id)
1199
+ except (GlobusAPIError, NetworkError, GlobusTimeoutError, GlobusConnectionError,
1200
+ GlobusConnectionTimeoutError) as e:
1201
+ if retries < 1:
1202
+ _logger.error('\nMax retries exceeded.')
1203
+ raise e
1204
+ else:
1205
+ _logger.debug('\nGlobus experienced a network error', exc_info=True)
1206
+ # if we reach this point without returning or erring, retry
1207
+ _logger.warning('\nGlobus experienced a network error, retrying.')
1208
+ self.run_task(globus_func, retries=(retries - 1), timeout=timeout)
1209
+
1210
+ @ensure_logged_in
1211
+ async def task_wait_async(self, task_id, polling_interval=10, timeout=10):
1212
+ """Asynchronously wait until a Task is complete or fails, with a time limit.
1213
+
1214
+ If the task status is ACTIVE after timout, returns False, otherwise returns True.
1215
+
1216
+ Parameters
1217
+ ----------
1218
+ task_id : str, uuid.UUID
1219
+ A Globus task UUID to wait on for completion.
1220
+ polling_interval : float
1221
+ Number of seconds between queries to Globus about the task status. Minimum 1 second.
1222
+ timeout : float
1223
+ Number of seconds to wait in total. Minimum 1 second.
1224
+
1225
+ Returns
1226
+ -------
1227
+ bool
1228
+ True if status not ACTIVE before timeout. False if status still ACTIVE at timeout.
1229
+
1230
+ Examples
1231
+ --------
1232
+ Asynchronously await a task to complete
1233
+
1234
+ >>> await Globus().task_wait_async(task_id)
1235
+
1236
+ """
1237
+ if polling_interval < 1:
1238
+ raise GlobusSDKUsageError('polling_interval must be at least 1 second')
1239
+ if timeout < 1:
1240
+ raise GlobusSDKUsageError('timout must be at least 1 second')
1241
+ polling_interval = min(timeout, polling_interval)
1242
+ waited_time = 0
1243
+ while True:
1244
+ task = self.client.get_task(task_id)
1245
+ status = task['status']
1246
+ if status != 'ACTIVE':
1247
+ return True
1248
+
1249
+ # check if we timed out before sleeping again
1250
+ waited_time += polling_interval
1251
+ if waited_time >= timeout:
1252
+ return False
1253
+
1254
+ await asyncio.sleep(polling_interval)