ONE-api 3.0b3__py3-none-any.whl → 3.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/LICENSE +21 -21
  2. {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/METADATA +115 -115
  3. ONE_api-3.0b4.dist-info/RECORD +37 -0
  4. one/__init__.py +2 -2
  5. one/alf/__init__.py +1 -1
  6. one/alf/cache.py +640 -653
  7. one/alf/exceptions.py +105 -105
  8. one/alf/io.py +876 -876
  9. one/alf/path.py +1450 -1450
  10. one/alf/spec.py +519 -519
  11. one/api.py +2949 -2973
  12. one/converters.py +850 -850
  13. one/params.py +414 -414
  14. one/registration.py +845 -845
  15. one/remote/__init__.py +1 -1
  16. one/remote/aws.py +313 -313
  17. one/remote/base.py +142 -142
  18. one/remote/globus.py +1254 -1254
  19. one/tests/fixtures/params/.caches +6 -6
  20. one/tests/fixtures/params/.test.alyx.internationalbrainlab.org +8 -8
  21. one/tests/fixtures/rest_responses/1f187d80fd59677b395fcdb18e68e4401bfa1cc9 +1 -1
  22. one/tests/fixtures/rest_responses/47893cf67c985e6361cdee009334963f49fb0746 +1 -1
  23. one/tests/fixtures/rest_responses/535d0e9a1e2c1efbdeba0d673b131e00361a2edb +1 -1
  24. one/tests/fixtures/rest_responses/6dc96f7e9bcc6ac2e7581489b9580a6cd3f28293 +1 -1
  25. one/tests/fixtures/rest_responses/db1731fb8df0208944ae85f76718430813a8bf50 +1 -1
  26. one/tests/fixtures/rest_responses/dcce48259bb929661f60a02a48563f70aa6185b3 +1 -1
  27. one/tests/fixtures/rest_responses/f530d6022f61cdc9e38cc66beb3cb71f3003c9a1 +1 -1
  28. one/tests/fixtures/test_dbs.json +14 -14
  29. one/util.py +524 -524
  30. one/webclient.py +1366 -1354
  31. ONE_api-3.0b3.dist-info/RECORD +0 -37
  32. {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/WHEEL +0 -0
  33. {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/top_level.txt +0 -0
one/registration.py CHANGED
@@ -1,845 +1,845 @@
1
- """Session creation and datasets registration.
2
-
3
- The RegistrationClient provides an high-level API for creating experimentation sessions on Alyx
4
- and registering associated datasets.
5
-
6
- Summary of methods
7
- ------------------
8
- create_new_session - Create a new local session folder and optionally create session record on Alyx
9
- create_sessions - Create sessions and register files for folder containing a given flag file
10
- register_session - Create a session on Alyx from local path and register any ALF datasets present
11
- register_files - Register a list of files to their respective sessions on Alyx
12
- """
13
- import pathlib
14
- from uuid import UUID
15
- from pathlib import Path, PurePosixPath
16
- import datetime
17
- import logging
18
- import itertools
19
- from collections import defaultdict
20
- from fnmatch import fnmatch
21
- import shutil
22
-
23
- import requests.exceptions
24
-
25
- from iblutil.io import hashfile
26
- from iblutil.util import Bunch, ensure_list
27
-
28
- import one.alf.io as alfio
29
- from one.alf.path import ALFPath, session_path_parts, ensure_alf_path, folder_parts
30
- from one.alf.spec import is_valid
31
- import one.alf.exceptions as alferr
32
- from one.api import ONE
33
- from one.webclient import no_cache
34
-
35
- _logger = logging.getLogger(__name__)
36
-
37
-
38
- def get_dataset_type(filename, dtypes):
39
- """Get the dataset type from a given filename.
40
-
41
- A dataset type is matched one of two ways:
42
-
43
- 1. the filename matches the dataset type filename_pattern;
44
- 2. if filename_pattern is empty, the filename object.attribute matches the dataset type name.
45
-
46
- Parameters
47
- ----------
48
- filename : str, pathlib.Path
49
- The filename or filepath.
50
- dtypes : iterable
51
- An iterable of dataset type objects with the attributes ('name', 'filename_pattern').
52
-
53
- Returns
54
- -------
55
- The matching dataset type object for filename.
56
-
57
- Raises
58
- ------
59
- ValueError
60
- filename doesn't match any of the dataset types
61
- filename matches multiple dataset types
62
-
63
- """
64
- dataset_types = []
65
- filename = ensure_alf_path(filename)
66
- for dt in dtypes:
67
- if not dt.filename_pattern.strip():
68
- # If the filename pattern is null, check whether the filename object.attribute matches
69
- # the dataset type name.
70
- if is_valid(filename.name):
71
- obj_attr = '.'.join(filename.dataset_name_parts[1:3])
72
- else: # will match name against filename sans extension
73
- obj_attr = filename.stem
74
- if dt.name == obj_attr:
75
- dataset_types.append(dt)
76
- # Check whether pattern matches filename
77
- elif fnmatch(filename.name.casefold(), dt.filename_pattern.casefold()):
78
- dataset_types.append(dt)
79
- n = len(dataset_types)
80
- if n == 0:
81
- raise ValueError(f'No dataset type found for filename "{filename.name}"')
82
- elif n >= 2:
83
- raise ValueError('Multiple matching dataset types found for filename '
84
- f'"{filename.name}": \n{", ".join(map(str, dataset_types))}')
85
- return dataset_types[0]
86
-
87
-
88
- class RegistrationClient:
89
- """Methods to create sessions and register data."""
90
-
91
- def __init__(self, one=None):
92
- self.one = one
93
- if not one:
94
- self.one = ONE(cache_rest=None)
95
- elif one.alyx.cache_mode == 'GET':
96
- _logger.warning('AlyxClient REST cache active; '
97
- 'this may cause issues with registration.')
98
- self.dtypes = list(map(Bunch, self.one.alyx.rest('dataset-types', 'list')))
99
- self.registration_patterns = [
100
- dt['filename_pattern'] for dt in self.dtypes if dt['filename_pattern']]
101
- self.file_extensions = [df['file_extension'] for df in
102
- self.one.alyx.rest('data-formats', 'list', no_cache=True)]
103
-
104
- def create_sessions(self, root_data_folder, glob_pattern='**/create_me.flag',
105
- register_files=False, dry=False):
106
- """Create sessions looking recursively for flag files.
107
-
108
- Parameters
109
- ----------
110
- root_data_folder : str, pathlib.Path
111
- Folder to look for sessions.
112
- glob_pattern : str
113
- Register valid sessions that contain this pattern.
114
- register_files : bool
115
- If true, register all valid datasets within the session folder.
116
- dry : bool
117
- If true returns list of sessions without creating them on Alyx.
118
-
119
- Returns
120
- -------
121
- list of pathlib.Paths
122
- Newly created session paths.
123
- list of dicts
124
- Alyx session records.
125
-
126
- """
127
- flag_files = list(Path(root_data_folder).glob(glob_pattern))
128
- records = []
129
- for flag_file in flag_files:
130
- if dry:
131
- records.append(print(flag_file))
132
- continue
133
- session_path = ALFPath(flag_file.parent)
134
- _logger.info('creating session for ' + str(session_path))
135
- # providing a false flag stops the registration after session creation
136
- session_info, _ = self.register_session(session_path, file_list=register_files)
137
- records.append(session_info)
138
- flag_file.unlink()
139
- return [ALFPath(ff.parent) for ff in flag_files], records
140
-
141
- def create_new_session(self, subject, session_root=None, date=None, register=True, **kwargs):
142
- """Create a new local session folder and optionally create session record on Alyx.
143
-
144
- Parameters
145
- ----------
146
- subject : str
147
- The subject name. Must exist on Alyx.
148
- session_root : str, pathlib.Path
149
- The root folder in which to create the subject/date/number folder. Defaults to ONE
150
- cache directory.
151
- date : datetime.datetime, datetime.date, str
152
- An optional date for the session. If None the current time is used.
153
- register : bool
154
- If true, create session record on Alyx database.
155
- kwargs
156
- Optional arguments for RegistrationClient.register_session.
157
-
158
- Returns
159
- -------
160
- pathlib.Path
161
- New local session path.
162
- uuid.UUID
163
- The experiment UUID if register is True.
164
-
165
- Examples
166
- --------
167
- Create a local session only
168
-
169
- >>> session_path, _ = RegistrationClient().create_new_session('Ian', register=False)
170
-
171
- Register a session on Alyx in a specific location
172
-
173
- >>> session_path, eid = RegistrationClient().create_new_session('Sy', '/data/lab/Subjects')
174
-
175
- Create a session for a given date
176
-
177
- >>> session_path, eid = RegistrationClient().create_new_session('Ian', date='2020-01-01')
178
-
179
- """
180
- assert not self.one.offline, 'ONE must be in online mode'
181
- date = self.ensure_ISO8601(date) # Format, validate
182
- # Ensure subject exists on Alyx
183
- self.assert_exists(subject, 'subjects')
184
- session_root = Path(session_root or self.one.alyx.cache_dir) / subject / date[:10]
185
- session_path = session_root / alfio.next_num_folder(session_root)
186
- session_path.mkdir(exist_ok=True, parents=True) # Ensure folder exists on disk
187
- if register:
188
- session_info, _ = self.register_session(session_path, **kwargs)
189
- eid = UUID(session_info['url'][-36:])
190
- else:
191
- eid = None
192
- return session_path, eid
193
-
194
- def find_files(self, session_path):
195
- """Returns a generator of file names that match one of the dataset type patterns in Alyx.
196
-
197
- Parameters
198
- ----------
199
- session_path : str, pathlib.Path
200
- The session path to search.
201
-
202
- Yields
203
- ------
204
- pathlib.Path
205
- File paths that match the dataset type patterns in Alyx.
206
-
207
- """
208
- session_path = ALFPath(session_path)
209
- for p in session_path.iter_datasets(recursive=True):
210
- if any(p.name.endswith(ext) for ext in self.file_extensions):
211
- try:
212
- get_dataset_type(p, self.dtypes)
213
- yield p
214
- except ValueError as ex:
215
- _logger.debug('%s', ex.args[0])
216
-
217
- def assert_exists(self, member, endpoint):
218
- """Raise an error if a given member doesn't exist on Alyx database.
219
-
220
- Parameters
221
- ----------
222
- member : str, uuid.UUID, list
223
- The member ID(s) to verify
224
- endpoint: str
225
- The endpoint at which to look it up
226
-
227
- Examples
228
- --------
229
- >>> client.assert_exists('ALK_036', 'subjects')
230
- >>> client.assert_exists('user_45', 'users')
231
- >>> client.assert_exists('local_server', 'repositories')
232
-
233
- Raises
234
- ------
235
- one.alf.exceptions.AlyxSubjectNotFound
236
- Subject does not exist on Alyx
237
- one.alf.exceptions.ALFError
238
- Member does not exist on Alyx
239
- requests.exceptions.HTTPError
240
- Failed to connect to Alyx database or endpoint not found
241
-
242
- Returns
243
- -------
244
- dict, list of dict
245
- The endpoint data if member exists.
246
-
247
- """
248
- if isinstance(member, (str, UUID)):
249
- try:
250
- return self.one.alyx.rest(endpoint, 'read', id=str(member), no_cache=True)
251
- except requests.exceptions.HTTPError as ex:
252
- if ex.response.status_code != 404:
253
- raise ex
254
- elif endpoint == 'subjects':
255
- raise alferr.AlyxSubjectNotFound(member)
256
- else:
257
- raise alferr.ALFError(f'Member "{member}" doesn\'t exist in {endpoint}')
258
- else:
259
- return [self.assert_exists(x, endpoint) for x in member]
260
-
261
- @staticmethod
262
- def ensure_ISO8601(date) -> str:
263
- """Ensure provided date is ISO 8601 compliant.
264
-
265
- Parameters
266
- ----------
267
- date : str, None, datetime.date, datetime.datetime
268
- An optional date to convert to ISO string. If None, the current datetime is used.
269
-
270
- Returns
271
- -------
272
- str
273
- The datetime as an ISO 8601 string
274
-
275
- """
276
- date = date or datetime.datetime.now() # If None get current time
277
- if isinstance(date, str):
278
- # FIXME support timezone aware strings, e.g. '2023-03-09T17:08:12.4465024+00:00'
279
- date = datetime.datetime.fromisoformat(date) # Validate by parsing
280
- elif type(date) is datetime.date:
281
- date = datetime.datetime.fromordinal(date.toordinal())
282
- return datetime.datetime.isoformat(date)
283
-
284
- def register_session(self, ses_path, users=None, file_list=True, **kwargs):
285
- """Register session in Alyx.
286
-
287
- NB: If providing a lab or start_time kwarg, they must match the lab (if there is one)
288
- and date of the session path.
289
-
290
- Parameters
291
- ----------
292
- ses_path : str, pathlib.Path
293
- The local session path
294
- users : str, list
295
- The user(s) to attribute to the session
296
- file_list : bool, list
297
- An optional list of file paths to register. If True, all valid files within the
298
- session folder are registered. If False, no files are registered
299
- location : str
300
- The optional location within the lab where the experiment takes place
301
- procedures : str, list
302
- An optional list of procedures, e.g. 'Behavior training/tasks'
303
- n_correct_trials : int
304
- The number of correct trials (optional)
305
- n_trials : int
306
- The total number of completed trials (optional)
307
- json : dict, str
308
- Optional JSON data
309
- projects: str, list
310
- The project(s) to which the experiment belongs (optional)
311
- type : str
312
- The experiment type, e.g. 'Experiment', 'Base'
313
- task_protocol : str
314
- The task protocol (optional)
315
- lab : str
316
- The name of the lab where the session took place. If None the lab name will be
317
- taken from the path. If no lab name is found in the path (i.e. no <lab>/Subjects)
318
- the default lab on Alyx will be used.
319
- start_time : str, datetime.datetime
320
- The precise start time of the session. The date must match the date in the session
321
- path.
322
- end_time : str, datetime.datetime
323
- The precise end time of the session.
324
-
325
- Returns
326
- -------
327
- dict
328
- An Alyx session record
329
- list, None
330
- Alyx file records (or None if file_list is False)
331
-
332
- Raises
333
- ------
334
- AssertionError
335
- Subject does not exist on Alyx or provided start_time does not match date in
336
- session path.
337
- ValueError
338
- The provided lab name does not match the one found in the session path or
339
- start_time/end_time is not a valid ISO date time.
340
- requests.HTTPError
341
- A 400 status code means the submitted data was incorrect (e.g. task_protocol was an
342
- int instead of a str); A 500 status code means there was a server error.
343
- ConnectionError
344
- Failed to connect to Alyx, most likely due to a bad internet connection.
345
-
346
- """
347
- ses_path = ALFPath(ses_path)
348
- details = session_path_parts(ses_path.as_posix(), as_dict=True, assert_valid=True)
349
- # query alyx endpoints for subject, error if not found
350
- self.assert_exists(details['subject'], 'subjects')
351
-
352
- # look for a session from the same subject, same number on the same day
353
- with no_cache(self.one.alyx):
354
- session_id, session = self.one.search(subject=details['subject'],
355
- date_range=details['date'],
356
- number=details['number'],
357
- details=True, query_type='remote')
358
- users = ensure_list(users or self.one.alyx.user)
359
- self.assert_exists(users, 'users')
360
-
361
- # if nothing found create a new session in Alyx
362
- ses_ = {'subject': details['subject'],
363
- 'users': users,
364
- 'type': 'Experiment',
365
- 'number': details['number']}
366
- if kwargs.get('end_time', False):
367
- ses_['end_time'] = self.ensure_ISO8601(kwargs.pop('end_time'))
368
- start_time = self.ensure_ISO8601(kwargs.pop('start_time', details['date']))
369
- assert start_time[:10] == details['date'], 'start_time doesn\'t match session path'
370
- if kwargs.get('procedures', False):
371
- ses_['procedures'] = ensure_list(kwargs.pop('procedures'))
372
- if kwargs.get('projects', False):
373
- ses_['projects'] = ensure_list(kwargs.pop('projects'))
374
- assert ('subject', 'number') not in kwargs
375
- if 'lab' not in kwargs and details['lab']:
376
- kwargs.update({'lab': details['lab']})
377
- elif details['lab'] and kwargs.get('lab', details['lab']) != details['lab']:
378
- names = (kwargs['lab'], details['lab'])
379
- raise ValueError('lab kwarg "%s" does not match lab name in path ("%s")' % names)
380
- ses_.update(kwargs)
381
-
382
- if not session: # Create from scratch
383
- ses_['start_time'] = start_time
384
- session = self.one.alyx.rest('sessions', 'create', data=ses_)
385
- else: # Update existing
386
- if start_time:
387
- ses_['start_time'] = self.ensure_ISO8601(start_time)
388
- session = self.one.alyx.rest('sessions', 'update', id=session_id[0], data=ses_)
389
-
390
- _logger.info(session['url'] + ' ')
391
- # at this point the session has been created. If create only, exit
392
- if not file_list:
393
- return session, None
394
- recs = self.register_files(self.find_files(ses_path) if file_list is True else file_list)
395
- if recs: # Update local session data after registering files
396
- session['data_dataset_session_related'] = ensure_list(recs)
397
- return session, recs
398
-
399
- def prepare_files(self, file_list, versions=None):
400
- """Validate file list for registration and group files by session path.
401
-
402
- Parameters
403
- ----------
404
- file_list : list, str, pathlib.Path
405
- A filepath (or list thereof) of ALF datasets to register to Alyx.
406
- versions : str, list of str
407
- Optional version tags.
408
-
409
- Returns
410
- -------
411
- list of dicts
412
- A dict containing a list of files for each session.
413
- list of dicts
414
- A dict containing a list of versions for each session.
415
- list
416
- A list of files converted to paths.
417
- bool
418
- A boolean indicating if input was a single file.
419
-
420
- """
421
- F = defaultdict(list) # empty map whose keys will be session paths
422
- V = defaultdict(list) # empty map for versions
423
-
424
- if single_file := isinstance(file_list, (str, pathlib.Path)):
425
- file_list = [file_list]
426
- file_list = list(map(ALFPath, file_list)) # Ensure list of path objects
427
-
428
- if versions is None or isinstance(versions, str):
429
- versions = itertools.repeat(versions)
430
- else:
431
- versions = itertools.cycle(versions)
432
-
433
- # Filter valid files and sort by session
434
- for fn, ver in zip(file_list, versions):
435
- session_path = fn.session_path()
436
- if not session_path:
437
- _logger.debug(f'{fn}: Invalid session path')
438
- continue
439
- if fn.suffix not in self.file_extensions:
440
- _logger.debug(f'{fn}: No matching extension "{fn.suffix}" in database')
441
- continue
442
- try:
443
- get_dataset_type(fn, self.dtypes)
444
- except ValueError as ex:
445
- _logger.debug('%s', ex.args[0])
446
- continue
447
- F[session_path].append(fn.relative_to(session_path))
448
- V[session_path].append(ver)
449
-
450
- return F, V, file_list, single_file
451
-
452
- def check_protected_files(self, file_list, created_by=None):
453
- """Check whether a set of files associated to a session are protected.
454
-
455
- Parameters
456
- ----------
457
- file_list : list, str, pathlib.Path
458
- A filepath (or list thereof) of ALF datasets to register to Alyx.
459
- created_by : str
460
- Name of Alyx user (defaults to whoever is logged in to ONE instance).
461
-
462
- Returns
463
- -------
464
- list of dicts, dict
465
- A status for each session whether any of the files specified are protected
466
- datasets or not.If none of the datasets are protected, a response with status
467
- 200 is returned, if any of the files are protected a response with status
468
- 403 is returned.
469
-
470
- """
471
- # Validate files and rearrange into list per session
472
- F, _, _, single_file = self.prepare_files(file_list)
473
-
474
- # For each unique session, make a separate POST request
475
- records = []
476
- for session_path, files in F.items():
477
- # this is the generic relative path: subject/yyyy-mm-dd/NNN
478
- details = session_path_parts(session_path.as_posix(), as_dict=True, assert_valid=True)
479
- rel_path = PurePosixPath(details['subject'], details['date'], details['number'])
480
-
481
- r_ = {'created_by': created_by or self.one.alyx.user,
482
- 'path': rel_path.as_posix(),
483
- 'filenames': [x.as_posix() for x in files]
484
- }
485
- records.append(self.one.alyx.get('/check-protected', data=r_, clobber=True))
486
-
487
- return records[0] if single_file else records
488
-
489
- def register_files(self, file_list,
490
- versions=None, default=True, created_by=None, server_only=False,
491
- repository=None, exists=True, dry=False, max_md5_size=None, **kwargs):
492
- """Registers a set of files belonging to a session only on the server.
493
-
494
- Parameters
495
- ----------
496
- file_list : list, str, pathlib.Path
497
- A filepath (or list thereof) of ALF datasets to register to Alyx.
498
- versions : str, list of str
499
- Optional version tags.
500
- default : bool
501
- Whether to set as default revision (defaults to True).
502
- created_by : str
503
- Name of Alyx user (defaults to whoever is logged in to ONE instance).
504
- server_only : bool
505
- Will only create file records in the 'online' repositories and skips local repositories
506
- repository : str
507
- Name of the repository in Alyx to register to.
508
- exists : bool
509
- Whether the files exist on the repository (defaults to True).
510
- dry : bool
511
- When true returns POST data for registration endpoint without submitting the data.
512
- max_md5_size : int
513
- Maximum file in bytes to compute md5 sum (always compute if None).
514
- exists : bool
515
- Whether files exist in the repository. May be set to False when registering files
516
- before copying to the repository.
517
- kwargs
518
- Extra arguments directly passed as REST request data to /register-files endpoint.
519
-
520
- Returns
521
- -------
522
- list of dicts, dict
523
- A list of newly created Alyx dataset records or the registration data if dry. If
524
- a single file is passed in, a single dict is returned.
525
-
526
- Notes
527
- -----
528
- - The registered files may be automatically moved to new revision folders if they are
529
- protected on Alyx, therefore it's important to check the relative paths of the output.
530
- - Protected datasets are not checked in dry mode.
531
- - In most circumstances a new revision will be added automatically, however if this fails
532
- a 403 HTTP status may be returned.
533
-
534
- Raises
535
- ------
536
- requests.exceptions.HTTPError
537
- Submitted data not valid (400 status code)
538
- Server side database error (500 status code)
539
- Revision protected (403 status code)
540
-
541
- """
542
- F, V, file_list, single_file = self.prepare_files(file_list, versions=versions)
543
-
544
- # For each unique session, make a separate POST request
545
- records = [None] * (len(F) if dry else len(file_list)) # If dry return data per session
546
- for session_path, files in F.items():
547
- # this is the generic relative path: subject/yyyy-mm-dd/NNN
548
- details = session_path_parts(session_path.as_posix(), as_dict=True, assert_valid=True)
549
- rel_path = PurePosixPath(details['subject'], details['date'], details['number'])
550
- file_sizes = [session_path.joinpath(fn).stat().st_size for fn in files]
551
- # computing the md5 can be very long, so this is an option to skip if the file is
552
- # bigger than a certain threshold
553
- md5s = [hashfile.md5(session_path.joinpath(fn))
554
- if (max_md5_size is None or sz < max_md5_size) else None
555
- for fn, sz in zip(files, file_sizes)]
556
-
557
- _logger.info('Registering ' + str(files))
558
-
559
- r_ = {'created_by': created_by or self.one.alyx.user,
560
- 'path': rel_path.as_posix(),
561
- 'filenames': [x.as_posix() for x in files],
562
- 'hashes': md5s,
563
- 'filesizes': file_sizes,
564
- 'name': repository,
565
- 'exists': exists,
566
- 'server_only': server_only,
567
- 'default': default,
568
- 'versions': V[session_path],
569
- 'check_protected': True,
570
- **kwargs
571
- }
572
-
573
- # Add optional field
574
- if details['lab'] and 'labs' not in kwargs:
575
- r_['labs'] = details['lab']
576
- # If dry, store POST data, otherwise store resulting file records
577
- if dry:
578
- records[list(F).index(session_path)] = r_
579
- continue
580
- try:
581
- response = self.one.alyx.post('/register-file', data=r_)
582
- # Ensure we keep the order of the output records: the files missing will remain
583
- # as None type
584
- for f, r in zip(files, response):
585
- records[file_list.index(session_path / f)] = r
586
- except requests.exceptions.HTTPError as err:
587
- # 403 response when datasets already registered and protected by tags
588
- err_message = err.response.json()
589
- if not (err_message.get('status_code') == 403 and
590
- err_message.get('error') == 'One or more datasets is protected'):
591
- raise err # Some other error occurred; re-raise
592
- response = err_message['details']
593
- today_revision = datetime.datetime.today().strftime('%Y-%m-%d')
594
- new_file_list = []
595
-
596
- for fl, res in zip(files, response):
597
- (name, prot_info), = res.items()
598
- # Dataset has not yet been registered
599
- if not prot_info:
600
- new_file_list.append(fl)
601
- continue
602
-
603
- # Check to see if the file path already has a revision in it
604
- file_revision = folder_parts(rel_path / fl, as_dict=True)['revision']
605
- # Find existing protected revisions
606
- existing_revisions = [k for pr in prot_info for k, v in pr.items() if v]
607
-
608
- if file_revision:
609
- # If the revision explicitly defined by the user doesn't exist or
610
- # is not protected, register as is
611
- if file_revision not in existing_revisions:
612
- revision_path = fl.parent
613
- else:
614
- # Find the next sub-revision that isn't protected
615
- new_revision = self._next_revision(file_revision, existing_revisions)
616
- revision_path = fl.parent.parent.joinpath(f'#{new_revision}#')
617
-
618
- if revision_path != fl.parent:
619
- session_path.joinpath(revision_path).mkdir(exist_ok=True)
620
- _logger.info('Moving %s -> %s', fl, revision_path.joinpath(fl.name))
621
- shutil.move(session_path / fl, session_path / revision_path / fl.name)
622
- new_file_list.append(revision_path.joinpath(fl.name))
623
- continue
624
-
625
- # The file wasn't in a revision folder but is protected
626
- fl_path = fl.parent
627
- assert name == fl_path.joinpath(fl.name).as_posix()
628
-
629
- # Find info about the latest revision
630
- # N.B on django side prot_info is sorted by latest revisions first
631
- (latest_revision, protected), = prot_info[0].items()
632
-
633
- # If the latest revision is the original and it is unprotected
634
- # no need for revision e.g {'clusters.amp.npy': [{'': False}]}
635
- if latest_revision == '' and not protected:
636
- # Use original path
637
- revision_path = fl_path
638
-
639
- # If there already is a revision but it is unprotected,
640
- # move into this revision folder e.g
641
- # {'clusters.amp.npy':
642
- # [{'2022-10-31': False}, {'2022-05-31': True}, {'': True}]}
643
- elif not protected:
644
- # Check that the latest_revision has the date naming convention we expect
645
- # i.e. 'YYYY-MM-DD'
646
- try:
647
- _ = datetime.datetime.strptime(latest_revision[:10], '%Y-%m-%d')
648
- revision_path = fl_path.joinpath(f'#{latest_revision}#')
649
- # If it doesn't it probably has been made manually so we don't want to
650
- # overwrite this and instead use today's date
651
- except ValueError:
652
- # NB: It's possible that today's date revision is also protected but is
653
- # not the most recent revision. In this case it's safer to let fail.
654
- revision_path = fl_path.joinpath(f'#{today_revision}#')
655
-
656
- # If protected and the latest protected revision is from today we need to make
657
- # a sub-revision
658
- elif protected and today_revision in latest_revision:
659
- if latest_revision == today_revision: # iterate from appending 'a'
660
- new_revision = self._next_revision(today_revision, existing_revisions)
661
- else: # assume the revision is date + character, e.g. '2020-01-01c'
662
- alpha = latest_revision[-1] # iterate from this character
663
- new_revision = self._next_revision(
664
- today_revision, existing_revisions, alpha)
665
- revision_path = fl_path.joinpath(f'#{new_revision}#')
666
-
667
- # Otherwise cases move into revision from today
668
- # e.g {'clusters.amp.npy': [{'': True}]}
669
- # e.g {'clusters.amp.npy': [{'2022-10-31': True}, {'': True}]}
670
- else:
671
- revision_path = fl_path.joinpath(f'#{today_revision}#')
672
-
673
- # Only move for the cases where a revision folder has been made
674
- if revision_path != fl_path:
675
- session_path.joinpath(revision_path).mkdir(exist_ok=True)
676
- _logger.info('Moving %s -> %s', fl, revision_path.joinpath(fl.name))
677
- shutil.move(session_path / fl, session_path / revision_path / fl.name)
678
- new_file_list.append(revision_path.joinpath(fl.name))
679
-
680
- assert len(new_file_list) == len(files)
681
- r_['filenames'] = [p.as_posix() for p in new_file_list]
682
- r_['filesizes'] = [session_path.joinpath(p).stat().st_size for p in new_file_list]
683
- r_['check_protected'] = False # Speed things up by ignoring server-side checks
684
-
685
- response = self.one.alyx.post('/register-file', data=r_)
686
- for f, r in zip(files, response): # Populate records list in correct order
687
- records[file_list.index(session_path / f)] = r
688
- files = new_file_list
689
-
690
- # Log file names
691
- _logger.info(f'ALYX REGISTERED DATA {"!DRY!" if dry else ""}: {rel_path}')
692
- for p in files:
693
- _logger.info(f'ALYX REGISTERED DATA: {p}')
694
-
695
- return records[0] if single_file else records
696
-
697
- @staticmethod
698
- def _next_revision(revision: str, reserved: list = None, alpha: str = 'a') -> str:
699
- """Return the next logical revision that is not already in the provided list.
700
-
701
- Revisions will increment by appending a letter to a date or other identifier.
702
-
703
- Parameters
704
- ----------
705
- revision : str
706
- The revision on which to base the new revision.
707
- reserved : list of str
708
- A list of reserved (i.e. already existing) revision strings.
709
- alpha : str
710
- The starting character as an integer, defaults to 'a'.
711
-
712
- Returns
713
- -------
714
- str
715
- The next logical revision string that's not in the reserved list.
716
-
717
- Examples
718
- --------
719
- >>> RegistrationClient._next_revision('2020-01-01')
720
- '2020-01-01a'
721
- >>> RegistrationClient._next_revision('2020-01-01', ['2020-01-01a', '2020-01-01b'])
722
- '2020-01-01c'
723
- >>> RegistrationClient._next_revision('2020-01-01', ['2020-01-01a', '2020-01-01b'])
724
- '2020-01-01c'
725
-
726
- """
727
- if len(alpha) != 1:
728
- raise TypeError(
729
- f'`alpha` must be a character; received a string of length {len(alpha)}'
730
- )
731
- i = ord(alpha)
732
- new_revision = revision + chr(i)
733
- while new_revision in (reserved or []):
734
- i += 1
735
- new_revision = revision + chr(i)
736
- return new_revision
737
-
738
- def register_water_administration(self, subject, volume, **kwargs):
739
- """Register a water administration to Alyx for a given subject.
740
-
741
- Parameters
742
- ----------
743
- subject : str
744
- A subject nickname that exists on Alyx
745
- volume : float
746
- The total volume administrated in ml
747
- date_time : str, datetime.datetime, datetime.date
748
- The time of administration. If None, the current time is used.
749
- water_type : str
750
- A water type that exists in Alyx; default is 'Water'
751
- user : str
752
- The user who administrated the water. Currently logged-in user is the default.
753
- session : str, UUID, pathlib.Path, dict
754
- An optional experiment ID to associate
755
- adlib : bool
756
- If true, indicates that the subject was given water ad libitum
757
-
758
- Returns
759
- -------
760
- dict
761
- A water administration record
762
-
763
- Raises
764
- ------
765
- one.alf.exceptions.AlyxSubjectNotFound
766
- Subject does not exist on Alyx
767
- one.alf.exceptions.ALFError
768
- User does not exist on Alyx
769
- ValueError
770
- date_time is not a valid ISO date time or session ID is not valid
771
- requests.exceptions.HTTPError
772
- Failed to connect to database, or submitted data not valid (500)
773
-
774
- """
775
- # Ensure subject exists
776
- self.assert_exists(subject, 'subjects')
777
- # Ensure user(s) exist
778
- user = kwargs.pop('user', self.one.alyx.user)
779
- self.assert_exists(user, 'users')
780
- # Ensure volume not zero
781
- if volume == 0:
782
- raise ValueError('Water volume must be greater than zero')
783
- # Post water admin
784
- wa_ = {
785
- 'subject': subject,
786
- 'date_time': self.ensure_ISO8601(kwargs.pop('date_time', None)),
787
- 'water_administered': float(f'{volume:.4g}'), # Round to 4 s.f.
788
- 'water_type': kwargs.pop('water_type', 'Water'),
789
- 'user': user,
790
- 'adlib': kwargs.pop('adlib', False)
791
- }
792
- # Ensure session is valid; convert to eid
793
- if kwargs.get('session', False):
794
- wa_['session'] = str(self.one.to_eid(kwargs.pop('session')) or '')
795
- if not wa_['session']:
796
- raise ValueError('Failed to parse session ID')
797
-
798
- return self.one.alyx.rest('water-administrations', 'create', data=wa_)
799
-
800
- def register_weight(self, subject, weight, date_time=None, user=None):
801
- """Register a subject weight to Alyx.
802
-
803
- Parameters
804
- ----------
805
- subject : str
806
- A subject nickname that exists on Alyx.
807
- weight : float
808
- The subject weight in grams.
809
- date_time : str, datetime.datetime, datetime.date
810
- The time of weighing. If None, the current time is used.
811
- user : str
812
- The user who performed the weighing. Currently logged-in user is the default.
813
-
814
- Returns
815
- -------
816
- dict
817
- An Alyx weight record
818
-
819
- Raises
820
- ------
821
- one.alf.exceptions.AlyxSubjectNotFound
822
- Subject does not exist on Alyx
823
- one.alf.exceptions.ALFError
824
- User does not exist on Alyx
825
- ValueError
826
- date_time is not a valid ISO date time or weight < 1e-4
827
- requests.exceptions.HTTPError
828
- Failed to connect to database, or submitted data not valid (500)
829
-
830
- """
831
- # Ensure subject exists
832
- self.assert_exists(subject, 'subjects')
833
- # Ensure user(s) exist
834
- user = user or self.one.alyx.user
835
- self.assert_exists(user, 'users')
836
- # Ensure weight not zero
837
- if weight == 0:
838
- raise ValueError('Water volume must be greater than 0')
839
-
840
- # Post water admin
841
- wei_ = {'subject': subject,
842
- 'date_time': self.ensure_ISO8601(date_time),
843
- 'weight': float(f'{weight:.4g}'), # Round to 4 s.f.
844
- 'user': user}
845
- return self.one.alyx.rest('weighings', 'create', data=wei_)
1
+ """Session creation and datasets registration.
2
+
3
+ The RegistrationClient provides an high-level API for creating experimentation sessions on Alyx
4
+ and registering associated datasets.
5
+
6
+ Summary of methods
7
+ ------------------
8
+ create_new_session - Create a new local session folder and optionally create session record on Alyx
9
+ create_sessions - Create sessions and register files for folder containing a given flag file
10
+ register_session - Create a session on Alyx from local path and register any ALF datasets present
11
+ register_files - Register a list of files to their respective sessions on Alyx
12
+ """
13
+ import pathlib
14
+ from uuid import UUID
15
+ from pathlib import Path, PurePosixPath
16
+ import datetime
17
+ import logging
18
+ import itertools
19
+ from collections import defaultdict
20
+ from fnmatch import fnmatch
21
+ import shutil
22
+
23
+ import requests.exceptions
24
+
25
+ from iblutil.io import hashfile
26
+ from iblutil.util import Bunch, ensure_list
27
+
28
+ import one.alf.io as alfio
29
+ from one.alf.path import ALFPath, session_path_parts, ensure_alf_path, folder_parts
30
+ from one.alf.spec import is_valid
31
+ import one.alf.exceptions as alferr
32
+ from one.api import ONE
33
+ from one.webclient import no_cache
34
+
35
+ _logger = logging.getLogger(__name__)
36
+
37
+
38
+ def get_dataset_type(filename, dtypes):
39
+ """Get the dataset type from a given filename.
40
+
41
+ A dataset type is matched one of two ways:
42
+
43
+ 1. the filename matches the dataset type filename_pattern;
44
+ 2. if filename_pattern is empty, the filename object.attribute matches the dataset type name.
45
+
46
+ Parameters
47
+ ----------
48
+ filename : str, pathlib.Path
49
+ The filename or filepath.
50
+ dtypes : iterable
51
+ An iterable of dataset type objects with the attributes ('name', 'filename_pattern').
52
+
53
+ Returns
54
+ -------
55
+ The matching dataset type object for filename.
56
+
57
+ Raises
58
+ ------
59
+ ValueError
60
+ filename doesn't match any of the dataset types
61
+ filename matches multiple dataset types
62
+
63
+ """
64
+ dataset_types = []
65
+ filename = ensure_alf_path(filename)
66
+ for dt in dtypes:
67
+ if not dt.filename_pattern.strip():
68
+ # If the filename pattern is null, check whether the filename object.attribute matches
69
+ # the dataset type name.
70
+ if is_valid(filename.name):
71
+ obj_attr = '.'.join(filename.dataset_name_parts[1:3])
72
+ else: # will match name against filename sans extension
73
+ obj_attr = filename.stem
74
+ if dt.name == obj_attr:
75
+ dataset_types.append(dt)
76
+ # Check whether pattern matches filename
77
+ elif fnmatch(filename.name.casefold(), dt.filename_pattern.casefold()):
78
+ dataset_types.append(dt)
79
+ n = len(dataset_types)
80
+ if n == 0:
81
+ raise ValueError(f'No dataset type found for filename "{filename.name}"')
82
+ elif n >= 2:
83
+ raise ValueError('Multiple matching dataset types found for filename '
84
+ f'"{filename.name}": \n{", ".join(map(str, dataset_types))}')
85
+ return dataset_types[0]
86
+
87
+
88
+ class RegistrationClient:
89
+ """Methods to create sessions and register data."""
90
+
91
+ def __init__(self, one=None):
92
+ self.one = one
93
+ if not one:
94
+ self.one = ONE(cache_rest=None)
95
+ elif one.alyx.cache_mode == 'GET':
96
+ _logger.warning('AlyxClient REST cache active; '
97
+ 'this may cause issues with registration.')
98
+ self.dtypes = list(map(Bunch, self.one.alyx.rest('dataset-types', 'list')))
99
+ self.registration_patterns = [
100
+ dt['filename_pattern'] for dt in self.dtypes if dt['filename_pattern']]
101
+ self.file_extensions = [df['file_extension'] for df in
102
+ self.one.alyx.rest('data-formats', 'list', no_cache=True)]
103
+
104
+ def create_sessions(self, root_data_folder, glob_pattern='**/create_me.flag',
105
+ register_files=False, dry=False):
106
+ """Create sessions looking recursively for flag files.
107
+
108
+ Parameters
109
+ ----------
110
+ root_data_folder : str, pathlib.Path
111
+ Folder to look for sessions.
112
+ glob_pattern : str
113
+ Register valid sessions that contain this pattern.
114
+ register_files : bool
115
+ If true, register all valid datasets within the session folder.
116
+ dry : bool
117
+ If true returns list of sessions without creating them on Alyx.
118
+
119
+ Returns
120
+ -------
121
+ list of pathlib.Paths
122
+ Newly created session paths.
123
+ list of dicts
124
+ Alyx session records.
125
+
126
+ """
127
+ flag_files = list(Path(root_data_folder).glob(glob_pattern))
128
+ records = []
129
+ for flag_file in flag_files:
130
+ if dry:
131
+ records.append(print(flag_file))
132
+ continue
133
+ session_path = ALFPath(flag_file.parent)
134
+ _logger.info('creating session for ' + str(session_path))
135
+ # providing a false flag stops the registration after session creation
136
+ session_info, _ = self.register_session(session_path, file_list=register_files)
137
+ records.append(session_info)
138
+ flag_file.unlink()
139
+ return [ALFPath(ff.parent) for ff in flag_files], records
140
+
141
+ def create_new_session(self, subject, session_root=None, date=None, register=True, **kwargs):
142
+ """Create a new local session folder and optionally create session record on Alyx.
143
+
144
+ Parameters
145
+ ----------
146
+ subject : str
147
+ The subject name. Must exist on Alyx.
148
+ session_root : str, pathlib.Path
149
+ The root folder in which to create the subject/date/number folder. Defaults to ONE
150
+ cache directory.
151
+ date : datetime.datetime, datetime.date, str
152
+ An optional date for the session. If None the current time is used.
153
+ register : bool
154
+ If true, create session record on Alyx database.
155
+ kwargs
156
+ Optional arguments for RegistrationClient.register_session.
157
+
158
+ Returns
159
+ -------
160
+ pathlib.Path
161
+ New local session path.
162
+ uuid.UUID
163
+ The experiment UUID if register is True.
164
+
165
+ Examples
166
+ --------
167
+ Create a local session only
168
+
169
+ >>> session_path, _ = RegistrationClient().create_new_session('Ian', register=False)
170
+
171
+ Register a session on Alyx in a specific location
172
+
173
+ >>> session_path, eid = RegistrationClient().create_new_session('Sy', '/data/lab/Subjects')
174
+
175
+ Create a session for a given date
176
+
177
+ >>> session_path, eid = RegistrationClient().create_new_session('Ian', date='2020-01-01')
178
+
179
+ """
180
+ assert not self.one.offline, 'ONE must be in online mode'
181
+ date = self.ensure_ISO8601(date) # Format, validate
182
+ # Ensure subject exists on Alyx
183
+ self.assert_exists(subject, 'subjects')
184
+ session_root = Path(session_root or self.one.alyx.cache_dir) / subject / date[:10]
185
+ session_path = session_root / alfio.next_num_folder(session_root)
186
+ session_path.mkdir(exist_ok=True, parents=True) # Ensure folder exists on disk
187
+ if register:
188
+ session_info, _ = self.register_session(session_path, **kwargs)
189
+ eid = UUID(session_info['url'][-36:])
190
+ else:
191
+ eid = None
192
+ return session_path, eid
193
+
194
+ def find_files(self, session_path):
195
+ """Returns a generator of file names that match one of the dataset type patterns in Alyx.
196
+
197
+ Parameters
198
+ ----------
199
+ session_path : str, pathlib.Path
200
+ The session path to search.
201
+
202
+ Yields
203
+ ------
204
+ pathlib.Path
205
+ File paths that match the dataset type patterns in Alyx.
206
+
207
+ """
208
+ session_path = ALFPath(session_path)
209
+ for p in session_path.iter_datasets(recursive=True):
210
+ if any(p.name.endswith(ext) for ext in self.file_extensions):
211
+ try:
212
+ get_dataset_type(p, self.dtypes)
213
+ yield p
214
+ except ValueError as ex:
215
+ _logger.debug('%s', ex.args[0])
216
+
217
+ def assert_exists(self, member, endpoint):
218
+ """Raise an error if a given member doesn't exist on Alyx database.
219
+
220
+ Parameters
221
+ ----------
222
+ member : str, uuid.UUID, list
223
+ The member ID(s) to verify
224
+ endpoint: str
225
+ The endpoint at which to look it up
226
+
227
+ Examples
228
+ --------
229
+ >>> client.assert_exists('ALK_036', 'subjects')
230
+ >>> client.assert_exists('user_45', 'users')
231
+ >>> client.assert_exists('local_server', 'repositories')
232
+
233
+ Raises
234
+ ------
235
+ one.alf.exceptions.AlyxSubjectNotFound
236
+ Subject does not exist on Alyx
237
+ one.alf.exceptions.ALFError
238
+ Member does not exist on Alyx
239
+ requests.exceptions.HTTPError
240
+ Failed to connect to Alyx database or endpoint not found
241
+
242
+ Returns
243
+ -------
244
+ dict, list of dict
245
+ The endpoint data if member exists.
246
+
247
+ """
248
+ if isinstance(member, (str, UUID)):
249
+ try:
250
+ return self.one.alyx.rest(endpoint, 'read', id=str(member), no_cache=True)
251
+ except requests.exceptions.HTTPError as ex:
252
+ if ex.response.status_code != 404:
253
+ raise ex
254
+ elif endpoint == 'subjects':
255
+ raise alferr.AlyxSubjectNotFound(member)
256
+ else:
257
+ raise alferr.ALFError(f'Member "{member}" doesn\'t exist in {endpoint}')
258
+ else:
259
+ return [self.assert_exists(x, endpoint) for x in member]
260
+
261
+ @staticmethod
262
+ def ensure_ISO8601(date) -> str:
263
+ """Ensure provided date is ISO 8601 compliant.
264
+
265
+ Parameters
266
+ ----------
267
+ date : str, None, datetime.date, datetime.datetime
268
+ An optional date to convert to ISO string. If None, the current datetime is used.
269
+
270
+ Returns
271
+ -------
272
+ str
273
+ The datetime as an ISO 8601 string
274
+
275
+ """
276
+ date = date or datetime.datetime.now() # If None get current time
277
+ if isinstance(date, str):
278
+ # FIXME support timezone aware strings, e.g. '2023-03-09T17:08:12.4465024+00:00'
279
+ date = datetime.datetime.fromisoformat(date) # Validate by parsing
280
+ elif type(date) is datetime.date:
281
+ date = datetime.datetime.fromordinal(date.toordinal())
282
+ return datetime.datetime.isoformat(date)
283
+
284
+ def register_session(self, ses_path, users=None, file_list=True, **kwargs):
285
+ """Register session in Alyx.
286
+
287
+ NB: If providing a lab or start_time kwarg, they must match the lab (if there is one)
288
+ and date of the session path.
289
+
290
+ Parameters
291
+ ----------
292
+ ses_path : str, pathlib.Path
293
+ The local session path
294
+ users : str, list
295
+ The user(s) to attribute to the session
296
+ file_list : bool, list
297
+ An optional list of file paths to register. If True, all valid files within the
298
+ session folder are registered. If False, no files are registered
299
+ location : str
300
+ The optional location within the lab where the experiment takes place
301
+ procedures : str, list
302
+ An optional list of procedures, e.g. 'Behavior training/tasks'
303
+ n_correct_trials : int
304
+ The number of correct trials (optional)
305
+ n_trials : int
306
+ The total number of completed trials (optional)
307
+ json : dict, str
308
+ Optional JSON data
309
+ projects: str, list
310
+ The project(s) to which the experiment belongs (optional)
311
+ type : str
312
+ The experiment type, e.g. 'Experiment', 'Base'
313
+ task_protocol : str
314
+ The task protocol (optional)
315
+ lab : str
316
+ The name of the lab where the session took place. If None the lab name will be
317
+ taken from the path. If no lab name is found in the path (i.e. no <lab>/Subjects)
318
+ the default lab on Alyx will be used.
319
+ start_time : str, datetime.datetime
320
+ The precise start time of the session. The date must match the date in the session
321
+ path.
322
+ end_time : str, datetime.datetime
323
+ The precise end time of the session.
324
+
325
+ Returns
326
+ -------
327
+ dict
328
+ An Alyx session record
329
+ list, None
330
+ Alyx file records (or None if file_list is False)
331
+
332
+ Raises
333
+ ------
334
+ AssertionError
335
+ Subject does not exist on Alyx or provided start_time does not match date in
336
+ session path.
337
+ ValueError
338
+ The provided lab name does not match the one found in the session path or
339
+ start_time/end_time is not a valid ISO date time.
340
+ requests.HTTPError
341
+ A 400 status code means the submitted data was incorrect (e.g. task_protocol was an
342
+ int instead of a str); A 500 status code means there was a server error.
343
+ ConnectionError
344
+ Failed to connect to Alyx, most likely due to a bad internet connection.
345
+
346
+ """
347
+ ses_path = ALFPath(ses_path)
348
+ details = session_path_parts(ses_path.as_posix(), as_dict=True, assert_valid=True)
349
+ # query alyx endpoints for subject, error if not found
350
+ self.assert_exists(details['subject'], 'subjects')
351
+
352
+ # look for a session from the same subject, same number on the same day
353
+ with no_cache(self.one.alyx):
354
+ session_id, session = self.one.search(subject=details['subject'],
355
+ date_range=details['date'],
356
+ number=details['number'],
357
+ details=True, query_type='remote')
358
+ users = ensure_list(users or self.one.alyx.user)
359
+ self.assert_exists(users, 'users')
360
+
361
+ # if nothing found create a new session in Alyx
362
+ ses_ = {'subject': details['subject'],
363
+ 'users': users,
364
+ 'type': 'Experiment',
365
+ 'number': details['number']}
366
+ if kwargs.get('end_time', False):
367
+ ses_['end_time'] = self.ensure_ISO8601(kwargs.pop('end_time'))
368
+ start_time = self.ensure_ISO8601(kwargs.pop('start_time', details['date']))
369
+ assert start_time[:10] == details['date'], 'start_time doesn\'t match session path'
370
+ if kwargs.get('procedures', False):
371
+ ses_['procedures'] = ensure_list(kwargs.pop('procedures'))
372
+ if kwargs.get('projects', False):
373
+ ses_['projects'] = ensure_list(kwargs.pop('projects'))
374
+ assert ('subject', 'number') not in kwargs
375
+ if 'lab' not in kwargs and details['lab']:
376
+ kwargs.update({'lab': details['lab']})
377
+ elif details['lab'] and kwargs.get('lab', details['lab']) != details['lab']:
378
+ names = (kwargs['lab'], details['lab'])
379
+ raise ValueError('lab kwarg "%s" does not match lab name in path ("%s")' % names)
380
+ ses_.update(kwargs)
381
+
382
+ if not session: # Create from scratch
383
+ ses_['start_time'] = start_time
384
+ session = self.one.alyx.rest('sessions', 'create', data=ses_)
385
+ else: # Update existing
386
+ if start_time:
387
+ ses_['start_time'] = self.ensure_ISO8601(start_time)
388
+ session = self.one.alyx.rest('sessions', 'update', id=session_id[0], data=ses_)
389
+
390
+ _logger.info(session['url'] + ' ')
391
+ # at this point the session has been created. If create only, exit
392
+ if not file_list:
393
+ return session, None
394
+ recs = self.register_files(self.find_files(ses_path) if file_list is True else file_list)
395
+ if recs: # Update local session data after registering files
396
+ session['data_dataset_session_related'] = ensure_list(recs)
397
+ return session, recs
398
+
399
+ def prepare_files(self, file_list, versions=None):
400
+ """Validate file list for registration and group files by session path.
401
+
402
+ Parameters
403
+ ----------
404
+ file_list : list, str, pathlib.Path
405
+ A filepath (or list thereof) of ALF datasets to register to Alyx.
406
+ versions : str, list of str
407
+ Optional version tags.
408
+
409
+ Returns
410
+ -------
411
+ list of dicts
412
+ A dict containing a list of files for each session.
413
+ list of dicts
414
+ A dict containing a list of versions for each session.
415
+ list
416
+ A list of files converted to paths.
417
+ bool
418
+ A boolean indicating if input was a single file.
419
+
420
+ """
421
+ F = defaultdict(list) # empty map whose keys will be session paths
422
+ V = defaultdict(list) # empty map for versions
423
+
424
+ if single_file := isinstance(file_list, (str, pathlib.Path)):
425
+ file_list = [file_list]
426
+ file_list = list(map(ALFPath, file_list)) # Ensure list of path objects
427
+
428
+ if versions is None or isinstance(versions, str):
429
+ versions = itertools.repeat(versions)
430
+ else:
431
+ versions = itertools.cycle(versions)
432
+
433
+ # Filter valid files and sort by session
434
+ for fn, ver in zip(file_list, versions):
435
+ session_path = fn.session_path()
436
+ if not session_path:
437
+ _logger.debug(f'{fn}: Invalid session path')
438
+ continue
439
+ if fn.suffix not in self.file_extensions:
440
+ _logger.debug(f'{fn}: No matching extension "{fn.suffix}" in database')
441
+ continue
442
+ try:
443
+ get_dataset_type(fn, self.dtypes)
444
+ except ValueError as ex:
445
+ _logger.debug('%s', ex.args[0])
446
+ continue
447
+ F[session_path].append(fn.relative_to(session_path))
448
+ V[session_path].append(ver)
449
+
450
+ return F, V, file_list, single_file
451
+
452
+ def check_protected_files(self, file_list, created_by=None):
453
+ """Check whether a set of files associated to a session are protected.
454
+
455
+ Parameters
456
+ ----------
457
+ file_list : list, str, pathlib.Path
458
+ A filepath (or list thereof) of ALF datasets to register to Alyx.
459
+ created_by : str
460
+ Name of Alyx user (defaults to whoever is logged in to ONE instance).
461
+
462
+ Returns
463
+ -------
464
+ list of dicts, dict
465
+ A status for each session whether any of the files specified are protected
466
+ datasets or not.If none of the datasets are protected, a response with status
467
+ 200 is returned, if any of the files are protected a response with status
468
+ 403 is returned.
469
+
470
+ """
471
+ # Validate files and rearrange into list per session
472
+ F, _, _, single_file = self.prepare_files(file_list)
473
+
474
+ # For each unique session, make a separate POST request
475
+ records = []
476
+ for session_path, files in F.items():
477
+ # this is the generic relative path: subject/yyyy-mm-dd/NNN
478
+ details = session_path_parts(session_path.as_posix(), as_dict=True, assert_valid=True)
479
+ rel_path = PurePosixPath(details['subject'], details['date'], details['number'])
480
+
481
+ r_ = {'created_by': created_by or self.one.alyx.user,
482
+ 'path': rel_path.as_posix(),
483
+ 'filenames': [x.as_posix() for x in files]
484
+ }
485
+ records.append(self.one.alyx.get('/check-protected', data=r_, clobber=True))
486
+
487
+ return records[0] if single_file else records
488
+
489
+ def register_files(self, file_list,
490
+ versions=None, default=True, created_by=None, server_only=False,
491
+ repository=None, exists=True, dry=False, max_md5_size=None, **kwargs):
492
+ """Registers a set of files belonging to a session only on the server.
493
+
494
+ Parameters
495
+ ----------
496
+ file_list : list, str, pathlib.Path
497
+ A filepath (or list thereof) of ALF datasets to register to Alyx.
498
+ versions : str, list of str
499
+ Optional version tags.
500
+ default : bool
501
+ Whether to set as default revision (defaults to True).
502
+ created_by : str
503
+ Name of Alyx user (defaults to whoever is logged in to ONE instance).
504
+ server_only : bool
505
+ Will only create file records in the 'online' repositories and skips local repositories
506
+ repository : str
507
+ Name of the repository in Alyx to register to.
508
+ exists : bool
509
+ Whether the files exist on the repository (defaults to True).
510
+ dry : bool
511
+ When true returns POST data for registration endpoint without submitting the data.
512
+ max_md5_size : int
513
+ Maximum file in bytes to compute md5 sum (always compute if None).
514
+ exists : bool
515
+ Whether files exist in the repository. May be set to False when registering files
516
+ before copying to the repository.
517
+ kwargs
518
+ Extra arguments directly passed as REST request data to /register-files endpoint.
519
+
520
+ Returns
521
+ -------
522
+ list of dicts, dict
523
+ A list of newly created Alyx dataset records or the registration data if dry. If
524
+ a single file is passed in, a single dict is returned.
525
+
526
+ Notes
527
+ -----
528
+ - The registered files may be automatically moved to new revision folders if they are
529
+ protected on Alyx, therefore it's important to check the relative paths of the output.
530
+ - Protected datasets are not checked in dry mode.
531
+ - In most circumstances a new revision will be added automatically, however if this fails
532
+ a 403 HTTP status may be returned.
533
+
534
+ Raises
535
+ ------
536
+ requests.exceptions.HTTPError
537
+ Submitted data not valid (400 status code)
538
+ Server side database error (500 status code)
539
+ Revision protected (403 status code)
540
+
541
+ """
542
+ F, V, file_list, single_file = self.prepare_files(file_list, versions=versions)
543
+
544
+ # For each unique session, make a separate POST request
545
+ records = [None] * (len(F) if dry else len(file_list)) # If dry return data per session
546
+ for session_path, files in F.items():
547
+ # this is the generic relative path: subject/yyyy-mm-dd/NNN
548
+ details = session_path_parts(session_path.as_posix(), as_dict=True, assert_valid=True)
549
+ rel_path = PurePosixPath(details['subject'], details['date'], details['number'])
550
+ file_sizes = [session_path.joinpath(fn).stat().st_size for fn in files]
551
+ # computing the md5 can be very long, so this is an option to skip if the file is
552
+ # bigger than a certain threshold
553
+ md5s = [hashfile.md5(session_path.joinpath(fn))
554
+ if (max_md5_size is None or sz < max_md5_size) else None
555
+ for fn, sz in zip(files, file_sizes)]
556
+
557
+ _logger.info('Registering ' + str(files))
558
+
559
+ r_ = {'created_by': created_by or self.one.alyx.user,
560
+ 'path': rel_path.as_posix(),
561
+ 'filenames': [x.as_posix() for x in files],
562
+ 'hashes': md5s,
563
+ 'filesizes': file_sizes,
564
+ 'name': repository,
565
+ 'exists': exists,
566
+ 'server_only': server_only,
567
+ 'default': default,
568
+ 'versions': V[session_path],
569
+ 'check_protected': True,
570
+ **kwargs
571
+ }
572
+
573
+ # Add optional field
574
+ if details['lab'] and 'labs' not in kwargs:
575
+ r_['labs'] = details['lab']
576
+ # If dry, store POST data, otherwise store resulting file records
577
+ if dry:
578
+ records[list(F).index(session_path)] = r_
579
+ continue
580
+ try:
581
+ response = self.one.alyx.post('/register-file', data=r_)
582
+ # Ensure we keep the order of the output records: the files missing will remain
583
+ # as None type
584
+ for f, r in zip(files, response):
585
+ records[file_list.index(session_path / f)] = r
586
+ except requests.exceptions.HTTPError as err:
587
+ # 403 response when datasets already registered and protected by tags
588
+ err_message = err.response.json()
589
+ if not (err_message.get('status_code') == 403 and
590
+ err_message.get('error') == 'One or more datasets is protected'):
591
+ raise err # Some other error occurred; re-raise
592
+ response = err_message['details']
593
+ today_revision = datetime.datetime.today().strftime('%Y-%m-%d')
594
+ new_file_list = []
595
+
596
+ for fl, res in zip(files, response):
597
+ (name, prot_info), = res.items()
598
+ # Dataset has not yet been registered
599
+ if not prot_info:
600
+ new_file_list.append(fl)
601
+ continue
602
+
603
+ # Check to see if the file path already has a revision in it
604
+ file_revision = folder_parts(rel_path / fl, as_dict=True)['revision']
605
+ # Find existing protected revisions
606
+ existing_revisions = [k for pr in prot_info for k, v in pr.items() if v]
607
+
608
+ if file_revision:
609
+ # If the revision explicitly defined by the user doesn't exist or
610
+ # is not protected, register as is
611
+ if file_revision not in existing_revisions:
612
+ revision_path = fl.parent
613
+ else:
614
+ # Find the next sub-revision that isn't protected
615
+ new_revision = self._next_revision(file_revision, existing_revisions)
616
+ revision_path = fl.parent.parent.joinpath(f'#{new_revision}#')
617
+
618
+ if revision_path != fl.parent:
619
+ session_path.joinpath(revision_path).mkdir(exist_ok=True)
620
+ _logger.info('Moving %s -> %s', fl, revision_path.joinpath(fl.name))
621
+ shutil.move(session_path / fl, session_path / revision_path / fl.name)
622
+ new_file_list.append(revision_path.joinpath(fl.name))
623
+ continue
624
+
625
+ # The file wasn't in a revision folder but is protected
626
+ fl_path = fl.parent
627
+ assert name == fl_path.joinpath(fl.name).as_posix()
628
+
629
+ # Find info about the latest revision
630
+ # N.B on django side prot_info is sorted by latest revisions first
631
+ (latest_revision, protected), = prot_info[0].items()
632
+
633
+ # If the latest revision is the original and it is unprotected
634
+ # no need for revision e.g {'clusters.amp.npy': [{'': False}]}
635
+ if latest_revision == '' and not protected:
636
+ # Use original path
637
+ revision_path = fl_path
638
+
639
+ # If there already is a revision but it is unprotected,
640
+ # move into this revision folder e.g
641
+ # {'clusters.amp.npy':
642
+ # [{'2022-10-31': False}, {'2022-05-31': True}, {'': True}]}
643
+ elif not protected:
644
+ # Check that the latest_revision has the date naming convention we expect
645
+ # i.e. 'YYYY-MM-DD'
646
+ try:
647
+ _ = datetime.datetime.strptime(latest_revision[:10], '%Y-%m-%d')
648
+ revision_path = fl_path.joinpath(f'#{latest_revision}#')
649
+ # If it doesn't it probably has been made manually so we don't want to
650
+ # overwrite this and instead use today's date
651
+ except ValueError:
652
+ # NB: It's possible that today's date revision is also protected but is
653
+ # not the most recent revision. In this case it's safer to let fail.
654
+ revision_path = fl_path.joinpath(f'#{today_revision}#')
655
+
656
+ # If protected and the latest protected revision is from today we need to make
657
+ # a sub-revision
658
+ elif protected and today_revision in latest_revision:
659
+ if latest_revision == today_revision: # iterate from appending 'a'
660
+ new_revision = self._next_revision(today_revision, existing_revisions)
661
+ else: # assume the revision is date + character, e.g. '2020-01-01c'
662
+ alpha = latest_revision[-1] # iterate from this character
663
+ new_revision = self._next_revision(
664
+ today_revision, existing_revisions, alpha)
665
+ revision_path = fl_path.joinpath(f'#{new_revision}#')
666
+
667
+ # Otherwise cases move into revision from today
668
+ # e.g {'clusters.amp.npy': [{'': True}]}
669
+ # e.g {'clusters.amp.npy': [{'2022-10-31': True}, {'': True}]}
670
+ else:
671
+ revision_path = fl_path.joinpath(f'#{today_revision}#')
672
+
673
+ # Only move for the cases where a revision folder has been made
674
+ if revision_path != fl_path:
675
+ session_path.joinpath(revision_path).mkdir(exist_ok=True)
676
+ _logger.info('Moving %s -> %s', fl, revision_path.joinpath(fl.name))
677
+ shutil.move(session_path / fl, session_path / revision_path / fl.name)
678
+ new_file_list.append(revision_path.joinpath(fl.name))
679
+
680
+ assert len(new_file_list) == len(files)
681
+ r_['filenames'] = [p.as_posix() for p in new_file_list]
682
+ r_['filesizes'] = [session_path.joinpath(p).stat().st_size for p in new_file_list]
683
+ r_['check_protected'] = False # Speed things up by ignoring server-side checks
684
+
685
+ response = self.one.alyx.post('/register-file', data=r_)
686
+ for f, r in zip(files, response): # Populate records list in correct order
687
+ records[file_list.index(session_path / f)] = r
688
+ files = new_file_list
689
+
690
+ # Log file names
691
+ _logger.info(f'ALYX REGISTERED DATA {"!DRY!" if dry else ""}: {rel_path}')
692
+ for p in files:
693
+ _logger.info(f'ALYX REGISTERED DATA: {p}')
694
+
695
+ return records[0] if single_file else records
696
+
697
+ @staticmethod
698
+ def _next_revision(revision: str, reserved: list = None, alpha: str = 'a') -> str:
699
+ """Return the next logical revision that is not already in the provided list.
700
+
701
+ Revisions will increment by appending a letter to a date or other identifier.
702
+
703
+ Parameters
704
+ ----------
705
+ revision : str
706
+ The revision on which to base the new revision.
707
+ reserved : list of str
708
+ A list of reserved (i.e. already existing) revision strings.
709
+ alpha : str
710
+ The starting character as an integer, defaults to 'a'.
711
+
712
+ Returns
713
+ -------
714
+ str
715
+ The next logical revision string that's not in the reserved list.
716
+
717
+ Examples
718
+ --------
719
+ >>> RegistrationClient._next_revision('2020-01-01')
720
+ '2020-01-01a'
721
+ >>> RegistrationClient._next_revision('2020-01-01', ['2020-01-01a', '2020-01-01b'])
722
+ '2020-01-01c'
723
+ >>> RegistrationClient._next_revision('2020-01-01', ['2020-01-01a', '2020-01-01b'])
724
+ '2020-01-01c'
725
+
726
+ """
727
+ if len(alpha) != 1:
728
+ raise TypeError(
729
+ f'`alpha` must be a character; received a string of length {len(alpha)}'
730
+ )
731
+ i = ord(alpha)
732
+ new_revision = revision + chr(i)
733
+ while new_revision in (reserved or []):
734
+ i += 1
735
+ new_revision = revision + chr(i)
736
+ return new_revision
737
+
738
+ def register_water_administration(self, subject, volume, **kwargs):
739
+ """Register a water administration to Alyx for a given subject.
740
+
741
+ Parameters
742
+ ----------
743
+ subject : str
744
+ A subject nickname that exists on Alyx
745
+ volume : float
746
+ The total volume administrated in ml
747
+ date_time : str, datetime.datetime, datetime.date
748
+ The time of administration. If None, the current time is used.
749
+ water_type : str
750
+ A water type that exists in Alyx; default is 'Water'
751
+ user : str
752
+ The user who administrated the water. Currently logged-in user is the default.
753
+ session : str, UUID, pathlib.Path, dict
754
+ An optional experiment ID to associate
755
+ adlib : bool
756
+ If true, indicates that the subject was given water ad libitum
757
+
758
+ Returns
759
+ -------
760
+ dict
761
+ A water administration record
762
+
763
+ Raises
764
+ ------
765
+ one.alf.exceptions.AlyxSubjectNotFound
766
+ Subject does not exist on Alyx
767
+ one.alf.exceptions.ALFError
768
+ User does not exist on Alyx
769
+ ValueError
770
+ date_time is not a valid ISO date time or session ID is not valid
771
+ requests.exceptions.HTTPError
772
+ Failed to connect to database, or submitted data not valid (500)
773
+
774
+ """
775
+ # Ensure subject exists
776
+ self.assert_exists(subject, 'subjects')
777
+ # Ensure user(s) exist
778
+ user = kwargs.pop('user', self.one.alyx.user)
779
+ self.assert_exists(user, 'users')
780
+ # Ensure volume not zero
781
+ if volume == 0:
782
+ raise ValueError('Water volume must be greater than zero')
783
+ # Post water admin
784
+ wa_ = {
785
+ 'subject': subject,
786
+ 'date_time': self.ensure_ISO8601(kwargs.pop('date_time', None)),
787
+ 'water_administered': float(f'{volume:.4g}'), # Round to 4 s.f.
788
+ 'water_type': kwargs.pop('water_type', 'Water'),
789
+ 'user': user,
790
+ 'adlib': kwargs.pop('adlib', False)
791
+ }
792
+ # Ensure session is valid; convert to eid
793
+ if kwargs.get('session', False):
794
+ wa_['session'] = str(self.one.to_eid(kwargs.pop('session')) or '')
795
+ if not wa_['session']:
796
+ raise ValueError('Failed to parse session ID')
797
+
798
+ return self.one.alyx.rest('water-administrations', 'create', data=wa_)
799
+
800
+ def register_weight(self, subject, weight, date_time=None, user=None):
801
+ """Register a subject weight to Alyx.
802
+
803
+ Parameters
804
+ ----------
805
+ subject : str
806
+ A subject nickname that exists on Alyx.
807
+ weight : float
808
+ The subject weight in grams.
809
+ date_time : str, datetime.datetime, datetime.date
810
+ The time of weighing. If None, the current time is used.
811
+ user : str
812
+ The user who performed the weighing. Currently logged-in user is the default.
813
+
814
+ Returns
815
+ -------
816
+ dict
817
+ An Alyx weight record
818
+
819
+ Raises
820
+ ------
821
+ one.alf.exceptions.AlyxSubjectNotFound
822
+ Subject does not exist on Alyx
823
+ one.alf.exceptions.ALFError
824
+ User does not exist on Alyx
825
+ ValueError
826
+ date_time is not a valid ISO date time or weight < 1e-4
827
+ requests.exceptions.HTTPError
828
+ Failed to connect to database, or submitted data not valid (500)
829
+
830
+ """
831
+ # Ensure subject exists
832
+ self.assert_exists(subject, 'subjects')
833
+ # Ensure user(s) exist
834
+ user = user or self.one.alyx.user
835
+ self.assert_exists(user, 'users')
836
+ # Ensure weight not zero
837
+ if weight == 0:
838
+ raise ValueError('Water volume must be greater than 0')
839
+
840
+ # Post water admin
841
+ wei_ = {'subject': subject,
842
+ 'date_time': self.ensure_ISO8601(date_time),
843
+ 'weight': float(f'{weight:.4g}'), # Round to 4 s.f.
844
+ 'user': user}
845
+ return self.one.alyx.rest('weighings', 'create', data=wei_)