ONE-api 3.0b1__py3-none-any.whl → 3.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ONE_api-3.0b1.dist-info → ONE_api-3.0b4.dist-info}/LICENSE +21 -21
- {ONE_api-3.0b1.dist-info → ONE_api-3.0b4.dist-info}/METADATA +115 -115
- ONE_api-3.0b4.dist-info/RECORD +37 -0
- one/__init__.py +2 -2
- one/alf/__init__.py +1 -1
- one/alf/cache.py +640 -653
- one/alf/exceptions.py +105 -105
- one/alf/io.py +876 -876
- one/alf/path.py +1450 -1450
- one/alf/spec.py +519 -504
- one/api.py +2949 -2973
- one/converters.py +850 -850
- one/params.py +414 -414
- one/registration.py +845 -845
- one/remote/__init__.py +1 -1
- one/remote/aws.py +313 -313
- one/remote/base.py +142 -142
- one/remote/globus.py +1254 -1254
- one/tests/fixtures/params/.caches +6 -6
- one/tests/fixtures/params/.test.alyx.internationalbrainlab.org +8 -8
- one/tests/fixtures/rest_responses/1f187d80fd59677b395fcdb18e68e4401bfa1cc9 +1 -1
- one/tests/fixtures/rest_responses/47893cf67c985e6361cdee009334963f49fb0746 +1 -1
- one/tests/fixtures/rest_responses/535d0e9a1e2c1efbdeba0d673b131e00361a2edb +1 -1
- one/tests/fixtures/rest_responses/6dc96f7e9bcc6ac2e7581489b9580a6cd3f28293 +1 -1
- one/tests/fixtures/rest_responses/db1731fb8df0208944ae85f76718430813a8bf50 +1 -1
- one/tests/fixtures/rest_responses/dcce48259bb929661f60a02a48563f70aa6185b3 +1 -1
- one/tests/fixtures/rest_responses/f530d6022f61cdc9e38cc66beb3cb71f3003c9a1 +1 -1
- one/tests/fixtures/test_dbs.json +14 -14
- one/util.py +524 -524
- one/webclient.py +1366 -1354
- ONE_api-3.0b1.dist-info/RECORD +0 -37
- {ONE_api-3.0b1.dist-info → ONE_api-3.0b4.dist-info}/WHEEL +0 -0
- {ONE_api-3.0b1.dist-info → ONE_api-3.0b4.dist-info}/top_level.txt +0 -0
one/registration.py
CHANGED
|
@@ -1,845 +1,845 @@
|
|
|
1
|
-
"""Session creation and datasets registration.
|
|
2
|
-
|
|
3
|
-
The RegistrationClient provides an high-level API for creating experimentation sessions on Alyx
|
|
4
|
-
and registering associated datasets.
|
|
5
|
-
|
|
6
|
-
Summary of methods
|
|
7
|
-
------------------
|
|
8
|
-
create_new_session - Create a new local session folder and optionally create session record on Alyx
|
|
9
|
-
create_sessions - Create sessions and register files for folder containing a given flag file
|
|
10
|
-
register_session - Create a session on Alyx from local path and register any ALF datasets present
|
|
11
|
-
register_files - Register a list of files to their respective sessions on Alyx
|
|
12
|
-
"""
|
|
13
|
-
import pathlib
|
|
14
|
-
from uuid import UUID
|
|
15
|
-
from pathlib import Path, PurePosixPath
|
|
16
|
-
import datetime
|
|
17
|
-
import logging
|
|
18
|
-
import itertools
|
|
19
|
-
from collections import defaultdict
|
|
20
|
-
from fnmatch import fnmatch
|
|
21
|
-
import shutil
|
|
22
|
-
|
|
23
|
-
import requests.exceptions
|
|
24
|
-
|
|
25
|
-
from iblutil.io import hashfile
|
|
26
|
-
from iblutil.util import Bunch, ensure_list
|
|
27
|
-
|
|
28
|
-
import one.alf.io as alfio
|
|
29
|
-
from one.alf.path import ALFPath, session_path_parts, ensure_alf_path, folder_parts
|
|
30
|
-
from one.alf.spec import is_valid
|
|
31
|
-
import one.alf.exceptions as alferr
|
|
32
|
-
from one.api import ONE
|
|
33
|
-
from one.webclient import no_cache
|
|
34
|
-
|
|
35
|
-
_logger = logging.getLogger(__name__)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def get_dataset_type(filename, dtypes):
|
|
39
|
-
"""Get the dataset type from a given filename.
|
|
40
|
-
|
|
41
|
-
A dataset type is matched one of two ways:
|
|
42
|
-
|
|
43
|
-
1. the filename matches the dataset type filename_pattern;
|
|
44
|
-
2. if filename_pattern is empty, the filename object.attribute matches the dataset type name.
|
|
45
|
-
|
|
46
|
-
Parameters
|
|
47
|
-
----------
|
|
48
|
-
filename : str, pathlib.Path
|
|
49
|
-
The filename or filepath.
|
|
50
|
-
dtypes : iterable
|
|
51
|
-
An iterable of dataset type objects with the attributes ('name', 'filename_pattern').
|
|
52
|
-
|
|
53
|
-
Returns
|
|
54
|
-
-------
|
|
55
|
-
The matching dataset type object for filename.
|
|
56
|
-
|
|
57
|
-
Raises
|
|
58
|
-
------
|
|
59
|
-
ValueError
|
|
60
|
-
filename doesn't match any of the dataset types
|
|
61
|
-
filename matches multiple dataset types
|
|
62
|
-
|
|
63
|
-
"""
|
|
64
|
-
dataset_types = []
|
|
65
|
-
filename = ensure_alf_path(filename)
|
|
66
|
-
for dt in dtypes:
|
|
67
|
-
if not dt.filename_pattern.strip():
|
|
68
|
-
# If the filename pattern is null, check whether the filename object.attribute matches
|
|
69
|
-
# the dataset type name.
|
|
70
|
-
if is_valid(filename.name):
|
|
71
|
-
obj_attr = '.'.join(filename.dataset_name_parts[1:3])
|
|
72
|
-
else: # will match name against filename sans extension
|
|
73
|
-
obj_attr = filename.stem
|
|
74
|
-
if dt.name == obj_attr:
|
|
75
|
-
dataset_types.append(dt)
|
|
76
|
-
# Check whether pattern matches filename
|
|
77
|
-
elif fnmatch(filename.name.casefold(), dt.filename_pattern.casefold()):
|
|
78
|
-
dataset_types.append(dt)
|
|
79
|
-
n = len(dataset_types)
|
|
80
|
-
if n == 0:
|
|
81
|
-
raise ValueError(f'No dataset type found for filename "{filename.name}"')
|
|
82
|
-
elif n >= 2:
|
|
83
|
-
raise ValueError('Multiple matching dataset types found for filename '
|
|
84
|
-
f'"{filename.name}": \n{", ".join(map(str, dataset_types))}')
|
|
85
|
-
return dataset_types[0]
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
class RegistrationClient:
|
|
89
|
-
"""Methods to create sessions and register data."""
|
|
90
|
-
|
|
91
|
-
def __init__(self, one=None):
|
|
92
|
-
self.one = one
|
|
93
|
-
if not one:
|
|
94
|
-
self.one = ONE(cache_rest=None)
|
|
95
|
-
elif one.alyx.cache_mode == 'GET':
|
|
96
|
-
_logger.warning('AlyxClient REST cache active; '
|
|
97
|
-
'this may cause issues with registration.')
|
|
98
|
-
self.dtypes = list(map(Bunch, self.one.alyx.rest('dataset-types', 'list')))
|
|
99
|
-
self.registration_patterns = [
|
|
100
|
-
dt['filename_pattern'] for dt in self.dtypes if dt['filename_pattern']]
|
|
101
|
-
self.file_extensions = [df['file_extension'] for df in
|
|
102
|
-
self.one.alyx.rest('data-formats', 'list', no_cache=True)]
|
|
103
|
-
|
|
104
|
-
def create_sessions(self, root_data_folder, glob_pattern='**/create_me.flag',
|
|
105
|
-
register_files=False, dry=False):
|
|
106
|
-
"""Create sessions looking recursively for flag files.
|
|
107
|
-
|
|
108
|
-
Parameters
|
|
109
|
-
----------
|
|
110
|
-
root_data_folder : str, pathlib.Path
|
|
111
|
-
Folder to look for sessions.
|
|
112
|
-
glob_pattern : str
|
|
113
|
-
Register valid sessions that contain this pattern.
|
|
114
|
-
register_files : bool
|
|
115
|
-
If true, register all valid datasets within the session folder.
|
|
116
|
-
dry : bool
|
|
117
|
-
If true returns list of sessions without creating them on Alyx.
|
|
118
|
-
|
|
119
|
-
Returns
|
|
120
|
-
-------
|
|
121
|
-
list of pathlib.Paths
|
|
122
|
-
Newly created session paths.
|
|
123
|
-
list of dicts
|
|
124
|
-
Alyx session records.
|
|
125
|
-
|
|
126
|
-
"""
|
|
127
|
-
flag_files = list(Path(root_data_folder).glob(glob_pattern))
|
|
128
|
-
records = []
|
|
129
|
-
for flag_file in flag_files:
|
|
130
|
-
if dry:
|
|
131
|
-
records.append(print(flag_file))
|
|
132
|
-
continue
|
|
133
|
-
session_path = ALFPath(flag_file.parent)
|
|
134
|
-
_logger.info('creating session for ' + str(session_path))
|
|
135
|
-
# providing a false flag stops the registration after session creation
|
|
136
|
-
session_info, _ = self.register_session(session_path, file_list=register_files)
|
|
137
|
-
records.append(session_info)
|
|
138
|
-
flag_file.unlink()
|
|
139
|
-
return [ALFPath(ff.parent) for ff in flag_files], records
|
|
140
|
-
|
|
141
|
-
def create_new_session(self, subject, session_root=None, date=None, register=True, **kwargs):
|
|
142
|
-
"""Create a new local session folder and optionally create session record on Alyx.
|
|
143
|
-
|
|
144
|
-
Parameters
|
|
145
|
-
----------
|
|
146
|
-
subject : str
|
|
147
|
-
The subject name. Must exist on Alyx.
|
|
148
|
-
session_root : str, pathlib.Path
|
|
149
|
-
The root folder in which to create the subject/date/number folder. Defaults to ONE
|
|
150
|
-
cache directory.
|
|
151
|
-
date : datetime.datetime, datetime.date, str
|
|
152
|
-
An optional date for the session. If None the current time is used.
|
|
153
|
-
register : bool
|
|
154
|
-
If true, create session record on Alyx database.
|
|
155
|
-
kwargs
|
|
156
|
-
Optional arguments for RegistrationClient.register_session.
|
|
157
|
-
|
|
158
|
-
Returns
|
|
159
|
-
-------
|
|
160
|
-
pathlib.Path
|
|
161
|
-
New local session path.
|
|
162
|
-
uuid.UUID
|
|
163
|
-
The experiment UUID if register is True.
|
|
164
|
-
|
|
165
|
-
Examples
|
|
166
|
-
--------
|
|
167
|
-
Create a local session only
|
|
168
|
-
|
|
169
|
-
>>> session_path, _ = RegistrationClient().create_new_session('Ian', register=False)
|
|
170
|
-
|
|
171
|
-
Register a session on Alyx in a specific location
|
|
172
|
-
|
|
173
|
-
>>> session_path, eid = RegistrationClient().create_new_session('Sy', '/data/lab/Subjects')
|
|
174
|
-
|
|
175
|
-
Create a session for a given date
|
|
176
|
-
|
|
177
|
-
>>> session_path, eid = RegistrationClient().create_new_session('Ian', date='2020-01-01')
|
|
178
|
-
|
|
179
|
-
"""
|
|
180
|
-
assert not self.one.offline, 'ONE must be in online mode'
|
|
181
|
-
date = self.ensure_ISO8601(date) # Format, validate
|
|
182
|
-
# Ensure subject exists on Alyx
|
|
183
|
-
self.assert_exists(subject, 'subjects')
|
|
184
|
-
session_root = Path(session_root or self.one.alyx.cache_dir) / subject / date[:10]
|
|
185
|
-
session_path = session_root / alfio.next_num_folder(session_root)
|
|
186
|
-
session_path.mkdir(exist_ok=True, parents=True) # Ensure folder exists on disk
|
|
187
|
-
if register:
|
|
188
|
-
session_info, _ = self.register_session(session_path, **kwargs)
|
|
189
|
-
eid = UUID(session_info['url'][-36:])
|
|
190
|
-
else:
|
|
191
|
-
eid = None
|
|
192
|
-
return session_path, eid
|
|
193
|
-
|
|
194
|
-
def find_files(self, session_path):
|
|
195
|
-
"""Returns a generator of file names that match one of the dataset type patterns in Alyx.
|
|
196
|
-
|
|
197
|
-
Parameters
|
|
198
|
-
----------
|
|
199
|
-
session_path : str, pathlib.Path
|
|
200
|
-
The session path to search.
|
|
201
|
-
|
|
202
|
-
Yields
|
|
203
|
-
------
|
|
204
|
-
pathlib.Path
|
|
205
|
-
File paths that match the dataset type patterns in Alyx.
|
|
206
|
-
|
|
207
|
-
"""
|
|
208
|
-
session_path = ALFPath(session_path)
|
|
209
|
-
for p in session_path.iter_datasets(recursive=True):
|
|
210
|
-
if any(p.name.endswith(ext) for ext in self.file_extensions):
|
|
211
|
-
try:
|
|
212
|
-
get_dataset_type(p, self.dtypes)
|
|
213
|
-
yield p
|
|
214
|
-
except ValueError as ex:
|
|
215
|
-
_logger.debug('%s', ex.args[0])
|
|
216
|
-
|
|
217
|
-
def assert_exists(self, member, endpoint):
|
|
218
|
-
"""Raise an error if a given member doesn't exist on Alyx database.
|
|
219
|
-
|
|
220
|
-
Parameters
|
|
221
|
-
----------
|
|
222
|
-
member : str, uuid.UUID, list
|
|
223
|
-
The member ID(s) to verify
|
|
224
|
-
endpoint: str
|
|
225
|
-
The endpoint at which to look it up
|
|
226
|
-
|
|
227
|
-
Examples
|
|
228
|
-
--------
|
|
229
|
-
>>> client.assert_exists('ALK_036', 'subjects')
|
|
230
|
-
>>> client.assert_exists('user_45', 'users')
|
|
231
|
-
>>> client.assert_exists('local_server', 'repositories')
|
|
232
|
-
|
|
233
|
-
Raises
|
|
234
|
-
------
|
|
235
|
-
one.alf.exceptions.AlyxSubjectNotFound
|
|
236
|
-
Subject does not exist on Alyx
|
|
237
|
-
one.alf.exceptions.ALFError
|
|
238
|
-
Member does not exist on Alyx
|
|
239
|
-
requests.exceptions.HTTPError
|
|
240
|
-
Failed to connect to Alyx database or endpoint not found
|
|
241
|
-
|
|
242
|
-
Returns
|
|
243
|
-
-------
|
|
244
|
-
dict, list of dict
|
|
245
|
-
The endpoint data if member exists.
|
|
246
|
-
|
|
247
|
-
"""
|
|
248
|
-
if isinstance(member, (str, UUID)):
|
|
249
|
-
try:
|
|
250
|
-
return self.one.alyx.rest(endpoint, 'read', id=str(member), no_cache=True)
|
|
251
|
-
except requests.exceptions.HTTPError as ex:
|
|
252
|
-
if ex.response.status_code != 404:
|
|
253
|
-
raise ex
|
|
254
|
-
elif endpoint == 'subjects':
|
|
255
|
-
raise alferr.AlyxSubjectNotFound(member)
|
|
256
|
-
else:
|
|
257
|
-
raise alferr.ALFError(f'Member "{member}" doesn\'t exist in {endpoint}')
|
|
258
|
-
else:
|
|
259
|
-
return [self.assert_exists(x, endpoint) for x in member]
|
|
260
|
-
|
|
261
|
-
@staticmethod
|
|
262
|
-
def ensure_ISO8601(date) -> str:
|
|
263
|
-
"""Ensure provided date is ISO 8601 compliant.
|
|
264
|
-
|
|
265
|
-
Parameters
|
|
266
|
-
----------
|
|
267
|
-
date : str, None, datetime.date, datetime.datetime
|
|
268
|
-
An optional date to convert to ISO string. If None, the current datetime is used.
|
|
269
|
-
|
|
270
|
-
Returns
|
|
271
|
-
-------
|
|
272
|
-
str
|
|
273
|
-
The datetime as an ISO 8601 string
|
|
274
|
-
|
|
275
|
-
"""
|
|
276
|
-
date = date or datetime.datetime.now() # If None get current time
|
|
277
|
-
if isinstance(date, str):
|
|
278
|
-
# FIXME support timezone aware strings, e.g. '2023-03-09T17:08:12.4465024+00:00'
|
|
279
|
-
date = datetime.datetime.fromisoformat(date) # Validate by parsing
|
|
280
|
-
elif type(date) is datetime.date:
|
|
281
|
-
date = datetime.datetime.fromordinal(date.toordinal())
|
|
282
|
-
return datetime.datetime.isoformat(date)
|
|
283
|
-
|
|
284
|
-
def register_session(self, ses_path, users=None, file_list=True, **kwargs):
|
|
285
|
-
"""Register session in Alyx.
|
|
286
|
-
|
|
287
|
-
NB: If providing a lab or start_time kwarg, they must match the lab (if there is one)
|
|
288
|
-
and date of the session path.
|
|
289
|
-
|
|
290
|
-
Parameters
|
|
291
|
-
----------
|
|
292
|
-
ses_path : str, pathlib.Path
|
|
293
|
-
The local session path
|
|
294
|
-
users : str, list
|
|
295
|
-
The user(s) to attribute to the session
|
|
296
|
-
file_list : bool, list
|
|
297
|
-
An optional list of file paths to register. If True, all valid files within the
|
|
298
|
-
session folder are registered. If False, no files are registered
|
|
299
|
-
location : str
|
|
300
|
-
The optional location within the lab where the experiment takes place
|
|
301
|
-
procedures : str, list
|
|
302
|
-
An optional list of procedures, e.g. 'Behavior training/tasks'
|
|
303
|
-
n_correct_trials : int
|
|
304
|
-
The number of correct trials (optional)
|
|
305
|
-
n_trials : int
|
|
306
|
-
The total number of completed trials (optional)
|
|
307
|
-
json : dict, str
|
|
308
|
-
Optional JSON data
|
|
309
|
-
projects: str, list
|
|
310
|
-
The project(s) to which the experiment belongs (optional)
|
|
311
|
-
type : str
|
|
312
|
-
The experiment type, e.g. 'Experiment', 'Base'
|
|
313
|
-
task_protocol : str
|
|
314
|
-
The task protocol (optional)
|
|
315
|
-
lab : str
|
|
316
|
-
The name of the lab where the session took place. If None the lab name will be
|
|
317
|
-
taken from the path. If no lab name is found in the path (i.e. no <lab>/Subjects)
|
|
318
|
-
the default lab on Alyx will be used.
|
|
319
|
-
start_time : str, datetime.datetime
|
|
320
|
-
The precise start time of the session. The date must match the date in the session
|
|
321
|
-
path.
|
|
322
|
-
end_time : str, datetime.datetime
|
|
323
|
-
The precise end time of the session.
|
|
324
|
-
|
|
325
|
-
Returns
|
|
326
|
-
-------
|
|
327
|
-
dict
|
|
328
|
-
An Alyx session record
|
|
329
|
-
list, None
|
|
330
|
-
Alyx file records (or None if file_list is False)
|
|
331
|
-
|
|
332
|
-
Raises
|
|
333
|
-
------
|
|
334
|
-
AssertionError
|
|
335
|
-
Subject does not exist on Alyx or provided start_time does not match date in
|
|
336
|
-
session path.
|
|
337
|
-
ValueError
|
|
338
|
-
The provided lab name does not match the one found in the session path or
|
|
339
|
-
start_time/end_time is not a valid ISO date time.
|
|
340
|
-
requests.HTTPError
|
|
341
|
-
A 400 status code means the submitted data was incorrect (e.g. task_protocol was an
|
|
342
|
-
int instead of a str); A 500 status code means there was a server error.
|
|
343
|
-
ConnectionError
|
|
344
|
-
Failed to connect to Alyx, most likely due to a bad internet connection.
|
|
345
|
-
|
|
346
|
-
"""
|
|
347
|
-
ses_path = ALFPath(ses_path)
|
|
348
|
-
details = session_path_parts(ses_path.as_posix(), as_dict=True, assert_valid=True)
|
|
349
|
-
# query alyx endpoints for subject, error if not found
|
|
350
|
-
self.assert_exists(details['subject'], 'subjects')
|
|
351
|
-
|
|
352
|
-
# look for a session from the same subject, same number on the same day
|
|
353
|
-
with no_cache(self.one.alyx):
|
|
354
|
-
session_id, session = self.one.search(subject=details['subject'],
|
|
355
|
-
date_range=details['date'],
|
|
356
|
-
number=details['number'],
|
|
357
|
-
details=True, query_type='remote')
|
|
358
|
-
users = ensure_list(users or self.one.alyx.user)
|
|
359
|
-
self.assert_exists(users, 'users')
|
|
360
|
-
|
|
361
|
-
# if nothing found create a new session in Alyx
|
|
362
|
-
ses_ = {'subject': details['subject'],
|
|
363
|
-
'users': users,
|
|
364
|
-
'type': 'Experiment',
|
|
365
|
-
'number': details['number']}
|
|
366
|
-
if kwargs.get('end_time', False):
|
|
367
|
-
ses_['end_time'] = self.ensure_ISO8601(kwargs.pop('end_time'))
|
|
368
|
-
start_time = self.ensure_ISO8601(kwargs.pop('start_time', details['date']))
|
|
369
|
-
assert start_time[:10] == details['date'], 'start_time doesn\'t match session path'
|
|
370
|
-
if kwargs.get('procedures', False):
|
|
371
|
-
ses_['procedures'] = ensure_list(kwargs.pop('procedures'))
|
|
372
|
-
if kwargs.get('projects', False):
|
|
373
|
-
ses_['projects'] = ensure_list(kwargs.pop('projects'))
|
|
374
|
-
assert ('subject', 'number') not in kwargs
|
|
375
|
-
if 'lab' not in kwargs and details['lab']:
|
|
376
|
-
kwargs.update({'lab': details['lab']})
|
|
377
|
-
elif details['lab'] and kwargs.get('lab', details['lab']) != details['lab']:
|
|
378
|
-
names = (kwargs['lab'], details['lab'])
|
|
379
|
-
raise ValueError('lab kwarg "%s" does not match lab name in path ("%s")' % names)
|
|
380
|
-
ses_.update(kwargs)
|
|
381
|
-
|
|
382
|
-
if not session: # Create from scratch
|
|
383
|
-
ses_['start_time'] = start_time
|
|
384
|
-
session = self.one.alyx.rest('sessions', 'create', data=ses_)
|
|
385
|
-
else: # Update existing
|
|
386
|
-
if start_time:
|
|
387
|
-
ses_['start_time'] = self.ensure_ISO8601(start_time)
|
|
388
|
-
session = self.one.alyx.rest('sessions', 'update', id=session_id[0], data=ses_)
|
|
389
|
-
|
|
390
|
-
_logger.info(session['url'] + ' ')
|
|
391
|
-
# at this point the session has been created. If create only, exit
|
|
392
|
-
if not file_list:
|
|
393
|
-
return session, None
|
|
394
|
-
recs = self.register_files(self.find_files(ses_path) if file_list is True else file_list)
|
|
395
|
-
if recs: # Update local session data after registering files
|
|
396
|
-
session['data_dataset_session_related'] = ensure_list(recs)
|
|
397
|
-
return session, recs
|
|
398
|
-
|
|
399
|
-
def prepare_files(self, file_list, versions=None):
|
|
400
|
-
"""Validate file list for registration and group files by session path.
|
|
401
|
-
|
|
402
|
-
Parameters
|
|
403
|
-
----------
|
|
404
|
-
file_list : list, str, pathlib.Path
|
|
405
|
-
A filepath (or list thereof) of ALF datasets to register to Alyx.
|
|
406
|
-
versions : str, list of str
|
|
407
|
-
Optional version tags.
|
|
408
|
-
|
|
409
|
-
Returns
|
|
410
|
-
-------
|
|
411
|
-
list of dicts
|
|
412
|
-
A dict containing a list of files for each session.
|
|
413
|
-
list of dicts
|
|
414
|
-
A dict containing a list of versions for each session.
|
|
415
|
-
list
|
|
416
|
-
A list of files converted to paths.
|
|
417
|
-
bool
|
|
418
|
-
A boolean indicating if input was a single file.
|
|
419
|
-
|
|
420
|
-
"""
|
|
421
|
-
F = defaultdict(list) # empty map whose keys will be session paths
|
|
422
|
-
V = defaultdict(list) # empty map for versions
|
|
423
|
-
|
|
424
|
-
if single_file := isinstance(file_list, (str, pathlib.Path)):
|
|
425
|
-
file_list = [file_list]
|
|
426
|
-
file_list = list(map(ALFPath, file_list)) # Ensure list of path objects
|
|
427
|
-
|
|
428
|
-
if versions is None or isinstance(versions, str):
|
|
429
|
-
versions = itertools.repeat(versions)
|
|
430
|
-
else:
|
|
431
|
-
versions = itertools.cycle(versions)
|
|
432
|
-
|
|
433
|
-
# Filter valid files and sort by session
|
|
434
|
-
for fn, ver in zip(file_list, versions):
|
|
435
|
-
session_path = fn.session_path()
|
|
436
|
-
if not session_path:
|
|
437
|
-
_logger.debug(f'{fn}: Invalid session path')
|
|
438
|
-
continue
|
|
439
|
-
if fn.suffix not in self.file_extensions:
|
|
440
|
-
_logger.debug(f'{fn}: No matching extension "{fn.suffix}" in database')
|
|
441
|
-
continue
|
|
442
|
-
try:
|
|
443
|
-
get_dataset_type(fn, self.dtypes)
|
|
444
|
-
except ValueError as ex:
|
|
445
|
-
_logger.debug('%s', ex.args[0])
|
|
446
|
-
continue
|
|
447
|
-
F[session_path].append(fn.relative_to(session_path))
|
|
448
|
-
V[session_path].append(ver)
|
|
449
|
-
|
|
450
|
-
return F, V, file_list, single_file
|
|
451
|
-
|
|
452
|
-
def check_protected_files(self, file_list, created_by=None):
|
|
453
|
-
"""Check whether a set of files associated to a session are protected.
|
|
454
|
-
|
|
455
|
-
Parameters
|
|
456
|
-
----------
|
|
457
|
-
file_list : list, str, pathlib.Path
|
|
458
|
-
A filepath (or list thereof) of ALF datasets to register to Alyx.
|
|
459
|
-
created_by : str
|
|
460
|
-
Name of Alyx user (defaults to whoever is logged in to ONE instance).
|
|
461
|
-
|
|
462
|
-
Returns
|
|
463
|
-
-------
|
|
464
|
-
list of dicts, dict
|
|
465
|
-
A status for each session whether any of the files specified are protected
|
|
466
|
-
datasets or not.If none of the datasets are protected, a response with status
|
|
467
|
-
200 is returned, if any of the files are protected a response with status
|
|
468
|
-
403 is returned.
|
|
469
|
-
|
|
470
|
-
"""
|
|
471
|
-
# Validate files and rearrange into list per session
|
|
472
|
-
F, _, _, single_file = self.prepare_files(file_list)
|
|
473
|
-
|
|
474
|
-
# For each unique session, make a separate POST request
|
|
475
|
-
records = []
|
|
476
|
-
for session_path, files in F.items():
|
|
477
|
-
# this is the generic relative path: subject/yyyy-mm-dd/NNN
|
|
478
|
-
details = session_path_parts(session_path.as_posix(), as_dict=True, assert_valid=True)
|
|
479
|
-
rel_path = PurePosixPath(details['subject'], details['date'], details['number'])
|
|
480
|
-
|
|
481
|
-
r_ = {'created_by': created_by or self.one.alyx.user,
|
|
482
|
-
'path': rel_path.as_posix(),
|
|
483
|
-
'filenames': [x.as_posix() for x in files]
|
|
484
|
-
}
|
|
485
|
-
records.append(self.one.alyx.get('/check-protected', data=r_, clobber=True))
|
|
486
|
-
|
|
487
|
-
return records[0] if single_file else records
|
|
488
|
-
|
|
489
|
-
def register_files(self, file_list,
|
|
490
|
-
versions=None, default=True, created_by=None, server_only=False,
|
|
491
|
-
repository=None, exists=True, dry=False, max_md5_size=None, **kwargs):
|
|
492
|
-
"""Registers a set of files belonging to a session only on the server.
|
|
493
|
-
|
|
494
|
-
Parameters
|
|
495
|
-
----------
|
|
496
|
-
file_list : list, str, pathlib.Path
|
|
497
|
-
A filepath (or list thereof) of ALF datasets to register to Alyx.
|
|
498
|
-
versions : str, list of str
|
|
499
|
-
Optional version tags.
|
|
500
|
-
default : bool
|
|
501
|
-
Whether to set as default revision (defaults to True).
|
|
502
|
-
created_by : str
|
|
503
|
-
Name of Alyx user (defaults to whoever is logged in to ONE instance).
|
|
504
|
-
server_only : bool
|
|
505
|
-
Will only create file records in the 'online' repositories and skips local repositories
|
|
506
|
-
repository : str
|
|
507
|
-
Name of the repository in Alyx to register to.
|
|
508
|
-
exists : bool
|
|
509
|
-
Whether the files exist on the repository (defaults to True).
|
|
510
|
-
dry : bool
|
|
511
|
-
When true returns POST data for registration endpoint without submitting the data.
|
|
512
|
-
max_md5_size : int
|
|
513
|
-
Maximum file in bytes to compute md5 sum (always compute if None).
|
|
514
|
-
exists : bool
|
|
515
|
-
Whether files exist in the repository. May be set to False when registering files
|
|
516
|
-
before copying to the repository.
|
|
517
|
-
kwargs
|
|
518
|
-
Extra arguments directly passed as REST request data to /register-files endpoint.
|
|
519
|
-
|
|
520
|
-
Returns
|
|
521
|
-
-------
|
|
522
|
-
list of dicts, dict
|
|
523
|
-
A list of newly created Alyx dataset records or the registration data if dry. If
|
|
524
|
-
a single file is passed in, a single dict is returned.
|
|
525
|
-
|
|
526
|
-
Notes
|
|
527
|
-
-----
|
|
528
|
-
- The registered files may be automatically moved to new revision folders if they are
|
|
529
|
-
protected on Alyx, therefore it's important to check the relative paths of the output.
|
|
530
|
-
- Protected datasets are not checked in dry mode.
|
|
531
|
-
- In most circumstances a new revision will be added automatically, however if this fails
|
|
532
|
-
a 403 HTTP status may be returned.
|
|
533
|
-
|
|
534
|
-
Raises
|
|
535
|
-
------
|
|
536
|
-
requests.exceptions.HTTPError
|
|
537
|
-
Submitted data not valid (400 status code)
|
|
538
|
-
Server side database error (500 status code)
|
|
539
|
-
Revision protected (403 status code)
|
|
540
|
-
|
|
541
|
-
"""
|
|
542
|
-
F, V, file_list, single_file = self.prepare_files(file_list, versions=versions)
|
|
543
|
-
|
|
544
|
-
# For each unique session, make a separate POST request
|
|
545
|
-
records = [None] * (len(F) if dry else len(file_list)) # If dry return data per session
|
|
546
|
-
for session_path, files in F.items():
|
|
547
|
-
# this is the generic relative path: subject/yyyy-mm-dd/NNN
|
|
548
|
-
details = session_path_parts(session_path.as_posix(), as_dict=True, assert_valid=True)
|
|
549
|
-
rel_path = PurePosixPath(details['subject'], details['date'], details['number'])
|
|
550
|
-
file_sizes = [session_path.joinpath(fn).stat().st_size for fn in files]
|
|
551
|
-
# computing the md5 can be very long, so this is an option to skip if the file is
|
|
552
|
-
# bigger than a certain threshold
|
|
553
|
-
md5s = [hashfile.md5(session_path.joinpath(fn))
|
|
554
|
-
if (max_md5_size is None or sz < max_md5_size) else None
|
|
555
|
-
for fn, sz in zip(files, file_sizes)]
|
|
556
|
-
|
|
557
|
-
_logger.info('Registering ' + str(files))
|
|
558
|
-
|
|
559
|
-
r_ = {'created_by': created_by or self.one.alyx.user,
|
|
560
|
-
'path': rel_path.as_posix(),
|
|
561
|
-
'filenames': [x.as_posix() for x in files],
|
|
562
|
-
'hashes': md5s,
|
|
563
|
-
'filesizes': file_sizes,
|
|
564
|
-
'name': repository,
|
|
565
|
-
'exists': exists,
|
|
566
|
-
'server_only': server_only,
|
|
567
|
-
'default': default,
|
|
568
|
-
'versions': V[session_path],
|
|
569
|
-
'check_protected': True,
|
|
570
|
-
**kwargs
|
|
571
|
-
}
|
|
572
|
-
|
|
573
|
-
# Add optional field
|
|
574
|
-
if details['lab'] and 'labs' not in kwargs:
|
|
575
|
-
r_['labs'] = details['lab']
|
|
576
|
-
# If dry, store POST data, otherwise store resulting file records
|
|
577
|
-
if dry:
|
|
578
|
-
records[list(F).index(session_path)] = r_
|
|
579
|
-
continue
|
|
580
|
-
try:
|
|
581
|
-
response = self.one.alyx.post('/register-file', data=r_)
|
|
582
|
-
# Ensure we keep the order of the output records: the files missing will remain
|
|
583
|
-
# as None type
|
|
584
|
-
for f, r in zip(files, response):
|
|
585
|
-
records[file_list.index(session_path / f)] = r
|
|
586
|
-
except requests.exceptions.HTTPError as err:
|
|
587
|
-
# 403 response when datasets already registered and protected by tags
|
|
588
|
-
err_message = err.response.json()
|
|
589
|
-
if not (err_message.get('status_code') == 403 and
|
|
590
|
-
err_message.get('error') == 'One or more datasets is protected'):
|
|
591
|
-
raise err # Some other error occurred; re-raise
|
|
592
|
-
response = err_message['details']
|
|
593
|
-
today_revision = datetime.datetime.today().strftime('%Y-%m-%d')
|
|
594
|
-
new_file_list = []
|
|
595
|
-
|
|
596
|
-
for fl, res in zip(files, response):
|
|
597
|
-
(name, prot_info), = res.items()
|
|
598
|
-
# Dataset has not yet been registered
|
|
599
|
-
if not prot_info:
|
|
600
|
-
new_file_list.append(fl)
|
|
601
|
-
continue
|
|
602
|
-
|
|
603
|
-
# Check to see if the file path already has a revision in it
|
|
604
|
-
file_revision = folder_parts(rel_path / fl, as_dict=True)['revision']
|
|
605
|
-
# Find existing protected revisions
|
|
606
|
-
existing_revisions = [k for pr in prot_info for k, v in pr.items() if v]
|
|
607
|
-
|
|
608
|
-
if file_revision:
|
|
609
|
-
# If the revision explicitly defined by the user doesn't exist or
|
|
610
|
-
# is not protected, register as is
|
|
611
|
-
if file_revision not in existing_revisions:
|
|
612
|
-
revision_path = fl.parent
|
|
613
|
-
else:
|
|
614
|
-
# Find the next sub-revision that isn't protected
|
|
615
|
-
new_revision = self._next_revision(file_revision, existing_revisions)
|
|
616
|
-
revision_path = fl.parent.parent.joinpath(f'#{new_revision}#')
|
|
617
|
-
|
|
618
|
-
if revision_path != fl.parent:
|
|
619
|
-
session_path.joinpath(revision_path).mkdir(exist_ok=True)
|
|
620
|
-
_logger.info('Moving %s -> %s', fl, revision_path.joinpath(fl.name))
|
|
621
|
-
shutil.move(session_path / fl, session_path / revision_path / fl.name)
|
|
622
|
-
new_file_list.append(revision_path.joinpath(fl.name))
|
|
623
|
-
continue
|
|
624
|
-
|
|
625
|
-
# The file wasn't in a revision folder but is protected
|
|
626
|
-
fl_path = fl.parent
|
|
627
|
-
assert name == fl_path.joinpath(fl.name).as_posix()
|
|
628
|
-
|
|
629
|
-
# Find info about the latest revision
|
|
630
|
-
# N.B on django side prot_info is sorted by latest revisions first
|
|
631
|
-
(latest_revision, protected), = prot_info[0].items()
|
|
632
|
-
|
|
633
|
-
# If the latest revision is the original and it is unprotected
|
|
634
|
-
# no need for revision e.g {'clusters.amp.npy': [{'': False}]}
|
|
635
|
-
if latest_revision == '' and not protected:
|
|
636
|
-
# Use original path
|
|
637
|
-
revision_path = fl_path
|
|
638
|
-
|
|
639
|
-
# If there already is a revision but it is unprotected,
|
|
640
|
-
# move into this revision folder e.g
|
|
641
|
-
# {'clusters.amp.npy':
|
|
642
|
-
# [{'2022-10-31': False}, {'2022-05-31': True}, {'': True}]}
|
|
643
|
-
elif not protected:
|
|
644
|
-
# Check that the latest_revision has the date naming convention we expect
|
|
645
|
-
# i.e. 'YYYY-MM-DD'
|
|
646
|
-
try:
|
|
647
|
-
_ = datetime.datetime.strptime(latest_revision[:10], '%Y-%m-%d')
|
|
648
|
-
revision_path = fl_path.joinpath(f'#{latest_revision}#')
|
|
649
|
-
# If it doesn't it probably has been made manually so we don't want to
|
|
650
|
-
# overwrite this and instead use today's date
|
|
651
|
-
except ValueError:
|
|
652
|
-
# NB: It's possible that today's date revision is also protected but is
|
|
653
|
-
# not the most recent revision. In this case it's safer to let fail.
|
|
654
|
-
revision_path = fl_path.joinpath(f'#{today_revision}#')
|
|
655
|
-
|
|
656
|
-
# If protected and the latest protected revision is from today we need to make
|
|
657
|
-
# a sub-revision
|
|
658
|
-
elif protected and today_revision in latest_revision:
|
|
659
|
-
if latest_revision == today_revision: # iterate from appending 'a'
|
|
660
|
-
new_revision = self._next_revision(today_revision, existing_revisions)
|
|
661
|
-
else: # assume the revision is date + character, e.g. '2020-01-01c'
|
|
662
|
-
alpha = latest_revision[-1] # iterate from this character
|
|
663
|
-
new_revision = self._next_revision(
|
|
664
|
-
today_revision, existing_revisions, alpha)
|
|
665
|
-
revision_path = fl_path.joinpath(f'#{new_revision}#')
|
|
666
|
-
|
|
667
|
-
# Otherwise cases move into revision from today
|
|
668
|
-
# e.g {'clusters.amp.npy': [{'': True}]}
|
|
669
|
-
# e.g {'clusters.amp.npy': [{'2022-10-31': True}, {'': True}]}
|
|
670
|
-
else:
|
|
671
|
-
revision_path = fl_path.joinpath(f'#{today_revision}#')
|
|
672
|
-
|
|
673
|
-
# Only move for the cases where a revision folder has been made
|
|
674
|
-
if revision_path != fl_path:
|
|
675
|
-
session_path.joinpath(revision_path).mkdir(exist_ok=True)
|
|
676
|
-
_logger.info('Moving %s -> %s', fl, revision_path.joinpath(fl.name))
|
|
677
|
-
shutil.move(session_path / fl, session_path / revision_path / fl.name)
|
|
678
|
-
new_file_list.append(revision_path.joinpath(fl.name))
|
|
679
|
-
|
|
680
|
-
assert len(new_file_list) == len(files)
|
|
681
|
-
r_['filenames'] = [p.as_posix() for p in new_file_list]
|
|
682
|
-
r_['filesizes'] = [session_path.joinpath(p).stat().st_size for p in new_file_list]
|
|
683
|
-
r_['check_protected'] = False # Speed things up by ignoring server-side checks
|
|
684
|
-
|
|
685
|
-
response = self.one.alyx.post('/register-file', data=r_)
|
|
686
|
-
for f, r in zip(files, response): # Populate records list in correct order
|
|
687
|
-
records[file_list.index(session_path / f)] = r
|
|
688
|
-
files = new_file_list
|
|
689
|
-
|
|
690
|
-
# Log file names
|
|
691
|
-
_logger.info(f'ALYX REGISTERED DATA {"!DRY!" if dry else ""}: {rel_path}')
|
|
692
|
-
for p in files:
|
|
693
|
-
_logger.info(f'ALYX REGISTERED DATA: {p}')
|
|
694
|
-
|
|
695
|
-
return records[0] if single_file else records
|
|
696
|
-
|
|
697
|
-
@staticmethod
|
|
698
|
-
def _next_revision(revision: str, reserved: list = None, alpha: str = 'a') -> str:
|
|
699
|
-
"""Return the next logical revision that is not already in the provided list.
|
|
700
|
-
|
|
701
|
-
Revisions will increment by appending a letter to a date or other identifier.
|
|
702
|
-
|
|
703
|
-
Parameters
|
|
704
|
-
----------
|
|
705
|
-
revision : str
|
|
706
|
-
The revision on which to base the new revision.
|
|
707
|
-
reserved : list of str
|
|
708
|
-
A list of reserved (i.e. already existing) revision strings.
|
|
709
|
-
alpha : str
|
|
710
|
-
The starting character as an integer, defaults to 'a'.
|
|
711
|
-
|
|
712
|
-
Returns
|
|
713
|
-
-------
|
|
714
|
-
str
|
|
715
|
-
The next logical revision string that's not in the reserved list.
|
|
716
|
-
|
|
717
|
-
Examples
|
|
718
|
-
--------
|
|
719
|
-
>>> RegistrationClient._next_revision('2020-01-01')
|
|
720
|
-
'2020-01-01a'
|
|
721
|
-
>>> RegistrationClient._next_revision('2020-01-01', ['2020-01-01a', '2020-01-01b'])
|
|
722
|
-
'2020-01-01c'
|
|
723
|
-
>>> RegistrationClient._next_revision('2020-01-01', ['2020-01-01a', '2020-01-01b'])
|
|
724
|
-
'2020-01-01c'
|
|
725
|
-
|
|
726
|
-
"""
|
|
727
|
-
if len(alpha) != 1:
|
|
728
|
-
raise TypeError(
|
|
729
|
-
f'`alpha` must be a character; received a string of length {len(alpha)}'
|
|
730
|
-
)
|
|
731
|
-
i = ord(alpha)
|
|
732
|
-
new_revision = revision + chr(i)
|
|
733
|
-
while new_revision in (reserved or []):
|
|
734
|
-
i += 1
|
|
735
|
-
new_revision = revision + chr(i)
|
|
736
|
-
return new_revision
|
|
737
|
-
|
|
738
|
-
def register_water_administration(self, subject, volume, **kwargs):
|
|
739
|
-
"""Register a water administration to Alyx for a given subject.
|
|
740
|
-
|
|
741
|
-
Parameters
|
|
742
|
-
----------
|
|
743
|
-
subject : str
|
|
744
|
-
A subject nickname that exists on Alyx
|
|
745
|
-
volume : float
|
|
746
|
-
The total volume administrated in ml
|
|
747
|
-
date_time : str, datetime.datetime, datetime.date
|
|
748
|
-
The time of administration. If None, the current time is used.
|
|
749
|
-
water_type : str
|
|
750
|
-
A water type that exists in Alyx; default is 'Water'
|
|
751
|
-
user : str
|
|
752
|
-
The user who administrated the water. Currently logged-in user is the default.
|
|
753
|
-
session : str, UUID, pathlib.Path, dict
|
|
754
|
-
An optional experiment ID to associate
|
|
755
|
-
adlib : bool
|
|
756
|
-
If true, indicates that the subject was given water ad libitum
|
|
757
|
-
|
|
758
|
-
Returns
|
|
759
|
-
-------
|
|
760
|
-
dict
|
|
761
|
-
A water administration record
|
|
762
|
-
|
|
763
|
-
Raises
|
|
764
|
-
------
|
|
765
|
-
one.alf.exceptions.AlyxSubjectNotFound
|
|
766
|
-
Subject does not exist on Alyx
|
|
767
|
-
one.alf.exceptions.ALFError
|
|
768
|
-
User does not exist on Alyx
|
|
769
|
-
ValueError
|
|
770
|
-
date_time is not a valid ISO date time or session ID is not valid
|
|
771
|
-
requests.exceptions.HTTPError
|
|
772
|
-
Failed to connect to database, or submitted data not valid (500)
|
|
773
|
-
|
|
774
|
-
"""
|
|
775
|
-
# Ensure subject exists
|
|
776
|
-
self.assert_exists(subject, 'subjects')
|
|
777
|
-
# Ensure user(s) exist
|
|
778
|
-
user = kwargs.pop('user', self.one.alyx.user)
|
|
779
|
-
self.assert_exists(user, 'users')
|
|
780
|
-
# Ensure volume not zero
|
|
781
|
-
if volume == 0:
|
|
782
|
-
raise ValueError('Water volume must be greater than zero')
|
|
783
|
-
# Post water admin
|
|
784
|
-
wa_ = {
|
|
785
|
-
'subject': subject,
|
|
786
|
-
'date_time': self.ensure_ISO8601(kwargs.pop('date_time', None)),
|
|
787
|
-
'water_administered': float(f'{volume:.4g}'), # Round to 4 s.f.
|
|
788
|
-
'water_type': kwargs.pop('water_type', 'Water'),
|
|
789
|
-
'user': user,
|
|
790
|
-
'adlib': kwargs.pop('adlib', False)
|
|
791
|
-
}
|
|
792
|
-
# Ensure session is valid; convert to eid
|
|
793
|
-
if kwargs.get('session', False):
|
|
794
|
-
wa_['session'] = str(self.one.to_eid(kwargs.pop('session')) or '')
|
|
795
|
-
if not wa_['session']:
|
|
796
|
-
raise ValueError('Failed to parse session ID')
|
|
797
|
-
|
|
798
|
-
return self.one.alyx.rest('water-administrations', 'create', data=wa_)
|
|
799
|
-
|
|
800
|
-
def register_weight(self, subject, weight, date_time=None, user=None):
|
|
801
|
-
"""Register a subject weight to Alyx.
|
|
802
|
-
|
|
803
|
-
Parameters
|
|
804
|
-
----------
|
|
805
|
-
subject : str
|
|
806
|
-
A subject nickname that exists on Alyx.
|
|
807
|
-
weight : float
|
|
808
|
-
The subject weight in grams.
|
|
809
|
-
date_time : str, datetime.datetime, datetime.date
|
|
810
|
-
The time of weighing. If None, the current time is used.
|
|
811
|
-
user : str
|
|
812
|
-
The user who performed the weighing. Currently logged-in user is the default.
|
|
813
|
-
|
|
814
|
-
Returns
|
|
815
|
-
-------
|
|
816
|
-
dict
|
|
817
|
-
An Alyx weight record
|
|
818
|
-
|
|
819
|
-
Raises
|
|
820
|
-
------
|
|
821
|
-
one.alf.exceptions.AlyxSubjectNotFound
|
|
822
|
-
Subject does not exist on Alyx
|
|
823
|
-
one.alf.exceptions.ALFError
|
|
824
|
-
User does not exist on Alyx
|
|
825
|
-
ValueError
|
|
826
|
-
date_time is not a valid ISO date time or weight < 1e-4
|
|
827
|
-
requests.exceptions.HTTPError
|
|
828
|
-
Failed to connect to database, or submitted data not valid (500)
|
|
829
|
-
|
|
830
|
-
"""
|
|
831
|
-
# Ensure subject exists
|
|
832
|
-
self.assert_exists(subject, 'subjects')
|
|
833
|
-
# Ensure user(s) exist
|
|
834
|
-
user = user or self.one.alyx.user
|
|
835
|
-
self.assert_exists(user, 'users')
|
|
836
|
-
# Ensure weight not zero
|
|
837
|
-
if weight == 0:
|
|
838
|
-
raise ValueError('Water volume must be greater than 0')
|
|
839
|
-
|
|
840
|
-
# Post water admin
|
|
841
|
-
wei_ = {'subject': subject,
|
|
842
|
-
'date_time': self.ensure_ISO8601(date_time),
|
|
843
|
-
'weight': float(f'{weight:.4g}'), # Round to 4 s.f.
|
|
844
|
-
'user': user}
|
|
845
|
-
return self.one.alyx.rest('weighings', 'create', data=wei_)
|
|
1
|
+
"""Session creation and datasets registration.
|
|
2
|
+
|
|
3
|
+
The RegistrationClient provides an high-level API for creating experimentation sessions on Alyx
|
|
4
|
+
and registering associated datasets.
|
|
5
|
+
|
|
6
|
+
Summary of methods
|
|
7
|
+
------------------
|
|
8
|
+
create_new_session - Create a new local session folder and optionally create session record on Alyx
|
|
9
|
+
create_sessions - Create sessions and register files for folder containing a given flag file
|
|
10
|
+
register_session - Create a session on Alyx from local path and register any ALF datasets present
|
|
11
|
+
register_files - Register a list of files to their respective sessions on Alyx
|
|
12
|
+
"""
|
|
13
|
+
import pathlib
|
|
14
|
+
from uuid import UUID
|
|
15
|
+
from pathlib import Path, PurePosixPath
|
|
16
|
+
import datetime
|
|
17
|
+
import logging
|
|
18
|
+
import itertools
|
|
19
|
+
from collections import defaultdict
|
|
20
|
+
from fnmatch import fnmatch
|
|
21
|
+
import shutil
|
|
22
|
+
|
|
23
|
+
import requests.exceptions
|
|
24
|
+
|
|
25
|
+
from iblutil.io import hashfile
|
|
26
|
+
from iblutil.util import Bunch, ensure_list
|
|
27
|
+
|
|
28
|
+
import one.alf.io as alfio
|
|
29
|
+
from one.alf.path import ALFPath, session_path_parts, ensure_alf_path, folder_parts
|
|
30
|
+
from one.alf.spec import is_valid
|
|
31
|
+
import one.alf.exceptions as alferr
|
|
32
|
+
from one.api import ONE
|
|
33
|
+
from one.webclient import no_cache
|
|
34
|
+
|
|
35
|
+
_logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_dataset_type(filename, dtypes):
|
|
39
|
+
"""Get the dataset type from a given filename.
|
|
40
|
+
|
|
41
|
+
A dataset type is matched one of two ways:
|
|
42
|
+
|
|
43
|
+
1. the filename matches the dataset type filename_pattern;
|
|
44
|
+
2. if filename_pattern is empty, the filename object.attribute matches the dataset type name.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
filename : str, pathlib.Path
|
|
49
|
+
The filename or filepath.
|
|
50
|
+
dtypes : iterable
|
|
51
|
+
An iterable of dataset type objects with the attributes ('name', 'filename_pattern').
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
The matching dataset type object for filename.
|
|
56
|
+
|
|
57
|
+
Raises
|
|
58
|
+
------
|
|
59
|
+
ValueError
|
|
60
|
+
filename doesn't match any of the dataset types
|
|
61
|
+
filename matches multiple dataset types
|
|
62
|
+
|
|
63
|
+
"""
|
|
64
|
+
dataset_types = []
|
|
65
|
+
filename = ensure_alf_path(filename)
|
|
66
|
+
for dt in dtypes:
|
|
67
|
+
if not dt.filename_pattern.strip():
|
|
68
|
+
# If the filename pattern is null, check whether the filename object.attribute matches
|
|
69
|
+
# the dataset type name.
|
|
70
|
+
if is_valid(filename.name):
|
|
71
|
+
obj_attr = '.'.join(filename.dataset_name_parts[1:3])
|
|
72
|
+
else: # will match name against filename sans extension
|
|
73
|
+
obj_attr = filename.stem
|
|
74
|
+
if dt.name == obj_attr:
|
|
75
|
+
dataset_types.append(dt)
|
|
76
|
+
# Check whether pattern matches filename
|
|
77
|
+
elif fnmatch(filename.name.casefold(), dt.filename_pattern.casefold()):
|
|
78
|
+
dataset_types.append(dt)
|
|
79
|
+
n = len(dataset_types)
|
|
80
|
+
if n == 0:
|
|
81
|
+
raise ValueError(f'No dataset type found for filename "{filename.name}"')
|
|
82
|
+
elif n >= 2:
|
|
83
|
+
raise ValueError('Multiple matching dataset types found for filename '
|
|
84
|
+
f'"{filename.name}": \n{", ".join(map(str, dataset_types))}')
|
|
85
|
+
return dataset_types[0]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class RegistrationClient:
|
|
89
|
+
"""Methods to create sessions and register data."""
|
|
90
|
+
|
|
91
|
+
def __init__(self, one=None):
|
|
92
|
+
self.one = one
|
|
93
|
+
if not one:
|
|
94
|
+
self.one = ONE(cache_rest=None)
|
|
95
|
+
elif one.alyx.cache_mode == 'GET':
|
|
96
|
+
_logger.warning('AlyxClient REST cache active; '
|
|
97
|
+
'this may cause issues with registration.')
|
|
98
|
+
self.dtypes = list(map(Bunch, self.one.alyx.rest('dataset-types', 'list')))
|
|
99
|
+
self.registration_patterns = [
|
|
100
|
+
dt['filename_pattern'] for dt in self.dtypes if dt['filename_pattern']]
|
|
101
|
+
self.file_extensions = [df['file_extension'] for df in
|
|
102
|
+
self.one.alyx.rest('data-formats', 'list', no_cache=True)]
|
|
103
|
+
|
|
104
|
+
def create_sessions(self, root_data_folder, glob_pattern='**/create_me.flag',
|
|
105
|
+
register_files=False, dry=False):
|
|
106
|
+
"""Create sessions looking recursively for flag files.
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
root_data_folder : str, pathlib.Path
|
|
111
|
+
Folder to look for sessions.
|
|
112
|
+
glob_pattern : str
|
|
113
|
+
Register valid sessions that contain this pattern.
|
|
114
|
+
register_files : bool
|
|
115
|
+
If true, register all valid datasets within the session folder.
|
|
116
|
+
dry : bool
|
|
117
|
+
If true returns list of sessions without creating them on Alyx.
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
list of pathlib.Paths
|
|
122
|
+
Newly created session paths.
|
|
123
|
+
list of dicts
|
|
124
|
+
Alyx session records.
|
|
125
|
+
|
|
126
|
+
"""
|
|
127
|
+
flag_files = list(Path(root_data_folder).glob(glob_pattern))
|
|
128
|
+
records = []
|
|
129
|
+
for flag_file in flag_files:
|
|
130
|
+
if dry:
|
|
131
|
+
records.append(print(flag_file))
|
|
132
|
+
continue
|
|
133
|
+
session_path = ALFPath(flag_file.parent)
|
|
134
|
+
_logger.info('creating session for ' + str(session_path))
|
|
135
|
+
# providing a false flag stops the registration after session creation
|
|
136
|
+
session_info, _ = self.register_session(session_path, file_list=register_files)
|
|
137
|
+
records.append(session_info)
|
|
138
|
+
flag_file.unlink()
|
|
139
|
+
return [ALFPath(ff.parent) for ff in flag_files], records
|
|
140
|
+
|
|
141
|
+
def create_new_session(self, subject, session_root=None, date=None, register=True, **kwargs):
|
|
142
|
+
"""Create a new local session folder and optionally create session record on Alyx.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
subject : str
|
|
147
|
+
The subject name. Must exist on Alyx.
|
|
148
|
+
session_root : str, pathlib.Path
|
|
149
|
+
The root folder in which to create the subject/date/number folder. Defaults to ONE
|
|
150
|
+
cache directory.
|
|
151
|
+
date : datetime.datetime, datetime.date, str
|
|
152
|
+
An optional date for the session. If None the current time is used.
|
|
153
|
+
register : bool
|
|
154
|
+
If true, create session record on Alyx database.
|
|
155
|
+
kwargs
|
|
156
|
+
Optional arguments for RegistrationClient.register_session.
|
|
157
|
+
|
|
158
|
+
Returns
|
|
159
|
+
-------
|
|
160
|
+
pathlib.Path
|
|
161
|
+
New local session path.
|
|
162
|
+
uuid.UUID
|
|
163
|
+
The experiment UUID if register is True.
|
|
164
|
+
|
|
165
|
+
Examples
|
|
166
|
+
--------
|
|
167
|
+
Create a local session only
|
|
168
|
+
|
|
169
|
+
>>> session_path, _ = RegistrationClient().create_new_session('Ian', register=False)
|
|
170
|
+
|
|
171
|
+
Register a session on Alyx in a specific location
|
|
172
|
+
|
|
173
|
+
>>> session_path, eid = RegistrationClient().create_new_session('Sy', '/data/lab/Subjects')
|
|
174
|
+
|
|
175
|
+
Create a session for a given date
|
|
176
|
+
|
|
177
|
+
>>> session_path, eid = RegistrationClient().create_new_session('Ian', date='2020-01-01')
|
|
178
|
+
|
|
179
|
+
"""
|
|
180
|
+
assert not self.one.offline, 'ONE must be in online mode'
|
|
181
|
+
date = self.ensure_ISO8601(date) # Format, validate
|
|
182
|
+
# Ensure subject exists on Alyx
|
|
183
|
+
self.assert_exists(subject, 'subjects')
|
|
184
|
+
session_root = Path(session_root or self.one.alyx.cache_dir) / subject / date[:10]
|
|
185
|
+
session_path = session_root / alfio.next_num_folder(session_root)
|
|
186
|
+
session_path.mkdir(exist_ok=True, parents=True) # Ensure folder exists on disk
|
|
187
|
+
if register:
|
|
188
|
+
session_info, _ = self.register_session(session_path, **kwargs)
|
|
189
|
+
eid = UUID(session_info['url'][-36:])
|
|
190
|
+
else:
|
|
191
|
+
eid = None
|
|
192
|
+
return session_path, eid
|
|
193
|
+
|
|
194
|
+
def find_files(self, session_path):
|
|
195
|
+
"""Returns a generator of file names that match one of the dataset type patterns in Alyx.
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
session_path : str, pathlib.Path
|
|
200
|
+
The session path to search.
|
|
201
|
+
|
|
202
|
+
Yields
|
|
203
|
+
------
|
|
204
|
+
pathlib.Path
|
|
205
|
+
File paths that match the dataset type patterns in Alyx.
|
|
206
|
+
|
|
207
|
+
"""
|
|
208
|
+
session_path = ALFPath(session_path)
|
|
209
|
+
for p in session_path.iter_datasets(recursive=True):
|
|
210
|
+
if any(p.name.endswith(ext) for ext in self.file_extensions):
|
|
211
|
+
try:
|
|
212
|
+
get_dataset_type(p, self.dtypes)
|
|
213
|
+
yield p
|
|
214
|
+
except ValueError as ex:
|
|
215
|
+
_logger.debug('%s', ex.args[0])
|
|
216
|
+
|
|
217
|
+
def assert_exists(self, member, endpoint):
|
|
218
|
+
"""Raise an error if a given member doesn't exist on Alyx database.
|
|
219
|
+
|
|
220
|
+
Parameters
|
|
221
|
+
----------
|
|
222
|
+
member : str, uuid.UUID, list
|
|
223
|
+
The member ID(s) to verify
|
|
224
|
+
endpoint: str
|
|
225
|
+
The endpoint at which to look it up
|
|
226
|
+
|
|
227
|
+
Examples
|
|
228
|
+
--------
|
|
229
|
+
>>> client.assert_exists('ALK_036', 'subjects')
|
|
230
|
+
>>> client.assert_exists('user_45', 'users')
|
|
231
|
+
>>> client.assert_exists('local_server', 'repositories')
|
|
232
|
+
|
|
233
|
+
Raises
|
|
234
|
+
------
|
|
235
|
+
one.alf.exceptions.AlyxSubjectNotFound
|
|
236
|
+
Subject does not exist on Alyx
|
|
237
|
+
one.alf.exceptions.ALFError
|
|
238
|
+
Member does not exist on Alyx
|
|
239
|
+
requests.exceptions.HTTPError
|
|
240
|
+
Failed to connect to Alyx database or endpoint not found
|
|
241
|
+
|
|
242
|
+
Returns
|
|
243
|
+
-------
|
|
244
|
+
dict, list of dict
|
|
245
|
+
The endpoint data if member exists.
|
|
246
|
+
|
|
247
|
+
"""
|
|
248
|
+
if isinstance(member, (str, UUID)):
|
|
249
|
+
try:
|
|
250
|
+
return self.one.alyx.rest(endpoint, 'read', id=str(member), no_cache=True)
|
|
251
|
+
except requests.exceptions.HTTPError as ex:
|
|
252
|
+
if ex.response.status_code != 404:
|
|
253
|
+
raise ex
|
|
254
|
+
elif endpoint == 'subjects':
|
|
255
|
+
raise alferr.AlyxSubjectNotFound(member)
|
|
256
|
+
else:
|
|
257
|
+
raise alferr.ALFError(f'Member "{member}" doesn\'t exist in {endpoint}')
|
|
258
|
+
else:
|
|
259
|
+
return [self.assert_exists(x, endpoint) for x in member]
|
|
260
|
+
|
|
261
|
+
@staticmethod
|
|
262
|
+
def ensure_ISO8601(date) -> str:
|
|
263
|
+
"""Ensure provided date is ISO 8601 compliant.
|
|
264
|
+
|
|
265
|
+
Parameters
|
|
266
|
+
----------
|
|
267
|
+
date : str, None, datetime.date, datetime.datetime
|
|
268
|
+
An optional date to convert to ISO string. If None, the current datetime is used.
|
|
269
|
+
|
|
270
|
+
Returns
|
|
271
|
+
-------
|
|
272
|
+
str
|
|
273
|
+
The datetime as an ISO 8601 string
|
|
274
|
+
|
|
275
|
+
"""
|
|
276
|
+
date = date or datetime.datetime.now() # If None get current time
|
|
277
|
+
if isinstance(date, str):
|
|
278
|
+
# FIXME support timezone aware strings, e.g. '2023-03-09T17:08:12.4465024+00:00'
|
|
279
|
+
date = datetime.datetime.fromisoformat(date) # Validate by parsing
|
|
280
|
+
elif type(date) is datetime.date:
|
|
281
|
+
date = datetime.datetime.fromordinal(date.toordinal())
|
|
282
|
+
return datetime.datetime.isoformat(date)
|
|
283
|
+
|
|
284
|
+
def register_session(self, ses_path, users=None, file_list=True, **kwargs):
|
|
285
|
+
"""Register session in Alyx.
|
|
286
|
+
|
|
287
|
+
NB: If providing a lab or start_time kwarg, they must match the lab (if there is one)
|
|
288
|
+
and date of the session path.
|
|
289
|
+
|
|
290
|
+
Parameters
|
|
291
|
+
----------
|
|
292
|
+
ses_path : str, pathlib.Path
|
|
293
|
+
The local session path
|
|
294
|
+
users : str, list
|
|
295
|
+
The user(s) to attribute to the session
|
|
296
|
+
file_list : bool, list
|
|
297
|
+
An optional list of file paths to register. If True, all valid files within the
|
|
298
|
+
session folder are registered. If False, no files are registered
|
|
299
|
+
location : str
|
|
300
|
+
The optional location within the lab where the experiment takes place
|
|
301
|
+
procedures : str, list
|
|
302
|
+
An optional list of procedures, e.g. 'Behavior training/tasks'
|
|
303
|
+
n_correct_trials : int
|
|
304
|
+
The number of correct trials (optional)
|
|
305
|
+
n_trials : int
|
|
306
|
+
The total number of completed trials (optional)
|
|
307
|
+
json : dict, str
|
|
308
|
+
Optional JSON data
|
|
309
|
+
projects: str, list
|
|
310
|
+
The project(s) to which the experiment belongs (optional)
|
|
311
|
+
type : str
|
|
312
|
+
The experiment type, e.g. 'Experiment', 'Base'
|
|
313
|
+
task_protocol : str
|
|
314
|
+
The task protocol (optional)
|
|
315
|
+
lab : str
|
|
316
|
+
The name of the lab where the session took place. If None the lab name will be
|
|
317
|
+
taken from the path. If no lab name is found in the path (i.e. no <lab>/Subjects)
|
|
318
|
+
the default lab on Alyx will be used.
|
|
319
|
+
start_time : str, datetime.datetime
|
|
320
|
+
The precise start time of the session. The date must match the date in the session
|
|
321
|
+
path.
|
|
322
|
+
end_time : str, datetime.datetime
|
|
323
|
+
The precise end time of the session.
|
|
324
|
+
|
|
325
|
+
Returns
|
|
326
|
+
-------
|
|
327
|
+
dict
|
|
328
|
+
An Alyx session record
|
|
329
|
+
list, None
|
|
330
|
+
Alyx file records (or None if file_list is False)
|
|
331
|
+
|
|
332
|
+
Raises
|
|
333
|
+
------
|
|
334
|
+
AssertionError
|
|
335
|
+
Subject does not exist on Alyx or provided start_time does not match date in
|
|
336
|
+
session path.
|
|
337
|
+
ValueError
|
|
338
|
+
The provided lab name does not match the one found in the session path or
|
|
339
|
+
start_time/end_time is not a valid ISO date time.
|
|
340
|
+
requests.HTTPError
|
|
341
|
+
A 400 status code means the submitted data was incorrect (e.g. task_protocol was an
|
|
342
|
+
int instead of a str); A 500 status code means there was a server error.
|
|
343
|
+
ConnectionError
|
|
344
|
+
Failed to connect to Alyx, most likely due to a bad internet connection.
|
|
345
|
+
|
|
346
|
+
"""
|
|
347
|
+
ses_path = ALFPath(ses_path)
|
|
348
|
+
details = session_path_parts(ses_path.as_posix(), as_dict=True, assert_valid=True)
|
|
349
|
+
# query alyx endpoints for subject, error if not found
|
|
350
|
+
self.assert_exists(details['subject'], 'subjects')
|
|
351
|
+
|
|
352
|
+
# look for a session from the same subject, same number on the same day
|
|
353
|
+
with no_cache(self.one.alyx):
|
|
354
|
+
session_id, session = self.one.search(subject=details['subject'],
|
|
355
|
+
date_range=details['date'],
|
|
356
|
+
number=details['number'],
|
|
357
|
+
details=True, query_type='remote')
|
|
358
|
+
users = ensure_list(users or self.one.alyx.user)
|
|
359
|
+
self.assert_exists(users, 'users')
|
|
360
|
+
|
|
361
|
+
# if nothing found create a new session in Alyx
|
|
362
|
+
ses_ = {'subject': details['subject'],
|
|
363
|
+
'users': users,
|
|
364
|
+
'type': 'Experiment',
|
|
365
|
+
'number': details['number']}
|
|
366
|
+
if kwargs.get('end_time', False):
|
|
367
|
+
ses_['end_time'] = self.ensure_ISO8601(kwargs.pop('end_time'))
|
|
368
|
+
start_time = self.ensure_ISO8601(kwargs.pop('start_time', details['date']))
|
|
369
|
+
assert start_time[:10] == details['date'], 'start_time doesn\'t match session path'
|
|
370
|
+
if kwargs.get('procedures', False):
|
|
371
|
+
ses_['procedures'] = ensure_list(kwargs.pop('procedures'))
|
|
372
|
+
if kwargs.get('projects', False):
|
|
373
|
+
ses_['projects'] = ensure_list(kwargs.pop('projects'))
|
|
374
|
+
assert ('subject', 'number') not in kwargs
|
|
375
|
+
if 'lab' not in kwargs and details['lab']:
|
|
376
|
+
kwargs.update({'lab': details['lab']})
|
|
377
|
+
elif details['lab'] and kwargs.get('lab', details['lab']) != details['lab']:
|
|
378
|
+
names = (kwargs['lab'], details['lab'])
|
|
379
|
+
raise ValueError('lab kwarg "%s" does not match lab name in path ("%s")' % names)
|
|
380
|
+
ses_.update(kwargs)
|
|
381
|
+
|
|
382
|
+
if not session: # Create from scratch
|
|
383
|
+
ses_['start_time'] = start_time
|
|
384
|
+
session = self.one.alyx.rest('sessions', 'create', data=ses_)
|
|
385
|
+
else: # Update existing
|
|
386
|
+
if start_time:
|
|
387
|
+
ses_['start_time'] = self.ensure_ISO8601(start_time)
|
|
388
|
+
session = self.one.alyx.rest('sessions', 'update', id=session_id[0], data=ses_)
|
|
389
|
+
|
|
390
|
+
_logger.info(session['url'] + ' ')
|
|
391
|
+
# at this point the session has been created. If create only, exit
|
|
392
|
+
if not file_list:
|
|
393
|
+
return session, None
|
|
394
|
+
recs = self.register_files(self.find_files(ses_path) if file_list is True else file_list)
|
|
395
|
+
if recs: # Update local session data after registering files
|
|
396
|
+
session['data_dataset_session_related'] = ensure_list(recs)
|
|
397
|
+
return session, recs
|
|
398
|
+
|
|
399
|
+
def prepare_files(self, file_list, versions=None):
|
|
400
|
+
"""Validate file list for registration and group files by session path.
|
|
401
|
+
|
|
402
|
+
Parameters
|
|
403
|
+
----------
|
|
404
|
+
file_list : list, str, pathlib.Path
|
|
405
|
+
A filepath (or list thereof) of ALF datasets to register to Alyx.
|
|
406
|
+
versions : str, list of str
|
|
407
|
+
Optional version tags.
|
|
408
|
+
|
|
409
|
+
Returns
|
|
410
|
+
-------
|
|
411
|
+
list of dicts
|
|
412
|
+
A dict containing a list of files for each session.
|
|
413
|
+
list of dicts
|
|
414
|
+
A dict containing a list of versions for each session.
|
|
415
|
+
list
|
|
416
|
+
A list of files converted to paths.
|
|
417
|
+
bool
|
|
418
|
+
A boolean indicating if input was a single file.
|
|
419
|
+
|
|
420
|
+
"""
|
|
421
|
+
F = defaultdict(list) # empty map whose keys will be session paths
|
|
422
|
+
V = defaultdict(list) # empty map for versions
|
|
423
|
+
|
|
424
|
+
if single_file := isinstance(file_list, (str, pathlib.Path)):
|
|
425
|
+
file_list = [file_list]
|
|
426
|
+
file_list = list(map(ALFPath, file_list)) # Ensure list of path objects
|
|
427
|
+
|
|
428
|
+
if versions is None or isinstance(versions, str):
|
|
429
|
+
versions = itertools.repeat(versions)
|
|
430
|
+
else:
|
|
431
|
+
versions = itertools.cycle(versions)
|
|
432
|
+
|
|
433
|
+
# Filter valid files and sort by session
|
|
434
|
+
for fn, ver in zip(file_list, versions):
|
|
435
|
+
session_path = fn.session_path()
|
|
436
|
+
if not session_path:
|
|
437
|
+
_logger.debug(f'{fn}: Invalid session path')
|
|
438
|
+
continue
|
|
439
|
+
if fn.suffix not in self.file_extensions:
|
|
440
|
+
_logger.debug(f'{fn}: No matching extension "{fn.suffix}" in database')
|
|
441
|
+
continue
|
|
442
|
+
try:
|
|
443
|
+
get_dataset_type(fn, self.dtypes)
|
|
444
|
+
except ValueError as ex:
|
|
445
|
+
_logger.debug('%s', ex.args[0])
|
|
446
|
+
continue
|
|
447
|
+
F[session_path].append(fn.relative_to(session_path))
|
|
448
|
+
V[session_path].append(ver)
|
|
449
|
+
|
|
450
|
+
return F, V, file_list, single_file
|
|
451
|
+
|
|
452
|
+
def check_protected_files(self, file_list, created_by=None):
|
|
453
|
+
"""Check whether a set of files associated to a session are protected.
|
|
454
|
+
|
|
455
|
+
Parameters
|
|
456
|
+
----------
|
|
457
|
+
file_list : list, str, pathlib.Path
|
|
458
|
+
A filepath (or list thereof) of ALF datasets to register to Alyx.
|
|
459
|
+
created_by : str
|
|
460
|
+
Name of Alyx user (defaults to whoever is logged in to ONE instance).
|
|
461
|
+
|
|
462
|
+
Returns
|
|
463
|
+
-------
|
|
464
|
+
list of dicts, dict
|
|
465
|
+
A status for each session whether any of the files specified are protected
|
|
466
|
+
datasets or not.If none of the datasets are protected, a response with status
|
|
467
|
+
200 is returned, if any of the files are protected a response with status
|
|
468
|
+
403 is returned.
|
|
469
|
+
|
|
470
|
+
"""
|
|
471
|
+
# Validate files and rearrange into list per session
|
|
472
|
+
F, _, _, single_file = self.prepare_files(file_list)
|
|
473
|
+
|
|
474
|
+
# For each unique session, make a separate POST request
|
|
475
|
+
records = []
|
|
476
|
+
for session_path, files in F.items():
|
|
477
|
+
# this is the generic relative path: subject/yyyy-mm-dd/NNN
|
|
478
|
+
details = session_path_parts(session_path.as_posix(), as_dict=True, assert_valid=True)
|
|
479
|
+
rel_path = PurePosixPath(details['subject'], details['date'], details['number'])
|
|
480
|
+
|
|
481
|
+
r_ = {'created_by': created_by or self.one.alyx.user,
|
|
482
|
+
'path': rel_path.as_posix(),
|
|
483
|
+
'filenames': [x.as_posix() for x in files]
|
|
484
|
+
}
|
|
485
|
+
records.append(self.one.alyx.get('/check-protected', data=r_, clobber=True))
|
|
486
|
+
|
|
487
|
+
return records[0] if single_file else records
|
|
488
|
+
|
|
489
|
+
def register_files(self, file_list,
|
|
490
|
+
versions=None, default=True, created_by=None, server_only=False,
|
|
491
|
+
repository=None, exists=True, dry=False, max_md5_size=None, **kwargs):
|
|
492
|
+
"""Registers a set of files belonging to a session only on the server.
|
|
493
|
+
|
|
494
|
+
Parameters
|
|
495
|
+
----------
|
|
496
|
+
file_list : list, str, pathlib.Path
|
|
497
|
+
A filepath (or list thereof) of ALF datasets to register to Alyx.
|
|
498
|
+
versions : str, list of str
|
|
499
|
+
Optional version tags.
|
|
500
|
+
default : bool
|
|
501
|
+
Whether to set as default revision (defaults to True).
|
|
502
|
+
created_by : str
|
|
503
|
+
Name of Alyx user (defaults to whoever is logged in to ONE instance).
|
|
504
|
+
server_only : bool
|
|
505
|
+
Will only create file records in the 'online' repositories and skips local repositories
|
|
506
|
+
repository : str
|
|
507
|
+
Name of the repository in Alyx to register to.
|
|
508
|
+
exists : bool
|
|
509
|
+
Whether the files exist on the repository (defaults to True).
|
|
510
|
+
dry : bool
|
|
511
|
+
When true returns POST data for registration endpoint without submitting the data.
|
|
512
|
+
max_md5_size : int
|
|
513
|
+
Maximum file in bytes to compute md5 sum (always compute if None).
|
|
514
|
+
exists : bool
|
|
515
|
+
Whether files exist in the repository. May be set to False when registering files
|
|
516
|
+
before copying to the repository.
|
|
517
|
+
kwargs
|
|
518
|
+
Extra arguments directly passed as REST request data to /register-files endpoint.
|
|
519
|
+
|
|
520
|
+
Returns
|
|
521
|
+
-------
|
|
522
|
+
list of dicts, dict
|
|
523
|
+
A list of newly created Alyx dataset records or the registration data if dry. If
|
|
524
|
+
a single file is passed in, a single dict is returned.
|
|
525
|
+
|
|
526
|
+
Notes
|
|
527
|
+
-----
|
|
528
|
+
- The registered files may be automatically moved to new revision folders if they are
|
|
529
|
+
protected on Alyx, therefore it's important to check the relative paths of the output.
|
|
530
|
+
- Protected datasets are not checked in dry mode.
|
|
531
|
+
- In most circumstances a new revision will be added automatically, however if this fails
|
|
532
|
+
a 403 HTTP status may be returned.
|
|
533
|
+
|
|
534
|
+
Raises
|
|
535
|
+
------
|
|
536
|
+
requests.exceptions.HTTPError
|
|
537
|
+
Submitted data not valid (400 status code)
|
|
538
|
+
Server side database error (500 status code)
|
|
539
|
+
Revision protected (403 status code)
|
|
540
|
+
|
|
541
|
+
"""
|
|
542
|
+
F, V, file_list, single_file = self.prepare_files(file_list, versions=versions)
|
|
543
|
+
|
|
544
|
+
# For each unique session, make a separate POST request
|
|
545
|
+
records = [None] * (len(F) if dry else len(file_list)) # If dry return data per session
|
|
546
|
+
for session_path, files in F.items():
|
|
547
|
+
# this is the generic relative path: subject/yyyy-mm-dd/NNN
|
|
548
|
+
details = session_path_parts(session_path.as_posix(), as_dict=True, assert_valid=True)
|
|
549
|
+
rel_path = PurePosixPath(details['subject'], details['date'], details['number'])
|
|
550
|
+
file_sizes = [session_path.joinpath(fn).stat().st_size for fn in files]
|
|
551
|
+
# computing the md5 can be very long, so this is an option to skip if the file is
|
|
552
|
+
# bigger than a certain threshold
|
|
553
|
+
md5s = [hashfile.md5(session_path.joinpath(fn))
|
|
554
|
+
if (max_md5_size is None or sz < max_md5_size) else None
|
|
555
|
+
for fn, sz in zip(files, file_sizes)]
|
|
556
|
+
|
|
557
|
+
_logger.info('Registering ' + str(files))
|
|
558
|
+
|
|
559
|
+
r_ = {'created_by': created_by or self.one.alyx.user,
|
|
560
|
+
'path': rel_path.as_posix(),
|
|
561
|
+
'filenames': [x.as_posix() for x in files],
|
|
562
|
+
'hashes': md5s,
|
|
563
|
+
'filesizes': file_sizes,
|
|
564
|
+
'name': repository,
|
|
565
|
+
'exists': exists,
|
|
566
|
+
'server_only': server_only,
|
|
567
|
+
'default': default,
|
|
568
|
+
'versions': V[session_path],
|
|
569
|
+
'check_protected': True,
|
|
570
|
+
**kwargs
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
# Add optional field
|
|
574
|
+
if details['lab'] and 'labs' not in kwargs:
|
|
575
|
+
r_['labs'] = details['lab']
|
|
576
|
+
# If dry, store POST data, otherwise store resulting file records
|
|
577
|
+
if dry:
|
|
578
|
+
records[list(F).index(session_path)] = r_
|
|
579
|
+
continue
|
|
580
|
+
try:
|
|
581
|
+
response = self.one.alyx.post('/register-file', data=r_)
|
|
582
|
+
# Ensure we keep the order of the output records: the files missing will remain
|
|
583
|
+
# as None type
|
|
584
|
+
for f, r in zip(files, response):
|
|
585
|
+
records[file_list.index(session_path / f)] = r
|
|
586
|
+
except requests.exceptions.HTTPError as err:
|
|
587
|
+
# 403 response when datasets already registered and protected by tags
|
|
588
|
+
err_message = err.response.json()
|
|
589
|
+
if not (err_message.get('status_code') == 403 and
|
|
590
|
+
err_message.get('error') == 'One or more datasets is protected'):
|
|
591
|
+
raise err # Some other error occurred; re-raise
|
|
592
|
+
response = err_message['details']
|
|
593
|
+
today_revision = datetime.datetime.today().strftime('%Y-%m-%d')
|
|
594
|
+
new_file_list = []
|
|
595
|
+
|
|
596
|
+
for fl, res in zip(files, response):
|
|
597
|
+
(name, prot_info), = res.items()
|
|
598
|
+
# Dataset has not yet been registered
|
|
599
|
+
if not prot_info:
|
|
600
|
+
new_file_list.append(fl)
|
|
601
|
+
continue
|
|
602
|
+
|
|
603
|
+
# Check to see if the file path already has a revision in it
|
|
604
|
+
file_revision = folder_parts(rel_path / fl, as_dict=True)['revision']
|
|
605
|
+
# Find existing protected revisions
|
|
606
|
+
existing_revisions = [k for pr in prot_info for k, v in pr.items() if v]
|
|
607
|
+
|
|
608
|
+
if file_revision:
|
|
609
|
+
# If the revision explicitly defined by the user doesn't exist or
|
|
610
|
+
# is not protected, register as is
|
|
611
|
+
if file_revision not in existing_revisions:
|
|
612
|
+
revision_path = fl.parent
|
|
613
|
+
else:
|
|
614
|
+
# Find the next sub-revision that isn't protected
|
|
615
|
+
new_revision = self._next_revision(file_revision, existing_revisions)
|
|
616
|
+
revision_path = fl.parent.parent.joinpath(f'#{new_revision}#')
|
|
617
|
+
|
|
618
|
+
if revision_path != fl.parent:
|
|
619
|
+
session_path.joinpath(revision_path).mkdir(exist_ok=True)
|
|
620
|
+
_logger.info('Moving %s -> %s', fl, revision_path.joinpath(fl.name))
|
|
621
|
+
shutil.move(session_path / fl, session_path / revision_path / fl.name)
|
|
622
|
+
new_file_list.append(revision_path.joinpath(fl.name))
|
|
623
|
+
continue
|
|
624
|
+
|
|
625
|
+
# The file wasn't in a revision folder but is protected
|
|
626
|
+
fl_path = fl.parent
|
|
627
|
+
assert name == fl_path.joinpath(fl.name).as_posix()
|
|
628
|
+
|
|
629
|
+
# Find info about the latest revision
|
|
630
|
+
# N.B on django side prot_info is sorted by latest revisions first
|
|
631
|
+
(latest_revision, protected), = prot_info[0].items()
|
|
632
|
+
|
|
633
|
+
# If the latest revision is the original and it is unprotected
|
|
634
|
+
# no need for revision e.g {'clusters.amp.npy': [{'': False}]}
|
|
635
|
+
if latest_revision == '' and not protected:
|
|
636
|
+
# Use original path
|
|
637
|
+
revision_path = fl_path
|
|
638
|
+
|
|
639
|
+
# If there already is a revision but it is unprotected,
|
|
640
|
+
# move into this revision folder e.g
|
|
641
|
+
# {'clusters.amp.npy':
|
|
642
|
+
# [{'2022-10-31': False}, {'2022-05-31': True}, {'': True}]}
|
|
643
|
+
elif not protected:
|
|
644
|
+
# Check that the latest_revision has the date naming convention we expect
|
|
645
|
+
# i.e. 'YYYY-MM-DD'
|
|
646
|
+
try:
|
|
647
|
+
_ = datetime.datetime.strptime(latest_revision[:10], '%Y-%m-%d')
|
|
648
|
+
revision_path = fl_path.joinpath(f'#{latest_revision}#')
|
|
649
|
+
# If it doesn't it probably has been made manually so we don't want to
|
|
650
|
+
# overwrite this and instead use today's date
|
|
651
|
+
except ValueError:
|
|
652
|
+
# NB: It's possible that today's date revision is also protected but is
|
|
653
|
+
# not the most recent revision. In this case it's safer to let fail.
|
|
654
|
+
revision_path = fl_path.joinpath(f'#{today_revision}#')
|
|
655
|
+
|
|
656
|
+
# If protected and the latest protected revision is from today we need to make
|
|
657
|
+
# a sub-revision
|
|
658
|
+
elif protected and today_revision in latest_revision:
|
|
659
|
+
if latest_revision == today_revision: # iterate from appending 'a'
|
|
660
|
+
new_revision = self._next_revision(today_revision, existing_revisions)
|
|
661
|
+
else: # assume the revision is date + character, e.g. '2020-01-01c'
|
|
662
|
+
alpha = latest_revision[-1] # iterate from this character
|
|
663
|
+
new_revision = self._next_revision(
|
|
664
|
+
today_revision, existing_revisions, alpha)
|
|
665
|
+
revision_path = fl_path.joinpath(f'#{new_revision}#')
|
|
666
|
+
|
|
667
|
+
# Otherwise cases move into revision from today
|
|
668
|
+
# e.g {'clusters.amp.npy': [{'': True}]}
|
|
669
|
+
# e.g {'clusters.amp.npy': [{'2022-10-31': True}, {'': True}]}
|
|
670
|
+
else:
|
|
671
|
+
revision_path = fl_path.joinpath(f'#{today_revision}#')
|
|
672
|
+
|
|
673
|
+
# Only move for the cases where a revision folder has been made
|
|
674
|
+
if revision_path != fl_path:
|
|
675
|
+
session_path.joinpath(revision_path).mkdir(exist_ok=True)
|
|
676
|
+
_logger.info('Moving %s -> %s', fl, revision_path.joinpath(fl.name))
|
|
677
|
+
shutil.move(session_path / fl, session_path / revision_path / fl.name)
|
|
678
|
+
new_file_list.append(revision_path.joinpath(fl.name))
|
|
679
|
+
|
|
680
|
+
assert len(new_file_list) == len(files)
|
|
681
|
+
r_['filenames'] = [p.as_posix() for p in new_file_list]
|
|
682
|
+
r_['filesizes'] = [session_path.joinpath(p).stat().st_size for p in new_file_list]
|
|
683
|
+
r_['check_protected'] = False # Speed things up by ignoring server-side checks
|
|
684
|
+
|
|
685
|
+
response = self.one.alyx.post('/register-file', data=r_)
|
|
686
|
+
for f, r in zip(files, response): # Populate records list in correct order
|
|
687
|
+
records[file_list.index(session_path / f)] = r
|
|
688
|
+
files = new_file_list
|
|
689
|
+
|
|
690
|
+
# Log file names
|
|
691
|
+
_logger.info(f'ALYX REGISTERED DATA {"!DRY!" if dry else ""}: {rel_path}')
|
|
692
|
+
for p in files:
|
|
693
|
+
_logger.info(f'ALYX REGISTERED DATA: {p}')
|
|
694
|
+
|
|
695
|
+
return records[0] if single_file else records
|
|
696
|
+
|
|
697
|
+
@staticmethod
|
|
698
|
+
def _next_revision(revision: str, reserved: list = None, alpha: str = 'a') -> str:
|
|
699
|
+
"""Return the next logical revision that is not already in the provided list.
|
|
700
|
+
|
|
701
|
+
Revisions will increment by appending a letter to a date or other identifier.
|
|
702
|
+
|
|
703
|
+
Parameters
|
|
704
|
+
----------
|
|
705
|
+
revision : str
|
|
706
|
+
The revision on which to base the new revision.
|
|
707
|
+
reserved : list of str
|
|
708
|
+
A list of reserved (i.e. already existing) revision strings.
|
|
709
|
+
alpha : str
|
|
710
|
+
The starting character as an integer, defaults to 'a'.
|
|
711
|
+
|
|
712
|
+
Returns
|
|
713
|
+
-------
|
|
714
|
+
str
|
|
715
|
+
The next logical revision string that's not in the reserved list.
|
|
716
|
+
|
|
717
|
+
Examples
|
|
718
|
+
--------
|
|
719
|
+
>>> RegistrationClient._next_revision('2020-01-01')
|
|
720
|
+
'2020-01-01a'
|
|
721
|
+
>>> RegistrationClient._next_revision('2020-01-01', ['2020-01-01a', '2020-01-01b'])
|
|
722
|
+
'2020-01-01c'
|
|
723
|
+
>>> RegistrationClient._next_revision('2020-01-01', ['2020-01-01a', '2020-01-01b'])
|
|
724
|
+
'2020-01-01c'
|
|
725
|
+
|
|
726
|
+
"""
|
|
727
|
+
if len(alpha) != 1:
|
|
728
|
+
raise TypeError(
|
|
729
|
+
f'`alpha` must be a character; received a string of length {len(alpha)}'
|
|
730
|
+
)
|
|
731
|
+
i = ord(alpha)
|
|
732
|
+
new_revision = revision + chr(i)
|
|
733
|
+
while new_revision in (reserved or []):
|
|
734
|
+
i += 1
|
|
735
|
+
new_revision = revision + chr(i)
|
|
736
|
+
return new_revision
|
|
737
|
+
|
|
738
|
+
def register_water_administration(self, subject, volume, **kwargs):
|
|
739
|
+
"""Register a water administration to Alyx for a given subject.
|
|
740
|
+
|
|
741
|
+
Parameters
|
|
742
|
+
----------
|
|
743
|
+
subject : str
|
|
744
|
+
A subject nickname that exists on Alyx
|
|
745
|
+
volume : float
|
|
746
|
+
The total volume administrated in ml
|
|
747
|
+
date_time : str, datetime.datetime, datetime.date
|
|
748
|
+
The time of administration. If None, the current time is used.
|
|
749
|
+
water_type : str
|
|
750
|
+
A water type that exists in Alyx; default is 'Water'
|
|
751
|
+
user : str
|
|
752
|
+
The user who administrated the water. Currently logged-in user is the default.
|
|
753
|
+
session : str, UUID, pathlib.Path, dict
|
|
754
|
+
An optional experiment ID to associate
|
|
755
|
+
adlib : bool
|
|
756
|
+
If true, indicates that the subject was given water ad libitum
|
|
757
|
+
|
|
758
|
+
Returns
|
|
759
|
+
-------
|
|
760
|
+
dict
|
|
761
|
+
A water administration record
|
|
762
|
+
|
|
763
|
+
Raises
|
|
764
|
+
------
|
|
765
|
+
one.alf.exceptions.AlyxSubjectNotFound
|
|
766
|
+
Subject does not exist on Alyx
|
|
767
|
+
one.alf.exceptions.ALFError
|
|
768
|
+
User does not exist on Alyx
|
|
769
|
+
ValueError
|
|
770
|
+
date_time is not a valid ISO date time or session ID is not valid
|
|
771
|
+
requests.exceptions.HTTPError
|
|
772
|
+
Failed to connect to database, or submitted data not valid (500)
|
|
773
|
+
|
|
774
|
+
"""
|
|
775
|
+
# Ensure subject exists
|
|
776
|
+
self.assert_exists(subject, 'subjects')
|
|
777
|
+
# Ensure user(s) exist
|
|
778
|
+
user = kwargs.pop('user', self.one.alyx.user)
|
|
779
|
+
self.assert_exists(user, 'users')
|
|
780
|
+
# Ensure volume not zero
|
|
781
|
+
if volume == 0:
|
|
782
|
+
raise ValueError('Water volume must be greater than zero')
|
|
783
|
+
# Post water admin
|
|
784
|
+
wa_ = {
|
|
785
|
+
'subject': subject,
|
|
786
|
+
'date_time': self.ensure_ISO8601(kwargs.pop('date_time', None)),
|
|
787
|
+
'water_administered': float(f'{volume:.4g}'), # Round to 4 s.f.
|
|
788
|
+
'water_type': kwargs.pop('water_type', 'Water'),
|
|
789
|
+
'user': user,
|
|
790
|
+
'adlib': kwargs.pop('adlib', False)
|
|
791
|
+
}
|
|
792
|
+
# Ensure session is valid; convert to eid
|
|
793
|
+
if kwargs.get('session', False):
|
|
794
|
+
wa_['session'] = str(self.one.to_eid(kwargs.pop('session')) or '')
|
|
795
|
+
if not wa_['session']:
|
|
796
|
+
raise ValueError('Failed to parse session ID')
|
|
797
|
+
|
|
798
|
+
return self.one.alyx.rest('water-administrations', 'create', data=wa_)
|
|
799
|
+
|
|
800
|
+
def register_weight(self, subject, weight, date_time=None, user=None):
|
|
801
|
+
"""Register a subject weight to Alyx.
|
|
802
|
+
|
|
803
|
+
Parameters
|
|
804
|
+
----------
|
|
805
|
+
subject : str
|
|
806
|
+
A subject nickname that exists on Alyx.
|
|
807
|
+
weight : float
|
|
808
|
+
The subject weight in grams.
|
|
809
|
+
date_time : str, datetime.datetime, datetime.date
|
|
810
|
+
The time of weighing. If None, the current time is used.
|
|
811
|
+
user : str
|
|
812
|
+
The user who performed the weighing. Currently logged-in user is the default.
|
|
813
|
+
|
|
814
|
+
Returns
|
|
815
|
+
-------
|
|
816
|
+
dict
|
|
817
|
+
An Alyx weight record
|
|
818
|
+
|
|
819
|
+
Raises
|
|
820
|
+
------
|
|
821
|
+
one.alf.exceptions.AlyxSubjectNotFound
|
|
822
|
+
Subject does not exist on Alyx
|
|
823
|
+
one.alf.exceptions.ALFError
|
|
824
|
+
User does not exist on Alyx
|
|
825
|
+
ValueError
|
|
826
|
+
date_time is not a valid ISO date time or weight < 1e-4
|
|
827
|
+
requests.exceptions.HTTPError
|
|
828
|
+
Failed to connect to database, or submitted data not valid (500)
|
|
829
|
+
|
|
830
|
+
"""
|
|
831
|
+
# Ensure subject exists
|
|
832
|
+
self.assert_exists(subject, 'subjects')
|
|
833
|
+
# Ensure user(s) exist
|
|
834
|
+
user = user or self.one.alyx.user
|
|
835
|
+
self.assert_exists(user, 'users')
|
|
836
|
+
# Ensure weight not zero
|
|
837
|
+
if weight == 0:
|
|
838
|
+
raise ValueError('Water volume must be greater than 0')
|
|
839
|
+
|
|
840
|
+
# Post water admin
|
|
841
|
+
wei_ = {'subject': subject,
|
|
842
|
+
'date_time': self.ensure_ISO8601(date_time),
|
|
843
|
+
'weight': float(f'{weight:.4g}'), # Round to 4 s.f.
|
|
844
|
+
'user': user}
|
|
845
|
+
return self.one.alyx.rest('weighings', 'create', data=wei_)
|