ONE-api 3.0b3__py3-none-any.whl → 3.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/LICENSE +21 -21
- {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/METADATA +115 -115
- ONE_api-3.0b4.dist-info/RECORD +37 -0
- one/__init__.py +2 -2
- one/alf/__init__.py +1 -1
- one/alf/cache.py +640 -653
- one/alf/exceptions.py +105 -105
- one/alf/io.py +876 -876
- one/alf/path.py +1450 -1450
- one/alf/spec.py +519 -519
- one/api.py +2949 -2973
- one/converters.py +850 -850
- one/params.py +414 -414
- one/registration.py +845 -845
- one/remote/__init__.py +1 -1
- one/remote/aws.py +313 -313
- one/remote/base.py +142 -142
- one/remote/globus.py +1254 -1254
- one/tests/fixtures/params/.caches +6 -6
- one/tests/fixtures/params/.test.alyx.internationalbrainlab.org +8 -8
- one/tests/fixtures/rest_responses/1f187d80fd59677b395fcdb18e68e4401bfa1cc9 +1 -1
- one/tests/fixtures/rest_responses/47893cf67c985e6361cdee009334963f49fb0746 +1 -1
- one/tests/fixtures/rest_responses/535d0e9a1e2c1efbdeba0d673b131e00361a2edb +1 -1
- one/tests/fixtures/rest_responses/6dc96f7e9bcc6ac2e7581489b9580a6cd3f28293 +1 -1
- one/tests/fixtures/rest_responses/db1731fb8df0208944ae85f76718430813a8bf50 +1 -1
- one/tests/fixtures/rest_responses/dcce48259bb929661f60a02a48563f70aa6185b3 +1 -1
- one/tests/fixtures/rest_responses/f530d6022f61cdc9e38cc66beb3cb71f3003c9a1 +1 -1
- one/tests/fixtures/test_dbs.json +14 -14
- one/util.py +524 -524
- one/webclient.py +1366 -1354
- ONE_api-3.0b3.dist-info/RECORD +0 -37
- {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/WHEEL +0 -0
- {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/top_level.txt +0 -0
one/converters.py
CHANGED
|
@@ -1,850 +1,850 @@
|
|
|
1
|
-
"""A module for inter-converting experiment identifiers.
|
|
2
|
-
|
|
3
|
-
There are multiple ways to uniquely identify an experiment:
|
|
4
|
-
- eid (UUID) : An experiment UUID (or 36 char hexadecimal string)
|
|
5
|
-
- np (int64) : An experiment UUID encoded as 2 int64s
|
|
6
|
-
- path (Path) : A pathlib ALF path of the form `<lab>/Subjects/<subject>/<date>/<number>`
|
|
7
|
-
- ref (str) : An experiment reference string of the form `yyyy-mm-dd_n_subject`
|
|
8
|
-
- url (str) : A remote http session path of the form `<lab>/Subjects/<subject>/<date>/<number>`
|
|
9
|
-
"""
|
|
10
|
-
import re
|
|
11
|
-
import functools
|
|
12
|
-
import datetime
|
|
13
|
-
import urllib.parse
|
|
14
|
-
from uuid import UUID
|
|
15
|
-
from inspect import unwrap
|
|
16
|
-
from pathlib import Path
|
|
17
|
-
from typing import Optional, Union, Mapping, List, Iterable as Iter
|
|
18
|
-
|
|
19
|
-
import pandas as pd
|
|
20
|
-
from iblutil.util import Bunch, Listable, ensure_list
|
|
21
|
-
|
|
22
|
-
from one.alf.spec import is_session_path, is_uuid_string, is_uuid
|
|
23
|
-
from one.alf.cache import EMPTY_DATASETS_FRAME
|
|
24
|
-
from one.alf.path import (
|
|
25
|
-
ALFPath, PurePosixALFPath, ensure_alf_path, get_session_path, get_alf_path, remove_uuid_string)
|
|
26
|
-
from one.util import LazyId
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def recurse(func):
|
|
30
|
-
"""Decorator to call decorated function recursively if first arg is non-string iterable.
|
|
31
|
-
|
|
32
|
-
Allows decorated methods to accept both single values, and lists/tuples of values. When
|
|
33
|
-
given the latter, a list is returned. This decorator is intended to work on class methods,
|
|
34
|
-
therefore the first arg is assumed to be the object. Maps and pandas objects are not
|
|
35
|
-
iterated over.
|
|
36
|
-
|
|
37
|
-
Parameters
|
|
38
|
-
----------
|
|
39
|
-
func : function
|
|
40
|
-
A method to decorate.
|
|
41
|
-
|
|
42
|
-
Returns
|
|
43
|
-
-------
|
|
44
|
-
function
|
|
45
|
-
The decorated method.
|
|
46
|
-
|
|
47
|
-
"""
|
|
48
|
-
@functools.wraps(func)
|
|
49
|
-
def wrapper_decorator(*args, **kwargs):
|
|
50
|
-
if len(args) <= 1:
|
|
51
|
-
return func(*args, **kwargs)
|
|
52
|
-
obj, first = args[:2]
|
|
53
|
-
exclude = (str, Mapping, pd.Series, pd.DataFrame)
|
|
54
|
-
is_lazy_id = isinstance(first, LazyId)
|
|
55
|
-
if is_lazy_id or (isinstance(first, Iter) and not isinstance(first, exclude)):
|
|
56
|
-
return [func(obj, item, *args[2:], **kwargs) for item in first]
|
|
57
|
-
else:
|
|
58
|
-
return func(obj, first, *args[2:], **kwargs)
|
|
59
|
-
return wrapper_decorator
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def parse_values(func):
|
|
63
|
-
"""Convert str values in reference dict to appropriate type.
|
|
64
|
-
|
|
65
|
-
Examples
|
|
66
|
-
--------
|
|
67
|
-
>>> parse_values(lambda x: x)({'date': '2020-01-01', 'sequence': '001'}, parse=True)
|
|
68
|
-
{'date': datetime.date(2020, 1, 1), 'sequence': 1}
|
|
69
|
-
|
|
70
|
-
"""
|
|
71
|
-
def parse_ref(ref):
|
|
72
|
-
if ref:
|
|
73
|
-
if isinstance(ref['date'], str):
|
|
74
|
-
if len(ref['date']) == 10:
|
|
75
|
-
ref['date'] = datetime.date.fromisoformat(ref['date'])
|
|
76
|
-
else:
|
|
77
|
-
ref['date'] = datetime.datetime.fromisoformat(ref['date']).date()
|
|
78
|
-
ref['sequence'] = int(ref['sequence'])
|
|
79
|
-
return ref
|
|
80
|
-
|
|
81
|
-
@functools.wraps(func)
|
|
82
|
-
def wrapper_decorator(*args, **kwargs):
|
|
83
|
-
parse = kwargs.pop('parse', True)
|
|
84
|
-
ref = func(*args, **kwargs)
|
|
85
|
-
if not parse or isinstance(ref, str):
|
|
86
|
-
return ref
|
|
87
|
-
elif isinstance(ref, (list, LazyId)):
|
|
88
|
-
return list(map(parse_ref, ref))
|
|
89
|
-
else:
|
|
90
|
-
return parse_ref(ref)
|
|
91
|
-
return wrapper_decorator
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
class ConversionMixin:
|
|
95
|
-
"""A mixin providing methods to inter-convert experiment identifiers."""
|
|
96
|
-
|
|
97
|
-
def __init__(self):
|
|
98
|
-
self._cache = None
|
|
99
|
-
self._par = None
|
|
100
|
-
|
|
101
|
-
@recurse
|
|
102
|
-
def to_eid(self,
|
|
103
|
-
id: Listable(Union[str, Path, UUID, dict]) = None,
|
|
104
|
-
cache_dir: Optional[Union[str, Path]] = None) -> Listable(UUID):
|
|
105
|
-
"""Given any kind of experiment identifier, return a corresponding eid string.
|
|
106
|
-
|
|
107
|
-
NB: Currently does not support integer IDs.
|
|
108
|
-
|
|
109
|
-
Parameters
|
|
110
|
-
----------
|
|
111
|
-
id : str, pathlib.Path, UUID, dict, tuple, list
|
|
112
|
-
An experiment identifier
|
|
113
|
-
cache_dir : pathlib.Path, str
|
|
114
|
-
An optional cache directory path for intermittent conversion to path
|
|
115
|
-
|
|
116
|
-
Returns
|
|
117
|
-
-------
|
|
118
|
-
uuid.UUID, None
|
|
119
|
-
An experiment ID or None if session not in cache
|
|
120
|
-
|
|
121
|
-
Raises
|
|
122
|
-
------
|
|
123
|
-
ValueError
|
|
124
|
-
Input ID invalid
|
|
125
|
-
|
|
126
|
-
"""
|
|
127
|
-
# TODO Could add np2str here
|
|
128
|
-
# if isinstance(id, (list, tuple)): # Recurse
|
|
129
|
-
# return [self.to_eid(i, cache_dir) for i in id]
|
|
130
|
-
if id is None:
|
|
131
|
-
return
|
|
132
|
-
elif isinstance(id, (UUID, LazyId)):
|
|
133
|
-
return id
|
|
134
|
-
elif self.is_exp_ref(id):
|
|
135
|
-
return self.ref2eid(id)
|
|
136
|
-
elif isinstance(id, dict):
|
|
137
|
-
assert {'subject', 'number', 'lab'}.issubset(id)
|
|
138
|
-
root = Path(cache_dir or self.cache_dir)
|
|
139
|
-
id = root.joinpath(
|
|
140
|
-
id['lab'],
|
|
141
|
-
'Subjects', id['subject'],
|
|
142
|
-
str(id.get('date') or id['start_time'][:10]),
|
|
143
|
-
('%03d' % id['number']))
|
|
144
|
-
|
|
145
|
-
if isinstance(id, Path):
|
|
146
|
-
return self.path2eid(id)
|
|
147
|
-
elif isinstance(id, str):
|
|
148
|
-
if is_session_path(id) or get_session_path(id):
|
|
149
|
-
return self.path2eid(id)
|
|
150
|
-
if len(id) > 36:
|
|
151
|
-
id = id[-36:]
|
|
152
|
-
if not is_uuid_string(id):
|
|
153
|
-
raise ValueError('Invalid experiment ID')
|
|
154
|
-
else:
|
|
155
|
-
return UUID(id)
|
|
156
|
-
else:
|
|
157
|
-
raise ValueError('Unrecognized experiment ID')
|
|
158
|
-
|
|
159
|
-
@recurse
|
|
160
|
-
def eid2path(self, eid: str) -> Optional[Listable(ALFPath)]:
|
|
161
|
-
"""From an experiment id or a list of experiment ids, gets the local cache path.
|
|
162
|
-
|
|
163
|
-
Parameters
|
|
164
|
-
----------
|
|
165
|
-
eid : str, uuid.UUID
|
|
166
|
-
Experiment ID (UUID) or list of UUIDs.
|
|
167
|
-
|
|
168
|
-
Returns
|
|
169
|
-
-------
|
|
170
|
-
one.alf.path.ALFPath
|
|
171
|
-
A session path.
|
|
172
|
-
|
|
173
|
-
"""
|
|
174
|
-
# If not valid return None
|
|
175
|
-
if not is_uuid(eid):
|
|
176
|
-
raise ValueError(f"{eid} is not a valid eID/UUID string")
|
|
177
|
-
if isinstance(eid, str):
|
|
178
|
-
eid = UUID(eid)
|
|
179
|
-
if self._cache['sessions'].size == 0:
|
|
180
|
-
return
|
|
181
|
-
|
|
182
|
-
# load path from cache
|
|
183
|
-
try:
|
|
184
|
-
ses = self._cache['sessions'].loc[eid].squeeze()
|
|
185
|
-
assert isinstance(ses, pd.Series), 'Duplicate eids in sessions table'
|
|
186
|
-
return session_record2path(ses.to_dict(), self.cache_dir)
|
|
187
|
-
except KeyError:
|
|
188
|
-
return
|
|
189
|
-
|
|
190
|
-
@recurse
|
|
191
|
-
def path2eid(self, path_obj):
|
|
192
|
-
"""From a local path, gets the experiment id.
|
|
193
|
-
|
|
194
|
-
Parameters
|
|
195
|
-
----------
|
|
196
|
-
path_obj : pathlib.Path, str
|
|
197
|
-
Local path or list of local paths.
|
|
198
|
-
|
|
199
|
-
Returns
|
|
200
|
-
-------
|
|
201
|
-
eid, list
|
|
202
|
-
Experiment ID (eid) or list of eids.
|
|
203
|
-
|
|
204
|
-
"""
|
|
205
|
-
# else ensure the path ends with mouse,date, number
|
|
206
|
-
session_path = get_session_path(path_obj)
|
|
207
|
-
sessions = self._cache['sessions']
|
|
208
|
-
|
|
209
|
-
# if path does not have a date and a number, or cache is empty return None
|
|
210
|
-
if session_path is None or sessions.size == 0:
|
|
211
|
-
return None
|
|
212
|
-
|
|
213
|
-
# reduce session records from cache
|
|
214
|
-
toDate = datetime.date.fromisoformat
|
|
215
|
-
subject, date, number = session_path.parts[-3:]
|
|
216
|
-
for col, val in zip(('subject', 'date', 'number'), (subject, toDate(date), int(number))):
|
|
217
|
-
sessions = sessions[sessions[col] == val]
|
|
218
|
-
if sessions.size == 0:
|
|
219
|
-
return
|
|
220
|
-
|
|
221
|
-
assert len(sessions) == 1
|
|
222
|
-
|
|
223
|
-
eid, = sessions.index.values
|
|
224
|
-
return eid
|
|
225
|
-
|
|
226
|
-
@recurse
|
|
227
|
-
def path2record(self, path) -> pd.Series:
|
|
228
|
-
"""Convert a file or session path to a dataset or session cache record.
|
|
229
|
-
|
|
230
|
-
NB: Assumes <lab>/Subjects/<subject>/<date>/<number> pattern.
|
|
231
|
-
|
|
232
|
-
Parameters
|
|
233
|
-
----------
|
|
234
|
-
path : str, pathlib.Path
|
|
235
|
-
Local path or HTTP URL.
|
|
236
|
-
|
|
237
|
-
Returns
|
|
238
|
-
-------
|
|
239
|
-
pandas.Series
|
|
240
|
-
A cache file record.
|
|
241
|
-
|
|
242
|
-
"""
|
|
243
|
-
path = ALFPath(path)
|
|
244
|
-
is_session = is_session_path(path)
|
|
245
|
-
if self._cache['sessions' if is_session else 'datasets'].empty:
|
|
246
|
-
return # short circuit: no records in the cache
|
|
247
|
-
|
|
248
|
-
if is_session_path(path):
|
|
249
|
-
lab, subject, date, number = path.session_parts
|
|
250
|
-
df = self._cache['sessions']
|
|
251
|
-
rec = df[
|
|
252
|
-
(df['lab'] == lab) & (df['subject'] == subject) &
|
|
253
|
-
(df['number'] == int(number)) &
|
|
254
|
-
(df['date'] == datetime.date.fromisoformat(date))
|
|
255
|
-
]
|
|
256
|
-
return None if rec.empty else rec.squeeze()
|
|
257
|
-
|
|
258
|
-
# If there's a UUID in the path, use that to fetch the record
|
|
259
|
-
name_parts = path.stem.split('.')
|
|
260
|
-
if is_uuid_string(uuid := name_parts[-1]):
|
|
261
|
-
try:
|
|
262
|
-
return self._cache['datasets'].loc[pd.IndexSlice[:, UUID(uuid)], :].squeeze()
|
|
263
|
-
except KeyError:
|
|
264
|
-
return
|
|
265
|
-
|
|
266
|
-
# Fetch via session record
|
|
267
|
-
eid = self.path2eid(path)
|
|
268
|
-
df = self.list_datasets(eid, details=True)
|
|
269
|
-
if not eid or df.empty:
|
|
270
|
-
return
|
|
271
|
-
|
|
272
|
-
# Find row where relative path matches
|
|
273
|
-
rec = df[df['rel_path'] == path.relative_to_session().as_posix()]
|
|
274
|
-
assert len(rec) < 2, 'Multiple records found'
|
|
275
|
-
if rec.empty:
|
|
276
|
-
return None
|
|
277
|
-
# Convert slice to series and reinstate eid index if dropped
|
|
278
|
-
return rec.squeeze().rename(index=(eid, rec.index.get_level_values('id')[0]))
|
|
279
|
-
|
|
280
|
-
@recurse
|
|
281
|
-
def path2url(self, filepath):
|
|
282
|
-
"""Given a local file path, constructs the URL of the remote file.
|
|
283
|
-
|
|
284
|
-
Parameters
|
|
285
|
-
----------
|
|
286
|
-
filepath : str, pathlib.Path
|
|
287
|
-
A local file path
|
|
288
|
-
|
|
289
|
-
Returns
|
|
290
|
-
-------
|
|
291
|
-
str
|
|
292
|
-
A remote URL string
|
|
293
|
-
|
|
294
|
-
"""
|
|
295
|
-
record = self.path2record(filepath)
|
|
296
|
-
if record is None:
|
|
297
|
-
return
|
|
298
|
-
return self.record2url(record)
|
|
299
|
-
|
|
300
|
-
def record2url(self, record):
|
|
301
|
-
"""Convert a session or dataset record to a remote URL.
|
|
302
|
-
|
|
303
|
-
NB: Requires online instance
|
|
304
|
-
|
|
305
|
-
Parameters
|
|
306
|
-
----------
|
|
307
|
-
record : pd.Series, pd.DataFrame
|
|
308
|
-
A datasets or sessions cache record. If DataFrame, iterate over and returns list.
|
|
309
|
-
|
|
310
|
-
Returns
|
|
311
|
-
-------
|
|
312
|
-
str, list
|
|
313
|
-
A dataset URL or list if input is DataFrame
|
|
314
|
-
|
|
315
|
-
"""
|
|
316
|
-
webclient = getattr(self, '_web_client', False)
|
|
317
|
-
assert webclient, 'No Web client found for instance'
|
|
318
|
-
# FIXME Should be OneAlyx converter only
|
|
319
|
-
if isinstance(record, pd.DataFrame):
|
|
320
|
-
return [self.record2url(r) for _, r in record.iterrows()]
|
|
321
|
-
elif isinstance(record, pd.Series):
|
|
322
|
-
is_session_record = 'rel_path' not in record
|
|
323
|
-
if is_session_record:
|
|
324
|
-
# NB: This assumes the root path is in the webclient URL
|
|
325
|
-
session_spec = '{lab}/Subjects/{subject}/{date}/{number:03d}'
|
|
326
|
-
url = record.get('session_path') or session_spec.format(**record)
|
|
327
|
-
return webclient.rel_path2url(url)
|
|
328
|
-
else:
|
|
329
|
-
raise TypeError(
|
|
330
|
-
f'record must be pandas.DataFrame or pandas.Series, got {type(record)} instead')
|
|
331
|
-
if 'session_path' in record:
|
|
332
|
-
# Check for session_path field (aggregate datasets have no eid in name)
|
|
333
|
-
session_path = record['session_path']
|
|
334
|
-
uuid = record.name if isinstance(record.name, UUID) else record.name[-1]
|
|
335
|
-
else:
|
|
336
|
-
assert isinstance(record.name, tuple) and len(record.name) == 2
|
|
337
|
-
eid, uuid = record.name # must be (eid, did)
|
|
338
|
-
session_path = get_alf_path(self.eid2path(eid))
|
|
339
|
-
url = PurePosixALFPath(session_path, record['rel_path'])
|
|
340
|
-
return webclient.rel_path2url(url.with_uuid(uuid).as_posix())
|
|
341
|
-
|
|
342
|
-
def record2path(self, dataset) -> Optional[ALFPath]:
|
|
343
|
-
"""Given a set of dataset records, returns the corresponding paths.
|
|
344
|
-
|
|
345
|
-
Parameters
|
|
346
|
-
----------
|
|
347
|
-
dataset : pd.DataFrame, pd.Series
|
|
348
|
-
A datasets dataframe slice.
|
|
349
|
-
|
|
350
|
-
Returns
|
|
351
|
-
-------
|
|
352
|
-
one.alf.path.ALFPath
|
|
353
|
-
File path for the record.
|
|
354
|
-
|
|
355
|
-
"""
|
|
356
|
-
if isinstance(dataset, pd.DataFrame):
|
|
357
|
-
return [self.record2path(r) for _, r in dataset.iterrows()]
|
|
358
|
-
elif not isinstance(dataset, pd.Series):
|
|
359
|
-
raise TypeError(
|
|
360
|
-
f'record must be pandas.DataFrame or pandas.Series, got {type(dataset)} instead')
|
|
361
|
-
assert isinstance(dataset.name, tuple) and len(dataset.name) == 2
|
|
362
|
-
eid, uuid = dataset.name # must be (eid, did)
|
|
363
|
-
if not (session_path := self.eid2path(eid)):
|
|
364
|
-
raise ValueError(f'Failed to determine session path for eid "{eid}"')
|
|
365
|
-
file = session_path / dataset['rel_path']
|
|
366
|
-
if self.uuid_filenames:
|
|
367
|
-
file = file.with_uuid(uuid)
|
|
368
|
-
return file
|
|
369
|
-
|
|
370
|
-
@recurse
|
|
371
|
-
def eid2ref(self, eid: Union[str, Iter], as_dict=True, parse=True) \
|
|
372
|
-
-> Union[str, Mapping, List]:
|
|
373
|
-
"""Get human-readable session ref from path.
|
|
374
|
-
|
|
375
|
-
Parameters
|
|
376
|
-
----------
|
|
377
|
-
eid : str, uuid.UUID
|
|
378
|
-
The experiment uuid to find reference for.
|
|
379
|
-
as_dict : bool
|
|
380
|
-
If false a string is returned in the form 'subject_sequence_yyyy-mm-dd'.
|
|
381
|
-
parse : bool
|
|
382
|
-
If true, the reference date and sequence are parsed from strings to their respective
|
|
383
|
-
data types.
|
|
384
|
-
|
|
385
|
-
Returns
|
|
386
|
-
-------
|
|
387
|
-
dict, str, list
|
|
388
|
-
One or more objects with keys ('subject', 'date', 'sequence'), or strings with the
|
|
389
|
-
form yyyy-mm-dd_n_subject.
|
|
390
|
-
|
|
391
|
-
Examples
|
|
392
|
-
--------
|
|
393
|
-
>>> eid = '4e0b3320-47b7-416e-b842-c34dc9004cf8'
|
|
394
|
-
>>> one.eid2ref(eid)
|
|
395
|
-
{'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1}
|
|
396
|
-
>>> one.eid2ref(eid, parse=False)
|
|
397
|
-
{'subject': 'flowers', 'date': '2018-07-13', 'sequence': '001'}
|
|
398
|
-
>>> one.eid2ref(eid, as_dict=False)
|
|
399
|
-
'2018-07-13_1_flowers'
|
|
400
|
-
>>> one.eid2ref(eid, as_dict=False, parse=False)
|
|
401
|
-
'2018-07-13_001_flowers'
|
|
402
|
-
>>> one.eid2ref([eid, '7dc3c44b-225f-4083-be3d-07b8562885f4'])
|
|
403
|
-
[{'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1},
|
|
404
|
-
{'subject': 'KS005', 'date': datetime.date(2019, 4, 11), 'sequence': 1}]
|
|
405
|
-
|
|
406
|
-
"""
|
|
407
|
-
d = self.get_details(eid)
|
|
408
|
-
if parse:
|
|
409
|
-
ref = {'subject': d['subject'], 'date': d['date'], 'sequence': d['number']}
|
|
410
|
-
format_str = '{date:%Y-%m-%d}_{sequence:d}_{subject:s}'
|
|
411
|
-
else:
|
|
412
|
-
ref = {
|
|
413
|
-
'subject': d['subject'], 'date': str(d['date']), 'sequence': '%03d' % d['number']
|
|
414
|
-
}
|
|
415
|
-
format_str = '{date:s}_{sequence:s}_{subject:s}'
|
|
416
|
-
return Bunch(ref) if as_dict else format_str.format(**ref)
|
|
417
|
-
|
|
418
|
-
@recurse
|
|
419
|
-
def ref2eid(self, ref: Union[Mapping, str, Iter]) -> Union[str, List]:
|
|
420
|
-
"""Returns experiment uuid, given one or more experiment references.
|
|
421
|
-
|
|
422
|
-
Parameters
|
|
423
|
-
----------
|
|
424
|
-
ref : str, dict, list
|
|
425
|
-
One or more objects with keys ('subject', 'date', 'sequence'), or strings with
|
|
426
|
-
the form yyyy-mm-dd_n_subject.
|
|
427
|
-
|
|
428
|
-
Returns
|
|
429
|
-
-------
|
|
430
|
-
uuid.UUID, list
|
|
431
|
-
One or more experiment uuid strings.
|
|
432
|
-
|
|
433
|
-
Examples
|
|
434
|
-
--------
|
|
435
|
-
>>> base = 'https://test.alyx.internationalbrainlab.org'
|
|
436
|
-
>>> one = ONE(username='test_user', password='TapetesBloc18', base_url=base)
|
|
437
|
-
Connected to...
|
|
438
|
-
>>> ref = {'date': datetime(2018, 7, 13).date(), 'sequence': 1, 'subject': 'flowers'}
|
|
439
|
-
>>> one.ref2eid(ref)
|
|
440
|
-
UUID('4e0b3320-47b7-416e-b842-c34dc9004cf8')
|
|
441
|
-
>>> one.ref2eid(['2018-07-13_1_flowers', '2019-04-11_1_KS005'])
|
|
442
|
-
[UUID('4e0b3320-47b7-416e-b842-c34dc9004cf8'),
|
|
443
|
-
UUID('7dc3c44b-225f-4083-be3d-07b8562885f4')]
|
|
444
|
-
|
|
445
|
-
"""
|
|
446
|
-
ref = self.ref2dict(ref, parse=False) # Ensure dict
|
|
447
|
-
session = self.search(
|
|
448
|
-
subject=ref['subject'],
|
|
449
|
-
date_range=str(ref['date']),
|
|
450
|
-
number=ref['sequence'])
|
|
451
|
-
assert len(session) == 1, 'session not found'
|
|
452
|
-
return session[0]
|
|
453
|
-
|
|
454
|
-
@recurse
|
|
455
|
-
def ref2path(self, ref):
|
|
456
|
-
"""Convert one or more experiment references to session path(s).
|
|
457
|
-
|
|
458
|
-
Parameters
|
|
459
|
-
----------
|
|
460
|
-
ref : str, dict, list
|
|
461
|
-
One or more objects with keys ('subject', 'date', 'sequence'), or strings with
|
|
462
|
-
the form yyyy-mm-dd_n_subject.
|
|
463
|
-
|
|
464
|
-
Returns
|
|
465
|
-
-------
|
|
466
|
-
one.alf.path.ALFPath
|
|
467
|
-
Path object(s) for the experiment session(s).
|
|
468
|
-
|
|
469
|
-
Examples
|
|
470
|
-
--------
|
|
471
|
-
>>> base = 'https://test.alyx.internationalbrainlab.org'
|
|
472
|
-
>>> one = ONE(username='test_user', password='TapetesBloc18', base_url=base)
|
|
473
|
-
Connected to...
|
|
474
|
-
>>> ref = {'subject': 'flowers', 'date': datetime(2018, 7, 13).date(), 'sequence': 1}
|
|
475
|
-
>>> one.ref2path(ref)
|
|
476
|
-
WindowsPath('E:/FlatIron/zadorlab/Subjects/flowers/2018-07-13/001')
|
|
477
|
-
>>> one.ref2path(['2018-07-13_1_flowers', '2019-04-11_1_KS005'])
|
|
478
|
-
[WindowsPath('E:/FlatIron/zadorlab/Subjects/flowers/2018-07-13/001'),
|
|
479
|
-
WindowsPath('E:/FlatIron/cortexlab/Subjects/KS005/2019-04-11/001')]
|
|
480
|
-
|
|
481
|
-
"""
|
|
482
|
-
eid2path = unwrap(self.eid2path)
|
|
483
|
-
ref2eid = unwrap(self.ref2eid)
|
|
484
|
-
return eid2path(self, ref2eid(self, ref))
|
|
485
|
-
|
|
486
|
-
@staticmethod
|
|
487
|
-
@parse_values
|
|
488
|
-
def path2ref(path_str: Union[str, Path, Iter], as_dict=True) -> Union[Bunch, List]:
|
|
489
|
-
"""Returns a human-readable experiment reference, given a session path.
|
|
490
|
-
|
|
491
|
-
The path need not exist.
|
|
492
|
-
|
|
493
|
-
Parameters
|
|
494
|
-
----------
|
|
495
|
-
path_str : str
|
|
496
|
-
A path to a given session.
|
|
497
|
-
as_dict : bool
|
|
498
|
-
If True a Bunch is returned, otherwise a string.
|
|
499
|
-
|
|
500
|
-
Returns
|
|
501
|
-
-------
|
|
502
|
-
dict, str, list
|
|
503
|
-
One or more objects with keys ('subject', 'date', 'sequence').
|
|
504
|
-
|
|
505
|
-
Examples
|
|
506
|
-
--------
|
|
507
|
-
>>> path_str = Path('E:/FlatIron/Subjects/zadorlab/flowers/2018-07-13/001')
|
|
508
|
-
>>> path2ref(path_str)
|
|
509
|
-
{'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1}
|
|
510
|
-
>>> path2ref(path_str, parse=False)
|
|
511
|
-
{'subject': 'flowers', 'date': '2018-07-13', 'sequence': '001'}
|
|
512
|
-
>>> path_str2 = Path('E:/FlatIron/Subjects/churchlandlab/CSHL046/2020-06-20/002')
|
|
513
|
-
>>> path2ref([path_str, path_str2])
|
|
514
|
-
[{'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1},
|
|
515
|
-
{'subject': 'CSHL046', 'date': datetime.date(2020, 6, 20), 'sequence': 2}]
|
|
516
|
-
|
|
517
|
-
"""
|
|
518
|
-
if isinstance(path_str, (list, tuple)):
|
|
519
|
-
return [unwrap(ConversionMixin.path2ref)(x) for x in path_str]
|
|
520
|
-
pattern = r'(?P<subject>[\w-]+)([\\/])(?P<date>\d{4}-\d{2}-\d{2})(\2)(?P<sequence>\d{1,3})'
|
|
521
|
-
match = re.search(pattern, str(path_str))
|
|
522
|
-
if match and not re.match(r'^0\d$', match.groups()[-1]): # e.g. '02' not valid
|
|
523
|
-
ref = match.groupdict()
|
|
524
|
-
return Bunch(ref) if as_dict else '{date:s}_{sequence:s}_{subject:s}'.format(**ref)
|
|
525
|
-
|
|
526
|
-
@staticmethod
|
|
527
|
-
def is_exp_ref(ref: Union[str, Mapping, Iter]) -> Union[bool, List[bool]]:
|
|
528
|
-
"""Returns True is ref is a valid experiment reference.
|
|
529
|
-
|
|
530
|
-
Parameters
|
|
531
|
-
----------
|
|
532
|
-
ref : str, dict, list
|
|
533
|
-
One or more objects with keys ('subject', 'date', 'sequence'), or strings with
|
|
534
|
-
the form yyyy-mm-dd_n_subject.
|
|
535
|
-
|
|
536
|
-
Returns
|
|
537
|
-
-------
|
|
538
|
-
bool, list of bool
|
|
539
|
-
True if ref is valid.
|
|
540
|
-
|
|
541
|
-
Examples
|
|
542
|
-
--------
|
|
543
|
-
>>> ref = {'date': datetime(2018, 7, 13).date(), 'sequence': 1, 'subject': 'flowers'}
|
|
544
|
-
>>> is_exp_ref(ref)
|
|
545
|
-
True
|
|
546
|
-
>>> is_exp_ref('2018-07-13_001_flowers')
|
|
547
|
-
True
|
|
548
|
-
>>> is_exp_ref('invalid_ref')
|
|
549
|
-
False
|
|
550
|
-
|
|
551
|
-
"""
|
|
552
|
-
if isinstance(ref, (list, tuple)):
|
|
553
|
-
return [ConversionMixin.is_exp_ref(x) for x in ref]
|
|
554
|
-
if isinstance(ref, (Bunch, dict)):
|
|
555
|
-
if not {'subject', 'date', 'sequence'}.issubset(ref):
|
|
556
|
-
return False
|
|
557
|
-
ref = '{date}_{sequence}_{subject}'.format(**ref)
|
|
558
|
-
elif not isinstance(ref, str):
|
|
559
|
-
return False
|
|
560
|
-
return re.compile(r'\d{4}(-\d{2}){2}_(\d{1,3})_\w+').match(ref) is not None
|
|
561
|
-
|
|
562
|
-
@staticmethod
|
|
563
|
-
@parse_values
|
|
564
|
-
def ref2dict(ref: Union[str, Mapping, Iter]) -> Union[Bunch, List]:
|
|
565
|
-
"""Returns a Bunch (dict-like) from a reference string (or list thereof).
|
|
566
|
-
|
|
567
|
-
Parameters
|
|
568
|
-
----------
|
|
569
|
-
ref : str, list
|
|
570
|
-
One or more experiment reference strings.
|
|
571
|
-
|
|
572
|
-
Returns
|
|
573
|
-
-------
|
|
574
|
-
iblutil.util.Bunch
|
|
575
|
-
A Bunch in with keys ('subject', 'sequence', 'date').
|
|
576
|
-
|
|
577
|
-
Examples
|
|
578
|
-
--------
|
|
579
|
-
>>> ref2dict('2018-07-13_1_flowers')
|
|
580
|
-
{'date': datetime.date(2018, 7, 13), 'sequence': 1, 'subject': 'flowers'}
|
|
581
|
-
>>> ref2dict('2018-07-13_001_flowers', parse=False)
|
|
582
|
-
{'date': '2018-07-13', 'sequence': '001', 'subject': 'flowers'}
|
|
583
|
-
>>> ref2dict(['2018-07-13_1_flowers', '2020-01-23_002_ibl_witten_01'])
|
|
584
|
-
[{'date': datetime.date(2018, 7, 13), 'sequence': 1, 'subject': 'flowers'},
|
|
585
|
-
{'date': datetime.date(2020, 1, 23), 'sequence': 2, 'subject': 'ibl_witten_01'}]
|
|
586
|
-
|
|
587
|
-
"""
|
|
588
|
-
if isinstance(ref, (list, tuple)):
|
|
589
|
-
return [ConversionMixin.ref2dict(x) for x in ref]
|
|
590
|
-
if isinstance(ref, (Bunch, dict)):
|
|
591
|
-
return Bunch(ref) # Short circuit
|
|
592
|
-
ref = dict(zip(['date', 'sequence', 'subject'], ref.split('_', 2)))
|
|
593
|
-
return Bunch(ref)
|
|
594
|
-
|
|
595
|
-
@staticmethod
|
|
596
|
-
def dict2ref(ref_dict) -> Union[str, List]:
|
|
597
|
-
"""Convert an experiment reference dict to a string in the format yyyy-mm-dd_n_subject.
|
|
598
|
-
|
|
599
|
-
Parameters
|
|
600
|
-
----------
|
|
601
|
-
ref_dict : dict, Bunch, list, tuple
|
|
602
|
-
A map with the keys ('subject', 'date', 'sequence').
|
|
603
|
-
|
|
604
|
-
Returns
|
|
605
|
-
-------
|
|
606
|
-
str, list:
|
|
607
|
-
An experiment reference string, or list thereof.
|
|
608
|
-
|
|
609
|
-
"""
|
|
610
|
-
if isinstance(ref_dict, (list, tuple)):
|
|
611
|
-
return [ConversionMixin.dict2ref(x) for x in ref_dict]
|
|
612
|
-
if not ref_dict:
|
|
613
|
-
return
|
|
614
|
-
if 'sequence' not in ref_dict and 'number' in ref_dict:
|
|
615
|
-
ref_dict = ref_dict.copy()
|
|
616
|
-
ref_dict['sequence'] = ref_dict.pop('number')
|
|
617
|
-
if 'date' not in ref_dict and 'start_time' in ref_dict:
|
|
618
|
-
ref_dict = ref_dict.copy()
|
|
619
|
-
if isinstance(ref_dict['start_time'], str):
|
|
620
|
-
ref_dict['date'] = ref_dict['start_time'][:10]
|
|
621
|
-
else:
|
|
622
|
-
ref_dict['date'] = ref_dict['start_time'].date()
|
|
623
|
-
parsed = any(not isinstance(k, str) for k in ref_dict.values())
|
|
624
|
-
format_str = ('{date:%Y-%m-%d}_{sequence:d}_{subject:s}'
|
|
625
|
-
if parsed
|
|
626
|
-
else '{date:s}_{sequence:s}_{subject:s}')
|
|
627
|
-
return format_str.format(**ref_dict)
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
def one_path_from_dataset(dset, one_cache):
|
|
631
|
-
"""Returns local one file path from a dset record or a list of dsets records from REST.
|
|
632
|
-
|
|
633
|
-
Unlike `to_eid`, this function does not require ONE, and the dataset may not exist.
|
|
634
|
-
|
|
635
|
-
Parameters
|
|
636
|
-
----------
|
|
637
|
-
dset : dict, list
|
|
638
|
-
Dataset dictionary or list of dictionaries from Alyx rest endpoint.
|
|
639
|
-
one_cache : str, pathlib.Path, pathlib.PurePath
|
|
640
|
-
The local ONE data cache directory.
|
|
641
|
-
|
|
642
|
-
Returns
|
|
643
|
-
-------
|
|
644
|
-
one.alf.path.ALFPath
|
|
645
|
-
The local path for a given dataset.
|
|
646
|
-
|
|
647
|
-
"""
|
|
648
|
-
return path_from_dataset(dset, root_path=one_cache, uuid=False)
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
def path_from_dataset(dset, root_path=PurePosixALFPath('/'), repository=None, uuid=False):
|
|
652
|
-
"""Returns the local file path from a dset record from a REST query.
|
|
653
|
-
|
|
654
|
-
Unlike `to_eid`, this function does not require ONE, and the dataset may not exist.
|
|
655
|
-
|
|
656
|
-
Parameters
|
|
657
|
-
----------
|
|
658
|
-
dset : dict, list
|
|
659
|
-
Dataset dictionary or list of dictionaries from Alyx rest endpoint.
|
|
660
|
-
root_path : str, pathlib.Path, pathlib.PurePath
|
|
661
|
-
The prefix path such as the ONE download directory or remote http server root.
|
|
662
|
-
repository : str, None
|
|
663
|
-
Which data repository to use from the file_records list, defaults to first online
|
|
664
|
-
repository.
|
|
665
|
-
uuid : bool
|
|
666
|
-
If True, the file path will contain the dataset UUID.
|
|
667
|
-
|
|
668
|
-
Returns
|
|
669
|
-
-------
|
|
670
|
-
one.alf.path.ALFPath, list
|
|
671
|
-
File path or list of paths.
|
|
672
|
-
|
|
673
|
-
"""
|
|
674
|
-
if isinstance(dset, list):
|
|
675
|
-
return [path_from_dataset(d) for d in dset]
|
|
676
|
-
if repository:
|
|
677
|
-
fr = next((fr for fr in dset['file_records'] if fr['data_repository'] == repository))
|
|
678
|
-
else:
|
|
679
|
-
fr = next((fr for fr in dset['file_records'] if fr['data_url']))
|
|
680
|
-
uuid = dset['url'][-36:] if uuid else None
|
|
681
|
-
return path_from_filerecord(fr, root_path=root_path, uuid=uuid)
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
def path_from_filerecord(fr, root_path=PurePosixALFPath('/'), uuid=None):
|
|
685
|
-
"""Returns a data file Path constructed from an Alyx file record.
|
|
686
|
-
|
|
687
|
-
The Path type returned depends on the type of root_path: If root_path is a string an ALFPath
|
|
688
|
-
object is returned, otherwise if the root_path is a PurePath, a PureALFPath is returned.
|
|
689
|
-
|
|
690
|
-
Parameters
|
|
691
|
-
----------
|
|
692
|
-
fr : dict
|
|
693
|
-
An Alyx file record dict.
|
|
694
|
-
root_path : str, pathlib.Path
|
|
695
|
-
An optional root path.
|
|
696
|
-
uuid : str, uuid.UUID
|
|
697
|
-
An optional dataset UUID to add to the file name.
|
|
698
|
-
|
|
699
|
-
Returns
|
|
700
|
-
-------
|
|
701
|
-
one.alf.path.ALFPath
|
|
702
|
-
A filepath as a pathlib object.
|
|
703
|
-
|
|
704
|
-
"""
|
|
705
|
-
if isinstance(fr, list):
|
|
706
|
-
return [path_from_filerecord(f) for f in fr]
|
|
707
|
-
repo_path = (p := fr['data_repository_path'])[p[0] == '/':] # Remove slash at start, if any
|
|
708
|
-
file_path = PurePosixALFPath(repo_path, fr['relative_path'])
|
|
709
|
-
if root_path:
|
|
710
|
-
# NB: this function won't cast any PurePaths
|
|
711
|
-
root_path = ensure_alf_path(root_path)
|
|
712
|
-
file_path = root_path / file_path
|
|
713
|
-
return file_path.with_uuid(uuid) if uuid else file_path
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
def session_record2path(session, root_dir=None):
|
|
717
|
-
"""Convert a session record into a path.
|
|
718
|
-
|
|
719
|
-
If a lab key is present, the path will be in the form
|
|
720
|
-
root_dir/lab/Subjects/subject/yyyy-mm-dd/nnn, otherwise root_dir/subject/yyyy-mm-dd/nnn.
|
|
721
|
-
|
|
722
|
-
Parameters
|
|
723
|
-
----------
|
|
724
|
-
session : Mapping
|
|
725
|
-
A session record with keys ('subject', 'date', 'number'[, 'lab']).
|
|
726
|
-
root_dir : str, pathlib.Path, pathlib.PurePath
|
|
727
|
-
A root directory to prepend.
|
|
728
|
-
|
|
729
|
-
Returns
|
|
730
|
-
-------
|
|
731
|
-
one.alf.path.ALFPath, one.alf.path.PureALFPath
|
|
732
|
-
A constructed path of the session.
|
|
733
|
-
|
|
734
|
-
Examples
|
|
735
|
-
--------
|
|
736
|
-
>>> session_record2path({'subject': 'ALK01', 'date': '2020-01-01', 'number': 1})
|
|
737
|
-
PurePosixPath('ALK01/2020-01-01/001')
|
|
738
|
-
|
|
739
|
-
>>> record = {'date': datetime.datetime.fromisoformat('2020-01-01').date(),
|
|
740
|
-
... 'number': '001', 'lab': 'foo', 'subject': 'ALK01'}
|
|
741
|
-
>>> session_record2path(record, Path('/home/user'))
|
|
742
|
-
Path('/home/user/foo/Subjects/ALK01/2020-01-01/001')
|
|
743
|
-
|
|
744
|
-
"""
|
|
745
|
-
rel_path = PurePosixALFPath(
|
|
746
|
-
session.get('lab') if session.get('lab') else '',
|
|
747
|
-
'Subjects' if session.get('lab') else '',
|
|
748
|
-
session['subject'], str(session['date']), str(session['number']).zfill(3)
|
|
749
|
-
)
|
|
750
|
-
if not root_dir:
|
|
751
|
-
return rel_path
|
|
752
|
-
return ensure_alf_path(root_dir).joinpath(rel_path)
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
def ses2records(ses: dict):
|
|
756
|
-
"""Extract session cache record and datasets cache from a remote session data record.
|
|
757
|
-
|
|
758
|
-
Parameters
|
|
759
|
-
----------
|
|
760
|
-
ses : dict
|
|
761
|
-
Session dictionary from Alyx REST endpoint.
|
|
762
|
-
|
|
763
|
-
Returns
|
|
764
|
-
-------
|
|
765
|
-
pd.Series
|
|
766
|
-
Session record.
|
|
767
|
-
pd.DataFrame
|
|
768
|
-
Datasets frame.
|
|
769
|
-
|
|
770
|
-
"""
|
|
771
|
-
# Extract session record
|
|
772
|
-
# id used for session_info field of probe insertion
|
|
773
|
-
eid = UUID(ses.get('id') or ses['url'][-36:])
|
|
774
|
-
session_keys = ('subject', 'start_time', 'lab', 'number', 'task_protocol', 'projects')
|
|
775
|
-
session_data = {k: v for k, v in ses.items() if k in session_keys}
|
|
776
|
-
session = (
|
|
777
|
-
pd.Series(data=session_data, name=eid).rename({'start_time': 'date'})
|
|
778
|
-
)
|
|
779
|
-
session['projects'] = ','.join(session.pop('projects'))
|
|
780
|
-
session['date'] = datetime.datetime.fromisoformat(session['date']).date()
|
|
781
|
-
|
|
782
|
-
# Extract datasets table
|
|
783
|
-
def _to_record(d):
|
|
784
|
-
did = UUID(d['id'])
|
|
785
|
-
rec = dict(file_size=d['file_size'], hash=d['hash'], exists=True, id=did)
|
|
786
|
-
rec['eid'] = session.name
|
|
787
|
-
file_path = urllib.parse.urlsplit(d['data_url'], allow_fragments=False).path.strip('/')
|
|
788
|
-
file_path = get_alf_path(remove_uuid_string(file_path))
|
|
789
|
-
session_path = get_session_path(file_path).as_posix()
|
|
790
|
-
rec['rel_path'] = file_path[len(session_path):].strip('/')
|
|
791
|
-
rec['default_revision'] = d['default_revision'] == 'True'
|
|
792
|
-
rec['qc'] = d.get('qc', 'NOT_SET')
|
|
793
|
-
return rec
|
|
794
|
-
|
|
795
|
-
if not ses.get('data_dataset_session_related'):
|
|
796
|
-
return session, EMPTY_DATASETS_FRAME.copy()
|
|
797
|
-
records = map(_to_record, ses['data_dataset_session_related'])
|
|
798
|
-
index = ['eid', 'id']
|
|
799
|
-
dtypes = EMPTY_DATASETS_FRAME.dtypes
|
|
800
|
-
datasets = pd.DataFrame(records).astype(dtypes).set_index(index).sort_index()
|
|
801
|
-
return session, datasets
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
def datasets2records(datasets, additional=None) -> pd.DataFrame:
|
|
805
|
-
"""Extract datasets DataFrame from one or more Alyx dataset records.
|
|
806
|
-
|
|
807
|
-
Parameters
|
|
808
|
-
----------
|
|
809
|
-
datasets : dict, list
|
|
810
|
-
One or more records from the Alyx 'datasets' endpoint.
|
|
811
|
-
additional : list of str
|
|
812
|
-
A set of optional fields to extract from dataset records.
|
|
813
|
-
|
|
814
|
-
Returns
|
|
815
|
-
-------
|
|
816
|
-
pd.DataFrame
|
|
817
|
-
Datasets frame.
|
|
818
|
-
|
|
819
|
-
Examples
|
|
820
|
-
--------
|
|
821
|
-
>>> datasets = ONE().alyx.rest('datasets', 'list', subject='foobar')
|
|
822
|
-
>>> df = datasets2records(datasets)
|
|
823
|
-
|
|
824
|
-
"""
|
|
825
|
-
records = []
|
|
826
|
-
|
|
827
|
-
for d in ensure_list(datasets):
|
|
828
|
-
file_record = next((x for x in d['file_records'] if x['data_url'] and x['exists']), None)
|
|
829
|
-
if not file_record:
|
|
830
|
-
continue # Ignore files that are not accessible
|
|
831
|
-
rec = dict(file_size=d['file_size'], hash=d['hash'], exists=True)
|
|
832
|
-
rec['id'] = UUID(d['url'][-36:])
|
|
833
|
-
rec['eid'] = UUID(d['session'][-36:]) if d['session'] else pd.NA
|
|
834
|
-
data_url = urllib.parse.urlsplit(file_record['data_url'], allow_fragments=False)
|
|
835
|
-
file_path = get_alf_path(data_url.path.strip('/'))
|
|
836
|
-
file_path = remove_uuid_string(file_path).as_posix()
|
|
837
|
-
session_path = get_session_path(file_path) or ''
|
|
838
|
-
if session_path:
|
|
839
|
-
session_path = session_path.as_posix()
|
|
840
|
-
rec['rel_path'] = file_path[len(session_path):].strip('/')
|
|
841
|
-
rec['default_revision'] = d['default_dataset']
|
|
842
|
-
rec['qc'] = d.get('qc')
|
|
843
|
-
for field in additional or []:
|
|
844
|
-
rec[field] = d.get(field)
|
|
845
|
-
records.append(rec)
|
|
846
|
-
|
|
847
|
-
if not records:
|
|
848
|
-
return EMPTY_DATASETS_FRAME
|
|
849
|
-
index = EMPTY_DATASETS_FRAME.index.names
|
|
850
|
-
return pd.DataFrame(records).set_index(index).sort_index().astype(EMPTY_DATASETS_FRAME.dtypes)
|
|
1
|
+
"""A module for inter-converting experiment identifiers.
|
|
2
|
+
|
|
3
|
+
There are multiple ways to uniquely identify an experiment:
|
|
4
|
+
- eid (UUID) : An experiment UUID (or 36 char hexadecimal string)
|
|
5
|
+
- np (int64) : An experiment UUID encoded as 2 int64s
|
|
6
|
+
- path (Path) : A pathlib ALF path of the form `<lab>/Subjects/<subject>/<date>/<number>`
|
|
7
|
+
- ref (str) : An experiment reference string of the form `yyyy-mm-dd_n_subject`
|
|
8
|
+
- url (str) : A remote http session path of the form `<lab>/Subjects/<subject>/<date>/<number>`
|
|
9
|
+
"""
|
|
10
|
+
import re
|
|
11
|
+
import functools
|
|
12
|
+
import datetime
|
|
13
|
+
import urllib.parse
|
|
14
|
+
from uuid import UUID
|
|
15
|
+
from inspect import unwrap
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional, Union, Mapping, List, Iterable as Iter
|
|
18
|
+
|
|
19
|
+
import pandas as pd
|
|
20
|
+
from iblutil.util import Bunch, Listable, ensure_list
|
|
21
|
+
|
|
22
|
+
from one.alf.spec import is_session_path, is_uuid_string, is_uuid
|
|
23
|
+
from one.alf.cache import EMPTY_DATASETS_FRAME
|
|
24
|
+
from one.alf.path import (
|
|
25
|
+
ALFPath, PurePosixALFPath, ensure_alf_path, get_session_path, get_alf_path, remove_uuid_string)
|
|
26
|
+
from one.util import LazyId
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def recurse(func):
|
|
30
|
+
"""Decorator to call decorated function recursively if first arg is non-string iterable.
|
|
31
|
+
|
|
32
|
+
Allows decorated methods to accept both single values, and lists/tuples of values. When
|
|
33
|
+
given the latter, a list is returned. This decorator is intended to work on class methods,
|
|
34
|
+
therefore the first arg is assumed to be the object. Maps and pandas objects are not
|
|
35
|
+
iterated over.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
func : function
|
|
40
|
+
A method to decorate.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
function
|
|
45
|
+
The decorated method.
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
@functools.wraps(func)
|
|
49
|
+
def wrapper_decorator(*args, **kwargs):
|
|
50
|
+
if len(args) <= 1:
|
|
51
|
+
return func(*args, **kwargs)
|
|
52
|
+
obj, first = args[:2]
|
|
53
|
+
exclude = (str, Mapping, pd.Series, pd.DataFrame)
|
|
54
|
+
is_lazy_id = isinstance(first, LazyId)
|
|
55
|
+
if is_lazy_id or (isinstance(first, Iter) and not isinstance(first, exclude)):
|
|
56
|
+
return [func(obj, item, *args[2:], **kwargs) for item in first]
|
|
57
|
+
else:
|
|
58
|
+
return func(obj, first, *args[2:], **kwargs)
|
|
59
|
+
return wrapper_decorator
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def parse_values(func):
|
|
63
|
+
"""Convert str values in reference dict to appropriate type.
|
|
64
|
+
|
|
65
|
+
Examples
|
|
66
|
+
--------
|
|
67
|
+
>>> parse_values(lambda x: x)({'date': '2020-01-01', 'sequence': '001'}, parse=True)
|
|
68
|
+
{'date': datetime.date(2020, 1, 1), 'sequence': 1}
|
|
69
|
+
|
|
70
|
+
"""
|
|
71
|
+
def parse_ref(ref):
|
|
72
|
+
if ref:
|
|
73
|
+
if isinstance(ref['date'], str):
|
|
74
|
+
if len(ref['date']) == 10:
|
|
75
|
+
ref['date'] = datetime.date.fromisoformat(ref['date'])
|
|
76
|
+
else:
|
|
77
|
+
ref['date'] = datetime.datetime.fromisoformat(ref['date']).date()
|
|
78
|
+
ref['sequence'] = int(ref['sequence'])
|
|
79
|
+
return ref
|
|
80
|
+
|
|
81
|
+
@functools.wraps(func)
|
|
82
|
+
def wrapper_decorator(*args, **kwargs):
|
|
83
|
+
parse = kwargs.pop('parse', True)
|
|
84
|
+
ref = func(*args, **kwargs)
|
|
85
|
+
if not parse or isinstance(ref, str):
|
|
86
|
+
return ref
|
|
87
|
+
elif isinstance(ref, (list, LazyId)):
|
|
88
|
+
return list(map(parse_ref, ref))
|
|
89
|
+
else:
|
|
90
|
+
return parse_ref(ref)
|
|
91
|
+
return wrapper_decorator
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class ConversionMixin:
|
|
95
|
+
"""A mixin providing methods to inter-convert experiment identifiers."""
|
|
96
|
+
|
|
97
|
+
def __init__(self):
|
|
98
|
+
self._cache = None
|
|
99
|
+
self._par = None
|
|
100
|
+
|
|
101
|
+
@recurse
|
|
102
|
+
def to_eid(self,
|
|
103
|
+
id: Listable(Union[str, Path, UUID, dict]) = None,
|
|
104
|
+
cache_dir: Optional[Union[str, Path]] = None) -> Listable(UUID):
|
|
105
|
+
"""Given any kind of experiment identifier, return a corresponding eid string.
|
|
106
|
+
|
|
107
|
+
NB: Currently does not support integer IDs.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
id : str, pathlib.Path, UUID, dict, tuple, list
|
|
112
|
+
An experiment identifier
|
|
113
|
+
cache_dir : pathlib.Path, str
|
|
114
|
+
An optional cache directory path for intermittent conversion to path
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
uuid.UUID, None
|
|
119
|
+
An experiment ID or None if session not in cache
|
|
120
|
+
|
|
121
|
+
Raises
|
|
122
|
+
------
|
|
123
|
+
ValueError
|
|
124
|
+
Input ID invalid
|
|
125
|
+
|
|
126
|
+
"""
|
|
127
|
+
# TODO Could add np2str here
|
|
128
|
+
# if isinstance(id, (list, tuple)): # Recurse
|
|
129
|
+
# return [self.to_eid(i, cache_dir) for i in id]
|
|
130
|
+
if id is None:
|
|
131
|
+
return
|
|
132
|
+
elif isinstance(id, (UUID, LazyId)):
|
|
133
|
+
return id
|
|
134
|
+
elif self.is_exp_ref(id):
|
|
135
|
+
return self.ref2eid(id)
|
|
136
|
+
elif isinstance(id, dict):
|
|
137
|
+
assert {'subject', 'number', 'lab'}.issubset(id)
|
|
138
|
+
root = Path(cache_dir or self.cache_dir)
|
|
139
|
+
id = root.joinpath(
|
|
140
|
+
id['lab'],
|
|
141
|
+
'Subjects', id['subject'],
|
|
142
|
+
str(id.get('date') or id['start_time'][:10]),
|
|
143
|
+
('%03d' % id['number']))
|
|
144
|
+
|
|
145
|
+
if isinstance(id, Path):
|
|
146
|
+
return self.path2eid(id)
|
|
147
|
+
elif isinstance(id, str):
|
|
148
|
+
if is_session_path(id) or get_session_path(id):
|
|
149
|
+
return self.path2eid(id)
|
|
150
|
+
if len(id) > 36:
|
|
151
|
+
id = id[-36:]
|
|
152
|
+
if not is_uuid_string(id):
|
|
153
|
+
raise ValueError('Invalid experiment ID')
|
|
154
|
+
else:
|
|
155
|
+
return UUID(id)
|
|
156
|
+
else:
|
|
157
|
+
raise ValueError('Unrecognized experiment ID')
|
|
158
|
+
|
|
159
|
+
@recurse
|
|
160
|
+
def eid2path(self, eid: str) -> Optional[Listable(ALFPath)]:
|
|
161
|
+
"""From an experiment id or a list of experiment ids, gets the local cache path.
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
eid : str, uuid.UUID
|
|
166
|
+
Experiment ID (UUID) or list of UUIDs.
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
one.alf.path.ALFPath
|
|
171
|
+
A session path.
|
|
172
|
+
|
|
173
|
+
"""
|
|
174
|
+
# If not valid return None
|
|
175
|
+
if not is_uuid(eid):
|
|
176
|
+
raise ValueError(f"{eid} is not a valid eID/UUID string")
|
|
177
|
+
if isinstance(eid, str):
|
|
178
|
+
eid = UUID(eid)
|
|
179
|
+
if self._cache['sessions'].size == 0:
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
# load path from cache
|
|
183
|
+
try:
|
|
184
|
+
ses = self._cache['sessions'].loc[eid].squeeze()
|
|
185
|
+
assert isinstance(ses, pd.Series), 'Duplicate eids in sessions table'
|
|
186
|
+
return session_record2path(ses.to_dict(), self.cache_dir)
|
|
187
|
+
except KeyError:
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
@recurse
|
|
191
|
+
def path2eid(self, path_obj):
|
|
192
|
+
"""From a local path, gets the experiment id.
|
|
193
|
+
|
|
194
|
+
Parameters
|
|
195
|
+
----------
|
|
196
|
+
path_obj : pathlib.Path, str
|
|
197
|
+
Local path or list of local paths.
|
|
198
|
+
|
|
199
|
+
Returns
|
|
200
|
+
-------
|
|
201
|
+
eid, list
|
|
202
|
+
Experiment ID (eid) or list of eids.
|
|
203
|
+
|
|
204
|
+
"""
|
|
205
|
+
# else ensure the path ends with mouse,date, number
|
|
206
|
+
session_path = get_session_path(path_obj)
|
|
207
|
+
sessions = self._cache['sessions']
|
|
208
|
+
|
|
209
|
+
# if path does not have a date and a number, or cache is empty return None
|
|
210
|
+
if session_path is None or sessions.size == 0:
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
# reduce session records from cache
|
|
214
|
+
toDate = datetime.date.fromisoformat
|
|
215
|
+
subject, date, number = session_path.parts[-3:]
|
|
216
|
+
for col, val in zip(('subject', 'date', 'number'), (subject, toDate(date), int(number))):
|
|
217
|
+
sessions = sessions[sessions[col] == val]
|
|
218
|
+
if sessions.size == 0:
|
|
219
|
+
return
|
|
220
|
+
|
|
221
|
+
assert len(sessions) == 1
|
|
222
|
+
|
|
223
|
+
eid, = sessions.index.values
|
|
224
|
+
return eid
|
|
225
|
+
|
|
226
|
+
@recurse
|
|
227
|
+
def path2record(self, path) -> pd.Series:
|
|
228
|
+
"""Convert a file or session path to a dataset or session cache record.
|
|
229
|
+
|
|
230
|
+
NB: Assumes <lab>/Subjects/<subject>/<date>/<number> pattern.
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
path : str, pathlib.Path
|
|
235
|
+
Local path or HTTP URL.
|
|
236
|
+
|
|
237
|
+
Returns
|
|
238
|
+
-------
|
|
239
|
+
pandas.Series
|
|
240
|
+
A cache file record.
|
|
241
|
+
|
|
242
|
+
"""
|
|
243
|
+
path = ALFPath(path)
|
|
244
|
+
is_session = is_session_path(path)
|
|
245
|
+
if self._cache['sessions' if is_session else 'datasets'].empty:
|
|
246
|
+
return # short circuit: no records in the cache
|
|
247
|
+
|
|
248
|
+
if is_session_path(path):
|
|
249
|
+
lab, subject, date, number = path.session_parts
|
|
250
|
+
df = self._cache['sessions']
|
|
251
|
+
rec = df[
|
|
252
|
+
(df['lab'] == lab) & (df['subject'] == subject) &
|
|
253
|
+
(df['number'] == int(number)) &
|
|
254
|
+
(df['date'] == datetime.date.fromisoformat(date))
|
|
255
|
+
]
|
|
256
|
+
return None if rec.empty else rec.squeeze()
|
|
257
|
+
|
|
258
|
+
# If there's a UUID in the path, use that to fetch the record
|
|
259
|
+
name_parts = path.stem.split('.')
|
|
260
|
+
if is_uuid_string(uuid := name_parts[-1]):
|
|
261
|
+
try:
|
|
262
|
+
return self._cache['datasets'].loc[pd.IndexSlice[:, UUID(uuid)], :].squeeze()
|
|
263
|
+
except KeyError:
|
|
264
|
+
return
|
|
265
|
+
|
|
266
|
+
# Fetch via session record
|
|
267
|
+
eid = self.path2eid(path)
|
|
268
|
+
df = self.list_datasets(eid, details=True)
|
|
269
|
+
if not eid or df.empty:
|
|
270
|
+
return
|
|
271
|
+
|
|
272
|
+
# Find row where relative path matches
|
|
273
|
+
rec = df[df['rel_path'] == path.relative_to_session().as_posix()]
|
|
274
|
+
assert len(rec) < 2, 'Multiple records found'
|
|
275
|
+
if rec.empty:
|
|
276
|
+
return None
|
|
277
|
+
# Convert slice to series and reinstate eid index if dropped
|
|
278
|
+
return rec.squeeze().rename(index=(eid, rec.index.get_level_values('id')[0]))
|
|
279
|
+
|
|
280
|
+
@recurse
|
|
281
|
+
def path2url(self, filepath):
|
|
282
|
+
"""Given a local file path, constructs the URL of the remote file.
|
|
283
|
+
|
|
284
|
+
Parameters
|
|
285
|
+
----------
|
|
286
|
+
filepath : str, pathlib.Path
|
|
287
|
+
A local file path
|
|
288
|
+
|
|
289
|
+
Returns
|
|
290
|
+
-------
|
|
291
|
+
str
|
|
292
|
+
A remote URL string
|
|
293
|
+
|
|
294
|
+
"""
|
|
295
|
+
record = self.path2record(filepath)
|
|
296
|
+
if record is None:
|
|
297
|
+
return
|
|
298
|
+
return self.record2url(record)
|
|
299
|
+
|
|
300
|
+
def record2url(self, record):
|
|
301
|
+
"""Convert a session or dataset record to a remote URL.
|
|
302
|
+
|
|
303
|
+
NB: Requires online instance
|
|
304
|
+
|
|
305
|
+
Parameters
|
|
306
|
+
----------
|
|
307
|
+
record : pd.Series, pd.DataFrame
|
|
308
|
+
A datasets or sessions cache record. If DataFrame, iterate over and returns list.
|
|
309
|
+
|
|
310
|
+
Returns
|
|
311
|
+
-------
|
|
312
|
+
str, list
|
|
313
|
+
A dataset URL or list if input is DataFrame
|
|
314
|
+
|
|
315
|
+
"""
|
|
316
|
+
webclient = getattr(self, '_web_client', False)
|
|
317
|
+
assert webclient, 'No Web client found for instance'
|
|
318
|
+
# FIXME Should be OneAlyx converter only
|
|
319
|
+
if isinstance(record, pd.DataFrame):
|
|
320
|
+
return [self.record2url(r) for _, r in record.iterrows()]
|
|
321
|
+
elif isinstance(record, pd.Series):
|
|
322
|
+
is_session_record = 'rel_path' not in record
|
|
323
|
+
if is_session_record:
|
|
324
|
+
# NB: This assumes the root path is in the webclient URL
|
|
325
|
+
session_spec = '{lab}/Subjects/{subject}/{date}/{number:03d}'
|
|
326
|
+
url = record.get('session_path') or session_spec.format(**record)
|
|
327
|
+
return webclient.rel_path2url(url)
|
|
328
|
+
else:
|
|
329
|
+
raise TypeError(
|
|
330
|
+
f'record must be pandas.DataFrame or pandas.Series, got {type(record)} instead')
|
|
331
|
+
if 'session_path' in record:
|
|
332
|
+
# Check for session_path field (aggregate datasets have no eid in name)
|
|
333
|
+
session_path = record['session_path']
|
|
334
|
+
uuid = record.name if isinstance(record.name, UUID) else record.name[-1]
|
|
335
|
+
else:
|
|
336
|
+
assert isinstance(record.name, tuple) and len(record.name) == 2
|
|
337
|
+
eid, uuid = record.name # must be (eid, did)
|
|
338
|
+
session_path = get_alf_path(self.eid2path(eid))
|
|
339
|
+
url = PurePosixALFPath(session_path, record['rel_path'])
|
|
340
|
+
return webclient.rel_path2url(url.with_uuid(uuid).as_posix())
|
|
341
|
+
|
|
342
|
+
def record2path(self, dataset) -> Optional[ALFPath]:
|
|
343
|
+
"""Given a set of dataset records, returns the corresponding paths.
|
|
344
|
+
|
|
345
|
+
Parameters
|
|
346
|
+
----------
|
|
347
|
+
dataset : pd.DataFrame, pd.Series
|
|
348
|
+
A datasets dataframe slice.
|
|
349
|
+
|
|
350
|
+
Returns
|
|
351
|
+
-------
|
|
352
|
+
one.alf.path.ALFPath
|
|
353
|
+
File path for the record.
|
|
354
|
+
|
|
355
|
+
"""
|
|
356
|
+
if isinstance(dataset, pd.DataFrame):
|
|
357
|
+
return [self.record2path(r) for _, r in dataset.iterrows()]
|
|
358
|
+
elif not isinstance(dataset, pd.Series):
|
|
359
|
+
raise TypeError(
|
|
360
|
+
f'record must be pandas.DataFrame or pandas.Series, got {type(dataset)} instead')
|
|
361
|
+
assert isinstance(dataset.name, tuple) and len(dataset.name) == 2
|
|
362
|
+
eid, uuid = dataset.name # must be (eid, did)
|
|
363
|
+
if not (session_path := self.eid2path(eid)):
|
|
364
|
+
raise ValueError(f'Failed to determine session path for eid "{eid}"')
|
|
365
|
+
file = session_path / dataset['rel_path']
|
|
366
|
+
if self.uuid_filenames:
|
|
367
|
+
file = file.with_uuid(uuid)
|
|
368
|
+
return file
|
|
369
|
+
|
|
370
|
+
@recurse
|
|
371
|
+
def eid2ref(self, eid: Union[str, Iter], as_dict=True, parse=True) \
|
|
372
|
+
-> Union[str, Mapping, List]:
|
|
373
|
+
"""Get human-readable session ref from path.
|
|
374
|
+
|
|
375
|
+
Parameters
|
|
376
|
+
----------
|
|
377
|
+
eid : str, uuid.UUID
|
|
378
|
+
The experiment uuid to find reference for.
|
|
379
|
+
as_dict : bool
|
|
380
|
+
If false a string is returned in the form 'subject_sequence_yyyy-mm-dd'.
|
|
381
|
+
parse : bool
|
|
382
|
+
If true, the reference date and sequence are parsed from strings to their respective
|
|
383
|
+
data types.
|
|
384
|
+
|
|
385
|
+
Returns
|
|
386
|
+
-------
|
|
387
|
+
dict, str, list
|
|
388
|
+
One or more objects with keys ('subject', 'date', 'sequence'), or strings with the
|
|
389
|
+
form yyyy-mm-dd_n_subject.
|
|
390
|
+
|
|
391
|
+
Examples
|
|
392
|
+
--------
|
|
393
|
+
>>> eid = '4e0b3320-47b7-416e-b842-c34dc9004cf8'
|
|
394
|
+
>>> one.eid2ref(eid)
|
|
395
|
+
{'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1}
|
|
396
|
+
>>> one.eid2ref(eid, parse=False)
|
|
397
|
+
{'subject': 'flowers', 'date': '2018-07-13', 'sequence': '001'}
|
|
398
|
+
>>> one.eid2ref(eid, as_dict=False)
|
|
399
|
+
'2018-07-13_1_flowers'
|
|
400
|
+
>>> one.eid2ref(eid, as_dict=False, parse=False)
|
|
401
|
+
'2018-07-13_001_flowers'
|
|
402
|
+
>>> one.eid2ref([eid, '7dc3c44b-225f-4083-be3d-07b8562885f4'])
|
|
403
|
+
[{'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1},
|
|
404
|
+
{'subject': 'KS005', 'date': datetime.date(2019, 4, 11), 'sequence': 1}]
|
|
405
|
+
|
|
406
|
+
"""
|
|
407
|
+
d = self.get_details(eid)
|
|
408
|
+
if parse:
|
|
409
|
+
ref = {'subject': d['subject'], 'date': d['date'], 'sequence': d['number']}
|
|
410
|
+
format_str = '{date:%Y-%m-%d}_{sequence:d}_{subject:s}'
|
|
411
|
+
else:
|
|
412
|
+
ref = {
|
|
413
|
+
'subject': d['subject'], 'date': str(d['date']), 'sequence': '%03d' % d['number']
|
|
414
|
+
}
|
|
415
|
+
format_str = '{date:s}_{sequence:s}_{subject:s}'
|
|
416
|
+
return Bunch(ref) if as_dict else format_str.format(**ref)
|
|
417
|
+
|
|
418
|
+
@recurse
|
|
419
|
+
def ref2eid(self, ref: Union[Mapping, str, Iter]) -> Union[str, List]:
|
|
420
|
+
"""Returns experiment uuid, given one or more experiment references.
|
|
421
|
+
|
|
422
|
+
Parameters
|
|
423
|
+
----------
|
|
424
|
+
ref : str, dict, list
|
|
425
|
+
One or more objects with keys ('subject', 'date', 'sequence'), or strings with
|
|
426
|
+
the form yyyy-mm-dd_n_subject.
|
|
427
|
+
|
|
428
|
+
Returns
|
|
429
|
+
-------
|
|
430
|
+
uuid.UUID, list
|
|
431
|
+
One or more experiment uuid strings.
|
|
432
|
+
|
|
433
|
+
Examples
|
|
434
|
+
--------
|
|
435
|
+
>>> base = 'https://test.alyx.internationalbrainlab.org'
|
|
436
|
+
>>> one = ONE(username='test_user', password='TapetesBloc18', base_url=base)
|
|
437
|
+
Connected to...
|
|
438
|
+
>>> ref = {'date': datetime(2018, 7, 13).date(), 'sequence': 1, 'subject': 'flowers'}
|
|
439
|
+
>>> one.ref2eid(ref)
|
|
440
|
+
UUID('4e0b3320-47b7-416e-b842-c34dc9004cf8')
|
|
441
|
+
>>> one.ref2eid(['2018-07-13_1_flowers', '2019-04-11_1_KS005'])
|
|
442
|
+
[UUID('4e0b3320-47b7-416e-b842-c34dc9004cf8'),
|
|
443
|
+
UUID('7dc3c44b-225f-4083-be3d-07b8562885f4')]
|
|
444
|
+
|
|
445
|
+
"""
|
|
446
|
+
ref = self.ref2dict(ref, parse=False) # Ensure dict
|
|
447
|
+
session = self.search(
|
|
448
|
+
subject=ref['subject'],
|
|
449
|
+
date_range=str(ref['date']),
|
|
450
|
+
number=ref['sequence'])
|
|
451
|
+
assert len(session) == 1, 'session not found'
|
|
452
|
+
return session[0]
|
|
453
|
+
|
|
454
|
+
@recurse
|
|
455
|
+
def ref2path(self, ref):
|
|
456
|
+
"""Convert one or more experiment references to session path(s).
|
|
457
|
+
|
|
458
|
+
Parameters
|
|
459
|
+
----------
|
|
460
|
+
ref : str, dict, list
|
|
461
|
+
One or more objects with keys ('subject', 'date', 'sequence'), or strings with
|
|
462
|
+
the form yyyy-mm-dd_n_subject.
|
|
463
|
+
|
|
464
|
+
Returns
|
|
465
|
+
-------
|
|
466
|
+
one.alf.path.ALFPath
|
|
467
|
+
Path object(s) for the experiment session(s).
|
|
468
|
+
|
|
469
|
+
Examples
|
|
470
|
+
--------
|
|
471
|
+
>>> base = 'https://test.alyx.internationalbrainlab.org'
|
|
472
|
+
>>> one = ONE(username='test_user', password='TapetesBloc18', base_url=base)
|
|
473
|
+
Connected to...
|
|
474
|
+
>>> ref = {'subject': 'flowers', 'date': datetime(2018, 7, 13).date(), 'sequence': 1}
|
|
475
|
+
>>> one.ref2path(ref)
|
|
476
|
+
WindowsPath('E:/FlatIron/zadorlab/Subjects/flowers/2018-07-13/001')
|
|
477
|
+
>>> one.ref2path(['2018-07-13_1_flowers', '2019-04-11_1_KS005'])
|
|
478
|
+
[WindowsPath('E:/FlatIron/zadorlab/Subjects/flowers/2018-07-13/001'),
|
|
479
|
+
WindowsPath('E:/FlatIron/cortexlab/Subjects/KS005/2019-04-11/001')]
|
|
480
|
+
|
|
481
|
+
"""
|
|
482
|
+
eid2path = unwrap(self.eid2path)
|
|
483
|
+
ref2eid = unwrap(self.ref2eid)
|
|
484
|
+
return eid2path(self, ref2eid(self, ref))
|
|
485
|
+
|
|
486
|
+
@staticmethod
|
|
487
|
+
@parse_values
|
|
488
|
+
def path2ref(path_str: Union[str, Path, Iter], as_dict=True) -> Union[Bunch, List]:
|
|
489
|
+
"""Returns a human-readable experiment reference, given a session path.
|
|
490
|
+
|
|
491
|
+
The path need not exist.
|
|
492
|
+
|
|
493
|
+
Parameters
|
|
494
|
+
----------
|
|
495
|
+
path_str : str
|
|
496
|
+
A path to a given session.
|
|
497
|
+
as_dict : bool
|
|
498
|
+
If True a Bunch is returned, otherwise a string.
|
|
499
|
+
|
|
500
|
+
Returns
|
|
501
|
+
-------
|
|
502
|
+
dict, str, list
|
|
503
|
+
One or more objects with keys ('subject', 'date', 'sequence').
|
|
504
|
+
|
|
505
|
+
Examples
|
|
506
|
+
--------
|
|
507
|
+
>>> path_str = Path('E:/FlatIron/Subjects/zadorlab/flowers/2018-07-13/001')
|
|
508
|
+
>>> path2ref(path_str)
|
|
509
|
+
{'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1}
|
|
510
|
+
>>> path2ref(path_str, parse=False)
|
|
511
|
+
{'subject': 'flowers', 'date': '2018-07-13', 'sequence': '001'}
|
|
512
|
+
>>> path_str2 = Path('E:/FlatIron/Subjects/churchlandlab/CSHL046/2020-06-20/002')
|
|
513
|
+
>>> path2ref([path_str, path_str2])
|
|
514
|
+
[{'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1},
|
|
515
|
+
{'subject': 'CSHL046', 'date': datetime.date(2020, 6, 20), 'sequence': 2}]
|
|
516
|
+
|
|
517
|
+
"""
|
|
518
|
+
if isinstance(path_str, (list, tuple)):
|
|
519
|
+
return [unwrap(ConversionMixin.path2ref)(x) for x in path_str]
|
|
520
|
+
pattern = r'(?P<subject>[\w-]+)([\\/])(?P<date>\d{4}-\d{2}-\d{2})(\2)(?P<sequence>\d{1,3})'
|
|
521
|
+
match = re.search(pattern, str(path_str))
|
|
522
|
+
if match and not re.match(r'^0\d$', match.groups()[-1]): # e.g. '02' not valid
|
|
523
|
+
ref = match.groupdict()
|
|
524
|
+
return Bunch(ref) if as_dict else '{date:s}_{sequence:s}_{subject:s}'.format(**ref)
|
|
525
|
+
|
|
526
|
+
@staticmethod
|
|
527
|
+
def is_exp_ref(ref: Union[str, Mapping, Iter]) -> Union[bool, List[bool]]:
|
|
528
|
+
"""Returns True is ref is a valid experiment reference.
|
|
529
|
+
|
|
530
|
+
Parameters
|
|
531
|
+
----------
|
|
532
|
+
ref : str, dict, list
|
|
533
|
+
One or more objects with keys ('subject', 'date', 'sequence'), or strings with
|
|
534
|
+
the form yyyy-mm-dd_n_subject.
|
|
535
|
+
|
|
536
|
+
Returns
|
|
537
|
+
-------
|
|
538
|
+
bool, list of bool
|
|
539
|
+
True if ref is valid.
|
|
540
|
+
|
|
541
|
+
Examples
|
|
542
|
+
--------
|
|
543
|
+
>>> ref = {'date': datetime(2018, 7, 13).date(), 'sequence': 1, 'subject': 'flowers'}
|
|
544
|
+
>>> is_exp_ref(ref)
|
|
545
|
+
True
|
|
546
|
+
>>> is_exp_ref('2018-07-13_001_flowers')
|
|
547
|
+
True
|
|
548
|
+
>>> is_exp_ref('invalid_ref')
|
|
549
|
+
False
|
|
550
|
+
|
|
551
|
+
"""
|
|
552
|
+
if isinstance(ref, (list, tuple)):
|
|
553
|
+
return [ConversionMixin.is_exp_ref(x) for x in ref]
|
|
554
|
+
if isinstance(ref, (Bunch, dict)):
|
|
555
|
+
if not {'subject', 'date', 'sequence'}.issubset(ref):
|
|
556
|
+
return False
|
|
557
|
+
ref = '{date}_{sequence}_{subject}'.format(**ref)
|
|
558
|
+
elif not isinstance(ref, str):
|
|
559
|
+
return False
|
|
560
|
+
return re.compile(r'\d{4}(-\d{2}){2}_(\d{1,3})_\w+').match(ref) is not None
|
|
561
|
+
|
|
562
|
+
@staticmethod
|
|
563
|
+
@parse_values
|
|
564
|
+
def ref2dict(ref: Union[str, Mapping, Iter]) -> Union[Bunch, List]:
|
|
565
|
+
"""Returns a Bunch (dict-like) from a reference string (or list thereof).
|
|
566
|
+
|
|
567
|
+
Parameters
|
|
568
|
+
----------
|
|
569
|
+
ref : str, list
|
|
570
|
+
One or more experiment reference strings.
|
|
571
|
+
|
|
572
|
+
Returns
|
|
573
|
+
-------
|
|
574
|
+
iblutil.util.Bunch
|
|
575
|
+
A Bunch in with keys ('subject', 'sequence', 'date').
|
|
576
|
+
|
|
577
|
+
Examples
|
|
578
|
+
--------
|
|
579
|
+
>>> ref2dict('2018-07-13_1_flowers')
|
|
580
|
+
{'date': datetime.date(2018, 7, 13), 'sequence': 1, 'subject': 'flowers'}
|
|
581
|
+
>>> ref2dict('2018-07-13_001_flowers', parse=False)
|
|
582
|
+
{'date': '2018-07-13', 'sequence': '001', 'subject': 'flowers'}
|
|
583
|
+
>>> ref2dict(['2018-07-13_1_flowers', '2020-01-23_002_ibl_witten_01'])
|
|
584
|
+
[{'date': datetime.date(2018, 7, 13), 'sequence': 1, 'subject': 'flowers'},
|
|
585
|
+
{'date': datetime.date(2020, 1, 23), 'sequence': 2, 'subject': 'ibl_witten_01'}]
|
|
586
|
+
|
|
587
|
+
"""
|
|
588
|
+
if isinstance(ref, (list, tuple)):
|
|
589
|
+
return [ConversionMixin.ref2dict(x) for x in ref]
|
|
590
|
+
if isinstance(ref, (Bunch, dict)):
|
|
591
|
+
return Bunch(ref) # Short circuit
|
|
592
|
+
ref = dict(zip(['date', 'sequence', 'subject'], ref.split('_', 2)))
|
|
593
|
+
return Bunch(ref)
|
|
594
|
+
|
|
595
|
+
@staticmethod
|
|
596
|
+
def dict2ref(ref_dict) -> Union[str, List]:
|
|
597
|
+
"""Convert an experiment reference dict to a string in the format yyyy-mm-dd_n_subject.
|
|
598
|
+
|
|
599
|
+
Parameters
|
|
600
|
+
----------
|
|
601
|
+
ref_dict : dict, Bunch, list, tuple
|
|
602
|
+
A map with the keys ('subject', 'date', 'sequence').
|
|
603
|
+
|
|
604
|
+
Returns
|
|
605
|
+
-------
|
|
606
|
+
str, list:
|
|
607
|
+
An experiment reference string, or list thereof.
|
|
608
|
+
|
|
609
|
+
"""
|
|
610
|
+
if isinstance(ref_dict, (list, tuple)):
|
|
611
|
+
return [ConversionMixin.dict2ref(x) for x in ref_dict]
|
|
612
|
+
if not ref_dict:
|
|
613
|
+
return
|
|
614
|
+
if 'sequence' not in ref_dict and 'number' in ref_dict:
|
|
615
|
+
ref_dict = ref_dict.copy()
|
|
616
|
+
ref_dict['sequence'] = ref_dict.pop('number')
|
|
617
|
+
if 'date' not in ref_dict and 'start_time' in ref_dict:
|
|
618
|
+
ref_dict = ref_dict.copy()
|
|
619
|
+
if isinstance(ref_dict['start_time'], str):
|
|
620
|
+
ref_dict['date'] = ref_dict['start_time'][:10]
|
|
621
|
+
else:
|
|
622
|
+
ref_dict['date'] = ref_dict['start_time'].date()
|
|
623
|
+
parsed = any(not isinstance(k, str) for k in ref_dict.values())
|
|
624
|
+
format_str = ('{date:%Y-%m-%d}_{sequence:d}_{subject:s}'
|
|
625
|
+
if parsed
|
|
626
|
+
else '{date:s}_{sequence:s}_{subject:s}')
|
|
627
|
+
return format_str.format(**ref_dict)
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
def one_path_from_dataset(dset, one_cache):
|
|
631
|
+
"""Returns local one file path from a dset record or a list of dsets records from REST.
|
|
632
|
+
|
|
633
|
+
Unlike `to_eid`, this function does not require ONE, and the dataset may not exist.
|
|
634
|
+
|
|
635
|
+
Parameters
|
|
636
|
+
----------
|
|
637
|
+
dset : dict, list
|
|
638
|
+
Dataset dictionary or list of dictionaries from Alyx rest endpoint.
|
|
639
|
+
one_cache : str, pathlib.Path, pathlib.PurePath
|
|
640
|
+
The local ONE data cache directory.
|
|
641
|
+
|
|
642
|
+
Returns
|
|
643
|
+
-------
|
|
644
|
+
one.alf.path.ALFPath
|
|
645
|
+
The local path for a given dataset.
|
|
646
|
+
|
|
647
|
+
"""
|
|
648
|
+
return path_from_dataset(dset, root_path=one_cache, uuid=False)
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
def path_from_dataset(dset, root_path=PurePosixALFPath('/'), repository=None, uuid=False):
|
|
652
|
+
"""Returns the local file path from a dset record from a REST query.
|
|
653
|
+
|
|
654
|
+
Unlike `to_eid`, this function does not require ONE, and the dataset may not exist.
|
|
655
|
+
|
|
656
|
+
Parameters
|
|
657
|
+
----------
|
|
658
|
+
dset : dict, list
|
|
659
|
+
Dataset dictionary or list of dictionaries from Alyx rest endpoint.
|
|
660
|
+
root_path : str, pathlib.Path, pathlib.PurePath
|
|
661
|
+
The prefix path such as the ONE download directory or remote http server root.
|
|
662
|
+
repository : str, None
|
|
663
|
+
Which data repository to use from the file_records list, defaults to first online
|
|
664
|
+
repository.
|
|
665
|
+
uuid : bool
|
|
666
|
+
If True, the file path will contain the dataset UUID.
|
|
667
|
+
|
|
668
|
+
Returns
|
|
669
|
+
-------
|
|
670
|
+
one.alf.path.ALFPath, list
|
|
671
|
+
File path or list of paths.
|
|
672
|
+
|
|
673
|
+
"""
|
|
674
|
+
if isinstance(dset, list):
|
|
675
|
+
return [path_from_dataset(d) for d in dset]
|
|
676
|
+
if repository:
|
|
677
|
+
fr = next((fr for fr in dset['file_records'] if fr['data_repository'] == repository))
|
|
678
|
+
else:
|
|
679
|
+
fr = next((fr for fr in dset['file_records'] if fr['data_url']))
|
|
680
|
+
uuid = dset['url'][-36:] if uuid else None
|
|
681
|
+
return path_from_filerecord(fr, root_path=root_path, uuid=uuid)
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def path_from_filerecord(fr, root_path=PurePosixALFPath('/'), uuid=None):
|
|
685
|
+
"""Returns a data file Path constructed from an Alyx file record.
|
|
686
|
+
|
|
687
|
+
The Path type returned depends on the type of root_path: If root_path is a string an ALFPath
|
|
688
|
+
object is returned, otherwise if the root_path is a PurePath, a PureALFPath is returned.
|
|
689
|
+
|
|
690
|
+
Parameters
|
|
691
|
+
----------
|
|
692
|
+
fr : dict
|
|
693
|
+
An Alyx file record dict.
|
|
694
|
+
root_path : str, pathlib.Path
|
|
695
|
+
An optional root path.
|
|
696
|
+
uuid : str, uuid.UUID
|
|
697
|
+
An optional dataset UUID to add to the file name.
|
|
698
|
+
|
|
699
|
+
Returns
|
|
700
|
+
-------
|
|
701
|
+
one.alf.path.ALFPath
|
|
702
|
+
A filepath as a pathlib object.
|
|
703
|
+
|
|
704
|
+
"""
|
|
705
|
+
if isinstance(fr, list):
|
|
706
|
+
return [path_from_filerecord(f) for f in fr]
|
|
707
|
+
repo_path = (p := fr['data_repository_path'])[p[0] == '/':] # Remove slash at start, if any
|
|
708
|
+
file_path = PurePosixALFPath(repo_path, fr['relative_path'])
|
|
709
|
+
if root_path:
|
|
710
|
+
# NB: this function won't cast any PurePaths
|
|
711
|
+
root_path = ensure_alf_path(root_path)
|
|
712
|
+
file_path = root_path / file_path
|
|
713
|
+
return file_path.with_uuid(uuid) if uuid else file_path
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
def session_record2path(session, root_dir=None):
|
|
717
|
+
"""Convert a session record into a path.
|
|
718
|
+
|
|
719
|
+
If a lab key is present, the path will be in the form
|
|
720
|
+
root_dir/lab/Subjects/subject/yyyy-mm-dd/nnn, otherwise root_dir/subject/yyyy-mm-dd/nnn.
|
|
721
|
+
|
|
722
|
+
Parameters
|
|
723
|
+
----------
|
|
724
|
+
session : Mapping
|
|
725
|
+
A session record with keys ('subject', 'date', 'number'[, 'lab']).
|
|
726
|
+
root_dir : str, pathlib.Path, pathlib.PurePath
|
|
727
|
+
A root directory to prepend.
|
|
728
|
+
|
|
729
|
+
Returns
|
|
730
|
+
-------
|
|
731
|
+
one.alf.path.ALFPath, one.alf.path.PureALFPath
|
|
732
|
+
A constructed path of the session.
|
|
733
|
+
|
|
734
|
+
Examples
|
|
735
|
+
--------
|
|
736
|
+
>>> session_record2path({'subject': 'ALK01', 'date': '2020-01-01', 'number': 1})
|
|
737
|
+
PurePosixPath('ALK01/2020-01-01/001')
|
|
738
|
+
|
|
739
|
+
>>> record = {'date': datetime.datetime.fromisoformat('2020-01-01').date(),
|
|
740
|
+
... 'number': '001', 'lab': 'foo', 'subject': 'ALK01'}
|
|
741
|
+
>>> session_record2path(record, Path('/home/user'))
|
|
742
|
+
Path('/home/user/foo/Subjects/ALK01/2020-01-01/001')
|
|
743
|
+
|
|
744
|
+
"""
|
|
745
|
+
rel_path = PurePosixALFPath(
|
|
746
|
+
session.get('lab') if session.get('lab') else '',
|
|
747
|
+
'Subjects' if session.get('lab') else '',
|
|
748
|
+
session['subject'], str(session['date']), str(session['number']).zfill(3)
|
|
749
|
+
)
|
|
750
|
+
if not root_dir:
|
|
751
|
+
return rel_path
|
|
752
|
+
return ensure_alf_path(root_dir).joinpath(rel_path)
|
|
753
|
+
|
|
754
|
+
|
|
755
|
+
def ses2records(ses: dict):
|
|
756
|
+
"""Extract session cache record and datasets cache from a remote session data record.
|
|
757
|
+
|
|
758
|
+
Parameters
|
|
759
|
+
----------
|
|
760
|
+
ses : dict
|
|
761
|
+
Session dictionary from Alyx REST endpoint.
|
|
762
|
+
|
|
763
|
+
Returns
|
|
764
|
+
-------
|
|
765
|
+
pd.Series
|
|
766
|
+
Session record.
|
|
767
|
+
pd.DataFrame
|
|
768
|
+
Datasets frame.
|
|
769
|
+
|
|
770
|
+
"""
|
|
771
|
+
# Extract session record
|
|
772
|
+
# id used for session_info field of probe insertion
|
|
773
|
+
eid = UUID(ses.get('id') or ses['url'][-36:])
|
|
774
|
+
session_keys = ('subject', 'start_time', 'lab', 'number', 'task_protocol', 'projects')
|
|
775
|
+
session_data = {k: v for k, v in ses.items() if k in session_keys}
|
|
776
|
+
session = (
|
|
777
|
+
pd.Series(data=session_data, name=eid).rename({'start_time': 'date'})
|
|
778
|
+
)
|
|
779
|
+
session['projects'] = ','.join(session.pop('projects'))
|
|
780
|
+
session['date'] = datetime.datetime.fromisoformat(session['date']).date()
|
|
781
|
+
|
|
782
|
+
# Extract datasets table
|
|
783
|
+
def _to_record(d):
|
|
784
|
+
did = UUID(d['id'])
|
|
785
|
+
rec = dict(file_size=d['file_size'], hash=d['hash'], exists=True, id=did)
|
|
786
|
+
rec['eid'] = session.name
|
|
787
|
+
file_path = urllib.parse.urlsplit(d['data_url'], allow_fragments=False).path.strip('/')
|
|
788
|
+
file_path = get_alf_path(remove_uuid_string(file_path))
|
|
789
|
+
session_path = get_session_path(file_path).as_posix()
|
|
790
|
+
rec['rel_path'] = file_path[len(session_path):].strip('/')
|
|
791
|
+
rec['default_revision'] = d['default_revision'] == 'True'
|
|
792
|
+
rec['qc'] = d.get('qc', 'NOT_SET')
|
|
793
|
+
return rec
|
|
794
|
+
|
|
795
|
+
if not ses.get('data_dataset_session_related'):
|
|
796
|
+
return session, EMPTY_DATASETS_FRAME.copy()
|
|
797
|
+
records = map(_to_record, ses['data_dataset_session_related'])
|
|
798
|
+
index = ['eid', 'id']
|
|
799
|
+
dtypes = EMPTY_DATASETS_FRAME.dtypes
|
|
800
|
+
datasets = pd.DataFrame(records).astype(dtypes).set_index(index).sort_index()
|
|
801
|
+
return session, datasets
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
def datasets2records(datasets, additional=None) -> pd.DataFrame:
|
|
805
|
+
"""Extract datasets DataFrame from one or more Alyx dataset records.
|
|
806
|
+
|
|
807
|
+
Parameters
|
|
808
|
+
----------
|
|
809
|
+
datasets : dict, list
|
|
810
|
+
One or more records from the Alyx 'datasets' endpoint.
|
|
811
|
+
additional : list of str
|
|
812
|
+
A set of optional fields to extract from dataset records.
|
|
813
|
+
|
|
814
|
+
Returns
|
|
815
|
+
-------
|
|
816
|
+
pd.DataFrame
|
|
817
|
+
Datasets frame.
|
|
818
|
+
|
|
819
|
+
Examples
|
|
820
|
+
--------
|
|
821
|
+
>>> datasets = ONE().alyx.rest('datasets', 'list', subject='foobar')
|
|
822
|
+
>>> df = datasets2records(datasets)
|
|
823
|
+
|
|
824
|
+
"""
|
|
825
|
+
records = []
|
|
826
|
+
|
|
827
|
+
for d in ensure_list(datasets):
|
|
828
|
+
file_record = next((x for x in d['file_records'] if x['data_url'] and x['exists']), None)
|
|
829
|
+
if not file_record:
|
|
830
|
+
continue # Ignore files that are not accessible
|
|
831
|
+
rec = dict(file_size=d['file_size'], hash=d['hash'], exists=True)
|
|
832
|
+
rec['id'] = UUID(d['url'][-36:])
|
|
833
|
+
rec['eid'] = UUID(d['session'][-36:]) if d['session'] else pd.NA
|
|
834
|
+
data_url = urllib.parse.urlsplit(file_record['data_url'], allow_fragments=False)
|
|
835
|
+
file_path = get_alf_path(data_url.path.strip('/'))
|
|
836
|
+
file_path = remove_uuid_string(file_path).as_posix()
|
|
837
|
+
session_path = get_session_path(file_path) or ''
|
|
838
|
+
if session_path:
|
|
839
|
+
session_path = session_path.as_posix()
|
|
840
|
+
rec['rel_path'] = file_path[len(session_path):].strip('/')
|
|
841
|
+
rec['default_revision'] = d['default_dataset']
|
|
842
|
+
rec['qc'] = d.get('qc')
|
|
843
|
+
for field in additional or []:
|
|
844
|
+
rec[field] = d.get(field)
|
|
845
|
+
records.append(rec)
|
|
846
|
+
|
|
847
|
+
if not records:
|
|
848
|
+
return EMPTY_DATASETS_FRAME
|
|
849
|
+
index = EMPTY_DATASETS_FRAME.index.names
|
|
850
|
+
return pd.DataFrame(records).set_index(index).sort_index().astype(EMPTY_DATASETS_FRAME.dtypes)
|