ONE-api 3.0b3__py3-none-any.whl → 3.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/LICENSE +21 -21
  2. {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/METADATA +115 -115
  3. ONE_api-3.0b4.dist-info/RECORD +37 -0
  4. one/__init__.py +2 -2
  5. one/alf/__init__.py +1 -1
  6. one/alf/cache.py +640 -653
  7. one/alf/exceptions.py +105 -105
  8. one/alf/io.py +876 -876
  9. one/alf/path.py +1450 -1450
  10. one/alf/spec.py +519 -519
  11. one/api.py +2949 -2973
  12. one/converters.py +850 -850
  13. one/params.py +414 -414
  14. one/registration.py +845 -845
  15. one/remote/__init__.py +1 -1
  16. one/remote/aws.py +313 -313
  17. one/remote/base.py +142 -142
  18. one/remote/globus.py +1254 -1254
  19. one/tests/fixtures/params/.caches +6 -6
  20. one/tests/fixtures/params/.test.alyx.internationalbrainlab.org +8 -8
  21. one/tests/fixtures/rest_responses/1f187d80fd59677b395fcdb18e68e4401bfa1cc9 +1 -1
  22. one/tests/fixtures/rest_responses/47893cf67c985e6361cdee009334963f49fb0746 +1 -1
  23. one/tests/fixtures/rest_responses/535d0e9a1e2c1efbdeba0d673b131e00361a2edb +1 -1
  24. one/tests/fixtures/rest_responses/6dc96f7e9bcc6ac2e7581489b9580a6cd3f28293 +1 -1
  25. one/tests/fixtures/rest_responses/db1731fb8df0208944ae85f76718430813a8bf50 +1 -1
  26. one/tests/fixtures/rest_responses/dcce48259bb929661f60a02a48563f70aa6185b3 +1 -1
  27. one/tests/fixtures/rest_responses/f530d6022f61cdc9e38cc66beb3cb71f3003c9a1 +1 -1
  28. one/tests/fixtures/test_dbs.json +14 -14
  29. one/util.py +524 -524
  30. one/webclient.py +1366 -1354
  31. ONE_api-3.0b3.dist-info/RECORD +0 -37
  32. {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/WHEEL +0 -0
  33. {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/top_level.txt +0 -0
one/converters.py CHANGED
@@ -1,850 +1,850 @@
1
- """A module for inter-converting experiment identifiers.
2
-
3
- There are multiple ways to uniquely identify an experiment:
4
- - eid (UUID) : An experiment UUID (or 36 char hexadecimal string)
5
- - np (int64) : An experiment UUID encoded as 2 int64s
6
- - path (Path) : A pathlib ALF path of the form `<lab>/Subjects/<subject>/<date>/<number>`
7
- - ref (str) : An experiment reference string of the form `yyyy-mm-dd_n_subject`
8
- - url (str) : A remote http session path of the form `<lab>/Subjects/<subject>/<date>/<number>`
9
- """
10
- import re
11
- import functools
12
- import datetime
13
- import urllib.parse
14
- from uuid import UUID
15
- from inspect import unwrap
16
- from pathlib import Path
17
- from typing import Optional, Union, Mapping, List, Iterable as Iter
18
-
19
- import pandas as pd
20
- from iblutil.util import Bunch, Listable, ensure_list
21
-
22
- from one.alf.spec import is_session_path, is_uuid_string, is_uuid
23
- from one.alf.cache import EMPTY_DATASETS_FRAME
24
- from one.alf.path import (
25
- ALFPath, PurePosixALFPath, ensure_alf_path, get_session_path, get_alf_path, remove_uuid_string)
26
- from one.util import LazyId
27
-
28
-
29
- def recurse(func):
30
- """Decorator to call decorated function recursively if first arg is non-string iterable.
31
-
32
- Allows decorated methods to accept both single values, and lists/tuples of values. When
33
- given the latter, a list is returned. This decorator is intended to work on class methods,
34
- therefore the first arg is assumed to be the object. Maps and pandas objects are not
35
- iterated over.
36
-
37
- Parameters
38
- ----------
39
- func : function
40
- A method to decorate.
41
-
42
- Returns
43
- -------
44
- function
45
- The decorated method.
46
-
47
- """
48
- @functools.wraps(func)
49
- def wrapper_decorator(*args, **kwargs):
50
- if len(args) <= 1:
51
- return func(*args, **kwargs)
52
- obj, first = args[:2]
53
- exclude = (str, Mapping, pd.Series, pd.DataFrame)
54
- is_lazy_id = isinstance(first, LazyId)
55
- if is_lazy_id or (isinstance(first, Iter) and not isinstance(first, exclude)):
56
- return [func(obj, item, *args[2:], **kwargs) for item in first]
57
- else:
58
- return func(obj, first, *args[2:], **kwargs)
59
- return wrapper_decorator
60
-
61
-
62
- def parse_values(func):
63
- """Convert str values in reference dict to appropriate type.
64
-
65
- Examples
66
- --------
67
- >>> parse_values(lambda x: x)({'date': '2020-01-01', 'sequence': '001'}, parse=True)
68
- {'date': datetime.date(2020, 1, 1), 'sequence': 1}
69
-
70
- """
71
- def parse_ref(ref):
72
- if ref:
73
- if isinstance(ref['date'], str):
74
- if len(ref['date']) == 10:
75
- ref['date'] = datetime.date.fromisoformat(ref['date'])
76
- else:
77
- ref['date'] = datetime.datetime.fromisoformat(ref['date']).date()
78
- ref['sequence'] = int(ref['sequence'])
79
- return ref
80
-
81
- @functools.wraps(func)
82
- def wrapper_decorator(*args, **kwargs):
83
- parse = kwargs.pop('parse', True)
84
- ref = func(*args, **kwargs)
85
- if not parse or isinstance(ref, str):
86
- return ref
87
- elif isinstance(ref, (list, LazyId)):
88
- return list(map(parse_ref, ref))
89
- else:
90
- return parse_ref(ref)
91
- return wrapper_decorator
92
-
93
-
94
- class ConversionMixin:
95
- """A mixin providing methods to inter-convert experiment identifiers."""
96
-
97
- def __init__(self):
98
- self._cache = None
99
- self._par = None
100
-
101
- @recurse
102
- def to_eid(self,
103
- id: Listable(Union[str, Path, UUID, dict]) = None,
104
- cache_dir: Optional[Union[str, Path]] = None) -> Listable(UUID):
105
- """Given any kind of experiment identifier, return a corresponding eid string.
106
-
107
- NB: Currently does not support integer IDs.
108
-
109
- Parameters
110
- ----------
111
- id : str, pathlib.Path, UUID, dict, tuple, list
112
- An experiment identifier
113
- cache_dir : pathlib.Path, str
114
- An optional cache directory path for intermittent conversion to path
115
-
116
- Returns
117
- -------
118
- uuid.UUID, None
119
- An experiment ID or None if session not in cache
120
-
121
- Raises
122
- ------
123
- ValueError
124
- Input ID invalid
125
-
126
- """
127
- # TODO Could add np2str here
128
- # if isinstance(id, (list, tuple)): # Recurse
129
- # return [self.to_eid(i, cache_dir) for i in id]
130
- if id is None:
131
- return
132
- elif isinstance(id, (UUID, LazyId)):
133
- return id
134
- elif self.is_exp_ref(id):
135
- return self.ref2eid(id)
136
- elif isinstance(id, dict):
137
- assert {'subject', 'number', 'lab'}.issubset(id)
138
- root = Path(cache_dir or self.cache_dir)
139
- id = root.joinpath(
140
- id['lab'],
141
- 'Subjects', id['subject'],
142
- str(id.get('date') or id['start_time'][:10]),
143
- ('%03d' % id['number']))
144
-
145
- if isinstance(id, Path):
146
- return self.path2eid(id)
147
- elif isinstance(id, str):
148
- if is_session_path(id) or get_session_path(id):
149
- return self.path2eid(id)
150
- if len(id) > 36:
151
- id = id[-36:]
152
- if not is_uuid_string(id):
153
- raise ValueError('Invalid experiment ID')
154
- else:
155
- return UUID(id)
156
- else:
157
- raise ValueError('Unrecognized experiment ID')
158
-
159
- @recurse
160
- def eid2path(self, eid: str) -> Optional[Listable(ALFPath)]:
161
- """From an experiment id or a list of experiment ids, gets the local cache path.
162
-
163
- Parameters
164
- ----------
165
- eid : str, uuid.UUID
166
- Experiment ID (UUID) or list of UUIDs.
167
-
168
- Returns
169
- -------
170
- one.alf.path.ALFPath
171
- A session path.
172
-
173
- """
174
- # If not valid return None
175
- if not is_uuid(eid):
176
- raise ValueError(f"{eid} is not a valid eID/UUID string")
177
- if isinstance(eid, str):
178
- eid = UUID(eid)
179
- if self._cache['sessions'].size == 0:
180
- return
181
-
182
- # load path from cache
183
- try:
184
- ses = self._cache['sessions'].loc[eid].squeeze()
185
- assert isinstance(ses, pd.Series), 'Duplicate eids in sessions table'
186
- return session_record2path(ses.to_dict(), self.cache_dir)
187
- except KeyError:
188
- return
189
-
190
- @recurse
191
- def path2eid(self, path_obj):
192
- """From a local path, gets the experiment id.
193
-
194
- Parameters
195
- ----------
196
- path_obj : pathlib.Path, str
197
- Local path or list of local paths.
198
-
199
- Returns
200
- -------
201
- eid, list
202
- Experiment ID (eid) or list of eids.
203
-
204
- """
205
- # else ensure the path ends with mouse,date, number
206
- session_path = get_session_path(path_obj)
207
- sessions = self._cache['sessions']
208
-
209
- # if path does not have a date and a number, or cache is empty return None
210
- if session_path is None or sessions.size == 0:
211
- return None
212
-
213
- # reduce session records from cache
214
- toDate = datetime.date.fromisoformat
215
- subject, date, number = session_path.parts[-3:]
216
- for col, val in zip(('subject', 'date', 'number'), (subject, toDate(date), int(number))):
217
- sessions = sessions[sessions[col] == val]
218
- if sessions.size == 0:
219
- return
220
-
221
- assert len(sessions) == 1
222
-
223
- eid, = sessions.index.values
224
- return eid
225
-
226
- @recurse
227
- def path2record(self, path) -> pd.Series:
228
- """Convert a file or session path to a dataset or session cache record.
229
-
230
- NB: Assumes <lab>/Subjects/<subject>/<date>/<number> pattern.
231
-
232
- Parameters
233
- ----------
234
- path : str, pathlib.Path
235
- Local path or HTTP URL.
236
-
237
- Returns
238
- -------
239
- pandas.Series
240
- A cache file record.
241
-
242
- """
243
- path = ALFPath(path)
244
- is_session = is_session_path(path)
245
- if self._cache['sessions' if is_session else 'datasets'].empty:
246
- return # short circuit: no records in the cache
247
-
248
- if is_session_path(path):
249
- lab, subject, date, number = path.session_parts
250
- df = self._cache['sessions']
251
- rec = df[
252
- (df['lab'] == lab) & (df['subject'] == subject) &
253
- (df['number'] == int(number)) &
254
- (df['date'] == datetime.date.fromisoformat(date))
255
- ]
256
- return None if rec.empty else rec.squeeze()
257
-
258
- # If there's a UUID in the path, use that to fetch the record
259
- name_parts = path.stem.split('.')
260
- if is_uuid_string(uuid := name_parts[-1]):
261
- try:
262
- return self._cache['datasets'].loc[pd.IndexSlice[:, UUID(uuid)], :].squeeze()
263
- except KeyError:
264
- return
265
-
266
- # Fetch via session record
267
- eid = self.path2eid(path)
268
- df = self.list_datasets(eid, details=True)
269
- if not eid or df.empty:
270
- return
271
-
272
- # Find row where relative path matches
273
- rec = df[df['rel_path'] == path.relative_to_session().as_posix()]
274
- assert len(rec) < 2, 'Multiple records found'
275
- if rec.empty:
276
- return None
277
- # Convert slice to series and reinstate eid index if dropped
278
- return rec.squeeze().rename(index=(eid, rec.index.get_level_values('id')[0]))
279
-
280
- @recurse
281
- def path2url(self, filepath):
282
- """Given a local file path, constructs the URL of the remote file.
283
-
284
- Parameters
285
- ----------
286
- filepath : str, pathlib.Path
287
- A local file path
288
-
289
- Returns
290
- -------
291
- str
292
- A remote URL string
293
-
294
- """
295
- record = self.path2record(filepath)
296
- if record is None:
297
- return
298
- return self.record2url(record)
299
-
300
- def record2url(self, record):
301
- """Convert a session or dataset record to a remote URL.
302
-
303
- NB: Requires online instance
304
-
305
- Parameters
306
- ----------
307
- record : pd.Series, pd.DataFrame
308
- A datasets or sessions cache record. If DataFrame, iterate over and returns list.
309
-
310
- Returns
311
- -------
312
- str, list
313
- A dataset URL or list if input is DataFrame
314
-
315
- """
316
- webclient = getattr(self, '_web_client', False)
317
- assert webclient, 'No Web client found for instance'
318
- # FIXME Should be OneAlyx converter only
319
- if isinstance(record, pd.DataFrame):
320
- return [self.record2url(r) for _, r in record.iterrows()]
321
- elif isinstance(record, pd.Series):
322
- is_session_record = 'rel_path' not in record
323
- if is_session_record:
324
- # NB: This assumes the root path is in the webclient URL
325
- session_spec = '{lab}/Subjects/{subject}/{date}/{number:03d}'
326
- url = record.get('session_path') or session_spec.format(**record)
327
- return webclient.rel_path2url(url)
328
- else:
329
- raise TypeError(
330
- f'record must be pandas.DataFrame or pandas.Series, got {type(record)} instead')
331
- if 'session_path' in record:
332
- # Check for session_path field (aggregate datasets have no eid in name)
333
- session_path = record['session_path']
334
- uuid = record.name if isinstance(record.name, UUID) else record.name[-1]
335
- else:
336
- assert isinstance(record.name, tuple) and len(record.name) == 2
337
- eid, uuid = record.name # must be (eid, did)
338
- session_path = get_alf_path(self.eid2path(eid))
339
- url = PurePosixALFPath(session_path, record['rel_path'])
340
- return webclient.rel_path2url(url.with_uuid(uuid).as_posix())
341
-
342
- def record2path(self, dataset) -> Optional[ALFPath]:
343
- """Given a set of dataset records, returns the corresponding paths.
344
-
345
- Parameters
346
- ----------
347
- dataset : pd.DataFrame, pd.Series
348
- A datasets dataframe slice.
349
-
350
- Returns
351
- -------
352
- one.alf.path.ALFPath
353
- File path for the record.
354
-
355
- """
356
- if isinstance(dataset, pd.DataFrame):
357
- return [self.record2path(r) for _, r in dataset.iterrows()]
358
- elif not isinstance(dataset, pd.Series):
359
- raise TypeError(
360
- f'record must be pandas.DataFrame or pandas.Series, got {type(dataset)} instead')
361
- assert isinstance(dataset.name, tuple) and len(dataset.name) == 2
362
- eid, uuid = dataset.name # must be (eid, did)
363
- if not (session_path := self.eid2path(eid)):
364
- raise ValueError(f'Failed to determine session path for eid "{eid}"')
365
- file = session_path / dataset['rel_path']
366
- if self.uuid_filenames:
367
- file = file.with_uuid(uuid)
368
- return file
369
-
370
- @recurse
371
- def eid2ref(self, eid: Union[str, Iter], as_dict=True, parse=True) \
372
- -> Union[str, Mapping, List]:
373
- """Get human-readable session ref from path.
374
-
375
- Parameters
376
- ----------
377
- eid : str, uuid.UUID
378
- The experiment uuid to find reference for.
379
- as_dict : bool
380
- If false a string is returned in the form 'subject_sequence_yyyy-mm-dd'.
381
- parse : bool
382
- If true, the reference date and sequence are parsed from strings to their respective
383
- data types.
384
-
385
- Returns
386
- -------
387
- dict, str, list
388
- One or more objects with keys ('subject', 'date', 'sequence'), or strings with the
389
- form yyyy-mm-dd_n_subject.
390
-
391
- Examples
392
- --------
393
- >>> eid = '4e0b3320-47b7-416e-b842-c34dc9004cf8'
394
- >>> one.eid2ref(eid)
395
- {'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1}
396
- >>> one.eid2ref(eid, parse=False)
397
- {'subject': 'flowers', 'date': '2018-07-13', 'sequence': '001'}
398
- >>> one.eid2ref(eid, as_dict=False)
399
- '2018-07-13_1_flowers'
400
- >>> one.eid2ref(eid, as_dict=False, parse=False)
401
- '2018-07-13_001_flowers'
402
- >>> one.eid2ref([eid, '7dc3c44b-225f-4083-be3d-07b8562885f4'])
403
- [{'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1},
404
- {'subject': 'KS005', 'date': datetime.date(2019, 4, 11), 'sequence': 1}]
405
-
406
- """
407
- d = self.get_details(eid)
408
- if parse:
409
- ref = {'subject': d['subject'], 'date': d['date'], 'sequence': d['number']}
410
- format_str = '{date:%Y-%m-%d}_{sequence:d}_{subject:s}'
411
- else:
412
- ref = {
413
- 'subject': d['subject'], 'date': str(d['date']), 'sequence': '%03d' % d['number']
414
- }
415
- format_str = '{date:s}_{sequence:s}_{subject:s}'
416
- return Bunch(ref) if as_dict else format_str.format(**ref)
417
-
418
- @recurse
419
- def ref2eid(self, ref: Union[Mapping, str, Iter]) -> Union[str, List]:
420
- """Returns experiment uuid, given one or more experiment references.
421
-
422
- Parameters
423
- ----------
424
- ref : str, dict, list
425
- One or more objects with keys ('subject', 'date', 'sequence'), or strings with
426
- the form yyyy-mm-dd_n_subject.
427
-
428
- Returns
429
- -------
430
- uuid.UUID, list
431
- One or more experiment uuid strings.
432
-
433
- Examples
434
- --------
435
- >>> base = 'https://test.alyx.internationalbrainlab.org'
436
- >>> one = ONE(username='test_user', password='TapetesBloc18', base_url=base)
437
- Connected to...
438
- >>> ref = {'date': datetime(2018, 7, 13).date(), 'sequence': 1, 'subject': 'flowers'}
439
- >>> one.ref2eid(ref)
440
- UUID('4e0b3320-47b7-416e-b842-c34dc9004cf8')
441
- >>> one.ref2eid(['2018-07-13_1_flowers', '2019-04-11_1_KS005'])
442
- [UUID('4e0b3320-47b7-416e-b842-c34dc9004cf8'),
443
- UUID('7dc3c44b-225f-4083-be3d-07b8562885f4')]
444
-
445
- """
446
- ref = self.ref2dict(ref, parse=False) # Ensure dict
447
- session = self.search(
448
- subject=ref['subject'],
449
- date_range=str(ref['date']),
450
- number=ref['sequence'])
451
- assert len(session) == 1, 'session not found'
452
- return session[0]
453
-
454
- @recurse
455
- def ref2path(self, ref):
456
- """Convert one or more experiment references to session path(s).
457
-
458
- Parameters
459
- ----------
460
- ref : str, dict, list
461
- One or more objects with keys ('subject', 'date', 'sequence'), or strings with
462
- the form yyyy-mm-dd_n_subject.
463
-
464
- Returns
465
- -------
466
- one.alf.path.ALFPath
467
- Path object(s) for the experiment session(s).
468
-
469
- Examples
470
- --------
471
- >>> base = 'https://test.alyx.internationalbrainlab.org'
472
- >>> one = ONE(username='test_user', password='TapetesBloc18', base_url=base)
473
- Connected to...
474
- >>> ref = {'subject': 'flowers', 'date': datetime(2018, 7, 13).date(), 'sequence': 1}
475
- >>> one.ref2path(ref)
476
- WindowsPath('E:/FlatIron/zadorlab/Subjects/flowers/2018-07-13/001')
477
- >>> one.ref2path(['2018-07-13_1_flowers', '2019-04-11_1_KS005'])
478
- [WindowsPath('E:/FlatIron/zadorlab/Subjects/flowers/2018-07-13/001'),
479
- WindowsPath('E:/FlatIron/cortexlab/Subjects/KS005/2019-04-11/001')]
480
-
481
- """
482
- eid2path = unwrap(self.eid2path)
483
- ref2eid = unwrap(self.ref2eid)
484
- return eid2path(self, ref2eid(self, ref))
485
-
486
- @staticmethod
487
- @parse_values
488
- def path2ref(path_str: Union[str, Path, Iter], as_dict=True) -> Union[Bunch, List]:
489
- """Returns a human-readable experiment reference, given a session path.
490
-
491
- The path need not exist.
492
-
493
- Parameters
494
- ----------
495
- path_str : str
496
- A path to a given session.
497
- as_dict : bool
498
- If True a Bunch is returned, otherwise a string.
499
-
500
- Returns
501
- -------
502
- dict, str, list
503
- One or more objects with keys ('subject', 'date', 'sequence').
504
-
505
- Examples
506
- --------
507
- >>> path_str = Path('E:/FlatIron/Subjects/zadorlab/flowers/2018-07-13/001')
508
- >>> path2ref(path_str)
509
- {'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1}
510
- >>> path2ref(path_str, parse=False)
511
- {'subject': 'flowers', 'date': '2018-07-13', 'sequence': '001'}
512
- >>> path_str2 = Path('E:/FlatIron/Subjects/churchlandlab/CSHL046/2020-06-20/002')
513
- >>> path2ref([path_str, path_str2])
514
- [{'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1},
515
- {'subject': 'CSHL046', 'date': datetime.date(2020, 6, 20), 'sequence': 2}]
516
-
517
- """
518
- if isinstance(path_str, (list, tuple)):
519
- return [unwrap(ConversionMixin.path2ref)(x) for x in path_str]
520
- pattern = r'(?P<subject>[\w-]+)([\\/])(?P<date>\d{4}-\d{2}-\d{2})(\2)(?P<sequence>\d{1,3})'
521
- match = re.search(pattern, str(path_str))
522
- if match and not re.match(r'^0\d$', match.groups()[-1]): # e.g. '02' not valid
523
- ref = match.groupdict()
524
- return Bunch(ref) if as_dict else '{date:s}_{sequence:s}_{subject:s}'.format(**ref)
525
-
526
- @staticmethod
527
- def is_exp_ref(ref: Union[str, Mapping, Iter]) -> Union[bool, List[bool]]:
528
- """Returns True is ref is a valid experiment reference.
529
-
530
- Parameters
531
- ----------
532
- ref : str, dict, list
533
- One or more objects with keys ('subject', 'date', 'sequence'), or strings with
534
- the form yyyy-mm-dd_n_subject.
535
-
536
- Returns
537
- -------
538
- bool, list of bool
539
- True if ref is valid.
540
-
541
- Examples
542
- --------
543
- >>> ref = {'date': datetime(2018, 7, 13).date(), 'sequence': 1, 'subject': 'flowers'}
544
- >>> is_exp_ref(ref)
545
- True
546
- >>> is_exp_ref('2018-07-13_001_flowers')
547
- True
548
- >>> is_exp_ref('invalid_ref')
549
- False
550
-
551
- """
552
- if isinstance(ref, (list, tuple)):
553
- return [ConversionMixin.is_exp_ref(x) for x in ref]
554
- if isinstance(ref, (Bunch, dict)):
555
- if not {'subject', 'date', 'sequence'}.issubset(ref):
556
- return False
557
- ref = '{date}_{sequence}_{subject}'.format(**ref)
558
- elif not isinstance(ref, str):
559
- return False
560
- return re.compile(r'\d{4}(-\d{2}){2}_(\d{1,3})_\w+').match(ref) is not None
561
-
562
- @staticmethod
563
- @parse_values
564
- def ref2dict(ref: Union[str, Mapping, Iter]) -> Union[Bunch, List]:
565
- """Returns a Bunch (dict-like) from a reference string (or list thereof).
566
-
567
- Parameters
568
- ----------
569
- ref : str, list
570
- One or more experiment reference strings.
571
-
572
- Returns
573
- -------
574
- iblutil.util.Bunch
575
- A Bunch in with keys ('subject', 'sequence', 'date').
576
-
577
- Examples
578
- --------
579
- >>> ref2dict('2018-07-13_1_flowers')
580
- {'date': datetime.date(2018, 7, 13), 'sequence': 1, 'subject': 'flowers'}
581
- >>> ref2dict('2018-07-13_001_flowers', parse=False)
582
- {'date': '2018-07-13', 'sequence': '001', 'subject': 'flowers'}
583
- >>> ref2dict(['2018-07-13_1_flowers', '2020-01-23_002_ibl_witten_01'])
584
- [{'date': datetime.date(2018, 7, 13), 'sequence': 1, 'subject': 'flowers'},
585
- {'date': datetime.date(2020, 1, 23), 'sequence': 2, 'subject': 'ibl_witten_01'}]
586
-
587
- """
588
- if isinstance(ref, (list, tuple)):
589
- return [ConversionMixin.ref2dict(x) for x in ref]
590
- if isinstance(ref, (Bunch, dict)):
591
- return Bunch(ref) # Short circuit
592
- ref = dict(zip(['date', 'sequence', 'subject'], ref.split('_', 2)))
593
- return Bunch(ref)
594
-
595
- @staticmethod
596
- def dict2ref(ref_dict) -> Union[str, List]:
597
- """Convert an experiment reference dict to a string in the format yyyy-mm-dd_n_subject.
598
-
599
- Parameters
600
- ----------
601
- ref_dict : dict, Bunch, list, tuple
602
- A map with the keys ('subject', 'date', 'sequence').
603
-
604
- Returns
605
- -------
606
- str, list:
607
- An experiment reference string, or list thereof.
608
-
609
- """
610
- if isinstance(ref_dict, (list, tuple)):
611
- return [ConversionMixin.dict2ref(x) for x in ref_dict]
612
- if not ref_dict:
613
- return
614
- if 'sequence' not in ref_dict and 'number' in ref_dict:
615
- ref_dict = ref_dict.copy()
616
- ref_dict['sequence'] = ref_dict.pop('number')
617
- if 'date' not in ref_dict and 'start_time' in ref_dict:
618
- ref_dict = ref_dict.copy()
619
- if isinstance(ref_dict['start_time'], str):
620
- ref_dict['date'] = ref_dict['start_time'][:10]
621
- else:
622
- ref_dict['date'] = ref_dict['start_time'].date()
623
- parsed = any(not isinstance(k, str) for k in ref_dict.values())
624
- format_str = ('{date:%Y-%m-%d}_{sequence:d}_{subject:s}'
625
- if parsed
626
- else '{date:s}_{sequence:s}_{subject:s}')
627
- return format_str.format(**ref_dict)
628
-
629
-
630
- def one_path_from_dataset(dset, one_cache):
631
- """Returns local one file path from a dset record or a list of dsets records from REST.
632
-
633
- Unlike `to_eid`, this function does not require ONE, and the dataset may not exist.
634
-
635
- Parameters
636
- ----------
637
- dset : dict, list
638
- Dataset dictionary or list of dictionaries from Alyx rest endpoint.
639
- one_cache : str, pathlib.Path, pathlib.PurePath
640
- The local ONE data cache directory.
641
-
642
- Returns
643
- -------
644
- one.alf.path.ALFPath
645
- The local path for a given dataset.
646
-
647
- """
648
- return path_from_dataset(dset, root_path=one_cache, uuid=False)
649
-
650
-
651
- def path_from_dataset(dset, root_path=PurePosixALFPath('/'), repository=None, uuid=False):
652
- """Returns the local file path from a dset record from a REST query.
653
-
654
- Unlike `to_eid`, this function does not require ONE, and the dataset may not exist.
655
-
656
- Parameters
657
- ----------
658
- dset : dict, list
659
- Dataset dictionary or list of dictionaries from Alyx rest endpoint.
660
- root_path : str, pathlib.Path, pathlib.PurePath
661
- The prefix path such as the ONE download directory or remote http server root.
662
- repository : str, None
663
- Which data repository to use from the file_records list, defaults to first online
664
- repository.
665
- uuid : bool
666
- If True, the file path will contain the dataset UUID.
667
-
668
- Returns
669
- -------
670
- one.alf.path.ALFPath, list
671
- File path or list of paths.
672
-
673
- """
674
- if isinstance(dset, list):
675
- return [path_from_dataset(d) for d in dset]
676
- if repository:
677
- fr = next((fr for fr in dset['file_records'] if fr['data_repository'] == repository))
678
- else:
679
- fr = next((fr for fr in dset['file_records'] if fr['data_url']))
680
- uuid = dset['url'][-36:] if uuid else None
681
- return path_from_filerecord(fr, root_path=root_path, uuid=uuid)
682
-
683
-
684
- def path_from_filerecord(fr, root_path=PurePosixALFPath('/'), uuid=None):
685
- """Returns a data file Path constructed from an Alyx file record.
686
-
687
- The Path type returned depends on the type of root_path: If root_path is a string an ALFPath
688
- object is returned, otherwise if the root_path is a PurePath, a PureALFPath is returned.
689
-
690
- Parameters
691
- ----------
692
- fr : dict
693
- An Alyx file record dict.
694
- root_path : str, pathlib.Path
695
- An optional root path.
696
- uuid : str, uuid.UUID
697
- An optional dataset UUID to add to the file name.
698
-
699
- Returns
700
- -------
701
- one.alf.path.ALFPath
702
- A filepath as a pathlib object.
703
-
704
- """
705
- if isinstance(fr, list):
706
- return [path_from_filerecord(f) for f in fr]
707
- repo_path = (p := fr['data_repository_path'])[p[0] == '/':] # Remove slash at start, if any
708
- file_path = PurePosixALFPath(repo_path, fr['relative_path'])
709
- if root_path:
710
- # NB: this function won't cast any PurePaths
711
- root_path = ensure_alf_path(root_path)
712
- file_path = root_path / file_path
713
- return file_path.with_uuid(uuid) if uuid else file_path
714
-
715
-
716
- def session_record2path(session, root_dir=None):
717
- """Convert a session record into a path.
718
-
719
- If a lab key is present, the path will be in the form
720
- root_dir/lab/Subjects/subject/yyyy-mm-dd/nnn, otherwise root_dir/subject/yyyy-mm-dd/nnn.
721
-
722
- Parameters
723
- ----------
724
- session : Mapping
725
- A session record with keys ('subject', 'date', 'number'[, 'lab']).
726
- root_dir : str, pathlib.Path, pathlib.PurePath
727
- A root directory to prepend.
728
-
729
- Returns
730
- -------
731
- one.alf.path.ALFPath, one.alf.path.PureALFPath
732
- A constructed path of the session.
733
-
734
- Examples
735
- --------
736
- >>> session_record2path({'subject': 'ALK01', 'date': '2020-01-01', 'number': 1})
737
- PurePosixPath('ALK01/2020-01-01/001')
738
-
739
- >>> record = {'date': datetime.datetime.fromisoformat('2020-01-01').date(),
740
- ... 'number': '001', 'lab': 'foo', 'subject': 'ALK01'}
741
- >>> session_record2path(record, Path('/home/user'))
742
- Path('/home/user/foo/Subjects/ALK01/2020-01-01/001')
743
-
744
- """
745
- rel_path = PurePosixALFPath(
746
- session.get('lab') if session.get('lab') else '',
747
- 'Subjects' if session.get('lab') else '',
748
- session['subject'], str(session['date']), str(session['number']).zfill(3)
749
- )
750
- if not root_dir:
751
- return rel_path
752
- return ensure_alf_path(root_dir).joinpath(rel_path)
753
-
754
-
755
- def ses2records(ses: dict):
756
- """Extract session cache record and datasets cache from a remote session data record.
757
-
758
- Parameters
759
- ----------
760
- ses : dict
761
- Session dictionary from Alyx REST endpoint.
762
-
763
- Returns
764
- -------
765
- pd.Series
766
- Session record.
767
- pd.DataFrame
768
- Datasets frame.
769
-
770
- """
771
- # Extract session record
772
- # id used for session_info field of probe insertion
773
- eid = UUID(ses.get('id') or ses['url'][-36:])
774
- session_keys = ('subject', 'start_time', 'lab', 'number', 'task_protocol', 'projects')
775
- session_data = {k: v for k, v in ses.items() if k in session_keys}
776
- session = (
777
- pd.Series(data=session_data, name=eid).rename({'start_time': 'date'})
778
- )
779
- session['projects'] = ','.join(session.pop('projects'))
780
- session['date'] = datetime.datetime.fromisoformat(session['date']).date()
781
-
782
- # Extract datasets table
783
- def _to_record(d):
784
- did = UUID(d['id'])
785
- rec = dict(file_size=d['file_size'], hash=d['hash'], exists=True, id=did)
786
- rec['eid'] = session.name
787
- file_path = urllib.parse.urlsplit(d['data_url'], allow_fragments=False).path.strip('/')
788
- file_path = get_alf_path(remove_uuid_string(file_path))
789
- session_path = get_session_path(file_path).as_posix()
790
- rec['rel_path'] = file_path[len(session_path):].strip('/')
791
- rec['default_revision'] = d['default_revision'] == 'True'
792
- rec['qc'] = d.get('qc', 'NOT_SET')
793
- return rec
794
-
795
- if not ses.get('data_dataset_session_related'):
796
- return session, EMPTY_DATASETS_FRAME.copy()
797
- records = map(_to_record, ses['data_dataset_session_related'])
798
- index = ['eid', 'id']
799
- dtypes = EMPTY_DATASETS_FRAME.dtypes
800
- datasets = pd.DataFrame(records).astype(dtypes).set_index(index).sort_index()
801
- return session, datasets
802
-
803
-
804
- def datasets2records(datasets, additional=None) -> pd.DataFrame:
805
- """Extract datasets DataFrame from one or more Alyx dataset records.
806
-
807
- Parameters
808
- ----------
809
- datasets : dict, list
810
- One or more records from the Alyx 'datasets' endpoint.
811
- additional : list of str
812
- A set of optional fields to extract from dataset records.
813
-
814
- Returns
815
- -------
816
- pd.DataFrame
817
- Datasets frame.
818
-
819
- Examples
820
- --------
821
- >>> datasets = ONE().alyx.rest('datasets', 'list', subject='foobar')
822
- >>> df = datasets2records(datasets)
823
-
824
- """
825
- records = []
826
-
827
- for d in ensure_list(datasets):
828
- file_record = next((x for x in d['file_records'] if x['data_url'] and x['exists']), None)
829
- if not file_record:
830
- continue # Ignore files that are not accessible
831
- rec = dict(file_size=d['file_size'], hash=d['hash'], exists=True)
832
- rec['id'] = UUID(d['url'][-36:])
833
- rec['eid'] = UUID(d['session'][-36:]) if d['session'] else pd.NA
834
- data_url = urllib.parse.urlsplit(file_record['data_url'], allow_fragments=False)
835
- file_path = get_alf_path(data_url.path.strip('/'))
836
- file_path = remove_uuid_string(file_path).as_posix()
837
- session_path = get_session_path(file_path) or ''
838
- if session_path:
839
- session_path = session_path.as_posix()
840
- rec['rel_path'] = file_path[len(session_path):].strip('/')
841
- rec['default_revision'] = d['default_dataset']
842
- rec['qc'] = d.get('qc')
843
- for field in additional or []:
844
- rec[field] = d.get(field)
845
- records.append(rec)
846
-
847
- if not records:
848
- return EMPTY_DATASETS_FRAME
849
- index = EMPTY_DATASETS_FRAME.index.names
850
- return pd.DataFrame(records).set_index(index).sort_index().astype(EMPTY_DATASETS_FRAME.dtypes)
1
+ """A module for inter-converting experiment identifiers.
2
+
3
+ There are multiple ways to uniquely identify an experiment:
4
+ - eid (UUID) : An experiment UUID (or 36 char hexadecimal string)
5
+ - np (int64) : An experiment UUID encoded as 2 int64s
6
+ - path (Path) : A pathlib ALF path of the form `<lab>/Subjects/<subject>/<date>/<number>`
7
+ - ref (str) : An experiment reference string of the form `yyyy-mm-dd_n_subject`
8
+ - url (str) : A remote http session path of the form `<lab>/Subjects/<subject>/<date>/<number>`
9
+ """
10
+ import re
11
+ import functools
12
+ import datetime
13
+ import urllib.parse
14
+ from uuid import UUID
15
+ from inspect import unwrap
16
+ from pathlib import Path
17
+ from typing import Optional, Union, Mapping, List, Iterable as Iter
18
+
19
+ import pandas as pd
20
+ from iblutil.util import Bunch, Listable, ensure_list
21
+
22
+ from one.alf.spec import is_session_path, is_uuid_string, is_uuid
23
+ from one.alf.cache import EMPTY_DATASETS_FRAME
24
+ from one.alf.path import (
25
+ ALFPath, PurePosixALFPath, ensure_alf_path, get_session_path, get_alf_path, remove_uuid_string)
26
+ from one.util import LazyId
27
+
28
+
29
+ def recurse(func):
30
+ """Decorator to call decorated function recursively if first arg is non-string iterable.
31
+
32
+ Allows decorated methods to accept both single values, and lists/tuples of values. When
33
+ given the latter, a list is returned. This decorator is intended to work on class methods,
34
+ therefore the first arg is assumed to be the object. Maps and pandas objects are not
35
+ iterated over.
36
+
37
+ Parameters
38
+ ----------
39
+ func : function
40
+ A method to decorate.
41
+
42
+ Returns
43
+ -------
44
+ function
45
+ The decorated method.
46
+
47
+ """
48
+ @functools.wraps(func)
49
+ def wrapper_decorator(*args, **kwargs):
50
+ if len(args) <= 1:
51
+ return func(*args, **kwargs)
52
+ obj, first = args[:2]
53
+ exclude = (str, Mapping, pd.Series, pd.DataFrame)
54
+ is_lazy_id = isinstance(first, LazyId)
55
+ if is_lazy_id or (isinstance(first, Iter) and not isinstance(first, exclude)):
56
+ return [func(obj, item, *args[2:], **kwargs) for item in first]
57
+ else:
58
+ return func(obj, first, *args[2:], **kwargs)
59
+ return wrapper_decorator
60
+
61
+
62
+ def parse_values(func):
63
+ """Convert str values in reference dict to appropriate type.
64
+
65
+ Examples
66
+ --------
67
+ >>> parse_values(lambda x: x)({'date': '2020-01-01', 'sequence': '001'}, parse=True)
68
+ {'date': datetime.date(2020, 1, 1), 'sequence': 1}
69
+
70
+ """
71
+ def parse_ref(ref):
72
+ if ref:
73
+ if isinstance(ref['date'], str):
74
+ if len(ref['date']) == 10:
75
+ ref['date'] = datetime.date.fromisoformat(ref['date'])
76
+ else:
77
+ ref['date'] = datetime.datetime.fromisoformat(ref['date']).date()
78
+ ref['sequence'] = int(ref['sequence'])
79
+ return ref
80
+
81
+ @functools.wraps(func)
82
+ def wrapper_decorator(*args, **kwargs):
83
+ parse = kwargs.pop('parse', True)
84
+ ref = func(*args, **kwargs)
85
+ if not parse or isinstance(ref, str):
86
+ return ref
87
+ elif isinstance(ref, (list, LazyId)):
88
+ return list(map(parse_ref, ref))
89
+ else:
90
+ return parse_ref(ref)
91
+ return wrapper_decorator
92
+
93
+
94
+ class ConversionMixin:
95
+ """A mixin providing methods to inter-convert experiment identifiers."""
96
+
97
+ def __init__(self):
98
+ self._cache = None
99
+ self._par = None
100
+
101
+ @recurse
102
+ def to_eid(self,
103
+ id: Listable(Union[str, Path, UUID, dict]) = None,
104
+ cache_dir: Optional[Union[str, Path]] = None) -> Listable(UUID):
105
+ """Given any kind of experiment identifier, return a corresponding eid string.
106
+
107
+ NB: Currently does not support integer IDs.
108
+
109
+ Parameters
110
+ ----------
111
+ id : str, pathlib.Path, UUID, dict, tuple, list
112
+ An experiment identifier
113
+ cache_dir : pathlib.Path, str
114
+ An optional cache directory path for intermittent conversion to path
115
+
116
+ Returns
117
+ -------
118
+ uuid.UUID, None
119
+ An experiment ID or None if session not in cache
120
+
121
+ Raises
122
+ ------
123
+ ValueError
124
+ Input ID invalid
125
+
126
+ """
127
+ # TODO Could add np2str here
128
+ # if isinstance(id, (list, tuple)): # Recurse
129
+ # return [self.to_eid(i, cache_dir) for i in id]
130
+ if id is None:
131
+ return
132
+ elif isinstance(id, (UUID, LazyId)):
133
+ return id
134
+ elif self.is_exp_ref(id):
135
+ return self.ref2eid(id)
136
+ elif isinstance(id, dict):
137
+ assert {'subject', 'number', 'lab'}.issubset(id)
138
+ root = Path(cache_dir or self.cache_dir)
139
+ id = root.joinpath(
140
+ id['lab'],
141
+ 'Subjects', id['subject'],
142
+ str(id.get('date') or id['start_time'][:10]),
143
+ ('%03d' % id['number']))
144
+
145
+ if isinstance(id, Path):
146
+ return self.path2eid(id)
147
+ elif isinstance(id, str):
148
+ if is_session_path(id) or get_session_path(id):
149
+ return self.path2eid(id)
150
+ if len(id) > 36:
151
+ id = id[-36:]
152
+ if not is_uuid_string(id):
153
+ raise ValueError('Invalid experiment ID')
154
+ else:
155
+ return UUID(id)
156
+ else:
157
+ raise ValueError('Unrecognized experiment ID')
158
+
159
+ @recurse
160
+ def eid2path(self, eid: str) -> Optional[Listable(ALFPath)]:
161
+ """From an experiment id or a list of experiment ids, gets the local cache path.
162
+
163
+ Parameters
164
+ ----------
165
+ eid : str, uuid.UUID
166
+ Experiment ID (UUID) or list of UUIDs.
167
+
168
+ Returns
169
+ -------
170
+ one.alf.path.ALFPath
171
+ A session path.
172
+
173
+ """
174
+ # If not valid return None
175
+ if not is_uuid(eid):
176
+ raise ValueError(f"{eid} is not a valid eID/UUID string")
177
+ if isinstance(eid, str):
178
+ eid = UUID(eid)
179
+ if self._cache['sessions'].size == 0:
180
+ return
181
+
182
+ # load path from cache
183
+ try:
184
+ ses = self._cache['sessions'].loc[eid].squeeze()
185
+ assert isinstance(ses, pd.Series), 'Duplicate eids in sessions table'
186
+ return session_record2path(ses.to_dict(), self.cache_dir)
187
+ except KeyError:
188
+ return
189
+
190
+ @recurse
191
+ def path2eid(self, path_obj):
192
+ """From a local path, gets the experiment id.
193
+
194
+ Parameters
195
+ ----------
196
+ path_obj : pathlib.Path, str
197
+ Local path or list of local paths.
198
+
199
+ Returns
200
+ -------
201
+ eid, list
202
+ Experiment ID (eid) or list of eids.
203
+
204
+ """
205
+ # else ensure the path ends with mouse,date, number
206
+ session_path = get_session_path(path_obj)
207
+ sessions = self._cache['sessions']
208
+
209
+ # if path does not have a date and a number, or cache is empty return None
210
+ if session_path is None or sessions.size == 0:
211
+ return None
212
+
213
+ # reduce session records from cache
214
+ toDate = datetime.date.fromisoformat
215
+ subject, date, number = session_path.parts[-3:]
216
+ for col, val in zip(('subject', 'date', 'number'), (subject, toDate(date), int(number))):
217
+ sessions = sessions[sessions[col] == val]
218
+ if sessions.size == 0:
219
+ return
220
+
221
+ assert len(sessions) == 1
222
+
223
+ eid, = sessions.index.values
224
+ return eid
225
+
226
+ @recurse
227
+ def path2record(self, path) -> pd.Series:
228
+ """Convert a file or session path to a dataset or session cache record.
229
+
230
+ NB: Assumes <lab>/Subjects/<subject>/<date>/<number> pattern.
231
+
232
+ Parameters
233
+ ----------
234
+ path : str, pathlib.Path
235
+ Local path or HTTP URL.
236
+
237
+ Returns
238
+ -------
239
+ pandas.Series
240
+ A cache file record.
241
+
242
+ """
243
+ path = ALFPath(path)
244
+ is_session = is_session_path(path)
245
+ if self._cache['sessions' if is_session else 'datasets'].empty:
246
+ return # short circuit: no records in the cache
247
+
248
+ if is_session_path(path):
249
+ lab, subject, date, number = path.session_parts
250
+ df = self._cache['sessions']
251
+ rec = df[
252
+ (df['lab'] == lab) & (df['subject'] == subject) &
253
+ (df['number'] == int(number)) &
254
+ (df['date'] == datetime.date.fromisoformat(date))
255
+ ]
256
+ return None if rec.empty else rec.squeeze()
257
+
258
+ # If there's a UUID in the path, use that to fetch the record
259
+ name_parts = path.stem.split('.')
260
+ if is_uuid_string(uuid := name_parts[-1]):
261
+ try:
262
+ return self._cache['datasets'].loc[pd.IndexSlice[:, UUID(uuid)], :].squeeze()
263
+ except KeyError:
264
+ return
265
+
266
+ # Fetch via session record
267
+ eid = self.path2eid(path)
268
+ df = self.list_datasets(eid, details=True)
269
+ if not eid or df.empty:
270
+ return
271
+
272
+ # Find row where relative path matches
273
+ rec = df[df['rel_path'] == path.relative_to_session().as_posix()]
274
+ assert len(rec) < 2, 'Multiple records found'
275
+ if rec.empty:
276
+ return None
277
+ # Convert slice to series and reinstate eid index if dropped
278
+ return rec.squeeze().rename(index=(eid, rec.index.get_level_values('id')[0]))
279
+
280
+ @recurse
281
+ def path2url(self, filepath):
282
+ """Given a local file path, constructs the URL of the remote file.
283
+
284
+ Parameters
285
+ ----------
286
+ filepath : str, pathlib.Path
287
+ A local file path
288
+
289
+ Returns
290
+ -------
291
+ str
292
+ A remote URL string
293
+
294
+ """
295
+ record = self.path2record(filepath)
296
+ if record is None:
297
+ return
298
+ return self.record2url(record)
299
+
300
+ def record2url(self, record):
301
+ """Convert a session or dataset record to a remote URL.
302
+
303
+ NB: Requires online instance
304
+
305
+ Parameters
306
+ ----------
307
+ record : pd.Series, pd.DataFrame
308
+ A datasets or sessions cache record. If DataFrame, iterate over and returns list.
309
+
310
+ Returns
311
+ -------
312
+ str, list
313
+ A dataset URL or list if input is DataFrame
314
+
315
+ """
316
+ webclient = getattr(self, '_web_client', False)
317
+ assert webclient, 'No Web client found for instance'
318
+ # FIXME Should be OneAlyx converter only
319
+ if isinstance(record, pd.DataFrame):
320
+ return [self.record2url(r) for _, r in record.iterrows()]
321
+ elif isinstance(record, pd.Series):
322
+ is_session_record = 'rel_path' not in record
323
+ if is_session_record:
324
+ # NB: This assumes the root path is in the webclient URL
325
+ session_spec = '{lab}/Subjects/{subject}/{date}/{number:03d}'
326
+ url = record.get('session_path') or session_spec.format(**record)
327
+ return webclient.rel_path2url(url)
328
+ else:
329
+ raise TypeError(
330
+ f'record must be pandas.DataFrame or pandas.Series, got {type(record)} instead')
331
+ if 'session_path' in record:
332
+ # Check for session_path field (aggregate datasets have no eid in name)
333
+ session_path = record['session_path']
334
+ uuid = record.name if isinstance(record.name, UUID) else record.name[-1]
335
+ else:
336
+ assert isinstance(record.name, tuple) and len(record.name) == 2
337
+ eid, uuid = record.name # must be (eid, did)
338
+ session_path = get_alf_path(self.eid2path(eid))
339
+ url = PurePosixALFPath(session_path, record['rel_path'])
340
+ return webclient.rel_path2url(url.with_uuid(uuid).as_posix())
341
+
342
+ def record2path(self, dataset) -> Optional[ALFPath]:
343
+ """Given a set of dataset records, returns the corresponding paths.
344
+
345
+ Parameters
346
+ ----------
347
+ dataset : pd.DataFrame, pd.Series
348
+ A datasets dataframe slice.
349
+
350
+ Returns
351
+ -------
352
+ one.alf.path.ALFPath
353
+ File path for the record.
354
+
355
+ """
356
+ if isinstance(dataset, pd.DataFrame):
357
+ return [self.record2path(r) for _, r in dataset.iterrows()]
358
+ elif not isinstance(dataset, pd.Series):
359
+ raise TypeError(
360
+ f'record must be pandas.DataFrame or pandas.Series, got {type(dataset)} instead')
361
+ assert isinstance(dataset.name, tuple) and len(dataset.name) == 2
362
+ eid, uuid = dataset.name # must be (eid, did)
363
+ if not (session_path := self.eid2path(eid)):
364
+ raise ValueError(f'Failed to determine session path for eid "{eid}"')
365
+ file = session_path / dataset['rel_path']
366
+ if self.uuid_filenames:
367
+ file = file.with_uuid(uuid)
368
+ return file
369
+
370
+ @recurse
371
+ def eid2ref(self, eid: Union[str, Iter], as_dict=True, parse=True) \
372
+ -> Union[str, Mapping, List]:
373
+ """Get human-readable session ref from path.
374
+
375
+ Parameters
376
+ ----------
377
+ eid : str, uuid.UUID
378
+ The experiment uuid to find reference for.
379
+ as_dict : bool
380
+ If false a string is returned in the form 'subject_sequence_yyyy-mm-dd'.
381
+ parse : bool
382
+ If true, the reference date and sequence are parsed from strings to their respective
383
+ data types.
384
+
385
+ Returns
386
+ -------
387
+ dict, str, list
388
+ One or more objects with keys ('subject', 'date', 'sequence'), or strings with the
389
+ form yyyy-mm-dd_n_subject.
390
+
391
+ Examples
392
+ --------
393
+ >>> eid = '4e0b3320-47b7-416e-b842-c34dc9004cf8'
394
+ >>> one.eid2ref(eid)
395
+ {'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1}
396
+ >>> one.eid2ref(eid, parse=False)
397
+ {'subject': 'flowers', 'date': '2018-07-13', 'sequence': '001'}
398
+ >>> one.eid2ref(eid, as_dict=False)
399
+ '2018-07-13_1_flowers'
400
+ >>> one.eid2ref(eid, as_dict=False, parse=False)
401
+ '2018-07-13_001_flowers'
402
+ >>> one.eid2ref([eid, '7dc3c44b-225f-4083-be3d-07b8562885f4'])
403
+ [{'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1},
404
+ {'subject': 'KS005', 'date': datetime.date(2019, 4, 11), 'sequence': 1}]
405
+
406
+ """
407
+ d = self.get_details(eid)
408
+ if parse:
409
+ ref = {'subject': d['subject'], 'date': d['date'], 'sequence': d['number']}
410
+ format_str = '{date:%Y-%m-%d}_{sequence:d}_{subject:s}'
411
+ else:
412
+ ref = {
413
+ 'subject': d['subject'], 'date': str(d['date']), 'sequence': '%03d' % d['number']
414
+ }
415
+ format_str = '{date:s}_{sequence:s}_{subject:s}'
416
+ return Bunch(ref) if as_dict else format_str.format(**ref)
417
+
418
+ @recurse
419
+ def ref2eid(self, ref: Union[Mapping, str, Iter]) -> Union[str, List]:
420
+ """Returns experiment uuid, given one or more experiment references.
421
+
422
+ Parameters
423
+ ----------
424
+ ref : str, dict, list
425
+ One or more objects with keys ('subject', 'date', 'sequence'), or strings with
426
+ the form yyyy-mm-dd_n_subject.
427
+
428
+ Returns
429
+ -------
430
+ uuid.UUID, list
431
+ One or more experiment uuid strings.
432
+
433
+ Examples
434
+ --------
435
+ >>> base = 'https://test.alyx.internationalbrainlab.org'
436
+ >>> one = ONE(username='test_user', password='TapetesBloc18', base_url=base)
437
+ Connected to...
438
+ >>> ref = {'date': datetime(2018, 7, 13).date(), 'sequence': 1, 'subject': 'flowers'}
439
+ >>> one.ref2eid(ref)
440
+ UUID('4e0b3320-47b7-416e-b842-c34dc9004cf8')
441
+ >>> one.ref2eid(['2018-07-13_1_flowers', '2019-04-11_1_KS005'])
442
+ [UUID('4e0b3320-47b7-416e-b842-c34dc9004cf8'),
443
+ UUID('7dc3c44b-225f-4083-be3d-07b8562885f4')]
444
+
445
+ """
446
+ ref = self.ref2dict(ref, parse=False) # Ensure dict
447
+ session = self.search(
448
+ subject=ref['subject'],
449
+ date_range=str(ref['date']),
450
+ number=ref['sequence'])
451
+ assert len(session) == 1, 'session not found'
452
+ return session[0]
453
+
454
+ @recurse
455
+ def ref2path(self, ref):
456
+ """Convert one or more experiment references to session path(s).
457
+
458
+ Parameters
459
+ ----------
460
+ ref : str, dict, list
461
+ One or more objects with keys ('subject', 'date', 'sequence'), or strings with
462
+ the form yyyy-mm-dd_n_subject.
463
+
464
+ Returns
465
+ -------
466
+ one.alf.path.ALFPath
467
+ Path object(s) for the experiment session(s).
468
+
469
+ Examples
470
+ --------
471
+ >>> base = 'https://test.alyx.internationalbrainlab.org'
472
+ >>> one = ONE(username='test_user', password='TapetesBloc18', base_url=base)
473
+ Connected to...
474
+ >>> ref = {'subject': 'flowers', 'date': datetime(2018, 7, 13).date(), 'sequence': 1}
475
+ >>> one.ref2path(ref)
476
+ WindowsPath('E:/FlatIron/zadorlab/Subjects/flowers/2018-07-13/001')
477
+ >>> one.ref2path(['2018-07-13_1_flowers', '2019-04-11_1_KS005'])
478
+ [WindowsPath('E:/FlatIron/zadorlab/Subjects/flowers/2018-07-13/001'),
479
+ WindowsPath('E:/FlatIron/cortexlab/Subjects/KS005/2019-04-11/001')]
480
+
481
+ """
482
+ eid2path = unwrap(self.eid2path)
483
+ ref2eid = unwrap(self.ref2eid)
484
+ return eid2path(self, ref2eid(self, ref))
485
+
486
+ @staticmethod
487
+ @parse_values
488
+ def path2ref(path_str: Union[str, Path, Iter], as_dict=True) -> Union[Bunch, List]:
489
+ """Returns a human-readable experiment reference, given a session path.
490
+
491
+ The path need not exist.
492
+
493
+ Parameters
494
+ ----------
495
+ path_str : str
496
+ A path to a given session.
497
+ as_dict : bool
498
+ If True a Bunch is returned, otherwise a string.
499
+
500
+ Returns
501
+ -------
502
+ dict, str, list
503
+ One or more objects with keys ('subject', 'date', 'sequence').
504
+
505
+ Examples
506
+ --------
507
+ >>> path_str = Path('E:/FlatIron/Subjects/zadorlab/flowers/2018-07-13/001')
508
+ >>> path2ref(path_str)
509
+ {'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1}
510
+ >>> path2ref(path_str, parse=False)
511
+ {'subject': 'flowers', 'date': '2018-07-13', 'sequence': '001'}
512
+ >>> path_str2 = Path('E:/FlatIron/Subjects/churchlandlab/CSHL046/2020-06-20/002')
513
+ >>> path2ref([path_str, path_str2])
514
+ [{'subject': 'flowers', 'date': datetime.date(2018, 7, 13), 'sequence': 1},
515
+ {'subject': 'CSHL046', 'date': datetime.date(2020, 6, 20), 'sequence': 2}]
516
+
517
+ """
518
+ if isinstance(path_str, (list, tuple)):
519
+ return [unwrap(ConversionMixin.path2ref)(x) for x in path_str]
520
+ pattern = r'(?P<subject>[\w-]+)([\\/])(?P<date>\d{4}-\d{2}-\d{2})(\2)(?P<sequence>\d{1,3})'
521
+ match = re.search(pattern, str(path_str))
522
+ if match and not re.match(r'^0\d$', match.groups()[-1]): # e.g. '02' not valid
523
+ ref = match.groupdict()
524
+ return Bunch(ref) if as_dict else '{date:s}_{sequence:s}_{subject:s}'.format(**ref)
525
+
526
+ @staticmethod
527
+ def is_exp_ref(ref: Union[str, Mapping, Iter]) -> Union[bool, List[bool]]:
528
+ """Returns True is ref is a valid experiment reference.
529
+
530
+ Parameters
531
+ ----------
532
+ ref : str, dict, list
533
+ One or more objects with keys ('subject', 'date', 'sequence'), or strings with
534
+ the form yyyy-mm-dd_n_subject.
535
+
536
+ Returns
537
+ -------
538
+ bool, list of bool
539
+ True if ref is valid.
540
+
541
+ Examples
542
+ --------
543
+ >>> ref = {'date': datetime(2018, 7, 13).date(), 'sequence': 1, 'subject': 'flowers'}
544
+ >>> is_exp_ref(ref)
545
+ True
546
+ >>> is_exp_ref('2018-07-13_001_flowers')
547
+ True
548
+ >>> is_exp_ref('invalid_ref')
549
+ False
550
+
551
+ """
552
+ if isinstance(ref, (list, tuple)):
553
+ return [ConversionMixin.is_exp_ref(x) for x in ref]
554
+ if isinstance(ref, (Bunch, dict)):
555
+ if not {'subject', 'date', 'sequence'}.issubset(ref):
556
+ return False
557
+ ref = '{date}_{sequence}_{subject}'.format(**ref)
558
+ elif not isinstance(ref, str):
559
+ return False
560
+ return re.compile(r'\d{4}(-\d{2}){2}_(\d{1,3})_\w+').match(ref) is not None
561
+
562
+ @staticmethod
563
+ @parse_values
564
+ def ref2dict(ref: Union[str, Mapping, Iter]) -> Union[Bunch, List]:
565
+ """Returns a Bunch (dict-like) from a reference string (or list thereof).
566
+
567
+ Parameters
568
+ ----------
569
+ ref : str, list
570
+ One or more experiment reference strings.
571
+
572
+ Returns
573
+ -------
574
+ iblutil.util.Bunch
575
+ A Bunch in with keys ('subject', 'sequence', 'date').
576
+
577
+ Examples
578
+ --------
579
+ >>> ref2dict('2018-07-13_1_flowers')
580
+ {'date': datetime.date(2018, 7, 13), 'sequence': 1, 'subject': 'flowers'}
581
+ >>> ref2dict('2018-07-13_001_flowers', parse=False)
582
+ {'date': '2018-07-13', 'sequence': '001', 'subject': 'flowers'}
583
+ >>> ref2dict(['2018-07-13_1_flowers', '2020-01-23_002_ibl_witten_01'])
584
+ [{'date': datetime.date(2018, 7, 13), 'sequence': 1, 'subject': 'flowers'},
585
+ {'date': datetime.date(2020, 1, 23), 'sequence': 2, 'subject': 'ibl_witten_01'}]
586
+
587
+ """
588
+ if isinstance(ref, (list, tuple)):
589
+ return [ConversionMixin.ref2dict(x) for x in ref]
590
+ if isinstance(ref, (Bunch, dict)):
591
+ return Bunch(ref) # Short circuit
592
+ ref = dict(zip(['date', 'sequence', 'subject'], ref.split('_', 2)))
593
+ return Bunch(ref)
594
+
595
+ @staticmethod
596
+ def dict2ref(ref_dict) -> Union[str, List]:
597
+ """Convert an experiment reference dict to a string in the format yyyy-mm-dd_n_subject.
598
+
599
+ Parameters
600
+ ----------
601
+ ref_dict : dict, Bunch, list, tuple
602
+ A map with the keys ('subject', 'date', 'sequence').
603
+
604
+ Returns
605
+ -------
606
+ str, list:
607
+ An experiment reference string, or list thereof.
608
+
609
+ """
610
+ if isinstance(ref_dict, (list, tuple)):
611
+ return [ConversionMixin.dict2ref(x) for x in ref_dict]
612
+ if not ref_dict:
613
+ return
614
+ if 'sequence' not in ref_dict and 'number' in ref_dict:
615
+ ref_dict = ref_dict.copy()
616
+ ref_dict['sequence'] = ref_dict.pop('number')
617
+ if 'date' not in ref_dict and 'start_time' in ref_dict:
618
+ ref_dict = ref_dict.copy()
619
+ if isinstance(ref_dict['start_time'], str):
620
+ ref_dict['date'] = ref_dict['start_time'][:10]
621
+ else:
622
+ ref_dict['date'] = ref_dict['start_time'].date()
623
+ parsed = any(not isinstance(k, str) for k in ref_dict.values())
624
+ format_str = ('{date:%Y-%m-%d}_{sequence:d}_{subject:s}'
625
+ if parsed
626
+ else '{date:s}_{sequence:s}_{subject:s}')
627
+ return format_str.format(**ref_dict)
628
+
629
+
630
+ def one_path_from_dataset(dset, one_cache):
631
+ """Returns local one file path from a dset record or a list of dsets records from REST.
632
+
633
+ Unlike `to_eid`, this function does not require ONE, and the dataset may not exist.
634
+
635
+ Parameters
636
+ ----------
637
+ dset : dict, list
638
+ Dataset dictionary or list of dictionaries from Alyx rest endpoint.
639
+ one_cache : str, pathlib.Path, pathlib.PurePath
640
+ The local ONE data cache directory.
641
+
642
+ Returns
643
+ -------
644
+ one.alf.path.ALFPath
645
+ The local path for a given dataset.
646
+
647
+ """
648
+ return path_from_dataset(dset, root_path=one_cache, uuid=False)
649
+
650
+
651
+ def path_from_dataset(dset, root_path=PurePosixALFPath('/'), repository=None, uuid=False):
652
+ """Returns the local file path from a dset record from a REST query.
653
+
654
+ Unlike `to_eid`, this function does not require ONE, and the dataset may not exist.
655
+
656
+ Parameters
657
+ ----------
658
+ dset : dict, list
659
+ Dataset dictionary or list of dictionaries from Alyx rest endpoint.
660
+ root_path : str, pathlib.Path, pathlib.PurePath
661
+ The prefix path such as the ONE download directory or remote http server root.
662
+ repository : str, None
663
+ Which data repository to use from the file_records list, defaults to first online
664
+ repository.
665
+ uuid : bool
666
+ If True, the file path will contain the dataset UUID.
667
+
668
+ Returns
669
+ -------
670
+ one.alf.path.ALFPath, list
671
+ File path or list of paths.
672
+
673
+ """
674
+ if isinstance(dset, list):
675
+ return [path_from_dataset(d) for d in dset]
676
+ if repository:
677
+ fr = next((fr for fr in dset['file_records'] if fr['data_repository'] == repository))
678
+ else:
679
+ fr = next((fr for fr in dset['file_records'] if fr['data_url']))
680
+ uuid = dset['url'][-36:] if uuid else None
681
+ return path_from_filerecord(fr, root_path=root_path, uuid=uuid)
682
+
683
+
684
+ def path_from_filerecord(fr, root_path=PurePosixALFPath('/'), uuid=None):
685
+ """Returns a data file Path constructed from an Alyx file record.
686
+
687
+ The Path type returned depends on the type of root_path: If root_path is a string an ALFPath
688
+ object is returned, otherwise if the root_path is a PurePath, a PureALFPath is returned.
689
+
690
+ Parameters
691
+ ----------
692
+ fr : dict
693
+ An Alyx file record dict.
694
+ root_path : str, pathlib.Path
695
+ An optional root path.
696
+ uuid : str, uuid.UUID
697
+ An optional dataset UUID to add to the file name.
698
+
699
+ Returns
700
+ -------
701
+ one.alf.path.ALFPath
702
+ A filepath as a pathlib object.
703
+
704
+ """
705
+ if isinstance(fr, list):
706
+ return [path_from_filerecord(f) for f in fr]
707
+ repo_path = (p := fr['data_repository_path'])[p[0] == '/':] # Remove slash at start, if any
708
+ file_path = PurePosixALFPath(repo_path, fr['relative_path'])
709
+ if root_path:
710
+ # NB: this function won't cast any PurePaths
711
+ root_path = ensure_alf_path(root_path)
712
+ file_path = root_path / file_path
713
+ return file_path.with_uuid(uuid) if uuid else file_path
714
+
715
+
716
+ def session_record2path(session, root_dir=None):
717
+ """Convert a session record into a path.
718
+
719
+ If a lab key is present, the path will be in the form
720
+ root_dir/lab/Subjects/subject/yyyy-mm-dd/nnn, otherwise root_dir/subject/yyyy-mm-dd/nnn.
721
+
722
+ Parameters
723
+ ----------
724
+ session : Mapping
725
+ A session record with keys ('subject', 'date', 'number'[, 'lab']).
726
+ root_dir : str, pathlib.Path, pathlib.PurePath
727
+ A root directory to prepend.
728
+
729
+ Returns
730
+ -------
731
+ one.alf.path.ALFPath, one.alf.path.PureALFPath
732
+ A constructed path of the session.
733
+
734
+ Examples
735
+ --------
736
+ >>> session_record2path({'subject': 'ALK01', 'date': '2020-01-01', 'number': 1})
737
+ PurePosixPath('ALK01/2020-01-01/001')
738
+
739
+ >>> record = {'date': datetime.datetime.fromisoformat('2020-01-01').date(),
740
+ ... 'number': '001', 'lab': 'foo', 'subject': 'ALK01'}
741
+ >>> session_record2path(record, Path('/home/user'))
742
+ Path('/home/user/foo/Subjects/ALK01/2020-01-01/001')
743
+
744
+ """
745
+ rel_path = PurePosixALFPath(
746
+ session.get('lab') if session.get('lab') else '',
747
+ 'Subjects' if session.get('lab') else '',
748
+ session['subject'], str(session['date']), str(session['number']).zfill(3)
749
+ )
750
+ if not root_dir:
751
+ return rel_path
752
+ return ensure_alf_path(root_dir).joinpath(rel_path)
753
+
754
+
755
+ def ses2records(ses: dict):
756
+ """Extract session cache record and datasets cache from a remote session data record.
757
+
758
+ Parameters
759
+ ----------
760
+ ses : dict
761
+ Session dictionary from Alyx REST endpoint.
762
+
763
+ Returns
764
+ -------
765
+ pd.Series
766
+ Session record.
767
+ pd.DataFrame
768
+ Datasets frame.
769
+
770
+ """
771
+ # Extract session record
772
+ # id used for session_info field of probe insertion
773
+ eid = UUID(ses.get('id') or ses['url'][-36:])
774
+ session_keys = ('subject', 'start_time', 'lab', 'number', 'task_protocol', 'projects')
775
+ session_data = {k: v for k, v in ses.items() if k in session_keys}
776
+ session = (
777
+ pd.Series(data=session_data, name=eid).rename({'start_time': 'date'})
778
+ )
779
+ session['projects'] = ','.join(session.pop('projects'))
780
+ session['date'] = datetime.datetime.fromisoformat(session['date']).date()
781
+
782
+ # Extract datasets table
783
+ def _to_record(d):
784
+ did = UUID(d['id'])
785
+ rec = dict(file_size=d['file_size'], hash=d['hash'], exists=True, id=did)
786
+ rec['eid'] = session.name
787
+ file_path = urllib.parse.urlsplit(d['data_url'], allow_fragments=False).path.strip('/')
788
+ file_path = get_alf_path(remove_uuid_string(file_path))
789
+ session_path = get_session_path(file_path).as_posix()
790
+ rec['rel_path'] = file_path[len(session_path):].strip('/')
791
+ rec['default_revision'] = d['default_revision'] == 'True'
792
+ rec['qc'] = d.get('qc', 'NOT_SET')
793
+ return rec
794
+
795
+ if not ses.get('data_dataset_session_related'):
796
+ return session, EMPTY_DATASETS_FRAME.copy()
797
+ records = map(_to_record, ses['data_dataset_session_related'])
798
+ index = ['eid', 'id']
799
+ dtypes = EMPTY_DATASETS_FRAME.dtypes
800
+ datasets = pd.DataFrame(records).astype(dtypes).set_index(index).sort_index()
801
+ return session, datasets
802
+
803
+
804
+ def datasets2records(datasets, additional=None) -> pd.DataFrame:
805
+ """Extract datasets DataFrame from one or more Alyx dataset records.
806
+
807
+ Parameters
808
+ ----------
809
+ datasets : dict, list
810
+ One or more records from the Alyx 'datasets' endpoint.
811
+ additional : list of str
812
+ A set of optional fields to extract from dataset records.
813
+
814
+ Returns
815
+ -------
816
+ pd.DataFrame
817
+ Datasets frame.
818
+
819
+ Examples
820
+ --------
821
+ >>> datasets = ONE().alyx.rest('datasets', 'list', subject='foobar')
822
+ >>> df = datasets2records(datasets)
823
+
824
+ """
825
+ records = []
826
+
827
+ for d in ensure_list(datasets):
828
+ file_record = next((x for x in d['file_records'] if x['data_url'] and x['exists']), None)
829
+ if not file_record:
830
+ continue # Ignore files that are not accessible
831
+ rec = dict(file_size=d['file_size'], hash=d['hash'], exists=True)
832
+ rec['id'] = UUID(d['url'][-36:])
833
+ rec['eid'] = UUID(d['session'][-36:]) if d['session'] else pd.NA
834
+ data_url = urllib.parse.urlsplit(file_record['data_url'], allow_fragments=False)
835
+ file_path = get_alf_path(data_url.path.strip('/'))
836
+ file_path = remove_uuid_string(file_path).as_posix()
837
+ session_path = get_session_path(file_path) or ''
838
+ if session_path:
839
+ session_path = session_path.as_posix()
840
+ rec['rel_path'] = file_path[len(session_path):].strip('/')
841
+ rec['default_revision'] = d['default_dataset']
842
+ rec['qc'] = d.get('qc')
843
+ for field in additional or []:
844
+ rec[field] = d.get(field)
845
+ records.append(rec)
846
+
847
+ if not records:
848
+ return EMPTY_DATASETS_FRAME
849
+ index = EMPTY_DATASETS_FRAME.index.names
850
+ return pd.DataFrame(records).set_index(index).sort_index().astype(EMPTY_DATASETS_FRAME.dtypes)