ONE-api 3.0b3__py3-none-any.whl → 3.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/LICENSE +21 -21
  2. {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/METADATA +115 -115
  3. ONE_api-3.0b4.dist-info/RECORD +37 -0
  4. one/__init__.py +2 -2
  5. one/alf/__init__.py +1 -1
  6. one/alf/cache.py +640 -653
  7. one/alf/exceptions.py +105 -105
  8. one/alf/io.py +876 -876
  9. one/alf/path.py +1450 -1450
  10. one/alf/spec.py +519 -519
  11. one/api.py +2949 -2973
  12. one/converters.py +850 -850
  13. one/params.py +414 -414
  14. one/registration.py +845 -845
  15. one/remote/__init__.py +1 -1
  16. one/remote/aws.py +313 -313
  17. one/remote/base.py +142 -142
  18. one/remote/globus.py +1254 -1254
  19. one/tests/fixtures/params/.caches +6 -6
  20. one/tests/fixtures/params/.test.alyx.internationalbrainlab.org +8 -8
  21. one/tests/fixtures/rest_responses/1f187d80fd59677b395fcdb18e68e4401bfa1cc9 +1 -1
  22. one/tests/fixtures/rest_responses/47893cf67c985e6361cdee009334963f49fb0746 +1 -1
  23. one/tests/fixtures/rest_responses/535d0e9a1e2c1efbdeba0d673b131e00361a2edb +1 -1
  24. one/tests/fixtures/rest_responses/6dc96f7e9bcc6ac2e7581489b9580a6cd3f28293 +1 -1
  25. one/tests/fixtures/rest_responses/db1731fb8df0208944ae85f76718430813a8bf50 +1 -1
  26. one/tests/fixtures/rest_responses/dcce48259bb929661f60a02a48563f70aa6185b3 +1 -1
  27. one/tests/fixtures/rest_responses/f530d6022f61cdc9e38cc66beb3cb71f3003c9a1 +1 -1
  28. one/tests/fixtures/test_dbs.json +14 -14
  29. one/util.py +524 -524
  30. one/webclient.py +1366 -1354
  31. ONE_api-3.0b3.dist-info/RECORD +0 -37
  32. {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/WHEEL +0 -0
  33. {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/top_level.txt +0 -0
one/util.py CHANGED
@@ -1,524 +1,524 @@
1
- """Decorators and small standalone functions for api module."""
2
- import re
3
- from uuid import UUID
4
- import logging
5
- import fnmatch
6
- import warnings
7
- from functools import wraps, partial
8
- from typing import Iterable, Optional, List
9
- from collections.abc import Mapping
10
-
11
- import pandas as pd
12
- import numpy as np
13
- from iblutil.util import ensure_list
14
-
15
- import one.alf.exceptions as alferr
16
- from one.alf.path import rel_path_parts
17
- from one.alf.spec import QC, FILE_SPEC, regex as alf_regex
18
-
19
- logger = logging.getLogger(__name__)
20
-
21
-
22
- def parse_id(method):
23
- """Ensure the input experiment identifier is an experiment UUID.
24
-
25
- Parameters
26
- ----------
27
- method : function
28
- An ONE method whose second arg is an experiment ID.
29
-
30
- Returns
31
- -------
32
- function
33
- A wrapper function that parses the ID to the expected string.
34
-
35
- Raises
36
- ------
37
- ValueError
38
- Unable to convert input to a valid experiment ID.
39
-
40
- """
41
-
42
- @wraps(method)
43
- def wrapper(self, id, *args, **kwargs):
44
- eid = self.to_eid(id)
45
- if eid is None:
46
- raise ValueError(f'Cannot parse session ID "{id}" (session may not exist)')
47
- return method(self, eid, *args, **kwargs)
48
-
49
- return wrapper
50
-
51
-
52
- def validate_date_range(date_range) -> (pd.Timestamp, pd.Timestamp):
53
- """Validate and arrange date range in a 2 elements list.
54
-
55
- Parameters
56
- ----------
57
- date_range : str, datetime.date, datetime.datetime, pd.Timestamp, np.datetime64, list, None
58
- A single date or tuple/list of two dates. None represents no bound.
59
-
60
- Returns
61
- -------
62
- tuple of pd.Timestamp
63
- The start and end timestamps.
64
-
65
- Examples
66
- --------
67
- >>> validate_date_range('2020-01-01') # On this day
68
- >>> validate_date_range(datetime.date(2020, 1, 1))
69
- >>> validate_date_range(np.array(['2022-01-30', '2022-01-30'], dtype='datetime64[D]'))
70
- >>> validate_date_range(pd.Timestamp(2020, 1, 1))
71
- >>> validate_date_range(np.datetime64(2021, 3, 11))
72
- >>> validate_date_range(['2020-01-01']) # from date
73
- >>> validate_date_range(['2020-01-01', None]) # from date
74
- >>> validate_date_range([None, '2020-01-01']) # up to date
75
-
76
- Raises
77
- ------
78
- ValueError
79
- Size of date range tuple must be 1 or 2.
80
-
81
- """
82
- if date_range is None:
83
- return
84
-
85
- # Ensure we have exactly two values
86
- if isinstance(date_range, str) or not isinstance(date_range, Iterable):
87
- # date_range = (date_range, pd.Timestamp(date_range) + pd.Timedelta(days=1))
88
- dt = pd.Timedelta(days=1) - pd.Timedelta(milliseconds=1)
89
- date_range = (date_range, pd.Timestamp(date_range) + dt)
90
- elif len(date_range) == 1:
91
- date_range = [date_range[0], pd.Timestamp.max]
92
- elif len(date_range) != 2:
93
- raise ValueError
94
-
95
- # For comparisons, ensure both values are pd.Timestamp (datetime, date and datetime64
96
- # objects will be converted)
97
- start, end = date_range
98
- start = start or pd.Timestamp.min # Convert None to lowest possible date
99
- end = end or pd.Timestamp.max # Convert None to highest possible date
100
-
101
- # Convert to timestamp
102
- if not isinstance(start, pd.Timestamp):
103
- start = pd.Timestamp(start)
104
- if not isinstance(end, pd.Timestamp):
105
- end = pd.Timestamp(end)
106
-
107
- return start, end
108
-
109
-
110
- def _collection_spec(collection=None, revision=None) -> str:
111
- """Return a template string for a collection/revision regular expression.
112
-
113
- Because both are optional in the ALF spec, None will match any (including absent), while an
114
- empty string will match absent.
115
-
116
- Parameters
117
- ----------
118
- collection : None, str
119
- An optional collection regular expression.
120
- revision : None, str
121
- An optional revision regular expression.
122
-
123
- Returns
124
- -------
125
- str
126
- A string format for matching the collection/revision.
127
-
128
- """
129
- spec = ''
130
- for value, default in zip((collection, revision), ('{collection}/', '#{revision}#/')):
131
- if not value:
132
- default = f'({default})?' if value is None else ''
133
- spec += default
134
- return spec
135
-
136
-
137
- def _file_spec(**kwargs):
138
- """Return a template string for a ALF dataset regular expression.
139
-
140
- Because 'namespace', 'timescale', and 'extra' are optional, None will match any
141
- (including absent). This function removes the regex flags from the file spec string that make
142
- certain parts optional.
143
-
144
- TODO an empty string should only match absent; this could be achieved by removing parts from
145
- spec string
146
-
147
- Parameters
148
- ----------
149
- namespace : None, str
150
- If namespace is not None, the namespace section of the returned file spec will not be
151
- optional.
152
- timescale : None, str
153
- If timescale is not None, the namespace section of the returned file spec will not be
154
- optional.
155
- extra : None, str
156
- If extra is not None, the namespace section of the returned file spec will not be
157
- optional.
158
-
159
- Returns
160
- -------
161
- str
162
- A string format for matching an ALF dataset.
163
-
164
- """
165
- OPTIONAL = {'namespace': '?', 'timescale': '?', 'extra': '*'}
166
- filespec = FILE_SPEC
167
- for k, v in kwargs.items():
168
- if k in OPTIONAL and v is not None:
169
- i = filespec.find(k) + len(k)
170
- i += filespec[i:].find(OPTIONAL[k])
171
- filespec = filespec[:i] + filespec[i:].replace(OPTIONAL[k], '', 1)
172
- return filespec
173
-
174
-
175
- def filter_datasets(
176
- all_datasets, filename=None, collection=None, revision=None, revision_last_before=True,
177
- qc=QC.FAIL, ignore_qc_not_set=False, assert_unique=True, wildcards=False):
178
- """Filter the datasets cache table by relative path (dataset name, collection and revision).
179
-
180
- When None is passed, all values will match. To match on empty parts, use an empty string.
181
- When revision_last_before is true, None means return latest revision.
182
-
183
- Parameters
184
- ----------
185
- all_datasets : pandas.DataFrame
186
- A datasets cache table.
187
- filename : str, dict, None
188
- A filename str or a dict of alf parts. Regular expressions permitted.
189
- collection : str, None
190
- A collection string. Regular expressions permitted.
191
- revision : str, None
192
- A revision string to match. If revision_last_before is true, regular expressions are
193
- not permitted.
194
- revision_last_before : bool
195
- When true and no exact match exists, the (lexicographically) previous revision is used
196
- instead. When false the revision string is matched like collection and filename,
197
- with regular expressions permitted. NB: When true and `revision` is None the default
198
- revision is returned which may not be the last revision. If no default is defined, the
199
- last revision is returned.
200
- qc : str, int, one.alf.spec.QC
201
- Returns datasets at or below this QC level. Integer values should correspond to the QC
202
- enumeration NOT the qc category column codes in the pandas table.
203
- ignore_qc_not_set : bool
204
- When true, do not return datasets for which QC is NOT_SET.
205
- assert_unique : bool
206
- When true an error is raised if multiple collections or datasets are found.
207
- wildcards : bool
208
- If true, use unix shell style matching instead of regular expressions.
209
-
210
- Returns
211
- -------
212
- pd.DataFrame
213
- A slice of all_datasets that match the filters.
214
-
215
- Examples
216
- --------
217
- Filter by dataset name and collection
218
-
219
- >>> datasets = filter_datasets(all_datasets, '.*spikes.times.*', 'alf/probe00')
220
-
221
- Filter datasets not in a collection
222
-
223
- >>> datasets = filter_datasets(all_datasets, collection='')
224
-
225
- Filter by matching revision
226
-
227
- >>> datasets = filter_datasets(all_datasets, 'spikes.times.npy',
228
- ... revision='2020-01-12', revision_last_before=False)
229
-
230
- Filter by filename parts
231
-
232
- >>> datasets = filter_datasets(all_datasets, dict(object='spikes', attribute='times'))
233
-
234
- Filter by QC outcome - datasets with WARNING or better
235
-
236
- >>> datasets filter_datasets(all_datasets, qc='WARNING')
237
-
238
- Filter by QC outcome and ignore datasets with unset QC - datasets with PASS only
239
-
240
- >>> datasets filter_datasets(all_datasets, qc='PASS', ignore_qc_not_set=True)
241
-
242
- Raises
243
- ------
244
- one.alf.exceptions.ALFMultipleCollectionsFound
245
- The matching list of datasets have more than one unique collection and `assert_unique` is
246
- True.
247
- one.alf.exceptions.ALFMultipleRevisionsFound
248
- When `revision_last_before` is false, the matching list of datasets have more than one
249
- unique revision. When `revision_last_before` is true, a 'default_revision' column exists,
250
- and no revision is passed, this error means that one or more matching datasets have
251
- multiple revisions specified as the default. This is typically an error in the cache table
252
- itself as all datasets should have one and only one default revision specified.
253
- one.alf.exceptions.ALFMultipleObjectsFound
254
- The matching list of datasets have more than one unique filename and both `assert_unique`
255
- and `revision_last_before` are true.
256
- one.alf.exceptions.ALFError
257
- When both `assert_unique` and `revision_last_before` is true, and a 'default_revision'
258
- column exists but `revision` is None; one or more matching datasets have no default
259
- revision specified. This is typically an error in the cache table itself as all datasets
260
- should have one and only one default revision specified.
261
-
262
- Notes
263
- -----
264
- - It is not possible to match datasets that are in a given collection OR NOT in ANY collection.
265
- e.g. filter_datasets(dsets, collection=['alf', '']) will not match the latter. For this you
266
- must use two separate queries.
267
- - It is not possible to match datasets with no revision when wildcards=True.
268
-
269
- """
270
- # Create a regular expression string to match relative path against
271
- filename = filename or {}
272
- regex_args = {'collection': collection}
273
- spec_str = _collection_spec(collection, None if revision_last_before else revision)
274
-
275
- if isinstance(filename, dict):
276
- spec_str += _file_spec(**filename)
277
- regex_args.update(**filename)
278
- else:
279
- # Convert to regex if necessary and assert end of string
280
- flagless_token = re.escape(r'(?s:') # fnmatch.translate may wrap input in flagless group
281
- # If there is a wildcard at the start of the filename we must exclude capture of slashes to
282
- # avoid capture of collection part, e.g. * -> .* -> [^/]* (one or more non-slash chars)
283
- exclude_slash = partial(re.sub, fr'^({flagless_token})?\.\*', r'\g<1>[^/]*')
284
- spec_str += '|'.join(
285
- exclude_slash(fnmatch.translate(x)) if wildcards else x + '$'
286
- for x in ensure_list(filename)
287
- )
288
-
289
- # If matching revision name, add to regex string
290
- if not revision_last_before:
291
- regex_args.update(revision=revision)
292
-
293
- for k, v in regex_args.items():
294
- if v is None:
295
- continue
296
- if wildcards:
297
- # Convert to regex, remove \\Z which asserts end of string
298
- v = (fnmatch.translate(x).replace('\\Z', '') for x in ensure_list(v))
299
- if not isinstance(v, str):
300
- regex_args[k] = '|'.join(v) # logical OR
301
-
302
- # Build regex string
303
- pattern = alf_regex('^' + spec_str, **regex_args)
304
- path_match = all_datasets['rel_path'].str.match(pattern)
305
-
306
- # Test on QC outcome
307
- qc = QC.validate(qc)
308
- qc_match = all_datasets['qc'].le(qc.name)
309
- if ignore_qc_not_set:
310
- qc_match &= all_datasets['qc'].ne('NOT_SET')
311
-
312
- # Filter datasets on path and QC
313
- match = all_datasets[path_match & qc_match].copy()
314
- if len(match) == 0 or not (revision_last_before or assert_unique):
315
- return match
316
-
317
- # Extract revision to separate column
318
- if 'revision' not in match.columns:
319
- match['revision'] = match.rel_path.map(lambda x: rel_path_parts(x)[1] or '')
320
- if assert_unique:
321
- collections = set(rel_path_parts(x)[0] or '' for x in match.rel_path.values)
322
- if len(collections) > 1:
323
- _list = '"' + '", "'.join(collections) + '"'
324
- raise alferr.ALFMultipleCollectionsFound(_list)
325
- if not revision_last_before:
326
- if len(set(match['revision'])) > 1:
327
- _list = '"' + '", "'.join(set(match['revision'])) + '"'
328
- raise alferr.ALFMultipleRevisionsFound(_list)
329
- if len(match) > 1:
330
- _list = '"' + '", "'.join(match['rel_path']) + '"'
331
- raise alferr.ALFMultipleObjectsFound(_list)
332
- else:
333
- return match
334
-
335
- match = filter_revision_last_before(match, revision, assert_unique=assert_unique)
336
- if assert_unique and len(match) > 1:
337
- _list = '"' + '", "'.join(match['rel_path']) + '"'
338
- raise alferr.ALFMultipleObjectsFound(_list)
339
- return match
340
-
341
-
342
- def filter_revision_last_before(
343
- datasets, revision=None, assert_unique=True, assert_consistent=False):
344
- """Filter datasets by revision, returning previous revision if no exact match is found.
345
-
346
- Parameters
347
- ----------
348
- datasets : pandas.DataFrame
349
- A datasets cache table.
350
- revision : str
351
- A revision string to match (regular expressions not permitted).
352
- assert_unique : bool
353
- When true an alferr.ALFMultipleRevisionsFound exception is raised when multiple
354
- default revisions are found; an alferr.ALFError when no default revision is found.
355
- assert_consistent : bool
356
- Will raise alferr.ALFMultipleRevisionsFound if matching revision is different between
357
- datasets.
358
-
359
- Returns
360
- -------
361
- pd.DataFrame
362
- A datasets DataFrame with 0 or 1 row per unique dataset.
363
-
364
- Raises
365
- ------
366
- one.alf.exceptions.ALFMultipleRevisionsFound
367
- When the 'default_revision' column exists and no revision is passed, this error means that
368
- one or more matching datasets have multiple revisions specified as the default. This is
369
- typically an error in the cache table itself as all datasets should have one and only one
370
- default revision specified.
371
- When `assert_consistent` is True, this error may mean that the matching datasets have
372
- mixed revisions.
373
- one.alf.exceptions.ALFMultipleObjectsFound
374
- The matching list of datasets have more than one unique filename and both `assert_unique`
375
- and `revision_last_before` are true.
376
- one.alf.exceptions.ALFError
377
- When both `assert_unique` and `revision_last_before` is true, and a 'default_revision'
378
- column exists but `revision` is None; one or more matching datasets have no default
379
- revision specified. This is typically an error in the cache table itself as all datasets
380
- should have one and only one default revision specified.
381
-
382
- Notes
383
- -----
384
- - When `revision` is not None, the default revision value is not used. If an older revision is
385
- the default one (uncommon), passing in a revision may lead to a newer revision being returned
386
- than if revision is None.
387
- - A view is returned if a revision column is present, otherwise a copy is returned.
388
-
389
- """
390
- def _last_before(df):
391
- """Takes a DataFrame with only one dataset and multiple revisions, returns matching row."""
392
- if revision is None:
393
- dset_name = df['rel_path'].iloc[0]
394
- if 'default_revision' in df.columns:
395
- if assert_unique and sum(df.default_revision) > 1:
396
- revisions = df['revision'][df.default_revision.values]
397
- rev_list = '"' + '", "'.join(revisions) + '"'
398
- raise alferr.ALFMultipleRevisionsFound(rev_list)
399
- if sum(df.default_revision) == 1:
400
- return df[df.default_revision]
401
- if len(df) == 1: # This may be the case when called from load_datasets
402
- return df # It's not the default but there's only one available revision
403
- # default_revision column all False; default isn't copied to remote repository
404
- if assert_unique:
405
- raise alferr.ALFError(f'No default revision for dataset {dset_name}')
406
- warnings.warn(
407
- f'No default revision for dataset {dset_name}; using most recent',
408
- alferr.ALFWarning)
409
- # Compare revisions lexicographically
410
- idx = index_last_before(df['revision'].tolist(), revision)
411
- # Square brackets forces 1 row DataFrame returned instead of Series
412
- return df.iloc[slice(0, 0) if idx is None else [idx], :]
413
-
414
- # Extract revision to separate column
415
- if 'revision' not in datasets.columns:
416
- with pd.option_context('mode.chained_assignment', None): # FIXME Explicitly copy?
417
- datasets['revision'] = datasets.rel_path.map(lambda x: rel_path_parts(x)[1] or '')
418
- # Group by relative path (sans revision)
419
- groups = datasets.rel_path.str.replace('#.*#/', '', regex=True).values
420
- grouped = datasets.groupby(groups, group_keys=False)
421
- filtered = grouped.apply(_last_before)
422
- # Raise if matching revision is different between datasets
423
- if len(filtered['revision'].unique()) > 1:
424
- rev_list = '"' + '", "'.join(filtered['revision'].unique()) + '"'
425
- if assert_consistent:
426
- raise alferr.ALFMultipleRevisionsFound(rev_list)
427
- else:
428
- warnings.warn(f'Multiple revisions: {rev_list}', alferr.ALFWarning)
429
- return filtered
430
-
431
-
432
- def index_last_before(revisions: List[str], revision: Optional[str]) -> Optional[int]:
433
- """Return index of string occurring directly before provided revision string when sorted.
434
-
435
- Revisions are lexicographically sorted. If `revision` is None, the index of the most recent
436
- revision is returned.
437
-
438
- Parameters
439
- ----------
440
- revisions : list of strings
441
- A list of revision strings.
442
- revision : None, str
443
- The revision string to match on.
444
-
445
- Returns
446
- -------
447
- int, None
448
- Index of revision before matching string in sorted list or None.
449
-
450
- Examples
451
- --------
452
- >>> idx = index_last_before([], '2020-08-01')
453
-
454
- """
455
- if len(revisions) == 0:
456
- return # No revisions, just return
457
- revisions_sorted = sorted(revisions, reverse=True)
458
- if revision is None: # Return most recent revision
459
- return revisions.index(revisions_sorted[0])
460
- lt = np.array(revisions_sorted) <= revision
461
- return revisions.index(revisions_sorted[lt.argmax()]) if any(lt) else None
462
-
463
-
464
- def autocomplete(term, search_terms) -> str:
465
- """Validate search term and return complete name.
466
-
467
- Examples
468
- --------
469
- >>> autocomplete('subj')
470
- 'subject'
471
-
472
- """
473
- term = term.casefold()
474
- # Check if term already complete
475
- if term in search_terms:
476
- return term
477
- full_key = (x for x in search_terms if x.casefold().startswith(term))
478
- key_ = next(full_key, None)
479
- if not key_:
480
- raise ValueError(f'Invalid search term "{term}", see `one.search_terms()`')
481
- elif next(full_key, None):
482
- raise ValueError(f'Ambiguous search term "{term}"')
483
- return key_
484
-
485
-
486
- class LazyId(Mapping):
487
- """Return UUID from records when indexed.
488
-
489
- Uses a paginated response object or list of Alyx REST records.
490
- """
491
-
492
- def __init__(self, pg, func=None):
493
- self._pg = pg
494
- self.func = func or self.ses2eid
495
-
496
- def __getitem__(self, item):
497
- return self.func(self._pg.__getitem__(item))
498
-
499
- def __len__(self):
500
- return self._pg.__len__()
501
-
502
- def __iter__(self):
503
- return map(self.func, self._pg.__iter__())
504
-
505
- @staticmethod
506
- def ses2eid(ses):
507
- """Given one or more session dictionaries, extract and return the session UUID.
508
-
509
- Parameters
510
- ----------
511
- ses : one.webclient._PaginatedResponse, dict, list
512
- A collection of Alyx REST sessions endpoint records.
513
-
514
- Returns
515
- -------
516
- str, list
517
- One or more experiment ID strings.
518
-
519
- """
520
- if isinstance(ses, list):
521
- return [LazyId.ses2eid(x) for x in ses]
522
- else:
523
- eid = ses.get('id', None) or ses['url'].split('/').pop()
524
- return UUID(eid)
1
+ """Decorators and small standalone functions for api module."""
2
+ import re
3
+ from uuid import UUID
4
+ import logging
5
+ import fnmatch
6
+ import warnings
7
+ from functools import wraps, partial
8
+ from typing import Iterable, Optional, List
9
+ from collections.abc import Mapping
10
+
11
+ import pandas as pd
12
+ import numpy as np
13
+ from iblutil.util import ensure_list
14
+
15
+ import one.alf.exceptions as alferr
16
+ from one.alf.path import rel_path_parts
17
+ from one.alf.spec import QC, FILE_SPEC, regex as alf_regex
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def parse_id(method):
23
+ """Ensure the input experiment identifier is an experiment UUID.
24
+
25
+ Parameters
26
+ ----------
27
+ method : function
28
+ An ONE method whose second arg is an experiment ID.
29
+
30
+ Returns
31
+ -------
32
+ function
33
+ A wrapper function that parses the ID to the expected string.
34
+
35
+ Raises
36
+ ------
37
+ ValueError
38
+ Unable to convert input to a valid experiment ID.
39
+
40
+ """
41
+
42
+ @wraps(method)
43
+ def wrapper(self, id, *args, **kwargs):
44
+ eid = self.to_eid(id)
45
+ if eid is None:
46
+ raise ValueError(f'Cannot parse session ID "{id}" (session may not exist)')
47
+ return method(self, eid, *args, **kwargs)
48
+
49
+ return wrapper
50
+
51
+
52
+ def validate_date_range(date_range) -> (pd.Timestamp, pd.Timestamp):
53
+ """Validate and arrange date range in a 2 elements list.
54
+
55
+ Parameters
56
+ ----------
57
+ date_range : str, datetime.date, datetime.datetime, pd.Timestamp, np.datetime64, list, None
58
+ A single date or tuple/list of two dates. None represents no bound.
59
+
60
+ Returns
61
+ -------
62
+ tuple of pd.Timestamp
63
+ The start and end timestamps.
64
+
65
+ Examples
66
+ --------
67
+ >>> validate_date_range('2020-01-01') # On this day
68
+ >>> validate_date_range(datetime.date(2020, 1, 1))
69
+ >>> validate_date_range(np.array(['2022-01-30', '2022-01-30'], dtype='datetime64[D]'))
70
+ >>> validate_date_range(pd.Timestamp(2020, 1, 1))
71
+ >>> validate_date_range(np.datetime64(2021, 3, 11))
72
+ >>> validate_date_range(['2020-01-01']) # from date
73
+ >>> validate_date_range(['2020-01-01', None]) # from date
74
+ >>> validate_date_range([None, '2020-01-01']) # up to date
75
+
76
+ Raises
77
+ ------
78
+ ValueError
79
+ Size of date range tuple must be 1 or 2.
80
+
81
+ """
82
+ if date_range is None:
83
+ return
84
+
85
+ # Ensure we have exactly two values
86
+ if isinstance(date_range, str) or not isinstance(date_range, Iterable):
87
+ # date_range = (date_range, pd.Timestamp(date_range) + pd.Timedelta(days=1))
88
+ dt = pd.Timedelta(days=1) - pd.Timedelta(milliseconds=1)
89
+ date_range = (date_range, pd.Timestamp(date_range) + dt)
90
+ elif len(date_range) == 1:
91
+ date_range = [date_range[0], pd.Timestamp.max]
92
+ elif len(date_range) != 2:
93
+ raise ValueError
94
+
95
+ # For comparisons, ensure both values are pd.Timestamp (datetime, date and datetime64
96
+ # objects will be converted)
97
+ start, end = date_range
98
+ start = start or pd.Timestamp.min # Convert None to lowest possible date
99
+ end = end or pd.Timestamp.max # Convert None to highest possible date
100
+
101
+ # Convert to timestamp
102
+ if not isinstance(start, pd.Timestamp):
103
+ start = pd.Timestamp(start)
104
+ if not isinstance(end, pd.Timestamp):
105
+ end = pd.Timestamp(end)
106
+
107
+ return start, end
108
+
109
+
110
+ def _collection_spec(collection=None, revision=None) -> str:
111
+ """Return a template string for a collection/revision regular expression.
112
+
113
+ Because both are optional in the ALF spec, None will match any (including absent), while an
114
+ empty string will match absent.
115
+
116
+ Parameters
117
+ ----------
118
+ collection : None, str
119
+ An optional collection regular expression.
120
+ revision : None, str
121
+ An optional revision regular expression.
122
+
123
+ Returns
124
+ -------
125
+ str
126
+ A string format for matching the collection/revision.
127
+
128
+ """
129
+ spec = ''
130
+ for value, default in zip((collection, revision), ('{collection}/', '#{revision}#/')):
131
+ if not value:
132
+ default = f'({default})?' if value is None else ''
133
+ spec += default
134
+ return spec
135
+
136
+
137
+ def _file_spec(**kwargs):
138
+ """Return a template string for a ALF dataset regular expression.
139
+
140
+ Because 'namespace', 'timescale', and 'extra' are optional, None will match any
141
+ (including absent). This function removes the regex flags from the file spec string that make
142
+ certain parts optional.
143
+
144
+ TODO an empty string should only match absent; this could be achieved by removing parts from
145
+ spec string
146
+
147
+ Parameters
148
+ ----------
149
+ namespace : None, str
150
+ If namespace is not None, the namespace section of the returned file spec will not be
151
+ optional.
152
+ timescale : None, str
153
+ If timescale is not None, the namespace section of the returned file spec will not be
154
+ optional.
155
+ extra : None, str
156
+ If extra is not None, the namespace section of the returned file spec will not be
157
+ optional.
158
+
159
+ Returns
160
+ -------
161
+ str
162
+ A string format for matching an ALF dataset.
163
+
164
+ """
165
+ OPTIONAL = {'namespace': '?', 'timescale': '?', 'extra': '*'}
166
+ filespec = FILE_SPEC
167
+ for k, v in kwargs.items():
168
+ if k in OPTIONAL and v is not None:
169
+ i = filespec.find(k) + len(k)
170
+ i += filespec[i:].find(OPTIONAL[k])
171
+ filespec = filespec[:i] + filespec[i:].replace(OPTIONAL[k], '', 1)
172
+ return filespec
173
+
174
+
175
+ def filter_datasets(
176
+ all_datasets, filename=None, collection=None, revision=None, revision_last_before=True,
177
+ qc=QC.FAIL, ignore_qc_not_set=False, assert_unique=True, wildcards=False):
178
+ """Filter the datasets cache table by relative path (dataset name, collection and revision).
179
+
180
+ When None is passed, all values will match. To match on empty parts, use an empty string.
181
+ When revision_last_before is true, None means return latest revision.
182
+
183
+ Parameters
184
+ ----------
185
+ all_datasets : pandas.DataFrame
186
+ A datasets cache table.
187
+ filename : str, dict, None
188
+ A filename str or a dict of alf parts. Regular expressions permitted.
189
+ collection : str, None
190
+ A collection string. Regular expressions permitted.
191
+ revision : str, None
192
+ A revision string to match. If revision_last_before is true, regular expressions are
193
+ not permitted.
194
+ revision_last_before : bool
195
+ When true and no exact match exists, the (lexicographically) previous revision is used
196
+ instead. When false the revision string is matched like collection and filename,
197
+ with regular expressions permitted. NB: When true and `revision` is None the default
198
+ revision is returned which may not be the last revision. If no default is defined, the
199
+ last revision is returned.
200
+ qc : str, int, one.alf.spec.QC
201
+ Returns datasets at or below this QC level. Integer values should correspond to the QC
202
+ enumeration NOT the qc category column codes in the pandas table.
203
+ ignore_qc_not_set : bool
204
+ When true, do not return datasets for which QC is NOT_SET.
205
+ assert_unique : bool
206
+ When true an error is raised if multiple collections or datasets are found.
207
+ wildcards : bool
208
+ If true, use unix shell style matching instead of regular expressions.
209
+
210
+ Returns
211
+ -------
212
+ pd.DataFrame
213
+ A slice of all_datasets that match the filters.
214
+
215
+ Examples
216
+ --------
217
+ Filter by dataset name and collection
218
+
219
+ >>> datasets = filter_datasets(all_datasets, '.*spikes.times.*', 'alf/probe00')
220
+
221
+ Filter datasets not in a collection
222
+
223
+ >>> datasets = filter_datasets(all_datasets, collection='')
224
+
225
+ Filter by matching revision
226
+
227
+ >>> datasets = filter_datasets(all_datasets, 'spikes.times.npy',
228
+ ... revision='2020-01-12', revision_last_before=False)
229
+
230
+ Filter by filename parts
231
+
232
+ >>> datasets = filter_datasets(all_datasets, dict(object='spikes', attribute='times'))
233
+
234
+ Filter by QC outcome - datasets with WARNING or better
235
+
236
+ >>> datasets filter_datasets(all_datasets, qc='WARNING')
237
+
238
+ Filter by QC outcome and ignore datasets with unset QC - datasets with PASS only
239
+
240
+ >>> datasets filter_datasets(all_datasets, qc='PASS', ignore_qc_not_set=True)
241
+
242
+ Raises
243
+ ------
244
+ one.alf.exceptions.ALFMultipleCollectionsFound
245
+ The matching list of datasets have more than one unique collection and `assert_unique` is
246
+ True.
247
+ one.alf.exceptions.ALFMultipleRevisionsFound
248
+ When `revision_last_before` is false, the matching list of datasets have more than one
249
+ unique revision. When `revision_last_before` is true, a 'default_revision' column exists,
250
+ and no revision is passed, this error means that one or more matching datasets have
251
+ multiple revisions specified as the default. This is typically an error in the cache table
252
+ itself as all datasets should have one and only one default revision specified.
253
+ one.alf.exceptions.ALFMultipleObjectsFound
254
+ The matching list of datasets have more than one unique filename and both `assert_unique`
255
+ and `revision_last_before` are true.
256
+ one.alf.exceptions.ALFError
257
+ When both `assert_unique` and `revision_last_before` is true, and a 'default_revision'
258
+ column exists but `revision` is None; one or more matching datasets have no default
259
+ revision specified. This is typically an error in the cache table itself as all datasets
260
+ should have one and only one default revision specified.
261
+
262
+ Notes
263
+ -----
264
+ - It is not possible to match datasets that are in a given collection OR NOT in ANY collection.
265
+ e.g. filter_datasets(dsets, collection=['alf', '']) will not match the latter. For this you
266
+ must use two separate queries.
267
+ - It is not possible to match datasets with no revision when wildcards=True.
268
+
269
+ """
270
+ # Create a regular expression string to match relative path against
271
+ filename = filename or {}
272
+ regex_args = {'collection': collection}
273
+ spec_str = _collection_spec(collection, None if revision_last_before else revision)
274
+
275
+ if isinstance(filename, dict):
276
+ spec_str += _file_spec(**filename)
277
+ regex_args.update(**filename)
278
+ else:
279
+ # Convert to regex if necessary and assert end of string
280
+ flagless_token = re.escape(r'(?s:') # fnmatch.translate may wrap input in flagless group
281
+ # If there is a wildcard at the start of the filename we must exclude capture of slashes to
282
+ # avoid capture of collection part, e.g. * -> .* -> [^/]* (one or more non-slash chars)
283
+ exclude_slash = partial(re.sub, fr'^({flagless_token})?\.\*', r'\g<1>[^/]*')
284
+ spec_str += '|'.join(
285
+ exclude_slash(fnmatch.translate(x)) if wildcards else x + '$'
286
+ for x in ensure_list(filename)
287
+ )
288
+
289
+ # If matching revision name, add to regex string
290
+ if not revision_last_before:
291
+ regex_args.update(revision=revision)
292
+
293
+ for k, v in regex_args.items():
294
+ if v is None:
295
+ continue
296
+ if wildcards:
297
+ # Convert to regex, remove \\Z which asserts end of string
298
+ v = (fnmatch.translate(x).replace('\\Z', '') for x in ensure_list(v))
299
+ if not isinstance(v, str):
300
+ regex_args[k] = '|'.join(v) # logical OR
301
+
302
+ # Build regex string
303
+ pattern = alf_regex('^' + spec_str, **regex_args)
304
+ path_match = all_datasets['rel_path'].str.match(pattern)
305
+
306
+ # Test on QC outcome
307
+ qc = QC.validate(qc)
308
+ qc_match = all_datasets['qc'].le(qc.name)
309
+ if ignore_qc_not_set:
310
+ qc_match &= all_datasets['qc'].ne('NOT_SET')
311
+
312
+ # Filter datasets on path and QC
313
+ match = all_datasets[path_match & qc_match].copy()
314
+ if len(match) == 0 or not (revision_last_before or assert_unique):
315
+ return match
316
+
317
+ # Extract revision to separate column
318
+ if 'revision' not in match.columns:
319
+ match['revision'] = match.rel_path.map(lambda x: rel_path_parts(x)[1] or '')
320
+ if assert_unique:
321
+ collections = set(rel_path_parts(x)[0] or '' for x in match.rel_path.values)
322
+ if len(collections) > 1:
323
+ _list = '"' + '", "'.join(collections) + '"'
324
+ raise alferr.ALFMultipleCollectionsFound(_list)
325
+ if not revision_last_before:
326
+ if len(set(match['revision'])) > 1:
327
+ _list = '"' + '", "'.join(set(match['revision'])) + '"'
328
+ raise alferr.ALFMultipleRevisionsFound(_list)
329
+ if len(match) > 1:
330
+ _list = '"' + '", "'.join(match['rel_path']) + '"'
331
+ raise alferr.ALFMultipleObjectsFound(_list)
332
+ else:
333
+ return match
334
+
335
+ match = filter_revision_last_before(match, revision, assert_unique=assert_unique)
336
+ if assert_unique and len(match) > 1:
337
+ _list = '"' + '", "'.join(match['rel_path']) + '"'
338
+ raise alferr.ALFMultipleObjectsFound(_list)
339
+ return match
340
+
341
+
342
+ def filter_revision_last_before(
343
+ datasets, revision=None, assert_unique=True, assert_consistent=False):
344
+ """Filter datasets by revision, returning previous revision if no exact match is found.
345
+
346
+ Parameters
347
+ ----------
348
+ datasets : pandas.DataFrame
349
+ A datasets cache table.
350
+ revision : str
351
+ A revision string to match (regular expressions not permitted).
352
+ assert_unique : bool
353
+ When true an alferr.ALFMultipleRevisionsFound exception is raised when multiple
354
+ default revisions are found; an alferr.ALFError when no default revision is found.
355
+ assert_consistent : bool
356
+ Will raise alferr.ALFMultipleRevisionsFound if matching revision is different between
357
+ datasets.
358
+
359
+ Returns
360
+ -------
361
+ pd.DataFrame
362
+ A datasets DataFrame with 0 or 1 row per unique dataset.
363
+
364
+ Raises
365
+ ------
366
+ one.alf.exceptions.ALFMultipleRevisionsFound
367
+ When the 'default_revision' column exists and no revision is passed, this error means that
368
+ one or more matching datasets have multiple revisions specified as the default. This is
369
+ typically an error in the cache table itself as all datasets should have one and only one
370
+ default revision specified.
371
+ When `assert_consistent` is True, this error may mean that the matching datasets have
372
+ mixed revisions.
373
+ one.alf.exceptions.ALFMultipleObjectsFound
374
+ The matching list of datasets have more than one unique filename and both `assert_unique`
375
+ and `revision_last_before` are true.
376
+ one.alf.exceptions.ALFError
377
+ When both `assert_unique` and `revision_last_before` is true, and a 'default_revision'
378
+ column exists but `revision` is None; one or more matching datasets have no default
379
+ revision specified. This is typically an error in the cache table itself as all datasets
380
+ should have one and only one default revision specified.
381
+
382
+ Notes
383
+ -----
384
+ - When `revision` is not None, the default revision value is not used. If an older revision is
385
+ the default one (uncommon), passing in a revision may lead to a newer revision being returned
386
+ than if revision is None.
387
+ - A view is returned if a revision column is present, otherwise a copy is returned.
388
+
389
+ """
390
+ def _last_before(df):
391
+ """Takes a DataFrame with only one dataset and multiple revisions, returns matching row."""
392
+ if revision is None:
393
+ dset_name = df['rel_path'].iloc[0]
394
+ if 'default_revision' in df.columns:
395
+ if assert_unique and sum(df.default_revision) > 1:
396
+ revisions = df['revision'][df.default_revision.values]
397
+ rev_list = '"' + '", "'.join(revisions) + '"'
398
+ raise alferr.ALFMultipleRevisionsFound(rev_list)
399
+ if sum(df.default_revision) == 1:
400
+ return df[df.default_revision]
401
+ if len(df) == 1: # This may be the case when called from load_datasets
402
+ return df # It's not the default but there's only one available revision
403
+ # default_revision column all False; default isn't copied to remote repository
404
+ if assert_unique:
405
+ raise alferr.ALFError(f'No default revision for dataset {dset_name}')
406
+ warnings.warn(
407
+ f'No default revision for dataset {dset_name}; using most recent',
408
+ alferr.ALFWarning)
409
+ # Compare revisions lexicographically
410
+ idx = index_last_before(df['revision'].tolist(), revision)
411
+ # Square brackets forces 1 row DataFrame returned instead of Series
412
+ return df.iloc[slice(0, 0) if idx is None else [idx], :]
413
+
414
+ # Extract revision to separate column
415
+ if 'revision' not in datasets.columns:
416
+ with pd.option_context('mode.chained_assignment', None): # FIXME Explicitly copy?
417
+ datasets['revision'] = datasets.rel_path.map(lambda x: rel_path_parts(x)[1] or '')
418
+ # Group by relative path (sans revision)
419
+ groups = datasets.rel_path.str.replace('#.*#/', '', regex=True).values
420
+ grouped = datasets.groupby(groups, group_keys=False)
421
+ filtered = grouped.apply(_last_before)
422
+ # Raise if matching revision is different between datasets
423
+ if len(filtered['revision'].unique()) > 1:
424
+ rev_list = '"' + '", "'.join(filtered['revision'].unique()) + '"'
425
+ if assert_consistent:
426
+ raise alferr.ALFMultipleRevisionsFound(rev_list)
427
+ else:
428
+ warnings.warn(f'Multiple revisions: {rev_list}', alferr.ALFWarning)
429
+ return filtered
430
+
431
+
432
+ def index_last_before(revisions: List[str], revision: Optional[str]) -> Optional[int]:
433
+ """Return index of string occurring directly before provided revision string when sorted.
434
+
435
+ Revisions are lexicographically sorted. If `revision` is None, the index of the most recent
436
+ revision is returned.
437
+
438
+ Parameters
439
+ ----------
440
+ revisions : list of strings
441
+ A list of revision strings.
442
+ revision : None, str
443
+ The revision string to match on.
444
+
445
+ Returns
446
+ -------
447
+ int, None
448
+ Index of revision before matching string in sorted list or None.
449
+
450
+ Examples
451
+ --------
452
+ >>> idx = index_last_before([], '2020-08-01')
453
+
454
+ """
455
+ if len(revisions) == 0:
456
+ return # No revisions, just return
457
+ revisions_sorted = sorted(revisions, reverse=True)
458
+ if revision is None: # Return most recent revision
459
+ return revisions.index(revisions_sorted[0])
460
+ lt = np.array(revisions_sorted) <= revision
461
+ return revisions.index(revisions_sorted[lt.argmax()]) if any(lt) else None
462
+
463
+
464
+ def autocomplete(term, search_terms) -> str:
465
+ """Validate search term and return complete name.
466
+
467
+ Examples
468
+ --------
469
+ >>> autocomplete('subj')
470
+ 'subject'
471
+
472
+ """
473
+ term = term.casefold()
474
+ # Check if term already complete
475
+ if term in search_terms:
476
+ return term
477
+ full_key = (x for x in search_terms if x.casefold().startswith(term))
478
+ key_ = next(full_key, None)
479
+ if not key_:
480
+ raise ValueError(f'Invalid search term "{term}", see `one.search_terms()`')
481
+ elif next(full_key, None):
482
+ raise ValueError(f'Ambiguous search term "{term}"')
483
+ return key_
484
+
485
+
486
+ class LazyId(Mapping):
487
+ """Return UUID from records when indexed.
488
+
489
+ Uses a paginated response object or list of Alyx REST records.
490
+ """
491
+
492
+ def __init__(self, pg, func=None):
493
+ self._pg = pg
494
+ self.func = func or self.ses2eid
495
+
496
+ def __getitem__(self, item):
497
+ return self.func(self._pg.__getitem__(item))
498
+
499
+ def __len__(self):
500
+ return self._pg.__len__()
501
+
502
+ def __iter__(self):
503
+ return map(self.func, self._pg.__iter__())
504
+
505
+ @staticmethod
506
+ def ses2eid(ses):
507
+ """Given one or more session dictionaries, extract and return the session UUID.
508
+
509
+ Parameters
510
+ ----------
511
+ ses : one.webclient._PaginatedResponse, dict, list
512
+ A collection of Alyx REST sessions endpoint records.
513
+
514
+ Returns
515
+ -------
516
+ str, list
517
+ One or more experiment ID strings.
518
+
519
+ """
520
+ if isinstance(ses, list):
521
+ return [LazyId.ses2eid(x) for x in ses]
522
+ else:
523
+ eid = ses.get('id', None) or ses['url'].split('/').pop()
524
+ return UUID(eid)