ONE-api 3.0b3__py3-none-any.whl → 3.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/LICENSE +21 -21
- {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/METADATA +115 -115
- ONE_api-3.0b4.dist-info/RECORD +37 -0
- one/__init__.py +2 -2
- one/alf/__init__.py +1 -1
- one/alf/cache.py +640 -653
- one/alf/exceptions.py +105 -105
- one/alf/io.py +876 -876
- one/alf/path.py +1450 -1450
- one/alf/spec.py +519 -519
- one/api.py +2949 -2973
- one/converters.py +850 -850
- one/params.py +414 -414
- one/registration.py +845 -845
- one/remote/__init__.py +1 -1
- one/remote/aws.py +313 -313
- one/remote/base.py +142 -142
- one/remote/globus.py +1254 -1254
- one/tests/fixtures/params/.caches +6 -6
- one/tests/fixtures/params/.test.alyx.internationalbrainlab.org +8 -8
- one/tests/fixtures/rest_responses/1f187d80fd59677b395fcdb18e68e4401bfa1cc9 +1 -1
- one/tests/fixtures/rest_responses/47893cf67c985e6361cdee009334963f49fb0746 +1 -1
- one/tests/fixtures/rest_responses/535d0e9a1e2c1efbdeba0d673b131e00361a2edb +1 -1
- one/tests/fixtures/rest_responses/6dc96f7e9bcc6ac2e7581489b9580a6cd3f28293 +1 -1
- one/tests/fixtures/rest_responses/db1731fb8df0208944ae85f76718430813a8bf50 +1 -1
- one/tests/fixtures/rest_responses/dcce48259bb929661f60a02a48563f70aa6185b3 +1 -1
- one/tests/fixtures/rest_responses/f530d6022f61cdc9e38cc66beb3cb71f3003c9a1 +1 -1
- one/tests/fixtures/test_dbs.json +14 -14
- one/util.py +524 -524
- one/webclient.py +1366 -1354
- ONE_api-3.0b3.dist-info/RECORD +0 -37
- {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/WHEEL +0 -0
- {ONE_api-3.0b3.dist-info → ONE_api-3.0b4.dist-info}/top_level.txt +0 -0
one/util.py
CHANGED
|
@@ -1,524 +1,524 @@
|
|
|
1
|
-
"""Decorators and small standalone functions for api module."""
|
|
2
|
-
import re
|
|
3
|
-
from uuid import UUID
|
|
4
|
-
import logging
|
|
5
|
-
import fnmatch
|
|
6
|
-
import warnings
|
|
7
|
-
from functools import wraps, partial
|
|
8
|
-
from typing import Iterable, Optional, List
|
|
9
|
-
from collections.abc import Mapping
|
|
10
|
-
|
|
11
|
-
import pandas as pd
|
|
12
|
-
import numpy as np
|
|
13
|
-
from iblutil.util import ensure_list
|
|
14
|
-
|
|
15
|
-
import one.alf.exceptions as alferr
|
|
16
|
-
from one.alf.path import rel_path_parts
|
|
17
|
-
from one.alf.spec import QC, FILE_SPEC, regex as alf_regex
|
|
18
|
-
|
|
19
|
-
logger = logging.getLogger(__name__)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def parse_id(method):
|
|
23
|
-
"""Ensure the input experiment identifier is an experiment UUID.
|
|
24
|
-
|
|
25
|
-
Parameters
|
|
26
|
-
----------
|
|
27
|
-
method : function
|
|
28
|
-
An ONE method whose second arg is an experiment ID.
|
|
29
|
-
|
|
30
|
-
Returns
|
|
31
|
-
-------
|
|
32
|
-
function
|
|
33
|
-
A wrapper function that parses the ID to the expected string.
|
|
34
|
-
|
|
35
|
-
Raises
|
|
36
|
-
------
|
|
37
|
-
ValueError
|
|
38
|
-
Unable to convert input to a valid experiment ID.
|
|
39
|
-
|
|
40
|
-
"""
|
|
41
|
-
|
|
42
|
-
@wraps(method)
|
|
43
|
-
def wrapper(self, id, *args, **kwargs):
|
|
44
|
-
eid = self.to_eid(id)
|
|
45
|
-
if eid is None:
|
|
46
|
-
raise ValueError(f'Cannot parse session ID "{id}" (session may not exist)')
|
|
47
|
-
return method(self, eid, *args, **kwargs)
|
|
48
|
-
|
|
49
|
-
return wrapper
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def validate_date_range(date_range) -> (pd.Timestamp, pd.Timestamp):
|
|
53
|
-
"""Validate and arrange date range in a 2 elements list.
|
|
54
|
-
|
|
55
|
-
Parameters
|
|
56
|
-
----------
|
|
57
|
-
date_range : str, datetime.date, datetime.datetime, pd.Timestamp, np.datetime64, list, None
|
|
58
|
-
A single date or tuple/list of two dates. None represents no bound.
|
|
59
|
-
|
|
60
|
-
Returns
|
|
61
|
-
-------
|
|
62
|
-
tuple of pd.Timestamp
|
|
63
|
-
The start and end timestamps.
|
|
64
|
-
|
|
65
|
-
Examples
|
|
66
|
-
--------
|
|
67
|
-
>>> validate_date_range('2020-01-01') # On this day
|
|
68
|
-
>>> validate_date_range(datetime.date(2020, 1, 1))
|
|
69
|
-
>>> validate_date_range(np.array(['2022-01-30', '2022-01-30'], dtype='datetime64[D]'))
|
|
70
|
-
>>> validate_date_range(pd.Timestamp(2020, 1, 1))
|
|
71
|
-
>>> validate_date_range(np.datetime64(2021, 3, 11))
|
|
72
|
-
>>> validate_date_range(['2020-01-01']) # from date
|
|
73
|
-
>>> validate_date_range(['2020-01-01', None]) # from date
|
|
74
|
-
>>> validate_date_range([None, '2020-01-01']) # up to date
|
|
75
|
-
|
|
76
|
-
Raises
|
|
77
|
-
------
|
|
78
|
-
ValueError
|
|
79
|
-
Size of date range tuple must be 1 or 2.
|
|
80
|
-
|
|
81
|
-
"""
|
|
82
|
-
if date_range is None:
|
|
83
|
-
return
|
|
84
|
-
|
|
85
|
-
# Ensure we have exactly two values
|
|
86
|
-
if isinstance(date_range, str) or not isinstance(date_range, Iterable):
|
|
87
|
-
# date_range = (date_range, pd.Timestamp(date_range) + pd.Timedelta(days=1))
|
|
88
|
-
dt = pd.Timedelta(days=1) - pd.Timedelta(milliseconds=1)
|
|
89
|
-
date_range = (date_range, pd.Timestamp(date_range) + dt)
|
|
90
|
-
elif len(date_range) == 1:
|
|
91
|
-
date_range = [date_range[0], pd.Timestamp.max]
|
|
92
|
-
elif len(date_range) != 2:
|
|
93
|
-
raise ValueError
|
|
94
|
-
|
|
95
|
-
# For comparisons, ensure both values are pd.Timestamp (datetime, date and datetime64
|
|
96
|
-
# objects will be converted)
|
|
97
|
-
start, end = date_range
|
|
98
|
-
start = start or pd.Timestamp.min # Convert None to lowest possible date
|
|
99
|
-
end = end or pd.Timestamp.max # Convert None to highest possible date
|
|
100
|
-
|
|
101
|
-
# Convert to timestamp
|
|
102
|
-
if not isinstance(start, pd.Timestamp):
|
|
103
|
-
start = pd.Timestamp(start)
|
|
104
|
-
if not isinstance(end, pd.Timestamp):
|
|
105
|
-
end = pd.Timestamp(end)
|
|
106
|
-
|
|
107
|
-
return start, end
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def _collection_spec(collection=None, revision=None) -> str:
|
|
111
|
-
"""Return a template string for a collection/revision regular expression.
|
|
112
|
-
|
|
113
|
-
Because both are optional in the ALF spec, None will match any (including absent), while an
|
|
114
|
-
empty string will match absent.
|
|
115
|
-
|
|
116
|
-
Parameters
|
|
117
|
-
----------
|
|
118
|
-
collection : None, str
|
|
119
|
-
An optional collection regular expression.
|
|
120
|
-
revision : None, str
|
|
121
|
-
An optional revision regular expression.
|
|
122
|
-
|
|
123
|
-
Returns
|
|
124
|
-
-------
|
|
125
|
-
str
|
|
126
|
-
A string format for matching the collection/revision.
|
|
127
|
-
|
|
128
|
-
"""
|
|
129
|
-
spec = ''
|
|
130
|
-
for value, default in zip((collection, revision), ('{collection}/', '#{revision}#/')):
|
|
131
|
-
if not value:
|
|
132
|
-
default = f'({default})?' if value is None else ''
|
|
133
|
-
spec += default
|
|
134
|
-
return spec
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def _file_spec(**kwargs):
|
|
138
|
-
"""Return a template string for a ALF dataset regular expression.
|
|
139
|
-
|
|
140
|
-
Because 'namespace', 'timescale', and 'extra' are optional, None will match any
|
|
141
|
-
(including absent). This function removes the regex flags from the file spec string that make
|
|
142
|
-
certain parts optional.
|
|
143
|
-
|
|
144
|
-
TODO an empty string should only match absent; this could be achieved by removing parts from
|
|
145
|
-
spec string
|
|
146
|
-
|
|
147
|
-
Parameters
|
|
148
|
-
----------
|
|
149
|
-
namespace : None, str
|
|
150
|
-
If namespace is not None, the namespace section of the returned file spec will not be
|
|
151
|
-
optional.
|
|
152
|
-
timescale : None, str
|
|
153
|
-
If timescale is not None, the namespace section of the returned file spec will not be
|
|
154
|
-
optional.
|
|
155
|
-
extra : None, str
|
|
156
|
-
If extra is not None, the namespace section of the returned file spec will not be
|
|
157
|
-
optional.
|
|
158
|
-
|
|
159
|
-
Returns
|
|
160
|
-
-------
|
|
161
|
-
str
|
|
162
|
-
A string format for matching an ALF dataset.
|
|
163
|
-
|
|
164
|
-
"""
|
|
165
|
-
OPTIONAL = {'namespace': '?', 'timescale': '?', 'extra': '*'}
|
|
166
|
-
filespec = FILE_SPEC
|
|
167
|
-
for k, v in kwargs.items():
|
|
168
|
-
if k in OPTIONAL and v is not None:
|
|
169
|
-
i = filespec.find(k) + len(k)
|
|
170
|
-
i += filespec[i:].find(OPTIONAL[k])
|
|
171
|
-
filespec = filespec[:i] + filespec[i:].replace(OPTIONAL[k], '', 1)
|
|
172
|
-
return filespec
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
def filter_datasets(
|
|
176
|
-
all_datasets, filename=None, collection=None, revision=None, revision_last_before=True,
|
|
177
|
-
qc=QC.FAIL, ignore_qc_not_set=False, assert_unique=True, wildcards=False):
|
|
178
|
-
"""Filter the datasets cache table by relative path (dataset name, collection and revision).
|
|
179
|
-
|
|
180
|
-
When None is passed, all values will match. To match on empty parts, use an empty string.
|
|
181
|
-
When revision_last_before is true, None means return latest revision.
|
|
182
|
-
|
|
183
|
-
Parameters
|
|
184
|
-
----------
|
|
185
|
-
all_datasets : pandas.DataFrame
|
|
186
|
-
A datasets cache table.
|
|
187
|
-
filename : str, dict, None
|
|
188
|
-
A filename str or a dict of alf parts. Regular expressions permitted.
|
|
189
|
-
collection : str, None
|
|
190
|
-
A collection string. Regular expressions permitted.
|
|
191
|
-
revision : str, None
|
|
192
|
-
A revision string to match. If revision_last_before is true, regular expressions are
|
|
193
|
-
not permitted.
|
|
194
|
-
revision_last_before : bool
|
|
195
|
-
When true and no exact match exists, the (lexicographically) previous revision is used
|
|
196
|
-
instead. When false the revision string is matched like collection and filename,
|
|
197
|
-
with regular expressions permitted. NB: When true and `revision` is None the default
|
|
198
|
-
revision is returned which may not be the last revision. If no default is defined, the
|
|
199
|
-
last revision is returned.
|
|
200
|
-
qc : str, int, one.alf.spec.QC
|
|
201
|
-
Returns datasets at or below this QC level. Integer values should correspond to the QC
|
|
202
|
-
enumeration NOT the qc category column codes in the pandas table.
|
|
203
|
-
ignore_qc_not_set : bool
|
|
204
|
-
When true, do not return datasets for which QC is NOT_SET.
|
|
205
|
-
assert_unique : bool
|
|
206
|
-
When true an error is raised if multiple collections or datasets are found.
|
|
207
|
-
wildcards : bool
|
|
208
|
-
If true, use unix shell style matching instead of regular expressions.
|
|
209
|
-
|
|
210
|
-
Returns
|
|
211
|
-
-------
|
|
212
|
-
pd.DataFrame
|
|
213
|
-
A slice of all_datasets that match the filters.
|
|
214
|
-
|
|
215
|
-
Examples
|
|
216
|
-
--------
|
|
217
|
-
Filter by dataset name and collection
|
|
218
|
-
|
|
219
|
-
>>> datasets = filter_datasets(all_datasets, '.*spikes.times.*', 'alf/probe00')
|
|
220
|
-
|
|
221
|
-
Filter datasets not in a collection
|
|
222
|
-
|
|
223
|
-
>>> datasets = filter_datasets(all_datasets, collection='')
|
|
224
|
-
|
|
225
|
-
Filter by matching revision
|
|
226
|
-
|
|
227
|
-
>>> datasets = filter_datasets(all_datasets, 'spikes.times.npy',
|
|
228
|
-
... revision='2020-01-12', revision_last_before=False)
|
|
229
|
-
|
|
230
|
-
Filter by filename parts
|
|
231
|
-
|
|
232
|
-
>>> datasets = filter_datasets(all_datasets, dict(object='spikes', attribute='times'))
|
|
233
|
-
|
|
234
|
-
Filter by QC outcome - datasets with WARNING or better
|
|
235
|
-
|
|
236
|
-
>>> datasets filter_datasets(all_datasets, qc='WARNING')
|
|
237
|
-
|
|
238
|
-
Filter by QC outcome and ignore datasets with unset QC - datasets with PASS only
|
|
239
|
-
|
|
240
|
-
>>> datasets filter_datasets(all_datasets, qc='PASS', ignore_qc_not_set=True)
|
|
241
|
-
|
|
242
|
-
Raises
|
|
243
|
-
------
|
|
244
|
-
one.alf.exceptions.ALFMultipleCollectionsFound
|
|
245
|
-
The matching list of datasets have more than one unique collection and `assert_unique` is
|
|
246
|
-
True.
|
|
247
|
-
one.alf.exceptions.ALFMultipleRevisionsFound
|
|
248
|
-
When `revision_last_before` is false, the matching list of datasets have more than one
|
|
249
|
-
unique revision. When `revision_last_before` is true, a 'default_revision' column exists,
|
|
250
|
-
and no revision is passed, this error means that one or more matching datasets have
|
|
251
|
-
multiple revisions specified as the default. This is typically an error in the cache table
|
|
252
|
-
itself as all datasets should have one and only one default revision specified.
|
|
253
|
-
one.alf.exceptions.ALFMultipleObjectsFound
|
|
254
|
-
The matching list of datasets have more than one unique filename and both `assert_unique`
|
|
255
|
-
and `revision_last_before` are true.
|
|
256
|
-
one.alf.exceptions.ALFError
|
|
257
|
-
When both `assert_unique` and `revision_last_before` is true, and a 'default_revision'
|
|
258
|
-
column exists but `revision` is None; one or more matching datasets have no default
|
|
259
|
-
revision specified. This is typically an error in the cache table itself as all datasets
|
|
260
|
-
should have one and only one default revision specified.
|
|
261
|
-
|
|
262
|
-
Notes
|
|
263
|
-
-----
|
|
264
|
-
- It is not possible to match datasets that are in a given collection OR NOT in ANY collection.
|
|
265
|
-
e.g. filter_datasets(dsets, collection=['alf', '']) will not match the latter. For this you
|
|
266
|
-
must use two separate queries.
|
|
267
|
-
- It is not possible to match datasets with no revision when wildcards=True.
|
|
268
|
-
|
|
269
|
-
"""
|
|
270
|
-
# Create a regular expression string to match relative path against
|
|
271
|
-
filename = filename or {}
|
|
272
|
-
regex_args = {'collection': collection}
|
|
273
|
-
spec_str = _collection_spec(collection, None if revision_last_before else revision)
|
|
274
|
-
|
|
275
|
-
if isinstance(filename, dict):
|
|
276
|
-
spec_str += _file_spec(**filename)
|
|
277
|
-
regex_args.update(**filename)
|
|
278
|
-
else:
|
|
279
|
-
# Convert to regex if necessary and assert end of string
|
|
280
|
-
flagless_token = re.escape(r'(?s:') # fnmatch.translate may wrap input in flagless group
|
|
281
|
-
# If there is a wildcard at the start of the filename we must exclude capture of slashes to
|
|
282
|
-
# avoid capture of collection part, e.g. * -> .* -> [^/]* (one or more non-slash chars)
|
|
283
|
-
exclude_slash = partial(re.sub, fr'^({flagless_token})?\.\*', r'\g<1>[^/]*')
|
|
284
|
-
spec_str += '|'.join(
|
|
285
|
-
exclude_slash(fnmatch.translate(x)) if wildcards else x + '$'
|
|
286
|
-
for x in ensure_list(filename)
|
|
287
|
-
)
|
|
288
|
-
|
|
289
|
-
# If matching revision name, add to regex string
|
|
290
|
-
if not revision_last_before:
|
|
291
|
-
regex_args.update(revision=revision)
|
|
292
|
-
|
|
293
|
-
for k, v in regex_args.items():
|
|
294
|
-
if v is None:
|
|
295
|
-
continue
|
|
296
|
-
if wildcards:
|
|
297
|
-
# Convert to regex, remove \\Z which asserts end of string
|
|
298
|
-
v = (fnmatch.translate(x).replace('\\Z', '') for x in ensure_list(v))
|
|
299
|
-
if not isinstance(v, str):
|
|
300
|
-
regex_args[k] = '|'.join(v) # logical OR
|
|
301
|
-
|
|
302
|
-
# Build regex string
|
|
303
|
-
pattern = alf_regex('^' + spec_str, **regex_args)
|
|
304
|
-
path_match = all_datasets['rel_path'].str.match(pattern)
|
|
305
|
-
|
|
306
|
-
# Test on QC outcome
|
|
307
|
-
qc = QC.validate(qc)
|
|
308
|
-
qc_match = all_datasets['qc'].le(qc.name)
|
|
309
|
-
if ignore_qc_not_set:
|
|
310
|
-
qc_match &= all_datasets['qc'].ne('NOT_SET')
|
|
311
|
-
|
|
312
|
-
# Filter datasets on path and QC
|
|
313
|
-
match = all_datasets[path_match & qc_match].copy()
|
|
314
|
-
if len(match) == 0 or not (revision_last_before or assert_unique):
|
|
315
|
-
return match
|
|
316
|
-
|
|
317
|
-
# Extract revision to separate column
|
|
318
|
-
if 'revision' not in match.columns:
|
|
319
|
-
match['revision'] = match.rel_path.map(lambda x: rel_path_parts(x)[1] or '')
|
|
320
|
-
if assert_unique:
|
|
321
|
-
collections = set(rel_path_parts(x)[0] or '' for x in match.rel_path.values)
|
|
322
|
-
if len(collections) > 1:
|
|
323
|
-
_list = '"' + '", "'.join(collections) + '"'
|
|
324
|
-
raise alferr.ALFMultipleCollectionsFound(_list)
|
|
325
|
-
if not revision_last_before:
|
|
326
|
-
if len(set(match['revision'])) > 1:
|
|
327
|
-
_list = '"' + '", "'.join(set(match['revision'])) + '"'
|
|
328
|
-
raise alferr.ALFMultipleRevisionsFound(_list)
|
|
329
|
-
if len(match) > 1:
|
|
330
|
-
_list = '"' + '", "'.join(match['rel_path']) + '"'
|
|
331
|
-
raise alferr.ALFMultipleObjectsFound(_list)
|
|
332
|
-
else:
|
|
333
|
-
return match
|
|
334
|
-
|
|
335
|
-
match = filter_revision_last_before(match, revision, assert_unique=assert_unique)
|
|
336
|
-
if assert_unique and len(match) > 1:
|
|
337
|
-
_list = '"' + '", "'.join(match['rel_path']) + '"'
|
|
338
|
-
raise alferr.ALFMultipleObjectsFound(_list)
|
|
339
|
-
return match
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
def filter_revision_last_before(
|
|
343
|
-
datasets, revision=None, assert_unique=True, assert_consistent=False):
|
|
344
|
-
"""Filter datasets by revision, returning previous revision if no exact match is found.
|
|
345
|
-
|
|
346
|
-
Parameters
|
|
347
|
-
----------
|
|
348
|
-
datasets : pandas.DataFrame
|
|
349
|
-
A datasets cache table.
|
|
350
|
-
revision : str
|
|
351
|
-
A revision string to match (regular expressions not permitted).
|
|
352
|
-
assert_unique : bool
|
|
353
|
-
When true an alferr.ALFMultipleRevisionsFound exception is raised when multiple
|
|
354
|
-
default revisions are found; an alferr.ALFError when no default revision is found.
|
|
355
|
-
assert_consistent : bool
|
|
356
|
-
Will raise alferr.ALFMultipleRevisionsFound if matching revision is different between
|
|
357
|
-
datasets.
|
|
358
|
-
|
|
359
|
-
Returns
|
|
360
|
-
-------
|
|
361
|
-
pd.DataFrame
|
|
362
|
-
A datasets DataFrame with 0 or 1 row per unique dataset.
|
|
363
|
-
|
|
364
|
-
Raises
|
|
365
|
-
------
|
|
366
|
-
one.alf.exceptions.ALFMultipleRevisionsFound
|
|
367
|
-
When the 'default_revision' column exists and no revision is passed, this error means that
|
|
368
|
-
one or more matching datasets have multiple revisions specified as the default. This is
|
|
369
|
-
typically an error in the cache table itself as all datasets should have one and only one
|
|
370
|
-
default revision specified.
|
|
371
|
-
When `assert_consistent` is True, this error may mean that the matching datasets have
|
|
372
|
-
mixed revisions.
|
|
373
|
-
one.alf.exceptions.ALFMultipleObjectsFound
|
|
374
|
-
The matching list of datasets have more than one unique filename and both `assert_unique`
|
|
375
|
-
and `revision_last_before` are true.
|
|
376
|
-
one.alf.exceptions.ALFError
|
|
377
|
-
When both `assert_unique` and `revision_last_before` is true, and a 'default_revision'
|
|
378
|
-
column exists but `revision` is None; one or more matching datasets have no default
|
|
379
|
-
revision specified. This is typically an error in the cache table itself as all datasets
|
|
380
|
-
should have one and only one default revision specified.
|
|
381
|
-
|
|
382
|
-
Notes
|
|
383
|
-
-----
|
|
384
|
-
- When `revision` is not None, the default revision value is not used. If an older revision is
|
|
385
|
-
the default one (uncommon), passing in a revision may lead to a newer revision being returned
|
|
386
|
-
than if revision is None.
|
|
387
|
-
- A view is returned if a revision column is present, otherwise a copy is returned.
|
|
388
|
-
|
|
389
|
-
"""
|
|
390
|
-
def _last_before(df):
|
|
391
|
-
"""Takes a DataFrame with only one dataset and multiple revisions, returns matching row."""
|
|
392
|
-
if revision is None:
|
|
393
|
-
dset_name = df['rel_path'].iloc[0]
|
|
394
|
-
if 'default_revision' in df.columns:
|
|
395
|
-
if assert_unique and sum(df.default_revision) > 1:
|
|
396
|
-
revisions = df['revision'][df.default_revision.values]
|
|
397
|
-
rev_list = '"' + '", "'.join(revisions) + '"'
|
|
398
|
-
raise alferr.ALFMultipleRevisionsFound(rev_list)
|
|
399
|
-
if sum(df.default_revision) == 1:
|
|
400
|
-
return df[df.default_revision]
|
|
401
|
-
if len(df) == 1: # This may be the case when called from load_datasets
|
|
402
|
-
return df # It's not the default but there's only one available revision
|
|
403
|
-
# default_revision column all False; default isn't copied to remote repository
|
|
404
|
-
if assert_unique:
|
|
405
|
-
raise alferr.ALFError(f'No default revision for dataset {dset_name}')
|
|
406
|
-
warnings.warn(
|
|
407
|
-
f'No default revision for dataset {dset_name}; using most recent',
|
|
408
|
-
alferr.ALFWarning)
|
|
409
|
-
# Compare revisions lexicographically
|
|
410
|
-
idx = index_last_before(df['revision'].tolist(), revision)
|
|
411
|
-
# Square brackets forces 1 row DataFrame returned instead of Series
|
|
412
|
-
return df.iloc[slice(0, 0) if idx is None else [idx], :]
|
|
413
|
-
|
|
414
|
-
# Extract revision to separate column
|
|
415
|
-
if 'revision' not in datasets.columns:
|
|
416
|
-
with pd.option_context('mode.chained_assignment', None): # FIXME Explicitly copy?
|
|
417
|
-
datasets['revision'] = datasets.rel_path.map(lambda x: rel_path_parts(x)[1] or '')
|
|
418
|
-
# Group by relative path (sans revision)
|
|
419
|
-
groups = datasets.rel_path.str.replace('#.*#/', '', regex=True).values
|
|
420
|
-
grouped = datasets.groupby(groups, group_keys=False)
|
|
421
|
-
filtered = grouped.apply(_last_before)
|
|
422
|
-
# Raise if matching revision is different between datasets
|
|
423
|
-
if len(filtered['revision'].unique()) > 1:
|
|
424
|
-
rev_list = '"' + '", "'.join(filtered['revision'].unique()) + '"'
|
|
425
|
-
if assert_consistent:
|
|
426
|
-
raise alferr.ALFMultipleRevisionsFound(rev_list)
|
|
427
|
-
else:
|
|
428
|
-
warnings.warn(f'Multiple revisions: {rev_list}', alferr.ALFWarning)
|
|
429
|
-
return filtered
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
def index_last_before(revisions: List[str], revision: Optional[str]) -> Optional[int]:
|
|
433
|
-
"""Return index of string occurring directly before provided revision string when sorted.
|
|
434
|
-
|
|
435
|
-
Revisions are lexicographically sorted. If `revision` is None, the index of the most recent
|
|
436
|
-
revision is returned.
|
|
437
|
-
|
|
438
|
-
Parameters
|
|
439
|
-
----------
|
|
440
|
-
revisions : list of strings
|
|
441
|
-
A list of revision strings.
|
|
442
|
-
revision : None, str
|
|
443
|
-
The revision string to match on.
|
|
444
|
-
|
|
445
|
-
Returns
|
|
446
|
-
-------
|
|
447
|
-
int, None
|
|
448
|
-
Index of revision before matching string in sorted list or None.
|
|
449
|
-
|
|
450
|
-
Examples
|
|
451
|
-
--------
|
|
452
|
-
>>> idx = index_last_before([], '2020-08-01')
|
|
453
|
-
|
|
454
|
-
"""
|
|
455
|
-
if len(revisions) == 0:
|
|
456
|
-
return # No revisions, just return
|
|
457
|
-
revisions_sorted = sorted(revisions, reverse=True)
|
|
458
|
-
if revision is None: # Return most recent revision
|
|
459
|
-
return revisions.index(revisions_sorted[0])
|
|
460
|
-
lt = np.array(revisions_sorted) <= revision
|
|
461
|
-
return revisions.index(revisions_sorted[lt.argmax()]) if any(lt) else None
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
def autocomplete(term, search_terms) -> str:
|
|
465
|
-
"""Validate search term and return complete name.
|
|
466
|
-
|
|
467
|
-
Examples
|
|
468
|
-
--------
|
|
469
|
-
>>> autocomplete('subj')
|
|
470
|
-
'subject'
|
|
471
|
-
|
|
472
|
-
"""
|
|
473
|
-
term = term.casefold()
|
|
474
|
-
# Check if term already complete
|
|
475
|
-
if term in search_terms:
|
|
476
|
-
return term
|
|
477
|
-
full_key = (x for x in search_terms if x.casefold().startswith(term))
|
|
478
|
-
key_ = next(full_key, None)
|
|
479
|
-
if not key_:
|
|
480
|
-
raise ValueError(f'Invalid search term "{term}", see `one.search_terms()`')
|
|
481
|
-
elif next(full_key, None):
|
|
482
|
-
raise ValueError(f'Ambiguous search term "{term}"')
|
|
483
|
-
return key_
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
class LazyId(Mapping):
|
|
487
|
-
"""Return UUID from records when indexed.
|
|
488
|
-
|
|
489
|
-
Uses a paginated response object or list of Alyx REST records.
|
|
490
|
-
"""
|
|
491
|
-
|
|
492
|
-
def __init__(self, pg, func=None):
|
|
493
|
-
self._pg = pg
|
|
494
|
-
self.func = func or self.ses2eid
|
|
495
|
-
|
|
496
|
-
def __getitem__(self, item):
|
|
497
|
-
return self.func(self._pg.__getitem__(item))
|
|
498
|
-
|
|
499
|
-
def __len__(self):
|
|
500
|
-
return self._pg.__len__()
|
|
501
|
-
|
|
502
|
-
def __iter__(self):
|
|
503
|
-
return map(self.func, self._pg.__iter__())
|
|
504
|
-
|
|
505
|
-
@staticmethod
|
|
506
|
-
def ses2eid(ses):
|
|
507
|
-
"""Given one or more session dictionaries, extract and return the session UUID.
|
|
508
|
-
|
|
509
|
-
Parameters
|
|
510
|
-
----------
|
|
511
|
-
ses : one.webclient._PaginatedResponse, dict, list
|
|
512
|
-
A collection of Alyx REST sessions endpoint records.
|
|
513
|
-
|
|
514
|
-
Returns
|
|
515
|
-
-------
|
|
516
|
-
str, list
|
|
517
|
-
One or more experiment ID strings.
|
|
518
|
-
|
|
519
|
-
"""
|
|
520
|
-
if isinstance(ses, list):
|
|
521
|
-
return [LazyId.ses2eid(x) for x in ses]
|
|
522
|
-
else:
|
|
523
|
-
eid = ses.get('id', None) or ses['url'].split('/').pop()
|
|
524
|
-
return UUID(eid)
|
|
1
|
+
"""Decorators and small standalone functions for api module."""
|
|
2
|
+
import re
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
import logging
|
|
5
|
+
import fnmatch
|
|
6
|
+
import warnings
|
|
7
|
+
from functools import wraps, partial
|
|
8
|
+
from typing import Iterable, Optional, List
|
|
9
|
+
from collections.abc import Mapping
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import numpy as np
|
|
13
|
+
from iblutil.util import ensure_list
|
|
14
|
+
|
|
15
|
+
import one.alf.exceptions as alferr
|
|
16
|
+
from one.alf.path import rel_path_parts
|
|
17
|
+
from one.alf.spec import QC, FILE_SPEC, regex as alf_regex
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def parse_id(method):
|
|
23
|
+
"""Ensure the input experiment identifier is an experiment UUID.
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
method : function
|
|
28
|
+
An ONE method whose second arg is an experiment ID.
|
|
29
|
+
|
|
30
|
+
Returns
|
|
31
|
+
-------
|
|
32
|
+
function
|
|
33
|
+
A wrapper function that parses the ID to the expected string.
|
|
34
|
+
|
|
35
|
+
Raises
|
|
36
|
+
------
|
|
37
|
+
ValueError
|
|
38
|
+
Unable to convert input to a valid experiment ID.
|
|
39
|
+
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
@wraps(method)
|
|
43
|
+
def wrapper(self, id, *args, **kwargs):
|
|
44
|
+
eid = self.to_eid(id)
|
|
45
|
+
if eid is None:
|
|
46
|
+
raise ValueError(f'Cannot parse session ID "{id}" (session may not exist)')
|
|
47
|
+
return method(self, eid, *args, **kwargs)
|
|
48
|
+
|
|
49
|
+
return wrapper
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def validate_date_range(date_range) -> (pd.Timestamp, pd.Timestamp):
|
|
53
|
+
"""Validate and arrange date range in a 2 elements list.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
date_range : str, datetime.date, datetime.datetime, pd.Timestamp, np.datetime64, list, None
|
|
58
|
+
A single date or tuple/list of two dates. None represents no bound.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
tuple of pd.Timestamp
|
|
63
|
+
The start and end timestamps.
|
|
64
|
+
|
|
65
|
+
Examples
|
|
66
|
+
--------
|
|
67
|
+
>>> validate_date_range('2020-01-01') # On this day
|
|
68
|
+
>>> validate_date_range(datetime.date(2020, 1, 1))
|
|
69
|
+
>>> validate_date_range(np.array(['2022-01-30', '2022-01-30'], dtype='datetime64[D]'))
|
|
70
|
+
>>> validate_date_range(pd.Timestamp(2020, 1, 1))
|
|
71
|
+
>>> validate_date_range(np.datetime64(2021, 3, 11))
|
|
72
|
+
>>> validate_date_range(['2020-01-01']) # from date
|
|
73
|
+
>>> validate_date_range(['2020-01-01', None]) # from date
|
|
74
|
+
>>> validate_date_range([None, '2020-01-01']) # up to date
|
|
75
|
+
|
|
76
|
+
Raises
|
|
77
|
+
------
|
|
78
|
+
ValueError
|
|
79
|
+
Size of date range tuple must be 1 or 2.
|
|
80
|
+
|
|
81
|
+
"""
|
|
82
|
+
if date_range is None:
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
# Ensure we have exactly two values
|
|
86
|
+
if isinstance(date_range, str) or not isinstance(date_range, Iterable):
|
|
87
|
+
# date_range = (date_range, pd.Timestamp(date_range) + pd.Timedelta(days=1))
|
|
88
|
+
dt = pd.Timedelta(days=1) - pd.Timedelta(milliseconds=1)
|
|
89
|
+
date_range = (date_range, pd.Timestamp(date_range) + dt)
|
|
90
|
+
elif len(date_range) == 1:
|
|
91
|
+
date_range = [date_range[0], pd.Timestamp.max]
|
|
92
|
+
elif len(date_range) != 2:
|
|
93
|
+
raise ValueError
|
|
94
|
+
|
|
95
|
+
# For comparisons, ensure both values are pd.Timestamp (datetime, date and datetime64
|
|
96
|
+
# objects will be converted)
|
|
97
|
+
start, end = date_range
|
|
98
|
+
start = start or pd.Timestamp.min # Convert None to lowest possible date
|
|
99
|
+
end = end or pd.Timestamp.max # Convert None to highest possible date
|
|
100
|
+
|
|
101
|
+
# Convert to timestamp
|
|
102
|
+
if not isinstance(start, pd.Timestamp):
|
|
103
|
+
start = pd.Timestamp(start)
|
|
104
|
+
if not isinstance(end, pd.Timestamp):
|
|
105
|
+
end = pd.Timestamp(end)
|
|
106
|
+
|
|
107
|
+
return start, end
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _collection_spec(collection=None, revision=None) -> str:
|
|
111
|
+
"""Return a template string for a collection/revision regular expression.
|
|
112
|
+
|
|
113
|
+
Because both are optional in the ALF spec, None will match any (including absent), while an
|
|
114
|
+
empty string will match absent.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
collection : None, str
|
|
119
|
+
An optional collection regular expression.
|
|
120
|
+
revision : None, str
|
|
121
|
+
An optional revision regular expression.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
str
|
|
126
|
+
A string format for matching the collection/revision.
|
|
127
|
+
|
|
128
|
+
"""
|
|
129
|
+
spec = ''
|
|
130
|
+
for value, default in zip((collection, revision), ('{collection}/', '#{revision}#/')):
|
|
131
|
+
if not value:
|
|
132
|
+
default = f'({default})?' if value is None else ''
|
|
133
|
+
spec += default
|
|
134
|
+
return spec
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _file_spec(**kwargs):
|
|
138
|
+
"""Return a template string for a ALF dataset regular expression.
|
|
139
|
+
|
|
140
|
+
Because 'namespace', 'timescale', and 'extra' are optional, None will match any
|
|
141
|
+
(including absent). This function removes the regex flags from the file spec string that make
|
|
142
|
+
certain parts optional.
|
|
143
|
+
|
|
144
|
+
TODO an empty string should only match absent; this could be achieved by removing parts from
|
|
145
|
+
spec string
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
namespace : None, str
|
|
150
|
+
If namespace is not None, the namespace section of the returned file spec will not be
|
|
151
|
+
optional.
|
|
152
|
+
timescale : None, str
|
|
153
|
+
If timescale is not None, the namespace section of the returned file spec will not be
|
|
154
|
+
optional.
|
|
155
|
+
extra : None, str
|
|
156
|
+
If extra is not None, the namespace section of the returned file spec will not be
|
|
157
|
+
optional.
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
str
|
|
162
|
+
A string format for matching an ALF dataset.
|
|
163
|
+
|
|
164
|
+
"""
|
|
165
|
+
OPTIONAL = {'namespace': '?', 'timescale': '?', 'extra': '*'}
|
|
166
|
+
filespec = FILE_SPEC
|
|
167
|
+
for k, v in kwargs.items():
|
|
168
|
+
if k in OPTIONAL and v is not None:
|
|
169
|
+
i = filespec.find(k) + len(k)
|
|
170
|
+
i += filespec[i:].find(OPTIONAL[k])
|
|
171
|
+
filespec = filespec[:i] + filespec[i:].replace(OPTIONAL[k], '', 1)
|
|
172
|
+
return filespec
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def filter_datasets(
|
|
176
|
+
all_datasets, filename=None, collection=None, revision=None, revision_last_before=True,
|
|
177
|
+
qc=QC.FAIL, ignore_qc_not_set=False, assert_unique=True, wildcards=False):
|
|
178
|
+
"""Filter the datasets cache table by relative path (dataset name, collection and revision).
|
|
179
|
+
|
|
180
|
+
When None is passed, all values will match. To match on empty parts, use an empty string.
|
|
181
|
+
When revision_last_before is true, None means return latest revision.
|
|
182
|
+
|
|
183
|
+
Parameters
|
|
184
|
+
----------
|
|
185
|
+
all_datasets : pandas.DataFrame
|
|
186
|
+
A datasets cache table.
|
|
187
|
+
filename : str, dict, None
|
|
188
|
+
A filename str or a dict of alf parts. Regular expressions permitted.
|
|
189
|
+
collection : str, None
|
|
190
|
+
A collection string. Regular expressions permitted.
|
|
191
|
+
revision : str, None
|
|
192
|
+
A revision string to match. If revision_last_before is true, regular expressions are
|
|
193
|
+
not permitted.
|
|
194
|
+
revision_last_before : bool
|
|
195
|
+
When true and no exact match exists, the (lexicographically) previous revision is used
|
|
196
|
+
instead. When false the revision string is matched like collection and filename,
|
|
197
|
+
with regular expressions permitted. NB: When true and `revision` is None the default
|
|
198
|
+
revision is returned which may not be the last revision. If no default is defined, the
|
|
199
|
+
last revision is returned.
|
|
200
|
+
qc : str, int, one.alf.spec.QC
|
|
201
|
+
Returns datasets at or below this QC level. Integer values should correspond to the QC
|
|
202
|
+
enumeration NOT the qc category column codes in the pandas table.
|
|
203
|
+
ignore_qc_not_set : bool
|
|
204
|
+
When true, do not return datasets for which QC is NOT_SET.
|
|
205
|
+
assert_unique : bool
|
|
206
|
+
When true an error is raised if multiple collections or datasets are found.
|
|
207
|
+
wildcards : bool
|
|
208
|
+
If true, use unix shell style matching instead of regular expressions.
|
|
209
|
+
|
|
210
|
+
Returns
|
|
211
|
+
-------
|
|
212
|
+
pd.DataFrame
|
|
213
|
+
A slice of all_datasets that match the filters.
|
|
214
|
+
|
|
215
|
+
Examples
|
|
216
|
+
--------
|
|
217
|
+
Filter by dataset name and collection
|
|
218
|
+
|
|
219
|
+
>>> datasets = filter_datasets(all_datasets, '.*spikes.times.*', 'alf/probe00')
|
|
220
|
+
|
|
221
|
+
Filter datasets not in a collection
|
|
222
|
+
|
|
223
|
+
>>> datasets = filter_datasets(all_datasets, collection='')
|
|
224
|
+
|
|
225
|
+
Filter by matching revision
|
|
226
|
+
|
|
227
|
+
>>> datasets = filter_datasets(all_datasets, 'spikes.times.npy',
|
|
228
|
+
... revision='2020-01-12', revision_last_before=False)
|
|
229
|
+
|
|
230
|
+
Filter by filename parts
|
|
231
|
+
|
|
232
|
+
>>> datasets = filter_datasets(all_datasets, dict(object='spikes', attribute='times'))
|
|
233
|
+
|
|
234
|
+
Filter by QC outcome - datasets with WARNING or better
|
|
235
|
+
|
|
236
|
+
>>> datasets filter_datasets(all_datasets, qc='WARNING')
|
|
237
|
+
|
|
238
|
+
Filter by QC outcome and ignore datasets with unset QC - datasets with PASS only
|
|
239
|
+
|
|
240
|
+
>>> datasets filter_datasets(all_datasets, qc='PASS', ignore_qc_not_set=True)
|
|
241
|
+
|
|
242
|
+
Raises
|
|
243
|
+
------
|
|
244
|
+
one.alf.exceptions.ALFMultipleCollectionsFound
|
|
245
|
+
The matching list of datasets have more than one unique collection and `assert_unique` is
|
|
246
|
+
True.
|
|
247
|
+
one.alf.exceptions.ALFMultipleRevisionsFound
|
|
248
|
+
When `revision_last_before` is false, the matching list of datasets have more than one
|
|
249
|
+
unique revision. When `revision_last_before` is true, a 'default_revision' column exists,
|
|
250
|
+
and no revision is passed, this error means that one or more matching datasets have
|
|
251
|
+
multiple revisions specified as the default. This is typically an error in the cache table
|
|
252
|
+
itself as all datasets should have one and only one default revision specified.
|
|
253
|
+
one.alf.exceptions.ALFMultipleObjectsFound
|
|
254
|
+
The matching list of datasets have more than one unique filename and both `assert_unique`
|
|
255
|
+
and `revision_last_before` are true.
|
|
256
|
+
one.alf.exceptions.ALFError
|
|
257
|
+
When both `assert_unique` and `revision_last_before` is true, and a 'default_revision'
|
|
258
|
+
column exists but `revision` is None; one or more matching datasets have no default
|
|
259
|
+
revision specified. This is typically an error in the cache table itself as all datasets
|
|
260
|
+
should have one and only one default revision specified.
|
|
261
|
+
|
|
262
|
+
Notes
|
|
263
|
+
-----
|
|
264
|
+
- It is not possible to match datasets that are in a given collection OR NOT in ANY collection.
|
|
265
|
+
e.g. filter_datasets(dsets, collection=['alf', '']) will not match the latter. For this you
|
|
266
|
+
must use two separate queries.
|
|
267
|
+
- It is not possible to match datasets with no revision when wildcards=True.
|
|
268
|
+
|
|
269
|
+
"""
|
|
270
|
+
# Create a regular expression string to match relative path against
|
|
271
|
+
filename = filename or {}
|
|
272
|
+
regex_args = {'collection': collection}
|
|
273
|
+
spec_str = _collection_spec(collection, None if revision_last_before else revision)
|
|
274
|
+
|
|
275
|
+
if isinstance(filename, dict):
|
|
276
|
+
spec_str += _file_spec(**filename)
|
|
277
|
+
regex_args.update(**filename)
|
|
278
|
+
else:
|
|
279
|
+
# Convert to regex if necessary and assert end of string
|
|
280
|
+
flagless_token = re.escape(r'(?s:') # fnmatch.translate may wrap input in flagless group
|
|
281
|
+
# If there is a wildcard at the start of the filename we must exclude capture of slashes to
|
|
282
|
+
# avoid capture of collection part, e.g. * -> .* -> [^/]* (one or more non-slash chars)
|
|
283
|
+
exclude_slash = partial(re.sub, fr'^({flagless_token})?\.\*', r'\g<1>[^/]*')
|
|
284
|
+
spec_str += '|'.join(
|
|
285
|
+
exclude_slash(fnmatch.translate(x)) if wildcards else x + '$'
|
|
286
|
+
for x in ensure_list(filename)
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# If matching revision name, add to regex string
|
|
290
|
+
if not revision_last_before:
|
|
291
|
+
regex_args.update(revision=revision)
|
|
292
|
+
|
|
293
|
+
for k, v in regex_args.items():
|
|
294
|
+
if v is None:
|
|
295
|
+
continue
|
|
296
|
+
if wildcards:
|
|
297
|
+
# Convert to regex, remove \\Z which asserts end of string
|
|
298
|
+
v = (fnmatch.translate(x).replace('\\Z', '') for x in ensure_list(v))
|
|
299
|
+
if not isinstance(v, str):
|
|
300
|
+
regex_args[k] = '|'.join(v) # logical OR
|
|
301
|
+
|
|
302
|
+
# Build regex string
|
|
303
|
+
pattern = alf_regex('^' + spec_str, **regex_args)
|
|
304
|
+
path_match = all_datasets['rel_path'].str.match(pattern)
|
|
305
|
+
|
|
306
|
+
# Test on QC outcome
|
|
307
|
+
qc = QC.validate(qc)
|
|
308
|
+
qc_match = all_datasets['qc'].le(qc.name)
|
|
309
|
+
if ignore_qc_not_set:
|
|
310
|
+
qc_match &= all_datasets['qc'].ne('NOT_SET')
|
|
311
|
+
|
|
312
|
+
# Filter datasets on path and QC
|
|
313
|
+
match = all_datasets[path_match & qc_match].copy()
|
|
314
|
+
if len(match) == 0 or not (revision_last_before or assert_unique):
|
|
315
|
+
return match
|
|
316
|
+
|
|
317
|
+
# Extract revision to separate column
|
|
318
|
+
if 'revision' not in match.columns:
|
|
319
|
+
match['revision'] = match.rel_path.map(lambda x: rel_path_parts(x)[1] or '')
|
|
320
|
+
if assert_unique:
|
|
321
|
+
collections = set(rel_path_parts(x)[0] or '' for x in match.rel_path.values)
|
|
322
|
+
if len(collections) > 1:
|
|
323
|
+
_list = '"' + '", "'.join(collections) + '"'
|
|
324
|
+
raise alferr.ALFMultipleCollectionsFound(_list)
|
|
325
|
+
if not revision_last_before:
|
|
326
|
+
if len(set(match['revision'])) > 1:
|
|
327
|
+
_list = '"' + '", "'.join(set(match['revision'])) + '"'
|
|
328
|
+
raise alferr.ALFMultipleRevisionsFound(_list)
|
|
329
|
+
if len(match) > 1:
|
|
330
|
+
_list = '"' + '", "'.join(match['rel_path']) + '"'
|
|
331
|
+
raise alferr.ALFMultipleObjectsFound(_list)
|
|
332
|
+
else:
|
|
333
|
+
return match
|
|
334
|
+
|
|
335
|
+
match = filter_revision_last_before(match, revision, assert_unique=assert_unique)
|
|
336
|
+
if assert_unique and len(match) > 1:
|
|
337
|
+
_list = '"' + '", "'.join(match['rel_path']) + '"'
|
|
338
|
+
raise alferr.ALFMultipleObjectsFound(_list)
|
|
339
|
+
return match
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def filter_revision_last_before(
|
|
343
|
+
datasets, revision=None, assert_unique=True, assert_consistent=False):
|
|
344
|
+
"""Filter datasets by revision, returning previous revision if no exact match is found.
|
|
345
|
+
|
|
346
|
+
Parameters
|
|
347
|
+
----------
|
|
348
|
+
datasets : pandas.DataFrame
|
|
349
|
+
A datasets cache table.
|
|
350
|
+
revision : str
|
|
351
|
+
A revision string to match (regular expressions not permitted).
|
|
352
|
+
assert_unique : bool
|
|
353
|
+
When true an alferr.ALFMultipleRevisionsFound exception is raised when multiple
|
|
354
|
+
default revisions are found; an alferr.ALFError when no default revision is found.
|
|
355
|
+
assert_consistent : bool
|
|
356
|
+
Will raise alferr.ALFMultipleRevisionsFound if matching revision is different between
|
|
357
|
+
datasets.
|
|
358
|
+
|
|
359
|
+
Returns
|
|
360
|
+
-------
|
|
361
|
+
pd.DataFrame
|
|
362
|
+
A datasets DataFrame with 0 or 1 row per unique dataset.
|
|
363
|
+
|
|
364
|
+
Raises
|
|
365
|
+
------
|
|
366
|
+
one.alf.exceptions.ALFMultipleRevisionsFound
|
|
367
|
+
When the 'default_revision' column exists and no revision is passed, this error means that
|
|
368
|
+
one or more matching datasets have multiple revisions specified as the default. This is
|
|
369
|
+
typically an error in the cache table itself as all datasets should have one and only one
|
|
370
|
+
default revision specified.
|
|
371
|
+
When `assert_consistent` is True, this error may mean that the matching datasets have
|
|
372
|
+
mixed revisions.
|
|
373
|
+
one.alf.exceptions.ALFMultipleObjectsFound
|
|
374
|
+
The matching list of datasets have more than one unique filename and both `assert_unique`
|
|
375
|
+
and `revision_last_before` are true.
|
|
376
|
+
one.alf.exceptions.ALFError
|
|
377
|
+
When both `assert_unique` and `revision_last_before` is true, and a 'default_revision'
|
|
378
|
+
column exists but `revision` is None; one or more matching datasets have no default
|
|
379
|
+
revision specified. This is typically an error in the cache table itself as all datasets
|
|
380
|
+
should have one and only one default revision specified.
|
|
381
|
+
|
|
382
|
+
Notes
|
|
383
|
+
-----
|
|
384
|
+
- When `revision` is not None, the default revision value is not used. If an older revision is
|
|
385
|
+
the default one (uncommon), passing in a revision may lead to a newer revision being returned
|
|
386
|
+
than if revision is None.
|
|
387
|
+
- A view is returned if a revision column is present, otherwise a copy is returned.
|
|
388
|
+
|
|
389
|
+
"""
|
|
390
|
+
def _last_before(df):
|
|
391
|
+
"""Takes a DataFrame with only one dataset and multiple revisions, returns matching row."""
|
|
392
|
+
if revision is None:
|
|
393
|
+
dset_name = df['rel_path'].iloc[0]
|
|
394
|
+
if 'default_revision' in df.columns:
|
|
395
|
+
if assert_unique and sum(df.default_revision) > 1:
|
|
396
|
+
revisions = df['revision'][df.default_revision.values]
|
|
397
|
+
rev_list = '"' + '", "'.join(revisions) + '"'
|
|
398
|
+
raise alferr.ALFMultipleRevisionsFound(rev_list)
|
|
399
|
+
if sum(df.default_revision) == 1:
|
|
400
|
+
return df[df.default_revision]
|
|
401
|
+
if len(df) == 1: # This may be the case when called from load_datasets
|
|
402
|
+
return df # It's not the default but there's only one available revision
|
|
403
|
+
# default_revision column all False; default isn't copied to remote repository
|
|
404
|
+
if assert_unique:
|
|
405
|
+
raise alferr.ALFError(f'No default revision for dataset {dset_name}')
|
|
406
|
+
warnings.warn(
|
|
407
|
+
f'No default revision for dataset {dset_name}; using most recent',
|
|
408
|
+
alferr.ALFWarning)
|
|
409
|
+
# Compare revisions lexicographically
|
|
410
|
+
idx = index_last_before(df['revision'].tolist(), revision)
|
|
411
|
+
# Square brackets forces 1 row DataFrame returned instead of Series
|
|
412
|
+
return df.iloc[slice(0, 0) if idx is None else [idx], :]
|
|
413
|
+
|
|
414
|
+
# Extract revision to separate column
|
|
415
|
+
if 'revision' not in datasets.columns:
|
|
416
|
+
with pd.option_context('mode.chained_assignment', None): # FIXME Explicitly copy?
|
|
417
|
+
datasets['revision'] = datasets.rel_path.map(lambda x: rel_path_parts(x)[1] or '')
|
|
418
|
+
# Group by relative path (sans revision)
|
|
419
|
+
groups = datasets.rel_path.str.replace('#.*#/', '', regex=True).values
|
|
420
|
+
grouped = datasets.groupby(groups, group_keys=False)
|
|
421
|
+
filtered = grouped.apply(_last_before)
|
|
422
|
+
# Raise if matching revision is different between datasets
|
|
423
|
+
if len(filtered['revision'].unique()) > 1:
|
|
424
|
+
rev_list = '"' + '", "'.join(filtered['revision'].unique()) + '"'
|
|
425
|
+
if assert_consistent:
|
|
426
|
+
raise alferr.ALFMultipleRevisionsFound(rev_list)
|
|
427
|
+
else:
|
|
428
|
+
warnings.warn(f'Multiple revisions: {rev_list}', alferr.ALFWarning)
|
|
429
|
+
return filtered
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def index_last_before(revisions: List[str], revision: Optional[str]) -> Optional[int]:
|
|
433
|
+
"""Return index of string occurring directly before provided revision string when sorted.
|
|
434
|
+
|
|
435
|
+
Revisions are lexicographically sorted. If `revision` is None, the index of the most recent
|
|
436
|
+
revision is returned.
|
|
437
|
+
|
|
438
|
+
Parameters
|
|
439
|
+
----------
|
|
440
|
+
revisions : list of strings
|
|
441
|
+
A list of revision strings.
|
|
442
|
+
revision : None, str
|
|
443
|
+
The revision string to match on.
|
|
444
|
+
|
|
445
|
+
Returns
|
|
446
|
+
-------
|
|
447
|
+
int, None
|
|
448
|
+
Index of revision before matching string in sorted list or None.
|
|
449
|
+
|
|
450
|
+
Examples
|
|
451
|
+
--------
|
|
452
|
+
>>> idx = index_last_before([], '2020-08-01')
|
|
453
|
+
|
|
454
|
+
"""
|
|
455
|
+
if len(revisions) == 0:
|
|
456
|
+
return # No revisions, just return
|
|
457
|
+
revisions_sorted = sorted(revisions, reverse=True)
|
|
458
|
+
if revision is None: # Return most recent revision
|
|
459
|
+
return revisions.index(revisions_sorted[0])
|
|
460
|
+
lt = np.array(revisions_sorted) <= revision
|
|
461
|
+
return revisions.index(revisions_sorted[lt.argmax()]) if any(lt) else None
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def autocomplete(term, search_terms) -> str:
|
|
465
|
+
"""Validate search term and return complete name.
|
|
466
|
+
|
|
467
|
+
Examples
|
|
468
|
+
--------
|
|
469
|
+
>>> autocomplete('subj')
|
|
470
|
+
'subject'
|
|
471
|
+
|
|
472
|
+
"""
|
|
473
|
+
term = term.casefold()
|
|
474
|
+
# Check if term already complete
|
|
475
|
+
if term in search_terms:
|
|
476
|
+
return term
|
|
477
|
+
full_key = (x for x in search_terms if x.casefold().startswith(term))
|
|
478
|
+
key_ = next(full_key, None)
|
|
479
|
+
if not key_:
|
|
480
|
+
raise ValueError(f'Invalid search term "{term}", see `one.search_terms()`')
|
|
481
|
+
elif next(full_key, None):
|
|
482
|
+
raise ValueError(f'Ambiguous search term "{term}"')
|
|
483
|
+
return key_
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
class LazyId(Mapping):
|
|
487
|
+
"""Return UUID from records when indexed.
|
|
488
|
+
|
|
489
|
+
Uses a paginated response object or list of Alyx REST records.
|
|
490
|
+
"""
|
|
491
|
+
|
|
492
|
+
def __init__(self, pg, func=None):
|
|
493
|
+
self._pg = pg
|
|
494
|
+
self.func = func or self.ses2eid
|
|
495
|
+
|
|
496
|
+
def __getitem__(self, item):
|
|
497
|
+
return self.func(self._pg.__getitem__(item))
|
|
498
|
+
|
|
499
|
+
def __len__(self):
|
|
500
|
+
return self._pg.__len__()
|
|
501
|
+
|
|
502
|
+
def __iter__(self):
|
|
503
|
+
return map(self.func, self._pg.__iter__())
|
|
504
|
+
|
|
505
|
+
@staticmethod
|
|
506
|
+
def ses2eid(ses):
|
|
507
|
+
"""Given one or more session dictionaries, extract and return the session UUID.
|
|
508
|
+
|
|
509
|
+
Parameters
|
|
510
|
+
----------
|
|
511
|
+
ses : one.webclient._PaginatedResponse, dict, list
|
|
512
|
+
A collection of Alyx REST sessions endpoint records.
|
|
513
|
+
|
|
514
|
+
Returns
|
|
515
|
+
-------
|
|
516
|
+
str, list
|
|
517
|
+
One or more experiment ID strings.
|
|
518
|
+
|
|
519
|
+
"""
|
|
520
|
+
if isinstance(ses, list):
|
|
521
|
+
return [LazyId.ses2eid(x) for x in ses]
|
|
522
|
+
else:
|
|
523
|
+
eid = ses.get('id', None) or ses['url'].split('/').pop()
|
|
524
|
+
return UUID(eid)
|