ONE-api 3.0b3__py3-none-any.whl → 3.0b5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {ONE_api-3.0b3.dist-info → ONE_api-3.0b5.dist-info}/LICENSE +21 -21
  2. {ONE_api-3.0b3.dist-info → ONE_api-3.0b5.dist-info}/METADATA +115 -115
  3. ONE_api-3.0b5.dist-info/RECORD +37 -0
  4. one/__init__.py +2 -2
  5. one/alf/__init__.py +1 -1
  6. one/alf/cache.py +640 -653
  7. one/alf/exceptions.py +105 -105
  8. one/alf/io.py +876 -876
  9. one/alf/path.py +1450 -1450
  10. one/alf/spec.py +519 -519
  11. one/api.py +2979 -2973
  12. one/converters.py +850 -850
  13. one/params.py +414 -414
  14. one/registration.py +845 -845
  15. one/remote/__init__.py +1 -1
  16. one/remote/aws.py +313 -313
  17. one/remote/base.py +142 -142
  18. one/remote/globus.py +1254 -1254
  19. one/tests/fixtures/params/.caches +6 -6
  20. one/tests/fixtures/params/.test.alyx.internationalbrainlab.org +8 -8
  21. one/tests/fixtures/rest_responses/1f187d80fd59677b395fcdb18e68e4401bfa1cc9 +1 -1
  22. one/tests/fixtures/rest_responses/47893cf67c985e6361cdee009334963f49fb0746 +1 -1
  23. one/tests/fixtures/rest_responses/535d0e9a1e2c1efbdeba0d673b131e00361a2edb +1 -1
  24. one/tests/fixtures/rest_responses/6dc96f7e9bcc6ac2e7581489b9580a6cd3f28293 +1 -1
  25. one/tests/fixtures/rest_responses/db1731fb8df0208944ae85f76718430813a8bf50 +1 -1
  26. one/tests/fixtures/rest_responses/dcce48259bb929661f60a02a48563f70aa6185b3 +1 -1
  27. one/tests/fixtures/rest_responses/f530d6022f61cdc9e38cc66beb3cb71f3003c9a1 +1 -1
  28. one/tests/fixtures/test_dbs.json +14 -14
  29. one/util.py +524 -524
  30. one/webclient.py +1368 -1354
  31. ONE_api-3.0b3.dist-info/RECORD +0 -37
  32. {ONE_api-3.0b3.dist-info → ONE_api-3.0b5.dist-info}/WHEEL +0 -0
  33. {ONE_api-3.0b3.dist-info → ONE_api-3.0b5.dist-info}/top_level.txt +0 -0
one/alf/spec.py CHANGED
@@ -1,519 +1,519 @@
1
- """The complete ALF specification descriptors and validators."""
2
- import re
3
- import textwrap
4
- from enum import IntEnum
5
- from uuid import UUID
6
- from typing import Union
7
-
8
- from iblutil.util import flatten
9
-
10
- """dict: The ALF part names and their definitions."""
11
- SPEC_DESCRIPTION = {
12
- 'lab': 'The name of the lab where the data were collected (optional).',
13
- 'Subjects': 'An optional directory to indicate that the experiment data are divided by '
14
- 'subject. If organizing by lab, this directory is required.',
15
- 'subject': 'The subject name, typically an arbitrary label',
16
- 'date': 'The date on which the experiment session took place, in ISO format, i.e. yyyy-mm-dd',
17
- 'number': 'The sequential session number of the day, optionally zero-padded to be three '
18
- 'numbers, e.g. 001, 002, etc.',
19
- 'collection': 'An optional folder to group data by modality, device, etc. This is necessary '
20
- 'when a session contains multiple measurements of the same type, from example '
21
- 'spike times from multiple probes. Label examples include "probe00", '
22
- '"raw_video_data".',
23
- 'revision': 'An optional folder to organize data by version. The version label is arbitrary, '
24
- 'however the folder must start and end with pound signs, e.g. "#v1.0.0#". '
25
- 'Unlike collections, if a specified revision is not found, the previous revision '
26
- 'will be returned. The revisions are ordered lexicographically.',
27
- 'namespace': 'An option filename prefix for data that are not not expected to be a community '
28
- 'standard, for example task specific events. The namespace may also be used to '
29
- 'indicate data unique to a given piece of hardware or software, and is '
30
- 'identified by underscores, e.g. "_iblrig_", "_phy_".',
31
- 'object': 'Every file describing a given object has the same number of rows (i.e. the 1st '
32
- 'dimension of an npy file, number of frames in a video file, etc). You can '
33
- 'therefore think of the files for an object as together defining a table, with '
34
- 'column headings given by the attribute in the file names, and values given by the '
35
- 'file contents. Object names should be in Haskell case and pluralized, '
36
- 'e.g. "wheelMoves", "sparseNoise", "trials".\nEncoding of relations between objects '
37
- 'can be achieved by a simplified relational model. If the attribute name of one '
38
- 'file matches the object name of a second, then the first file is guaranteed to '
39
- 'contain integers referring to the rows of the second. For example, '
40
- '"spikes.clusters.npy" would contain integer references to the rows of '
41
- '"clusters.brain_location.json" and "clusters.probes.npy"; and '
42
- '"clusters.probes.npy" would contain integer references to "probes.insertion.json". '
43
- '\nBe careful of plurals ("clusters.probe.npy" would not correspond to '
44
- '"probes.insertion.json") and remember we count arrays starting from 0.',
45
- 'attribute': 'Together with the object, the attribute represents the type of data in the '
46
- 'file, for example "times", "amplitudes", "clusters". The names should be in '
47
- 'Haskell case, however the following three attributes may be separated by an '
48
- 'underscore, e.g. "stimOn_times".\nThe attribute "times" is reserved for '
49
- 'discrete event times and comprises a numerical array containing times of the '
50
- 'events in seconds, relative to a universal timescale common to all files.\n'
51
- 'The attribute "intervals" should have two columns, indicating the start and end '
52
- 'times of each interval relative to the universal timescale.\n'
53
- 'Continuous timeseries are represented by the "timestamps" attribute. The file '
54
- 'may contain a vector of times in universal seconds if unevenly sampled, or two '
55
- 'rows each representing a synchronization point, the first column giving the '
56
- 'sample number (counting from 0), and the second column giving the '
57
- 'corresponding time in universal seconds. The times corresponding to all '
58
- 'samples are then found by linear interpolation. NB: the "timestamps" file is '
59
- 'an exception to the rule that all files representing a continuous timeseries '
60
- 'object must have one row per sample, as it will often have substantially less.',
61
- 'timescale': 'If you want to represent times relative to another (non-universal) timescale, '
62
- 'a timescale can be appended after an underscore e.g. '
63
- '"spikes.times_ephysClock.npy", "trials.intervals_nidaq", '
64
- '"wheel.timestamps_bpod.csv".',
65
- 'extra': 'File names could have as many optional parts as you like: '
66
- '"object.attribute.x1.x2.[…].xN.extension". The extra name parts play no formal '
67
- 'role, but can serve several additional purposes. For example, it could be a UUID or '
68
- 'file hash for archiving purposes. If there are multiple files with the same '
69
- 'object, attribute, and extensions but different extra parts, these should be '
70
- 'treated as files to be concatenated, for example to allow multiple-part tif files '
71
- 'as produced by scanimage to be encoded in ALF. The concatenation would happen in '
72
- 'hierarchical lexicographical order: i.e. by lexicographic order of x1, '
73
- 'then x2, etc.',
74
- 'extension': 'ALF can deal with any sort of file, as long as it has a concept of a number of '
75
- 'rows (or primary dimension). The type of file is recognized by its extension. \n'
76
- 'Preferred choices:\n\n.npy: numpy array file. This is recommended over flat '
77
- 'binary since datatype and shape is stored in the file. If you have an array of '
78
- '3 or more dimensions, the first dimension counts as the number of rows.\n\n'
79
- '.tsv: tab-delimited text file. This is recommended over comma-separated files'
80
- 'since text fields often have commas in. All rows should have the same number '
81
- 'of columns. The first row contains tab-separated names for each column.\n\n'
82
- '.bin: flat binary file. It’s better to use .npy for storing binary data but '
83
- 'some recording systems save in flat binary. Rather than convert them, '
84
- 'you can ALFize a flat binary file by adding a metadata file, which specifies '
85
- 'the number of columns (as the size of the "columns" array) and the binary '
86
- 'datatype as a top-level key "dtype", using numpy naming conventions.'
87
- }
88
- """dict: The ALF part names and their definitions."""
89
-
90
- # ========================================================== #
91
- # The following are the specifications and patterns for ALFs #
92
- # ========================================================== #
93
-
94
- SESSION_SPEC = '({lab}/Subjects/)?{subject}/{date}/{number}'
95
- """str: The session specification pattern"""
96
-
97
- COLLECTION_SPEC = r'({collection}/)?(#{revision}#/)?'
98
- """str: The collection and revision specification pattern"""
99
-
100
- FILE_SPEC = r'_?{namespace}?_?{object}\.{attribute}(?:_{timescale})?(?:\.{extra})*\.{extension}$'
101
- """str: The filename specification pattern"""
102
-
103
- REL_PATH_SPEC = f'{COLLECTION_SPEC}{FILE_SPEC}'
104
- """str: The collection, revision and filename specification pattern"""
105
-
106
- FULL_SPEC = f'{SESSION_SPEC}/{REL_PATH_SPEC}'
107
- """str: The full ALF path specification pattern"""
108
-
109
- _DEFAULT = (
110
- ('lab', r'\w+'),
111
- ('subject', r'[\w.-]+'),
112
- ('date', r'\d{4}-\d{2}-\d{2}'),
113
- ('number', r'\d{1,3}'),
114
- ('collection', r'[\w./-]+'),
115
- ('revision', r'[\w.-]+'), # brackets
116
- # to include underscores: r'(?P<namespace>(?:^_)\w+(?:_))?'
117
- ('namespace', '(?<=_)[a-zA-Z0-9]+'), # brackets
118
- ('object', r'\w+'),
119
- # to treat _times and _intervals as timescale: (?P<attribute>[a-zA-Z]+)_?
120
- # (?:_[a-z]+_)? allows attribute level namespaces (deprecated)
121
- ('attribute', r'(?:_[a-z]+_)?[a-zA-Z0-9]+(?:_times(?=[_.])|_intervals(?=[_.]))?'), # brackets
122
- ('timescale', r'\w+'), # brackets
123
- ('extra', r'[.\w-]+'), # brackets
124
- ('extension', r'\w+')
125
- )
126
-
127
-
128
- class QC(IntEnum):
129
- """Data QC outcomes.
130
-
131
- This enumeration is used by the Alyx database. NB: Pandas cache tables use different codes.
132
- """
133
-
134
- CRITICAL = 50
135
- """Dataset practically unusable, e.g. clock can't be aligned; data missing or inaccurate."""
136
- FAIL = 40
137
- """Dataset does not meet expected standards, e.g. trial event timings different to protocol."""
138
- WARNING = 30
139
- """
140
- Dataset has minor quality issues, e.g. relatively high SNR, that should not affect most
141
- analyses.
142
- """
143
- NOT_SET = 0
144
- """Dataset quality has not been assessed."""
145
- PASS = 10
146
- """Dataset considered 'gold-standard', e.g. tight trial event timings, low recorded SNR."""
147
-
148
- @staticmethod
149
- def validate(v):
150
- """Validate QC input and return equivalent enumeration.
151
-
152
- Parameters
153
- ----------
154
- v : int, str, QC
155
- A QC enumeration, or equivalent int value or name.
156
-
157
- Returns
158
- -------
159
- QC
160
- The enumeration.
161
-
162
- Raises
163
- ------
164
- ValueError
165
- An invalid QC value was passed.
166
-
167
- """
168
- if isinstance(v, QC):
169
- return v
170
- elif isinstance(v, str):
171
- if v.isnumeric():
172
- return QC(int(v))
173
- try:
174
- return QC[v.upper()]
175
- except KeyError:
176
- raise ValueError(f'{v} is not a valid QC')
177
- else:
178
- return QC(v)
179
-
180
-
181
- def path_pattern() -> str:
182
- """Returns a template string representing the where the ALF parts lie in an ALF path.
183
-
184
- Brackets denote optional parts. This is used for documentation purposes only.
185
- """
186
- return ''.join(filter(lambda c: c not in '{}?*\\$', FULL_SPEC))
187
-
188
-
189
- def describe(part=None, width=99):
190
- """Print a description of an ALF part.
191
-
192
- Prints the path pattern along with a description of the given ALF part (or all parts if None).
193
-
194
- Parameters
195
- ----------
196
- part : str
197
- ALF part to describe. One from `SPEC_DESCRIPTION.keys()`. If None, all parts are
198
- described.
199
- width : int
200
- The max line length.
201
-
202
- Returns
203
- -------
204
- None
205
-
206
- Examples
207
- --------
208
- >>> describe()
209
- >>> describe('collection')
210
- >>> describe('extension', width=120)
211
-
212
- """
213
- full_spec = path_pattern()
214
- print(full_spec)
215
- if part:
216
- if part not in SPEC_DESCRIPTION.keys():
217
- all_parts = '"' + '", "'.join(SPEC_DESCRIPTION.keys()) + '"'
218
- raise ValueError(f'Unknown ALF part "{part}", should be one of {all_parts}')
219
- parts = [part]
220
- span = re.search(part, full_spec).span()
221
- ' ' * len(full_spec)
222
- print(' ' * span[0] + '^' * (span[1] - span[0]) + ' ' * (len(full_spec) - span[1]))
223
- else:
224
- parts = SPEC_DESCRIPTION.keys()
225
- for part in parts:
226
- print('\n' + part.upper())
227
- # Split by max width
228
- lines = flatten(textwrap.wrap(ln, width, replace_whitespace=False)
229
- for ln in SPEC_DESCRIPTION[part].splitlines())
230
- [print(ln) for ln in lines]
231
-
232
-
233
- def _dromedary(string) -> str:
234
- """Convert a string to camel case. Acronyms/initialisms are preserved.
235
-
236
- Parameters
237
- ----------
238
- string : str
239
- To be converted to camel case
240
-
241
- Returns
242
- -------
243
- str
244
- The string in camel case
245
-
246
- Examples
247
- --------
248
- >>> _dromedary('Hello world') == 'helloWorld'
249
- >>> _dromedary('motion_energy') == 'motionEnergy'
250
- >>> _dromedary('passive_RFM') == 'passive RFM'
251
- >>> _dromedary('FooBarBaz') == 'fooBarBaz'
252
- >>> _dromedary('mpci ROIs') == 'mpciROIs'
253
-
254
- See Also
255
- --------
256
- readableALF
257
-
258
- """
259
- def _capitalize(x):
260
- return x if re.match(r'^[A-Z]+s?$', x) else x.capitalize()
261
- if not string: # short circuit on None and ''
262
- return string
263
- first, *other = re.split(r'[_\s]', string)
264
- if len(other) == 0:
265
- # Already camel/Pascal case, ensure first letter lower case
266
- return first[0].lower() + first[1:]
267
- # Convert to camel case, preserving all-uppercase elements
268
- first = first if re.match(r'^[A-Z]+s?$', first) else first.lower()
269
- return ''.join([first, *map(_capitalize, other)])
270
-
271
-
272
- def _named(pattern, name):
273
- """Wraps a regex pattern in a named capture group."""
274
- return f'(?P<{name}>{pattern})'
275
-
276
-
277
- def regex(spec: str = FULL_SPEC, **kwargs) -> re.Pattern:
278
- """Construct a regular expression pattern for parsing or validating an ALF.
279
-
280
- Parameters
281
- ----------
282
- spec : str
283
- The spec string to construct the regular expression from
284
- kwargs : dict[str]
285
- Optional patterns to replace the defaults
286
-
287
- Returns
288
- -------
289
- re.Pattern
290
- A regular expression Pattern object
291
-
292
- Examples
293
- --------
294
- Regex for a filename
295
-
296
- >>> pattern = regex(spec=FILE_SPEC)
297
-
298
- Regex for a complete path (including root)
299
-
300
- >>> pattern = '.*' + regex(spec=FULL_SPEC).pattern
301
-
302
- Regex pattern for specific object name
303
-
304
- >>> pattern = regex(object='trials')
305
-
306
- """
307
- fields = dict(_DEFAULT)
308
- if not fields.keys() >= kwargs.keys():
309
- unknown = next(k for k in kwargs.keys() if k not in fields.keys())
310
- raise KeyError(f'Unknown field "{unknown}"')
311
- fields.update({k: v for k, v in kwargs.items() if v is not None})
312
- spec_str = spec.format(**{k: _named(fields[k], k) for k in re.findall(r'(?<={)\w+', spec)})
313
- return re.compile(spec_str)
314
-
315
-
316
- def is_valid(filename):
317
- """Returns a True for a given file name if it is an ALF file, otherwise returns False.
318
-
319
- Parameters
320
- ----------
321
- filename : str
322
- The name of the file to evaluate
323
-
324
- Returns
325
- -------
326
- bool
327
- True if filename is valid ALF
328
-
329
- Examples
330
- --------
331
- >>> is_valid('trials.feedbackType.npy')
332
- True
333
- >>> is_valid('_ns_obj.attr1.2622b17c-9408-4910-99cb-abf16d9225b9.metadata.json')
334
- True
335
- >>> is_valid('spike_train.npy')
336
- False
337
- >>> is_valid('channels._phy_ids.csv') # WARNING: attribute level namespaces are deprecated
338
- True
339
-
340
- """
341
- return regex(FILE_SPEC).match(filename) is not None
342
-
343
-
344
- def is_session_path(path_object):
345
- """Checks if the syntax corresponds to a session path.
346
-
347
- Note that there is no physical check about existence nor contents.
348
-
349
- Parameters
350
- ----------
351
- path_object : str, pathlib.Path
352
- The path object to validate
353
-
354
- Returns
355
- -------
356
- bool
357
- True if session path a valid ALF session path
358
-
359
- """
360
- session_spec = re.compile(regex(SESSION_SPEC).pattern + '$')
361
- if hasattr(path_object, 'as_posix'):
362
- path_object = path_object.as_posix()
363
- path_object = path_object.strip('/')
364
- return session_spec.search(path_object) is not None
365
-
366
-
367
- def is_uuid_string(string: str) -> bool:
368
- """Bool test for randomly generated hexadecimal uuid validity.
369
-
370
- NB: unlike is_uuid, is_uuid_string checks that uuid is correctly hyphen separated
371
- """
372
- return isinstance(string, str) and is_uuid(string, (3, 4, 5)) and str(UUID(string)) == string
373
-
374
-
375
- def is_uuid(uuid: Union[str, int, bytes, UUID], versions=(4, 3)) -> bool:
376
- """Bool test for randomly generated hexadecimal uuid validity.
377
-
378
- By default only uuid versions 3 and 4 are considered valid. Version 4 uuids are generated by
379
- Alyx while version 3 uuids are generated by :mod:`one.alf.cache`.
380
-
381
- Unlike `is_uuid_string`, this function accepts UUID objects and compatible representations.
382
-
383
- Parameters
384
- ----------
385
- uuid : str, int, bytes, UUID
386
- An object to test for UUID validity.
387
- versions : tuple of int
388
- The UUID versions to considered valid, by default (4, 3).
389
-
390
- Returns
391
- -------
392
- bool
393
- True if the input can be cast to a UUID object and is of the specified version(s).
394
- """
395
- if not isinstance(uuid, (UUID, str, bytes, int)):
396
- return False
397
- elif not isinstance(uuid, UUID):
398
- try:
399
- uuid = UUID(uuid) if isinstance(uuid, str) else UUID(**{type(uuid).__name__: uuid})
400
- except ValueError:
401
- return False
402
- return isinstance(uuid, UUID) and uuid.version in versions
403
-
404
-
405
- def to_alf(object, attribute, extension, namespace=None, timescale=None, extra=None):
406
- """Given a set of ALF file parts, return a valid ALF file name.
407
-
408
- Essential periods and underscores are added by the function.
409
-
410
- Parameters
411
- ----------
412
- object : str
413
- The ALF object name
414
- attribute : str
415
- The ALF object attribute name
416
- extension : str
417
- The file extension
418
- namespace : str
419
- An optional namespace
420
- timescale : str, tuple
421
- An optional timescale
422
- extra : str, tuple
423
- One or more optional extra ALF attributes
424
-
425
- Returns
426
- -------
427
- str
428
- A file name string built from the ALF parts
429
-
430
- Examples
431
- --------
432
- >>> to_alf('spikes', 'times', 'ssv')
433
- 'spikes.times.ssv'
434
- >>> to_alf('spikes', 'times', 'ssv', namespace='ibl')
435
- '_ibl_spikes.times.ssv'
436
- >>> to_alf('spikes', 'times', 'ssv', namespace='ibl', timescale='ephysClock')
437
- '_ibl_spikes.times_ephysClock.ssv'
438
- >>> to_alf('spikes', 'times', 'ssv', namespace='ibl', timescale=('ephys clock', 'minutes'))
439
- '_ibl_spikes.times_ephysClock_minutes.ssv'
440
- >>> to_alf('spikes', 'times', 'npy', namespace='ibl', timescale='ephysClock', extra='raw')
441
- '_ibl_spikes.times_ephysClock.raw.npy'
442
- >>> to_alf('wheel', 'timestamps', 'npy', 'ibl', 'bpod', ('raw', 'v12'))
443
- '_ibl_wheel.timestamps_bpod.raw.v12.npy'
444
-
445
- """
446
- # Validate inputs
447
- if not extension:
448
- raise TypeError('An extension must be provided')
449
- elif extension.startswith('.'):
450
- extension = extension[1:]
451
- if any(pt is not None and '.' in pt for pt in
452
- (object, attribute, namespace, extension, timescale)):
453
- raise ValueError('ALF parts must not contain a period (`.`)')
454
- if '_' in (namespace or ''):
455
- raise ValueError('Namespace must not contain extra underscores')
456
- if object[0] == '_':
457
- raise ValueError('Objects must not contain underscores; use namespace arg instead')
458
- # Ensure parts are camel case (converts whitespace and snake case)
459
- if timescale:
460
- timescale = filter(None, [timescale] if isinstance(timescale, str) else timescale)
461
- timescale = '_'.join(map(_dromedary, timescale))
462
- # Convert attribute to camel case, leaving '_times', etc. in tact
463
- times_re = re.search('_(times|timestamps|intervals)$', attribute)
464
- idx = times_re.start() if times_re else len(attribute)
465
- attribute = _dromedary(attribute[:idx]) + attribute[idx:]
466
- object = _dromedary(object)
467
-
468
- # Optional extras may be provided as string or tuple of strings
469
- if not extra:
470
- extra = ()
471
- elif isinstance(extra, str):
472
- extra = extra.split('.')
473
-
474
- # Construct ALF file
475
- parts = (('_%s_' % namespace if namespace else '') + object,
476
- attribute + ('_%s' % timescale if timescale else ''),
477
- *extra,
478
- extension)
479
- return '.'.join(parts)
480
-
481
-
482
- def readableALF(name: str, capitalize: bool = False) -> str:
483
- """Convert camel case string to space separated string.
484
-
485
- Given an ALF object name or attribute, return a string where the camel case words are space
486
- separated. Acronyms/initialisms are preserved.
487
-
488
- Parameters
489
- ----------
490
- name : str
491
- The ALF part to format (e.g. object name or attribute).
492
- capitalize : bool
493
- If true, return with the first letter capitalized.
494
-
495
- Returns
496
- -------
497
- str
498
- The name formatted for display, with spaces and capitalization.
499
-
500
- Examples
501
- --------
502
- >>> readableALF('sparseNoise') == 'sparse noise'
503
- >>> readableALF('someROIDataset') == 'some ROI dataset'
504
- >>> readableALF('someROIDataset', capitalize=True) == 'Some ROI dataset'
505
-
506
- See Also
507
- --------
508
- _dromedary
509
-
510
- """
511
- words = []
512
- i = 0
513
- matches = re.finditer(r'[A-Z](?=[a-rt-z0-9])|(?<=[a-z0-9])[A-Z]', name)
514
- for j in map(re.Match.start, matches):
515
- words.append(name[i:j])
516
- i = j
517
- words.append(name[i:])
518
- display_str = ' '.join(map(lambda s: s if re.match(r'^[A-Z]+s?$', s) else s.lower(), words))
519
- return display_str[0].upper() + display_str[1:] if capitalize else display_str
1
+ """The complete ALF specification descriptors and validators."""
2
+ import re
3
+ import textwrap
4
+ from enum import IntEnum
5
+ from uuid import UUID
6
+ from typing import Union
7
+
8
+ from iblutil.util import flatten
9
+
10
+ """dict: The ALF part names and their definitions."""
11
+ SPEC_DESCRIPTION = {
12
+ 'lab': 'The name of the lab where the data were collected (optional).',
13
+ 'Subjects': 'An optional directory to indicate that the experiment data are divided by '
14
+ 'subject. If organizing by lab, this directory is required.',
15
+ 'subject': 'The subject name, typically an arbitrary label',
16
+ 'date': 'The date on which the experiment session took place, in ISO format, i.e. yyyy-mm-dd',
17
+ 'number': 'The sequential session number of the day, optionally zero-padded to be three '
18
+ 'numbers, e.g. 001, 002, etc.',
19
+ 'collection': 'An optional folder to group data by modality, device, etc. This is necessary '
20
+ 'when a session contains multiple measurements of the same type, from example '
21
+ 'spike times from multiple probes. Label examples include "probe00", '
22
+ '"raw_video_data".',
23
+ 'revision': 'An optional folder to organize data by version. The version label is arbitrary, '
24
+ 'however the folder must start and end with pound signs, e.g. "#v1.0.0#". '
25
+ 'Unlike collections, if a specified revision is not found, the previous revision '
26
+ 'will be returned. The revisions are ordered lexicographically.',
27
+ 'namespace': 'An option filename prefix for data that are not not expected to be a community '
28
+ 'standard, for example task specific events. The namespace may also be used to '
29
+ 'indicate data unique to a given piece of hardware or software, and is '
30
+ 'identified by underscores, e.g. "_iblrig_", "_phy_".',
31
+ 'object': 'Every file describing a given object has the same number of rows (i.e. the 1st '
32
+ 'dimension of an npy file, number of frames in a video file, etc). You can '
33
+ 'therefore think of the files for an object as together defining a table, with '
34
+ 'column headings given by the attribute in the file names, and values given by the '
35
+ 'file contents. Object names should be in Haskell case and pluralized, '
36
+ 'e.g. "wheelMoves", "sparseNoise", "trials".\nEncoding of relations between objects '
37
+ 'can be achieved by a simplified relational model. If the attribute name of one '
38
+ 'file matches the object name of a second, then the first file is guaranteed to '
39
+ 'contain integers referring to the rows of the second. For example, '
40
+ '"spikes.clusters.npy" would contain integer references to the rows of '
41
+ '"clusters.brain_location.json" and "clusters.probes.npy"; and '
42
+ '"clusters.probes.npy" would contain integer references to "probes.insertion.json". '
43
+ '\nBe careful of plurals ("clusters.probe.npy" would not correspond to '
44
+ '"probes.insertion.json") and remember we count arrays starting from 0.',
45
+ 'attribute': 'Together with the object, the attribute represents the type of data in the '
46
+ 'file, for example "times", "amplitudes", "clusters". The names should be in '
47
+ 'Haskell case, however the following three attributes may be separated by an '
48
+ 'underscore, e.g. "stimOn_times".\nThe attribute "times" is reserved for '
49
+ 'discrete event times and comprises a numerical array containing times of the '
50
+ 'events in seconds, relative to a universal timescale common to all files.\n'
51
+ 'The attribute "intervals" should have two columns, indicating the start and end '
52
+ 'times of each interval relative to the universal timescale.\n'
53
+ 'Continuous timeseries are represented by the "timestamps" attribute. The file '
54
+ 'may contain a vector of times in universal seconds if unevenly sampled, or two '
55
+ 'rows each representing a synchronization point, the first column giving the '
56
+ 'sample number (counting from 0), and the second column giving the '
57
+ 'corresponding time in universal seconds. The times corresponding to all '
58
+ 'samples are then found by linear interpolation. NB: the "timestamps" file is '
59
+ 'an exception to the rule that all files representing a continuous timeseries '
60
+ 'object must have one row per sample, as it will often have substantially less.',
61
+ 'timescale': 'If you want to represent times relative to another (non-universal) timescale, '
62
+ 'a timescale can be appended after an underscore e.g. '
63
+ '"spikes.times_ephysClock.npy", "trials.intervals_nidaq", '
64
+ '"wheel.timestamps_bpod.csv".',
65
+ 'extra': 'File names could have as many optional parts as you like: '
66
+ '"object.attribute.x1.x2.[…].xN.extension". The extra name parts play no formal '
67
+ 'role, but can serve several additional purposes. For example, it could be a UUID or '
68
+ 'file hash for archiving purposes. If there are multiple files with the same '
69
+ 'object, attribute, and extensions but different extra parts, these should be '
70
+ 'treated as files to be concatenated, for example to allow multiple-part tif files '
71
+ 'as produced by scanimage to be encoded in ALF. The concatenation would happen in '
72
+ 'hierarchical lexicographical order: i.e. by lexicographic order of x1, '
73
+ 'then x2, etc.',
74
+ 'extension': 'ALF can deal with any sort of file, as long as it has a concept of a number of '
75
+ 'rows (or primary dimension). The type of file is recognized by its extension. \n'
76
+ 'Preferred choices:\n\n.npy: numpy array file. This is recommended over flat '
77
+ 'binary since datatype and shape is stored in the file. If you have an array of '
78
+ '3 or more dimensions, the first dimension counts as the number of rows.\n\n'
79
+ '.tsv: tab-delimited text file. This is recommended over comma-separated files'
80
+ 'since text fields often have commas in. All rows should have the same number '
81
+ 'of columns. The first row contains tab-separated names for each column.\n\n'
82
+ '.bin: flat binary file. It’s better to use .npy for storing binary data but '
83
+ 'some recording systems save in flat binary. Rather than convert them, '
84
+ 'you can ALFize a flat binary file by adding a metadata file, which specifies '
85
+ 'the number of columns (as the size of the "columns" array) and the binary '
86
+ 'datatype as a top-level key "dtype", using numpy naming conventions.'
87
+ }
88
+ """dict: The ALF part names and their definitions."""
89
+
90
+ # ========================================================== #
91
+ # The following are the specifications and patterns for ALFs #
92
+ # ========================================================== #
93
+
94
+ SESSION_SPEC = '({lab}/Subjects/)?{subject}/{date}/{number}'
95
+ """str: The session specification pattern"""
96
+
97
+ COLLECTION_SPEC = r'({collection}/)?(#{revision}#/)?'
98
+ """str: The collection and revision specification pattern"""
99
+
100
+ FILE_SPEC = r'_?{namespace}?_?{object}\.{attribute}(?:_{timescale})?(?:\.{extra})*\.{extension}$'
101
+ """str: The filename specification pattern"""
102
+
103
+ REL_PATH_SPEC = f'{COLLECTION_SPEC}{FILE_SPEC}'
104
+ """str: The collection, revision and filename specification pattern"""
105
+
106
+ FULL_SPEC = f'{SESSION_SPEC}/{REL_PATH_SPEC}'
107
+ """str: The full ALF path specification pattern"""
108
+
109
+ _DEFAULT = (
110
+ ('lab', r'\w+'),
111
+ ('subject', r'[\w.-]+'),
112
+ ('date', r'\d{4}-\d{2}-\d{2}'),
113
+ ('number', r'\d{1,3}'),
114
+ ('collection', r'[\w./-]+'),
115
+ ('revision', r'[\w.-]+'), # brackets
116
+ # to include underscores: r'(?P<namespace>(?:^_)\w+(?:_))?'
117
+ ('namespace', '(?<=_)[a-zA-Z0-9]+'), # brackets
118
+ ('object', r'\w+'),
119
+ # to treat _times and _intervals as timescale: (?P<attribute>[a-zA-Z]+)_?
120
+ # (?:_[a-z]+_)? allows attribute level namespaces (deprecated)
121
+ ('attribute', r'(?:_[a-z]+_)?[a-zA-Z0-9]+(?:_times(?=[_.])|_intervals(?=[_.]))?'), # brackets
122
+ ('timescale', r'\w+'), # brackets
123
+ ('extra', r'[.\w-]+'), # brackets
124
+ ('extension', r'\w+')
125
+ )
126
+
127
+
128
+ class QC(IntEnum):
129
+ """Data QC outcomes.
130
+
131
+ This enumeration is used by the Alyx database. NB: Pandas cache tables use different codes.
132
+ """
133
+
134
+ CRITICAL = 50
135
+ """Dataset practically unusable, e.g. clock can't be aligned; data missing or inaccurate."""
136
+ FAIL = 40
137
+ """Dataset does not meet expected standards, e.g. trial event timings different to protocol."""
138
+ WARNING = 30
139
+ """
140
+ Dataset has minor quality issues, e.g. relatively high SNR, that should not affect most
141
+ analyses.
142
+ """
143
+ NOT_SET = 0
144
+ """Dataset quality has not been assessed."""
145
+ PASS = 10
146
+ """Dataset considered 'gold-standard', e.g. tight trial event timings, low recorded SNR."""
147
+
148
+ @staticmethod
149
+ def validate(v):
150
+ """Validate QC input and return equivalent enumeration.
151
+
152
+ Parameters
153
+ ----------
154
+ v : int, str, QC
155
+ A QC enumeration, or equivalent int value or name.
156
+
157
+ Returns
158
+ -------
159
+ QC
160
+ The enumeration.
161
+
162
+ Raises
163
+ ------
164
+ ValueError
165
+ An invalid QC value was passed.
166
+
167
+ """
168
+ if isinstance(v, QC):
169
+ return v
170
+ elif isinstance(v, str):
171
+ if v.isnumeric():
172
+ return QC(int(v))
173
+ try:
174
+ return QC[v.upper()]
175
+ except KeyError:
176
+ raise ValueError(f'{v} is not a valid QC')
177
+ else:
178
+ return QC(v)
179
+
180
+
181
+ def path_pattern() -> str:
182
+ """Returns a template string representing the where the ALF parts lie in an ALF path.
183
+
184
+ Brackets denote optional parts. This is used for documentation purposes only.
185
+ """
186
+ return ''.join(filter(lambda c: c not in '{}?*\\$', FULL_SPEC))
187
+
188
+
189
+ def describe(part=None, width=99):
190
+ """Print a description of an ALF part.
191
+
192
+ Prints the path pattern along with a description of the given ALF part (or all parts if None).
193
+
194
+ Parameters
195
+ ----------
196
+ part : str
197
+ ALF part to describe. One from `SPEC_DESCRIPTION.keys()`. If None, all parts are
198
+ described.
199
+ width : int
200
+ The max line length.
201
+
202
+ Returns
203
+ -------
204
+ None
205
+
206
+ Examples
207
+ --------
208
+ >>> describe()
209
+ >>> describe('collection')
210
+ >>> describe('extension', width=120)
211
+
212
+ """
213
+ full_spec = path_pattern()
214
+ print(full_spec)
215
+ if part:
216
+ if part not in SPEC_DESCRIPTION.keys():
217
+ all_parts = '"' + '", "'.join(SPEC_DESCRIPTION.keys()) + '"'
218
+ raise ValueError(f'Unknown ALF part "{part}", should be one of {all_parts}')
219
+ parts = [part]
220
+ span = re.search(part, full_spec).span()
221
+ ' ' * len(full_spec)
222
+ print(' ' * span[0] + '^' * (span[1] - span[0]) + ' ' * (len(full_spec) - span[1]))
223
+ else:
224
+ parts = SPEC_DESCRIPTION.keys()
225
+ for part in parts:
226
+ print('\n' + part.upper())
227
+ # Split by max width
228
+ lines = flatten(textwrap.wrap(ln, width, replace_whitespace=False)
229
+ for ln in SPEC_DESCRIPTION[part].splitlines())
230
+ [print(ln) for ln in lines]
231
+
232
+
233
+ def _dromedary(string) -> str:
234
+ """Convert a string to camel case. Acronyms/initialisms are preserved.
235
+
236
+ Parameters
237
+ ----------
238
+ string : str
239
+ To be converted to camel case
240
+
241
+ Returns
242
+ -------
243
+ str
244
+ The string in camel case
245
+
246
+ Examples
247
+ --------
248
+ >>> _dromedary('Hello world') == 'helloWorld'
249
+ >>> _dromedary('motion_energy') == 'motionEnergy'
250
+ >>> _dromedary('passive_RFM') == 'passive RFM'
251
+ >>> _dromedary('FooBarBaz') == 'fooBarBaz'
252
+ >>> _dromedary('mpci ROIs') == 'mpciROIs'
253
+
254
+ See Also
255
+ --------
256
+ readableALF
257
+
258
+ """
259
+ def _capitalize(x):
260
+ return x if re.match(r'^[A-Z]+s?$', x) else x.capitalize()
261
+ if not string: # short circuit on None and ''
262
+ return string
263
+ first, *other = re.split(r'[_\s]', string)
264
+ if len(other) == 0:
265
+ # Already camel/Pascal case, ensure first letter lower case
266
+ return first[0].lower() + first[1:]
267
+ # Convert to camel case, preserving all-uppercase elements
268
+ first = first if re.match(r'^[A-Z]+s?$', first) else first.lower()
269
+ return ''.join([first, *map(_capitalize, other)])
270
+
271
+
272
+ def _named(pattern, name):
273
+ """Wraps a regex pattern in a named capture group."""
274
+ return f'(?P<{name}>{pattern})'
275
+
276
+
277
+ def regex(spec: str = FULL_SPEC, **kwargs) -> re.Pattern:
278
+ """Construct a regular expression pattern for parsing or validating an ALF.
279
+
280
+ Parameters
281
+ ----------
282
+ spec : str
283
+ The spec string to construct the regular expression from
284
+ kwargs : dict[str]
285
+ Optional patterns to replace the defaults
286
+
287
+ Returns
288
+ -------
289
+ re.Pattern
290
+ A regular expression Pattern object
291
+
292
+ Examples
293
+ --------
294
+ Regex for a filename
295
+
296
+ >>> pattern = regex(spec=FILE_SPEC)
297
+
298
+ Regex for a complete path (including root)
299
+
300
+ >>> pattern = '.*' + regex(spec=FULL_SPEC).pattern
301
+
302
+ Regex pattern for specific object name
303
+
304
+ >>> pattern = regex(object='trials')
305
+
306
+ """
307
+ fields = dict(_DEFAULT)
308
+ if not fields.keys() >= kwargs.keys():
309
+ unknown = next(k for k in kwargs.keys() if k not in fields.keys())
310
+ raise KeyError(f'Unknown field "{unknown}"')
311
+ fields.update({k: v for k, v in kwargs.items() if v is not None})
312
+ spec_str = spec.format(**{k: _named(fields[k], k) for k in re.findall(r'(?<={)\w+', spec)})
313
+ return re.compile(spec_str)
314
+
315
+
316
+ def is_valid(filename):
317
+ """Returns a True for a given file name if it is an ALF file, otherwise returns False.
318
+
319
+ Parameters
320
+ ----------
321
+ filename : str
322
+ The name of the file to evaluate
323
+
324
+ Returns
325
+ -------
326
+ bool
327
+ True if filename is valid ALF
328
+
329
+ Examples
330
+ --------
331
+ >>> is_valid('trials.feedbackType.npy')
332
+ True
333
+ >>> is_valid('_ns_obj.attr1.2622b17c-9408-4910-99cb-abf16d9225b9.metadata.json')
334
+ True
335
+ >>> is_valid('spike_train.npy')
336
+ False
337
+ >>> is_valid('channels._phy_ids.csv') # WARNING: attribute level namespaces are deprecated
338
+ True
339
+
340
+ """
341
+ return regex(FILE_SPEC).match(filename) is not None
342
+
343
+
344
+ def is_session_path(path_object):
345
+ """Checks if the syntax corresponds to a session path.
346
+
347
+ Note that there is no physical check about existence nor contents.
348
+
349
+ Parameters
350
+ ----------
351
+ path_object : str, pathlib.Path
352
+ The path object to validate
353
+
354
+ Returns
355
+ -------
356
+ bool
357
+ True if session path a valid ALF session path
358
+
359
+ """
360
+ session_spec = re.compile(regex(SESSION_SPEC).pattern + '$')
361
+ if hasattr(path_object, 'as_posix'):
362
+ path_object = path_object.as_posix()
363
+ path_object = path_object.strip('/')
364
+ return session_spec.search(path_object) is not None
365
+
366
+
367
+ def is_uuid_string(string: str) -> bool:
368
+ """Bool test for randomly generated hexadecimal uuid validity.
369
+
370
+ NB: unlike is_uuid, is_uuid_string checks that uuid is correctly hyphen separated
371
+ """
372
+ return isinstance(string, str) and is_uuid(string, (3, 4, 5)) and str(UUID(string)) == string
373
+
374
+
375
+ def is_uuid(uuid: Union[str, int, bytes, UUID], versions=(4, 3)) -> bool:
376
+ """Bool test for randomly generated hexadecimal uuid validity.
377
+
378
+ By default only uuid versions 3 and 4 are considered valid. Version 4 uuids are generated by
379
+ Alyx while version 3 uuids are generated by :mod:`one.alf.cache`.
380
+
381
+ Unlike `is_uuid_string`, this function accepts UUID objects and compatible representations.
382
+
383
+ Parameters
384
+ ----------
385
+ uuid : str, int, bytes, UUID
386
+ An object to test for UUID validity.
387
+ versions : tuple of int
388
+ The UUID versions to considered valid, by default (4, 3).
389
+
390
+ Returns
391
+ -------
392
+ bool
393
+ True if the input can be cast to a UUID object and is of the specified version(s).
394
+ """
395
+ if not isinstance(uuid, (UUID, str, bytes, int)):
396
+ return False
397
+ elif not isinstance(uuid, UUID):
398
+ try:
399
+ uuid = UUID(uuid) if isinstance(uuid, str) else UUID(**{type(uuid).__name__: uuid})
400
+ except ValueError:
401
+ return False
402
+ return isinstance(uuid, UUID) and uuid.version in versions
403
+
404
+
405
+ def to_alf(object, attribute, extension, namespace=None, timescale=None, extra=None):
406
+ """Given a set of ALF file parts, return a valid ALF file name.
407
+
408
+ Essential periods and underscores are added by the function.
409
+
410
+ Parameters
411
+ ----------
412
+ object : str
413
+ The ALF object name
414
+ attribute : str
415
+ The ALF object attribute name
416
+ extension : str
417
+ The file extension
418
+ namespace : str
419
+ An optional namespace
420
+ timescale : str, tuple
421
+ An optional timescale
422
+ extra : str, tuple
423
+ One or more optional extra ALF attributes
424
+
425
+ Returns
426
+ -------
427
+ str
428
+ A file name string built from the ALF parts
429
+
430
+ Examples
431
+ --------
432
+ >>> to_alf('spikes', 'times', 'ssv')
433
+ 'spikes.times.ssv'
434
+ >>> to_alf('spikes', 'times', 'ssv', namespace='ibl')
435
+ '_ibl_spikes.times.ssv'
436
+ >>> to_alf('spikes', 'times', 'ssv', namespace='ibl', timescale='ephysClock')
437
+ '_ibl_spikes.times_ephysClock.ssv'
438
+ >>> to_alf('spikes', 'times', 'ssv', namespace='ibl', timescale=('ephys clock', 'minutes'))
439
+ '_ibl_spikes.times_ephysClock_minutes.ssv'
440
+ >>> to_alf('spikes', 'times', 'npy', namespace='ibl', timescale='ephysClock', extra='raw')
441
+ '_ibl_spikes.times_ephysClock.raw.npy'
442
+ >>> to_alf('wheel', 'timestamps', 'npy', 'ibl', 'bpod', ('raw', 'v12'))
443
+ '_ibl_wheel.timestamps_bpod.raw.v12.npy'
444
+
445
+ """
446
+ # Validate inputs
447
+ if not extension:
448
+ raise TypeError('An extension must be provided')
449
+ elif extension.startswith('.'):
450
+ extension = extension[1:]
451
+ if any(pt is not None and '.' in pt for pt in
452
+ (object, attribute, namespace, extension, timescale)):
453
+ raise ValueError('ALF parts must not contain a period (`.`)')
454
+ if '_' in (namespace or ''):
455
+ raise ValueError('Namespace must not contain extra underscores')
456
+ if object[0] == '_':
457
+ raise ValueError('Objects must not contain underscores; use namespace arg instead')
458
+ # Ensure parts are camel case (converts whitespace and snake case)
459
+ if timescale:
460
+ timescale = filter(None, [timescale] if isinstance(timescale, str) else timescale)
461
+ timescale = '_'.join(map(_dromedary, timescale))
462
+ # Convert attribute to camel case, leaving '_times', etc. in tact
463
+ times_re = re.search('_(times|timestamps|intervals)$', attribute)
464
+ idx = times_re.start() if times_re else len(attribute)
465
+ attribute = _dromedary(attribute[:idx]) + attribute[idx:]
466
+ object = _dromedary(object)
467
+
468
+ # Optional extras may be provided as string or tuple of strings
469
+ if not extra:
470
+ extra = ()
471
+ elif isinstance(extra, str):
472
+ extra = extra.split('.')
473
+
474
+ # Construct ALF file
475
+ parts = (('_%s_' % namespace if namespace else '') + object,
476
+ attribute + ('_%s' % timescale if timescale else ''),
477
+ *extra,
478
+ extension)
479
+ return '.'.join(parts)
480
+
481
+
482
+ def readableALF(name: str, capitalize: bool = False) -> str:
483
+ """Convert camel case string to space separated string.
484
+
485
+ Given an ALF object name or attribute, return a string where the camel case words are space
486
+ separated. Acronyms/initialisms are preserved.
487
+
488
+ Parameters
489
+ ----------
490
+ name : str
491
+ The ALF part to format (e.g. object name or attribute).
492
+ capitalize : bool
493
+ If true, return with the first letter capitalized.
494
+
495
+ Returns
496
+ -------
497
+ str
498
+ The name formatted for display, with spaces and capitalization.
499
+
500
+ Examples
501
+ --------
502
+ >>> readableALF('sparseNoise') == 'sparse noise'
503
+ >>> readableALF('someROIDataset') == 'some ROI dataset'
504
+ >>> readableALF('someROIDataset', capitalize=True) == 'Some ROI dataset'
505
+
506
+ See Also
507
+ --------
508
+ _dromedary
509
+
510
+ """
511
+ words = []
512
+ i = 0
513
+ matches = re.finditer(r'[A-Z](?=[a-rt-z0-9])|(?<=[a-z0-9])[A-Z]', name)
514
+ for j in map(re.Match.start, matches):
515
+ words.append(name[i:j])
516
+ i = j
517
+ words.append(name[i:])
518
+ display_str = ' '.join(map(lambda s: s if re.match(r'^[A-Z]+s?$', s) else s.lower(), words))
519
+ return display_str[0].upper() + display_str[1:] if capitalize else display_str