ONE-api 3.0b4__tar.gz → 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {one_api-3.0b4 → one_api-3.1.0/ONE_api.egg-info}/PKG-INFO +3 -2
- {one_api-3.0b4/ONE_api.egg-info → one_api-3.1.0}/PKG-INFO +3 -2
- {one_api-3.0b4 → one_api-3.1.0}/one/__init__.py +1 -1
- {one_api-3.0b4 → one_api-3.1.0}/one/alf/cache.py +58 -16
- {one_api-3.0b4 → one_api-3.1.0}/one/api.py +110 -47
- {one_api-3.0b4 → one_api-3.1.0}/one/remote/globus.py +1 -1
- {one_api-3.0b4 → one_api-3.1.0}/one/webclient.py +9 -3
- {one_api-3.0b4 → one_api-3.1.0}/LICENSE +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/MANIFEST.in +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/ONE_api.egg-info/SOURCES.txt +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/ONE_api.egg-info/dependency_links.txt +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/ONE_api.egg-info/requires.txt +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/ONE_api.egg-info/top_level.txt +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/README.md +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/alf/__init__.py +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/alf/exceptions.py +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/alf/io.py +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/alf/path.py +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/alf/spec.py +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/converters.py +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/params.py +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/registration.py +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/remote/__init__.py +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/remote/aws.py +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/remote/base.py +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/datasets.pqt +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/params/.caches +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/params/.test.alyx.internationalbrainlab.org +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/1f187d80fd59677b395fcdb18e68e4401bfa1cc9 +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/3f51aa2e0baa42438467906f56a457c91a221898 +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/47893cf67c985e6361cdee009334963f49fb0746 +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/535d0e9a1e2c1efbdeba0d673b131e00361a2edb +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/5618bea3484a52cd893616f07903f0e49e023ba1 +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/6dc96f7e9bcc6ac2e7581489b9580a6cd3f28293 +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/db1731fb8df0208944ae85f76718430813a8bf50 +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/dcce48259bb929661f60a02a48563f70aa6185b3 +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/f530d6022f61cdc9e38cc66beb3cb71f3003c9a1 +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/sessions.pqt +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/test_dbs.json +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/test_img.png +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/one/util.py +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/pyproject.toml +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/requirements.txt +0 -0
- {one_api-3.0b4 → one_api-3.1.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: ONE-api
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.1.0
|
|
4
4
|
Summary: Open Neurophysiology Environment
|
|
5
5
|
Author: IBL Staff
|
|
6
6
|
License: MIT
|
|
@@ -21,6 +21,7 @@ Requires-Dist: iblutil>=1.14.0
|
|
|
21
21
|
Requires-Dist: packaging
|
|
22
22
|
Requires-Dist: boto3
|
|
23
23
|
Requires-Dist: pyyaml
|
|
24
|
+
Dynamic: license-file
|
|
24
25
|
|
|
25
26
|
# Open Neurophysiology Environment
|
|
26
27
|
[](https://coveralls.io/github/int-brain-lab/ONE?branch=main)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: ONE-api
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.1.0
|
|
4
4
|
Summary: Open Neurophysiology Environment
|
|
5
5
|
Author: IBL Staff
|
|
6
6
|
License: MIT
|
|
@@ -21,6 +21,7 @@ Requires-Dist: iblutil>=1.14.0
|
|
|
21
21
|
Requires-Dist: packaging
|
|
22
22
|
Requires-Dist: boto3
|
|
23
23
|
Requires-Dist: pyyaml
|
|
24
|
+
Dynamic: license-file
|
|
24
25
|
|
|
25
26
|
# Open Neurophysiology Environment
|
|
26
27
|
[](https://coveralls.io/github/int-brain-lab/ONE?branch=main)
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
"""The Open Neurophysiology Environment (ONE) API."""
|
|
2
|
-
__version__ = '3.
|
|
2
|
+
__version__ = '3.1.0'
|
|
@@ -26,7 +26,7 @@ import logging
|
|
|
26
26
|
import pandas as pd
|
|
27
27
|
import numpy as np
|
|
28
28
|
from packaging import version
|
|
29
|
-
from iblutil.util import Bunch
|
|
29
|
+
from iblutil.util import Bunch, ensure_list
|
|
30
30
|
from iblutil.io import parquet
|
|
31
31
|
from iblutil.io.hashfile import md5
|
|
32
32
|
|
|
@@ -35,8 +35,9 @@ from one.alf.io import iter_sessions
|
|
|
35
35
|
from one.alf.path import session_path_parts, get_alf_path
|
|
36
36
|
|
|
37
37
|
__all__ = [
|
|
38
|
-
'make_parquet_db', '
|
|
39
|
-
'
|
|
38
|
+
'make_parquet_db', 'load_tables', 'patch_tables', 'merge_tables',
|
|
39
|
+
'remove_table_files', 'remove_missing_datasets', 'default_cache',
|
|
40
|
+
'QC_TYPE', 'EMPTY_DATASETS_FRAME', 'EMPTY_SESSIONS_FRAME']
|
|
40
41
|
_logger = logging.getLogger(__name__)
|
|
41
42
|
|
|
42
43
|
# -------------------------------------------------------------------------------------------------
|
|
@@ -259,6 +260,33 @@ def _make_datasets_df(root_dir, hash_files=False) -> pd.DataFrame:
|
|
|
259
260
|
return pd.DataFrame(rows, columns=DATASETS_COLUMNS).astype(DATASETS_COLUMNS)
|
|
260
261
|
|
|
261
262
|
|
|
263
|
+
def default_cache(origin=''):
|
|
264
|
+
"""Returns an empty cache dictionary with the default tables.
|
|
265
|
+
|
|
266
|
+
Parameters
|
|
267
|
+
----------
|
|
268
|
+
origin : str, optional
|
|
269
|
+
The origin of the cache (e.g. a computer name or database name).
|
|
270
|
+
|
|
271
|
+
Returns
|
|
272
|
+
-------
|
|
273
|
+
Bunch
|
|
274
|
+
A Bunch object containing the loaded cache tables and associated metadata.
|
|
275
|
+
|
|
276
|
+
"""
|
|
277
|
+
table_meta = _metadata(origin)
|
|
278
|
+
return Bunch({
|
|
279
|
+
'datasets': EMPTY_DATASETS_FRAME.copy(),
|
|
280
|
+
'sessions': EMPTY_SESSIONS_FRAME.copy(),
|
|
281
|
+
'_meta': {
|
|
282
|
+
'created_time': None,
|
|
283
|
+
'loaded_time': None,
|
|
284
|
+
'modified_time': None,
|
|
285
|
+
'saved_time': None,
|
|
286
|
+
'raw': {k: table_meta.copy() for k in ('datasets', 'sessions')}}
|
|
287
|
+
})
|
|
288
|
+
|
|
289
|
+
|
|
262
290
|
def make_parquet_db(root_dir, out_dir=None, hash_ids=True, hash_files=False, lab=None):
|
|
263
291
|
"""Given a data directory, index the ALF datasets and save the generated cache tables.
|
|
264
292
|
|
|
@@ -375,17 +403,8 @@ def load_tables(tables_dir, glob_pattern='*.pqt'):
|
|
|
375
403
|
A Bunch object containing the loaded cache tables and associated metadata.
|
|
376
404
|
|
|
377
405
|
"""
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
'loaded_time': None,
|
|
381
|
-
'modified_time': None,
|
|
382
|
-
'saved_time': None,
|
|
383
|
-
'raw': {}
|
|
384
|
-
}
|
|
385
|
-
caches = Bunch({
|
|
386
|
-
'datasets': EMPTY_DATASETS_FRAME.copy(),
|
|
387
|
-
'sessions': EMPTY_SESSIONS_FRAME.copy(),
|
|
388
|
-
'_meta': meta})
|
|
406
|
+
caches = default_cache()
|
|
407
|
+
meta = caches['_meta']
|
|
389
408
|
INDEX_KEY = '.?id'
|
|
390
409
|
for cache_file in Path(tables_dir).glob(glob_pattern):
|
|
391
410
|
table = cache_file.stem
|
|
@@ -425,9 +444,12 @@ def load_tables(tables_dir, glob_pattern='*.pqt'):
|
|
|
425
444
|
return caches
|
|
426
445
|
|
|
427
446
|
|
|
428
|
-
def merge_tables(cache, strict=False, **kwargs):
|
|
447
|
+
def merge_tables(cache, strict=False, origin=None, **kwargs):
|
|
429
448
|
"""Update the cache tables with new records.
|
|
430
449
|
|
|
450
|
+
Note: A copy of the tables in cache may be returned if the original tables are immutable.
|
|
451
|
+
This can happen when tables are loaded from a parquet file.
|
|
452
|
+
|
|
431
453
|
Parameters
|
|
432
454
|
----------
|
|
433
455
|
dict
|
|
@@ -435,6 +457,8 @@ def merge_tables(cache, strict=False, **kwargs):
|
|
|
435
457
|
strict : bool
|
|
436
458
|
If not True, the columns don't need to match. Extra columns in input tables are
|
|
437
459
|
dropped and missing columns are added and filled with np.nan.
|
|
460
|
+
origin : str
|
|
461
|
+
The origin of the cache (e.g. a computer name or database name).
|
|
438
462
|
kwargs
|
|
439
463
|
pandas.DataFrame or pandas.Series to insert/update for each table.
|
|
440
464
|
|
|
@@ -488,13 +512,31 @@ def merge_tables(cache, strict=False, **kwargs):
|
|
|
488
512
|
records = records.astype(cache[table].dtypes)
|
|
489
513
|
# Update existing rows
|
|
490
514
|
to_update = records.index.isin(cache[table].index)
|
|
491
|
-
|
|
515
|
+
try:
|
|
516
|
+
cache[table].loc[records.index[to_update], :] = records[to_update]
|
|
517
|
+
except ValueError as e:
|
|
518
|
+
if 'assignment destination is read-only' in str(e):
|
|
519
|
+
# NB: nullable integer and categorical dtypes may be backed by immutable arrays
|
|
520
|
+
# after loading from parquet and therefore must be copied before assignment
|
|
521
|
+
cache[table] = cache[table].copy()
|
|
522
|
+
cache[table].loc[records.index[to_update], :] = records[to_update]
|
|
523
|
+
else:
|
|
524
|
+
raise e # pragma: no cover
|
|
525
|
+
|
|
492
526
|
# Assign new rows
|
|
493
527
|
to_assign = records[~to_update]
|
|
494
528
|
frames = [cache[table], to_assign]
|
|
495
529
|
# Concatenate and sort
|
|
496
530
|
cache[table] = pd.concat(frames).sort_index()
|
|
497
531
|
updated = datetime.datetime.now()
|
|
532
|
+
# Update the table metadata with the origin
|
|
533
|
+
if origin is not None:
|
|
534
|
+
table_meta = cache['_meta']['raw'].get(table, {})
|
|
535
|
+
if not table_meta.get('origin'):
|
|
536
|
+
table_meta['origin'] = origin
|
|
537
|
+
else:
|
|
538
|
+
table_meta['origin'] = set((*ensure_list(table_meta['origin']), origin))
|
|
539
|
+
cache['_meta']['raw'][table] = table_meta
|
|
498
540
|
cache['_meta']['modified_time'] = updated
|
|
499
541
|
return updated
|
|
500
542
|
|
|
@@ -6,6 +6,7 @@ import logging
|
|
|
6
6
|
from weakref import WeakMethod
|
|
7
7
|
from datetime import datetime, timedelta
|
|
8
8
|
from functools import lru_cache, partial
|
|
9
|
+
from itertools import chain
|
|
9
10
|
from inspect import unwrap
|
|
10
11
|
from pathlib import Path, PurePosixPath
|
|
11
12
|
from typing import Any, Union, Optional, List
|
|
@@ -31,7 +32,7 @@ import one.alf.exceptions as alferr
|
|
|
31
32
|
from one.alf.path import ALFPath
|
|
32
33
|
from .alf.cache import (
|
|
33
34
|
make_parquet_db, load_tables, remove_table_files, merge_tables,
|
|
34
|
-
|
|
35
|
+
default_cache, cast_index_object)
|
|
35
36
|
from .alf.spec import is_uuid, is_uuid_string, QC, to_alf
|
|
36
37
|
from . import __version__
|
|
37
38
|
from one.converters import ConversionMixin, session_record2path, ses2records, datasets2records
|
|
@@ -39,15 +40,17 @@ from one import util
|
|
|
39
40
|
|
|
40
41
|
_logger = logging.getLogger(__name__)
|
|
41
42
|
__all__ = ['ONE', 'One', 'OneAlyx']
|
|
42
|
-
|
|
43
|
-
"""
|
|
43
|
+
SAVE_ON_DELETE = (os.environ.get('ONE_SAVE_ON_DELETE') or '1').casefold() in ('true', '1')
|
|
44
|
+
"""bool: Whether to save modified cache tables on delete."""
|
|
45
|
+
|
|
46
|
+
_logger.debug('ONE_SAVE_ON_DELETE: %s', SAVE_ON_DELETE)
|
|
44
47
|
|
|
45
48
|
|
|
46
49
|
class One(ConversionMixin):
|
|
47
50
|
"""An API for searching and loading data on a local filesystem."""
|
|
48
51
|
|
|
49
52
|
_search_terms = (
|
|
50
|
-
'
|
|
53
|
+
'datasets', 'date_range', 'laboratory', 'number',
|
|
51
54
|
'projects', 'subject', 'task_protocol', 'dataset_qc_lte'
|
|
52
55
|
)
|
|
53
56
|
|
|
@@ -97,7 +100,8 @@ class One(ConversionMixin):
|
|
|
97
100
|
|
|
98
101
|
def __del__(self):
|
|
99
102
|
"""Save cache tables to disk before deleting the object."""
|
|
100
|
-
|
|
103
|
+
if SAVE_ON_DELETE:
|
|
104
|
+
self.save_cache()
|
|
101
105
|
|
|
102
106
|
@property
|
|
103
107
|
def offline(self):
|
|
@@ -110,16 +114,7 @@ class One(ConversionMixin):
|
|
|
110
114
|
|
|
111
115
|
def _reset_cache(self):
|
|
112
116
|
"""Replace the cache object with a Bunch that contains the right fields."""
|
|
113
|
-
self._cache =
|
|
114
|
-
'datasets': EMPTY_DATASETS_FRAME.copy(),
|
|
115
|
-
'sessions': EMPTY_SESSIONS_FRAME.copy(),
|
|
116
|
-
'_meta': {
|
|
117
|
-
'created_time': None,
|
|
118
|
-
'loaded_time': None,
|
|
119
|
-
'modified_time': None,
|
|
120
|
-
'saved_time': None,
|
|
121
|
-
'raw': {}} # map of original table metadata
|
|
122
|
-
})
|
|
117
|
+
self._cache = default_cache()
|
|
123
118
|
|
|
124
119
|
def _remove_table_files(self, tables=None):
|
|
125
120
|
"""Delete cache tables on disk.
|
|
@@ -162,6 +157,25 @@ class One(ConversionMixin):
|
|
|
162
157
|
if self.offline: # In online mode, the cache tables should be downloaded later
|
|
163
158
|
warnings.warn(f'No cache tables found in {self._tables_dir}')
|
|
164
159
|
|
|
160
|
+
# If in remote mode and loading old tables generated on Alyx,
|
|
161
|
+
# prompt the user to delete them to improve load times
|
|
162
|
+
raw_meta = self._cache['_meta'].get('raw', {}).values() or [{}]
|
|
163
|
+
tagged = any(filter(None, flatten(x.get('database_tags') for x in raw_meta)))
|
|
164
|
+
origin = set(x['origin'] for x in raw_meta if 'origin' in x)
|
|
165
|
+
older = (self._cache['_meta']['created_time'] or datetime.now()) < datetime(2025, 2, 13)
|
|
166
|
+
remote = not self.offline and self.mode == 'remote'
|
|
167
|
+
if remote and origin == {'alyx'} and older and not self._web_client.silent and not tagged:
|
|
168
|
+
message = ('Old Alyx cache tables detected on disk. '
|
|
169
|
+
'It\'s recomended to remove these tables as they '
|
|
170
|
+
'negatively affect performance.\nDelete these tables? [Y/n]: ')
|
|
171
|
+
if (input(message).casefold().strip() or 'y')[0] == 'y':
|
|
172
|
+
self._remove_table_files()
|
|
173
|
+
self._reset_cache()
|
|
174
|
+
elif len(self._cache.datasets) > 1e6:
|
|
175
|
+
warnings.warn(
|
|
176
|
+
'Large cache tables affect performance. '
|
|
177
|
+
'Consider removing them by calling the `_remove_table_files` method.')
|
|
178
|
+
|
|
165
179
|
return self._cache['_meta']['loaded_time']
|
|
166
180
|
|
|
167
181
|
def save_cache(self, save_dir=None, clobber=False):
|
|
@@ -194,6 +208,14 @@ class One(ConversionMixin):
|
|
|
194
208
|
caches = load_tables(save_dir)
|
|
195
209
|
merge_tables(
|
|
196
210
|
caches, **{k: v for k, v in self._cache.items() if not k.startswith('_')})
|
|
211
|
+
# Ensure we use the minimum created date for each table
|
|
212
|
+
for table in caches['_meta']['raw']:
|
|
213
|
+
raw_meta = [x['_meta']['raw'].get(table, {}) for x in (caches, self._cache)]
|
|
214
|
+
created = filter(None, (x.get('date_created') for x in raw_meta))
|
|
215
|
+
if any(created := list(created)):
|
|
216
|
+
created = min(map(datetime.fromisoformat, created))
|
|
217
|
+
created = created.isoformat(sep=' ', timespec='minutes')
|
|
218
|
+
meta['raw'][table]['date_created'] = created
|
|
197
219
|
|
|
198
220
|
with FileLock(save_dir, log=_logger, timeout=TIMEOUT, timeout_action='delete'):
|
|
199
221
|
_logger.info('Saving cache tables...')
|
|
@@ -297,10 +319,8 @@ class One(ConversionMixin):
|
|
|
297
319
|
|
|
298
320
|
Parameters
|
|
299
321
|
----------
|
|
300
|
-
|
|
301
|
-
One or more dataset names. Returns sessions containing all these datasets.
|
|
302
|
-
A dataset matches if it contains the search string e.g. 'wheel.position' matches
|
|
303
|
-
'_ibl_wheel.position.npy'.
|
|
322
|
+
datasets : str, list
|
|
323
|
+
One or more (exact) dataset names. Returns sessions containing all of these datasets.
|
|
304
324
|
dataset_qc_lte : str, int, one.alf.spec.QC
|
|
305
325
|
A dataset QC value, returns sessions with datasets at or below this QC value, including
|
|
306
326
|
those with no QC set. If `dataset` not passed, sessions with any passing QC datasets
|
|
@@ -348,7 +368,9 @@ class One(ConversionMixin):
|
|
|
348
368
|
|
|
349
369
|
Search for sessions on a given date, in a given lab, containing trials and spike data.
|
|
350
370
|
|
|
351
|
-
>>> eids = one.search(
|
|
371
|
+
>>> eids = one.search(
|
|
372
|
+
... date='2023-01-01', lab='churchlandlab',
|
|
373
|
+
... datasets=['trials.table.pqt', 'spikes.times.npy'])
|
|
352
374
|
|
|
353
375
|
Search for sessions containing trials and spike data where QC for both are WARNING or less.
|
|
354
376
|
|
|
@@ -375,13 +397,14 @@ class One(ConversionMixin):
|
|
|
375
397
|
|
|
376
398
|
def all_present(x, dsets, exists=True):
|
|
377
399
|
"""Returns true if all datasets present in Series."""
|
|
378
|
-
|
|
400
|
+
name = x.str.rsplit('/', n=1, expand=True).iloc[:, -1]
|
|
401
|
+
return all(any(name.str.fullmatch(y) & exists) for y in dsets)
|
|
379
402
|
|
|
380
403
|
# Iterate over search filters, reducing the sessions table
|
|
381
404
|
sessions = self._cache['sessions']
|
|
382
405
|
|
|
383
406
|
# Ensure sessions filtered in a particular order, with datasets last
|
|
384
|
-
search_order = ('date_range', 'number', '
|
|
407
|
+
search_order = ('date_range', 'number', 'datasets')
|
|
385
408
|
|
|
386
409
|
def sort_fcn(itm):
|
|
387
410
|
return -1 if itm[0] not in search_order else search_order.index(itm[0])
|
|
@@ -408,12 +431,15 @@ class One(ConversionMixin):
|
|
|
408
431
|
query = ensure_list(value)
|
|
409
432
|
sessions = sessions[sessions[key].isin(map(int, query))]
|
|
410
433
|
# Dataset/QC check is biggest so this should be done last
|
|
411
|
-
elif key == '
|
|
434
|
+
elif key == 'datasets' or (key == 'dataset_qc_lte' and 'datasets' not in queries):
|
|
412
435
|
datasets = self._cache['datasets']
|
|
413
436
|
qc = QC.validate(queries.get('dataset_qc_lte', 'FAIL')).name # validate value
|
|
414
437
|
has_dset = sessions.index.isin(datasets.index.get_level_values('eid'))
|
|
438
|
+
if not has_dset.any():
|
|
439
|
+
sessions = sessions.iloc[0:0] # No datasets for any sessions
|
|
440
|
+
continue
|
|
415
441
|
datasets = datasets.loc[(sessions.index.values[has_dset], ), :]
|
|
416
|
-
query = ensure_list(value if key == '
|
|
442
|
+
query = ensure_list(value if key == 'datasets' else '')
|
|
417
443
|
# For each session check any dataset both contains query and exists
|
|
418
444
|
mask = (
|
|
419
445
|
(datasets
|
|
@@ -1003,7 +1029,7 @@ class One(ConversionMixin):
|
|
|
1003
1029
|
"""
|
|
1004
1030
|
query_type = query_type or self.mode
|
|
1005
1031
|
datasets = self.list_datasets(
|
|
1006
|
-
eid, details=True, query_type=query_type, keep_eid_index=True
|
|
1032
|
+
eid, details=True, query_type=query_type, keep_eid_index=True)
|
|
1007
1033
|
|
|
1008
1034
|
if len(datasets) == 0:
|
|
1009
1035
|
raise alferr.ALFObjectNotFound(obj)
|
|
@@ -1643,6 +1669,11 @@ class OneAlyx(One):
|
|
|
1643
1669
|
tag : str
|
|
1644
1670
|
An optional Alyx dataset tag for loading cache tables containing a subset of datasets.
|
|
1645
1671
|
|
|
1672
|
+
Returns
|
|
1673
|
+
-------
|
|
1674
|
+
datetime.datetime
|
|
1675
|
+
A timestamp of when the cache was loaded.
|
|
1676
|
+
|
|
1646
1677
|
Examples
|
|
1647
1678
|
--------
|
|
1648
1679
|
To load the cache tables for a given release tag
|
|
@@ -1666,6 +1697,8 @@ class OneAlyx(One):
|
|
|
1666
1697
|
different_tag = any(x != tag for x in current_tags)
|
|
1667
1698
|
if not (clobber or different_tag):
|
|
1668
1699
|
super(OneAlyx, self).load_cache(tables_dir) # Load any present cache
|
|
1700
|
+
cache_meta = self._cache.get('_meta', {})
|
|
1701
|
+
raw_meta = cache_meta.get('raw', {}).values() or [{}]
|
|
1669
1702
|
|
|
1670
1703
|
try:
|
|
1671
1704
|
# Determine whether a newer cache is available
|
|
@@ -1676,15 +1709,21 @@ class OneAlyx(One):
|
|
|
1676
1709
|
min_version = packaging.version.parse(cache_info.get('min_api_version', '0.0.0'))
|
|
1677
1710
|
if packaging.version.parse(one.__version__) < min_version:
|
|
1678
1711
|
warnings.warn(f'Newer cache tables require ONE version {min_version} or greater')
|
|
1679
|
-
return
|
|
1712
|
+
return cache_meta['loaded_time']
|
|
1680
1713
|
|
|
1681
1714
|
# Check whether remote cache more recent
|
|
1682
1715
|
remote_created = datetime.fromisoformat(cache_info['date_created'])
|
|
1683
1716
|
local_created = cache_meta.get('created_time', None)
|
|
1684
1717
|
fresh = local_created and (remote_created - local_created) < timedelta(minutes=1)
|
|
1685
|
-
|
|
1718
|
+
# The local cache may have been created locally more recently, but if it doesn't
|
|
1719
|
+
# contain the same tag or origin, we need to download the remote one.
|
|
1720
|
+
origin = cache_info.get('origin', 'unknown')
|
|
1721
|
+
local_origin = (x.get('origin', []) for x in raw_meta)
|
|
1722
|
+
local_origin = set(chain.from_iterable(map(ensure_list, local_origin)))
|
|
1723
|
+
different_origin = origin not in local_origin
|
|
1724
|
+
if fresh and not (different_tag or different_origin):
|
|
1686
1725
|
_logger.info('No newer cache available')
|
|
1687
|
-
return
|
|
1726
|
+
return cache_meta['loaded_time']
|
|
1688
1727
|
|
|
1689
1728
|
# Set the cache table directory location
|
|
1690
1729
|
if tables_dir: # If tables directory specified, use that
|
|
@@ -1696,19 +1735,27 @@ class OneAlyx(One):
|
|
|
1696
1735
|
self._tables_dir = self._tables_dir or self.cache_dir
|
|
1697
1736
|
|
|
1698
1737
|
# Check if the origin has changed. This is to warn users if downloading from a
|
|
1699
|
-
# different database to the one currently loaded.
|
|
1700
|
-
|
|
1701
|
-
origin
|
|
1702
|
-
if
|
|
1738
|
+
# different database to the one currently loaded. When building the cache from
|
|
1739
|
+
# remote queries the origin is set to the Alyx database URL. If the cache info
|
|
1740
|
+
# origin name and URL are different, warn the user.
|
|
1741
|
+
if different_origin and local_origin and self.alyx.base_url not in local_origin:
|
|
1703
1742
|
warnings.warn(
|
|
1704
1743
|
'Downloading cache tables from another origin '
|
|
1705
|
-
f'("{origin}" instead of "{", ".join(
|
|
1744
|
+
f'("{origin}" instead of "{", ".join(local_origin)}")')
|
|
1706
1745
|
|
|
1707
1746
|
# Download the remote cache files
|
|
1708
1747
|
_logger.info('Downloading remote caches...')
|
|
1709
1748
|
files = self.alyx.download_cache_tables(cache_info.get('location'), self._tables_dir)
|
|
1710
1749
|
assert any(files)
|
|
1711
|
-
|
|
1750
|
+
# Reload cache after download
|
|
1751
|
+
loaded_time = super(OneAlyx, self).load_cache(self._tables_dir)
|
|
1752
|
+
# Add db URL to origin set so we know where the cache came from
|
|
1753
|
+
for raw_meta in self._cache['_meta']['raw'].values():
|
|
1754
|
+
table_origin = set(filter(None, ensure_list(raw_meta.get('origin', []))))
|
|
1755
|
+
if origin in table_origin:
|
|
1756
|
+
table_origin.add(self.alyx.base_url)
|
|
1757
|
+
raw_meta['origin'] = table_origin
|
|
1758
|
+
return loaded_time
|
|
1712
1759
|
except (requests.exceptions.HTTPError, wc.HTTPError, requests.exceptions.SSLError) as ex:
|
|
1713
1760
|
_logger.debug(ex)
|
|
1714
1761
|
_logger.error(f'{type(ex).__name__}: Failed to load the remote cache file')
|
|
@@ -1725,6 +1772,7 @@ class OneAlyx(One):
|
|
|
1725
1772
|
'Please provide valid tables_dir / cache_dir kwargs '
|
|
1726
1773
|
'or run ONE.setup to update the default directory.'
|
|
1727
1774
|
)
|
|
1775
|
+
return cache_meta['loaded_time']
|
|
1728
1776
|
|
|
1729
1777
|
@property
|
|
1730
1778
|
def alyx(self):
|
|
@@ -1817,7 +1865,8 @@ class OneAlyx(One):
|
|
|
1817
1865
|
return self._cache['datasets'].iloc[0:0] if details else [] # Return empty
|
|
1818
1866
|
session, datasets = ses2records(self.alyx.rest('sessions', 'read', id=eid))
|
|
1819
1867
|
# Add to cache tables
|
|
1820
|
-
merge_tables(
|
|
1868
|
+
merge_tables(
|
|
1869
|
+
self._cache, sessions=session, datasets=datasets.copy(), origin=self.alyx.base_url)
|
|
1821
1870
|
if datasets is None or datasets.empty:
|
|
1822
1871
|
return self._cache['datasets'].iloc[0:0] if details else [] # Return empty
|
|
1823
1872
|
assert set(datasets.index.unique('eid')) == {eid}
|
|
@@ -1969,7 +2018,7 @@ class OneAlyx(One):
|
|
|
1969
2018
|
rec = self.alyx.rest('insertions', 'read', id=str(pid))
|
|
1970
2019
|
return UUID(rec['session']), rec['name']
|
|
1971
2020
|
|
|
1972
|
-
def eid2pid(self, eid, query_type=None, details=False):
|
|
2021
|
+
def eid2pid(self, eid, query_type=None, details=False, **kwargs) -> (UUID, str, list):
|
|
1973
2022
|
"""Given an experiment UUID (eID), return the probe IDs and labels (i.e. ALF collection).
|
|
1974
2023
|
|
|
1975
2024
|
NB: Requires a connection to the Alyx database.
|
|
@@ -1983,6 +2032,8 @@ class OneAlyx(One):
|
|
|
1983
2032
|
Query mode - options include 'remote', and 'refresh'.
|
|
1984
2033
|
details : bool
|
|
1985
2034
|
Additionally return the complete Alyx records from insertions endpoint.
|
|
2035
|
+
kwargs
|
|
2036
|
+
Additional parameters to filter insertions Alyx endpoint.
|
|
1986
2037
|
|
|
1987
2038
|
Returns
|
|
1988
2039
|
-------
|
|
@@ -1993,6 +2044,15 @@ class OneAlyx(One):
|
|
|
1993
2044
|
list of dict (optional)
|
|
1994
2045
|
If details is true, returns the Alyx records from insertions endpoint.
|
|
1995
2046
|
|
|
2047
|
+
Examples
|
|
2048
|
+
--------
|
|
2049
|
+
Get the probe IDs and details for a given session ID
|
|
2050
|
+
|
|
2051
|
+
>>> pids, labels, recs = one.eid2pid(eid, details=True)
|
|
2052
|
+
|
|
2053
|
+
Get the probe ID for a given session ID and label
|
|
2054
|
+
|
|
2055
|
+
>>> (pid,), _ = one.eid2pid(eid, details=False, name='probe00')
|
|
1996
2056
|
"""
|
|
1997
2057
|
query_type = query_type or self.mode
|
|
1998
2058
|
if query_type == 'local' and 'insertions' not in self._cache.keys():
|
|
@@ -2000,7 +2060,7 @@ class OneAlyx(One):
|
|
|
2000
2060
|
eid = self.to_eid(eid) # Ensure we have a UUID str
|
|
2001
2061
|
if not eid:
|
|
2002
2062
|
return (None,) * (3 if details else 2)
|
|
2003
|
-
recs = self.alyx.rest('insertions', 'list', session=eid)
|
|
2063
|
+
recs = self.alyx.rest('insertions', 'list', session=eid, **kwargs)
|
|
2004
2064
|
pids = [UUID(x['id']) for x in recs]
|
|
2005
2065
|
labels = [x['name'] for x in recs]
|
|
2006
2066
|
if details:
|
|
@@ -2143,7 +2203,8 @@ class OneAlyx(One):
|
|
|
2143
2203
|
# Build sessions table
|
|
2144
2204
|
session_records = (x['session_info'] for x in insertions_records)
|
|
2145
2205
|
sessions_df = pd.DataFrame(next(zip(*map(ses2records, session_records))))
|
|
2146
|
-
return merge_tables(
|
|
2206
|
+
return merge_tables(
|
|
2207
|
+
self._cache, insertions=df, sessions=sessions_df, origin=self.alyx.base_url)
|
|
2147
2208
|
|
|
2148
2209
|
def search(self, details=False, query_type=None, **kwargs):
|
|
2149
2210
|
"""Searches sessions matching the given criteria and returns a list of matching eids.
|
|
@@ -2162,10 +2223,8 @@ class OneAlyx(One):
|
|
|
2162
2223
|
|
|
2163
2224
|
Parameters
|
|
2164
2225
|
----------
|
|
2165
|
-
|
|
2166
|
-
|
|
2167
|
-
A dataset matches if it contains the search string e.g. 'wheel.position' matches
|
|
2168
|
-
'_ibl_wheel.position.npy'. C.f. `datasets` argument.
|
|
2226
|
+
datasets : str, list
|
|
2227
|
+
One or more (exact) dataset names. Returns sessions containing all of these datasets.
|
|
2169
2228
|
date_range : str, list, datetime.datetime, datetime.date, pandas.timestamp
|
|
2170
2229
|
A single date to search or a list of 2 dates that define the range (inclusive). To
|
|
2171
2230
|
define only the upper or lower date bound, set the other element to None.
|
|
@@ -2192,11 +2251,12 @@ class OneAlyx(One):
|
|
|
2192
2251
|
A str or list of lab location (as per Alyx definition) name.
|
|
2193
2252
|
Note: this corresponds to the specific rig, not the lab geographical location per se.
|
|
2194
2253
|
dataset_types : str, list
|
|
2195
|
-
One or more of dataset_types.
|
|
2196
|
-
|
|
2197
|
-
|
|
2254
|
+
One or more of dataset_types. Unlike with `datasets`, the dataset types for the
|
|
2255
|
+
sessions returned may not be reachable (i.e. for recent sessions the datasets may not
|
|
2256
|
+
yet be available).
|
|
2198
2257
|
dataset_qc_lte : int, str, one.alf.spec.QC
|
|
2199
|
-
The maximum QC value for associated datasets.
|
|
2258
|
+
The maximum QC value for associated datasets. NB: Without `datasets`, not all
|
|
2259
|
+
associated datasets with the matching QC values are guarenteed to be reachable.
|
|
2200
2260
|
details : bool
|
|
2201
2261
|
If true also returns a dict of dataset details.
|
|
2202
2262
|
query_type : str, None
|
|
@@ -2241,6 +2301,9 @@ class OneAlyx(One):
|
|
|
2241
2301
|
- In default and local mode, when the one.wildcards flag is True (default), queries are
|
|
2242
2302
|
interpreted as regular expressions. To turn this off set one.wildcards to False.
|
|
2243
2303
|
- In remote mode regular expressions are only supported using the `django` argument.
|
|
2304
|
+
- In remote mode, only the `datasets` argument returns sessions where datasets are
|
|
2305
|
+
registered *and* exist. Using `dataset_types` or `dataset_qc_lte` without `datasets`
|
|
2306
|
+
will not check that the datasets are reachable.
|
|
2244
2307
|
|
|
2245
2308
|
"""
|
|
2246
2309
|
query_type = query_type or self.mode
|
|
@@ -2310,7 +2373,7 @@ class OneAlyx(One):
|
|
|
2310
2373
|
|
|
2311
2374
|
"""
|
|
2312
2375
|
df = pd.DataFrame(next(zip(*map(ses2records, session_records))))
|
|
2313
|
-
return merge_tables(self._cache, sessions=df)
|
|
2376
|
+
return merge_tables(self._cache, sessions=df, origin=self.alyx.base_url)
|
|
2314
2377
|
|
|
2315
2378
|
def _download_datasets(self, dsets, **kwargs) -> List[ALFPath]:
|
|
2316
2379
|
"""Download a single or multitude of datasets if stored on AWS.
|
|
@@ -1211,7 +1211,7 @@ class Globus(DownloadClient):
|
|
|
1211
1211
|
async def task_wait_async(self, task_id, polling_interval=10, timeout=10):
|
|
1212
1212
|
"""Asynchronously wait until a Task is complete or fails, with a time limit.
|
|
1213
1213
|
|
|
1214
|
-
If the task status is ACTIVE after
|
|
1214
|
+
If the task status is ACTIVE after timeout, returns False, otherwise returns True.
|
|
1215
1215
|
|
|
1216
1216
|
Parameters
|
|
1217
1217
|
----------
|
|
@@ -34,6 +34,7 @@ from uuid import UUID
|
|
|
34
34
|
import json
|
|
35
35
|
import logging
|
|
36
36
|
import math
|
|
37
|
+
import os
|
|
37
38
|
import re
|
|
38
39
|
import functools
|
|
39
40
|
import urllib.request
|
|
@@ -61,6 +62,8 @@ from iblutil.io.params import set_hidden
|
|
|
61
62
|
from iblutil.util import ensure_list
|
|
62
63
|
import concurrent.futures
|
|
63
64
|
_logger = logging.getLogger(__name__)
|
|
65
|
+
N_THREADS = int(os.environ.get('ONE_HTTP_DL_THREADS', 4))
|
|
66
|
+
"""int: The number of download threads."""
|
|
64
67
|
|
|
65
68
|
|
|
66
69
|
class _JSONEncoder(json.JSONEncoder):
|
|
@@ -355,7 +358,6 @@ def http_download_file_list(links_to_file_list, **kwargs):
|
|
|
355
358
|
|
|
356
359
|
"""
|
|
357
360
|
links_to_file_list = list(links_to_file_list) # In case generator was passed
|
|
358
|
-
n_threads = 4 # Max number of threads
|
|
359
361
|
outputs = []
|
|
360
362
|
target_dir = kwargs.pop('target_dir', None)
|
|
361
363
|
# Ensure target dir the length of url list
|
|
@@ -364,7 +366,7 @@ def http_download_file_list(links_to_file_list, **kwargs):
|
|
|
364
366
|
assert len(target_dir) == len(links_to_file_list)
|
|
365
367
|
# using with statement to ensure threads are cleaned up promptly
|
|
366
368
|
zipped = zip(links_to_file_list, target_dir)
|
|
367
|
-
with concurrent.futures.ThreadPoolExecutor(max_workers=
|
|
369
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=N_THREADS) as executor:
|
|
368
370
|
# Multithreading load operations
|
|
369
371
|
futures = [executor.submit(
|
|
370
372
|
http_download_file, link, target_dir=target, **kwargs) for link, target in zipped]
|
|
@@ -1153,7 +1155,8 @@ class AlyxClient:
|
|
|
1153
1155
|
assert endpoint_scheme[action]['action'] == 'get'
|
|
1154
1156
|
# add to url data if it is a string
|
|
1155
1157
|
if id:
|
|
1156
|
-
# this is a special case of the list where we query a uuid
|
|
1158
|
+
# this is a special case of the list where we query a uuid
|
|
1159
|
+
# usually read is better but list may return fewer data and therefore be faster
|
|
1157
1160
|
if 'django' in kwargs.keys():
|
|
1158
1161
|
kwargs['django'] = kwargs['django'] + ','
|
|
1159
1162
|
else:
|
|
@@ -1161,6 +1164,9 @@ class AlyxClient:
|
|
|
1161
1164
|
kwargs['django'] = f"{kwargs['django']}pk,{id}"
|
|
1162
1165
|
# otherwise, look for a dictionary of filter terms
|
|
1163
1166
|
if kwargs:
|
|
1167
|
+
# if django arg is present but is None, server will return a cryptic 500 status
|
|
1168
|
+
if 'django' in kwargs and kwargs['django'] is None:
|
|
1169
|
+
del kwargs['django']
|
|
1164
1170
|
# Convert all lists in query params to comma separated list
|
|
1165
1171
|
query_params = {k: ','.join(map(str, ensure_list(v))) for k, v in kwargs.items()}
|
|
1166
1172
|
url = update_url_params(url, query_params)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/params/.test.alyx.internationalbrainlab.org
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|