ONE-api 3.0b4__tar.gz → 3.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {one_api-3.0b4 → one_api-3.1.0/ONE_api.egg-info}/PKG-INFO +3 -2
  2. {one_api-3.0b4/ONE_api.egg-info → one_api-3.1.0}/PKG-INFO +3 -2
  3. {one_api-3.0b4 → one_api-3.1.0}/one/__init__.py +1 -1
  4. {one_api-3.0b4 → one_api-3.1.0}/one/alf/cache.py +58 -16
  5. {one_api-3.0b4 → one_api-3.1.0}/one/api.py +110 -47
  6. {one_api-3.0b4 → one_api-3.1.0}/one/remote/globus.py +1 -1
  7. {one_api-3.0b4 → one_api-3.1.0}/one/webclient.py +9 -3
  8. {one_api-3.0b4 → one_api-3.1.0}/LICENSE +0 -0
  9. {one_api-3.0b4 → one_api-3.1.0}/MANIFEST.in +0 -0
  10. {one_api-3.0b4 → one_api-3.1.0}/ONE_api.egg-info/SOURCES.txt +0 -0
  11. {one_api-3.0b4 → one_api-3.1.0}/ONE_api.egg-info/dependency_links.txt +0 -0
  12. {one_api-3.0b4 → one_api-3.1.0}/ONE_api.egg-info/requires.txt +0 -0
  13. {one_api-3.0b4 → one_api-3.1.0}/ONE_api.egg-info/top_level.txt +0 -0
  14. {one_api-3.0b4 → one_api-3.1.0}/README.md +0 -0
  15. {one_api-3.0b4 → one_api-3.1.0}/one/alf/__init__.py +0 -0
  16. {one_api-3.0b4 → one_api-3.1.0}/one/alf/exceptions.py +0 -0
  17. {one_api-3.0b4 → one_api-3.1.0}/one/alf/io.py +0 -0
  18. {one_api-3.0b4 → one_api-3.1.0}/one/alf/path.py +0 -0
  19. {one_api-3.0b4 → one_api-3.1.0}/one/alf/spec.py +0 -0
  20. {one_api-3.0b4 → one_api-3.1.0}/one/converters.py +0 -0
  21. {one_api-3.0b4 → one_api-3.1.0}/one/params.py +0 -0
  22. {one_api-3.0b4 → one_api-3.1.0}/one/registration.py +0 -0
  23. {one_api-3.0b4 → one_api-3.1.0}/one/remote/__init__.py +0 -0
  24. {one_api-3.0b4 → one_api-3.1.0}/one/remote/aws.py +0 -0
  25. {one_api-3.0b4 → one_api-3.1.0}/one/remote/base.py +0 -0
  26. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/datasets.pqt +0 -0
  27. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/params/.caches +0 -0
  28. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/params/.test.alyx.internationalbrainlab.org +0 -0
  29. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/1f187d80fd59677b395fcdb18e68e4401bfa1cc9 +0 -0
  30. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/3f51aa2e0baa42438467906f56a457c91a221898 +0 -0
  31. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/47893cf67c985e6361cdee009334963f49fb0746 +0 -0
  32. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/535d0e9a1e2c1efbdeba0d673b131e00361a2edb +0 -0
  33. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/5618bea3484a52cd893616f07903f0e49e023ba1 +0 -0
  34. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/6dc96f7e9bcc6ac2e7581489b9580a6cd3f28293 +0 -0
  35. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/db1731fb8df0208944ae85f76718430813a8bf50 +0 -0
  36. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/dcce48259bb929661f60a02a48563f70aa6185b3 +0 -0
  37. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/rest_responses/f530d6022f61cdc9e38cc66beb3cb71f3003c9a1 +0 -0
  38. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/sessions.pqt +0 -0
  39. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/test_dbs.json +0 -0
  40. {one_api-3.0b4 → one_api-3.1.0}/one/tests/fixtures/test_img.png +0 -0
  41. {one_api-3.0b4 → one_api-3.1.0}/one/util.py +0 -0
  42. {one_api-3.0b4 → one_api-3.1.0}/pyproject.toml +0 -0
  43. {one_api-3.0b4 → one_api-3.1.0}/requirements.txt +0 -0
  44. {one_api-3.0b4 → one_api-3.1.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: ONE-api
3
- Version: 3.0b4
3
+ Version: 3.1.0
4
4
  Summary: Open Neurophysiology Environment
5
5
  Author: IBL Staff
6
6
  License: MIT
@@ -21,6 +21,7 @@ Requires-Dist: iblutil>=1.14.0
21
21
  Requires-Dist: packaging
22
22
  Requires-Dist: boto3
23
23
  Requires-Dist: pyyaml
24
+ Dynamic: license-file
24
25
 
25
26
  # Open Neurophysiology Environment
26
27
  [![Coverage Status](https://coveralls.io/repos/github/int-brain-lab/ONE/badge.svg?branch=main)](https://coveralls.io/github/int-brain-lab/ONE?branch=main)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: ONE-api
3
- Version: 3.0b4
3
+ Version: 3.1.0
4
4
  Summary: Open Neurophysiology Environment
5
5
  Author: IBL Staff
6
6
  License: MIT
@@ -21,6 +21,7 @@ Requires-Dist: iblutil>=1.14.0
21
21
  Requires-Dist: packaging
22
22
  Requires-Dist: boto3
23
23
  Requires-Dist: pyyaml
24
+ Dynamic: license-file
24
25
 
25
26
  # Open Neurophysiology Environment
26
27
  [![Coverage Status](https://coveralls.io/repos/github/int-brain-lab/ONE/badge.svg?branch=main)](https://coveralls.io/github/int-brain-lab/ONE?branch=main)
@@ -1,2 +1,2 @@
1
1
  """The Open Neurophysiology Environment (ONE) API."""
2
- __version__ = '3.0b4'
2
+ __version__ = '3.1.0'
@@ -26,7 +26,7 @@ import logging
26
26
  import pandas as pd
27
27
  import numpy as np
28
28
  from packaging import version
29
- from iblutil.util import Bunch
29
+ from iblutil.util import Bunch, ensure_list
30
30
  from iblutil.io import parquet
31
31
  from iblutil.io.hashfile import md5
32
32
 
@@ -35,8 +35,9 @@ from one.alf.io import iter_sessions
35
35
  from one.alf.path import session_path_parts, get_alf_path
36
36
 
37
37
  __all__ = [
38
- 'make_parquet_db', 'patch_tables', 'merge_tables', 'QC_TYPE', 'remove_table_files',
39
- 'remove_missing_datasets', 'load_tables', 'EMPTY_DATASETS_FRAME', 'EMPTY_SESSIONS_FRAME']
38
+ 'make_parquet_db', 'load_tables', 'patch_tables', 'merge_tables',
39
+ 'remove_table_files', 'remove_missing_datasets', 'default_cache',
40
+ 'QC_TYPE', 'EMPTY_DATASETS_FRAME', 'EMPTY_SESSIONS_FRAME']
40
41
  _logger = logging.getLogger(__name__)
41
42
 
42
43
  # -------------------------------------------------------------------------------------------------
@@ -259,6 +260,33 @@ def _make_datasets_df(root_dir, hash_files=False) -> pd.DataFrame:
259
260
  return pd.DataFrame(rows, columns=DATASETS_COLUMNS).astype(DATASETS_COLUMNS)
260
261
 
261
262
 
263
+ def default_cache(origin=''):
264
+ """Returns an empty cache dictionary with the default tables.
265
+
266
+ Parameters
267
+ ----------
268
+ origin : str, optional
269
+ The origin of the cache (e.g. a computer name or database name).
270
+
271
+ Returns
272
+ -------
273
+ Bunch
274
+ A Bunch object containing the loaded cache tables and associated metadata.
275
+
276
+ """
277
+ table_meta = _metadata(origin)
278
+ return Bunch({
279
+ 'datasets': EMPTY_DATASETS_FRAME.copy(),
280
+ 'sessions': EMPTY_SESSIONS_FRAME.copy(),
281
+ '_meta': {
282
+ 'created_time': None,
283
+ 'loaded_time': None,
284
+ 'modified_time': None,
285
+ 'saved_time': None,
286
+ 'raw': {k: table_meta.copy() for k in ('datasets', 'sessions')}}
287
+ })
288
+
289
+
262
290
  def make_parquet_db(root_dir, out_dir=None, hash_ids=True, hash_files=False, lab=None):
263
291
  """Given a data directory, index the ALF datasets and save the generated cache tables.
264
292
 
@@ -375,17 +403,8 @@ def load_tables(tables_dir, glob_pattern='*.pqt'):
375
403
  A Bunch object containing the loaded cache tables and associated metadata.
376
404
 
377
405
  """
378
- meta = {
379
- 'created_time': None,
380
- 'loaded_time': None,
381
- 'modified_time': None,
382
- 'saved_time': None,
383
- 'raw': {}
384
- }
385
- caches = Bunch({
386
- 'datasets': EMPTY_DATASETS_FRAME.copy(),
387
- 'sessions': EMPTY_SESSIONS_FRAME.copy(),
388
- '_meta': meta})
406
+ caches = default_cache()
407
+ meta = caches['_meta']
389
408
  INDEX_KEY = '.?id'
390
409
  for cache_file in Path(tables_dir).glob(glob_pattern):
391
410
  table = cache_file.stem
@@ -425,9 +444,12 @@ def load_tables(tables_dir, glob_pattern='*.pqt'):
425
444
  return caches
426
445
 
427
446
 
428
- def merge_tables(cache, strict=False, **kwargs):
447
+ def merge_tables(cache, strict=False, origin=None, **kwargs):
429
448
  """Update the cache tables with new records.
430
449
 
450
+ Note: A copy of the tables in cache may be returned if the original tables are immutable.
451
+ This can happen when tables are loaded from a parquet file.
452
+
431
453
  Parameters
432
454
  ----------
433
455
  dict
@@ -435,6 +457,8 @@ def merge_tables(cache, strict=False, **kwargs):
435
457
  strict : bool
436
458
  If not True, the columns don't need to match. Extra columns in input tables are
437
459
  dropped and missing columns are added and filled with np.nan.
460
+ origin : str
461
+ The origin of the cache (e.g. a computer name or database name).
438
462
  kwargs
439
463
  pandas.DataFrame or pandas.Series to insert/update for each table.
440
464
 
@@ -488,13 +512,31 @@ def merge_tables(cache, strict=False, **kwargs):
488
512
  records = records.astype(cache[table].dtypes)
489
513
  # Update existing rows
490
514
  to_update = records.index.isin(cache[table].index)
491
- cache[table].loc[records.index[to_update], :] = records[to_update]
515
+ try:
516
+ cache[table].loc[records.index[to_update], :] = records[to_update]
517
+ except ValueError as e:
518
+ if 'assignment destination is read-only' in str(e):
519
+ # NB: nullable integer and categorical dtypes may be backed by immutable arrays
520
+ # after loading from parquet and therefore must be copied before assignment
521
+ cache[table] = cache[table].copy()
522
+ cache[table].loc[records.index[to_update], :] = records[to_update]
523
+ else:
524
+ raise e # pragma: no cover
525
+
492
526
  # Assign new rows
493
527
  to_assign = records[~to_update]
494
528
  frames = [cache[table], to_assign]
495
529
  # Concatenate and sort
496
530
  cache[table] = pd.concat(frames).sort_index()
497
531
  updated = datetime.datetime.now()
532
+ # Update the table metadata with the origin
533
+ if origin is not None:
534
+ table_meta = cache['_meta']['raw'].get(table, {})
535
+ if not table_meta.get('origin'):
536
+ table_meta['origin'] = origin
537
+ else:
538
+ table_meta['origin'] = set((*ensure_list(table_meta['origin']), origin))
539
+ cache['_meta']['raw'][table] = table_meta
498
540
  cache['_meta']['modified_time'] = updated
499
541
  return updated
500
542
 
@@ -6,6 +6,7 @@ import logging
6
6
  from weakref import WeakMethod
7
7
  from datetime import datetime, timedelta
8
8
  from functools import lru_cache, partial
9
+ from itertools import chain
9
10
  from inspect import unwrap
10
11
  from pathlib import Path, PurePosixPath
11
12
  from typing import Any, Union, Optional, List
@@ -31,7 +32,7 @@ import one.alf.exceptions as alferr
31
32
  from one.alf.path import ALFPath
32
33
  from .alf.cache import (
33
34
  make_parquet_db, load_tables, remove_table_files, merge_tables,
34
- EMPTY_DATASETS_FRAME, EMPTY_SESSIONS_FRAME, cast_index_object)
35
+ default_cache, cast_index_object)
35
36
  from .alf.spec import is_uuid, is_uuid_string, QC, to_alf
36
37
  from . import __version__
37
38
  from one.converters import ConversionMixin, session_record2path, ses2records, datasets2records
@@ -39,15 +40,17 @@ from one import util
39
40
 
40
41
  _logger = logging.getLogger(__name__)
41
42
  __all__ = ['ONE', 'One', 'OneAlyx']
42
- N_THREADS = os.environ.get('ONE_HTTP_DL_THREADS', 4)
43
- """int: The number of download threads."""
43
+ SAVE_ON_DELETE = (os.environ.get('ONE_SAVE_ON_DELETE') or '1').casefold() in ('true', '1')
44
+ """bool: Whether to save modified cache tables on delete."""
45
+
46
+ _logger.debug('ONE_SAVE_ON_DELETE: %s', SAVE_ON_DELETE)
44
47
 
45
48
 
46
49
  class One(ConversionMixin):
47
50
  """An API for searching and loading data on a local filesystem."""
48
51
 
49
52
  _search_terms = (
50
- 'dataset', 'date_range', 'laboratory', 'number',
53
+ 'datasets', 'date_range', 'laboratory', 'number',
51
54
  'projects', 'subject', 'task_protocol', 'dataset_qc_lte'
52
55
  )
53
56
 
@@ -97,7 +100,8 @@ class One(ConversionMixin):
97
100
 
98
101
  def __del__(self):
99
102
  """Save cache tables to disk before deleting the object."""
100
- self.save_cache()
103
+ if SAVE_ON_DELETE:
104
+ self.save_cache()
101
105
 
102
106
  @property
103
107
  def offline(self):
@@ -110,16 +114,7 @@ class One(ConversionMixin):
110
114
 
111
115
  def _reset_cache(self):
112
116
  """Replace the cache object with a Bunch that contains the right fields."""
113
- self._cache = Bunch({
114
- 'datasets': EMPTY_DATASETS_FRAME.copy(),
115
- 'sessions': EMPTY_SESSIONS_FRAME.copy(),
116
- '_meta': {
117
- 'created_time': None,
118
- 'loaded_time': None,
119
- 'modified_time': None,
120
- 'saved_time': None,
121
- 'raw': {}} # map of original table metadata
122
- })
117
+ self._cache = default_cache()
123
118
 
124
119
  def _remove_table_files(self, tables=None):
125
120
  """Delete cache tables on disk.
@@ -162,6 +157,25 @@ class One(ConversionMixin):
162
157
  if self.offline: # In online mode, the cache tables should be downloaded later
163
158
  warnings.warn(f'No cache tables found in {self._tables_dir}')
164
159
 
160
+ # If in remote mode and loading old tables generated on Alyx,
161
+ # prompt the user to delete them to improve load times
162
+ raw_meta = self._cache['_meta'].get('raw', {}).values() or [{}]
163
+ tagged = any(filter(None, flatten(x.get('database_tags') for x in raw_meta)))
164
+ origin = set(x['origin'] for x in raw_meta if 'origin' in x)
165
+ older = (self._cache['_meta']['created_time'] or datetime.now()) < datetime(2025, 2, 13)
166
+ remote = not self.offline and self.mode == 'remote'
167
+ if remote and origin == {'alyx'} and older and not self._web_client.silent and not tagged:
168
+ message = ('Old Alyx cache tables detected on disk. '
169
+ 'It\'s recomended to remove these tables as they '
170
+ 'negatively affect performance.\nDelete these tables? [Y/n]: ')
171
+ if (input(message).casefold().strip() or 'y')[0] == 'y':
172
+ self._remove_table_files()
173
+ self._reset_cache()
174
+ elif len(self._cache.datasets) > 1e6:
175
+ warnings.warn(
176
+ 'Large cache tables affect performance. '
177
+ 'Consider removing them by calling the `_remove_table_files` method.')
178
+
165
179
  return self._cache['_meta']['loaded_time']
166
180
 
167
181
  def save_cache(self, save_dir=None, clobber=False):
@@ -194,6 +208,14 @@ class One(ConversionMixin):
194
208
  caches = load_tables(save_dir)
195
209
  merge_tables(
196
210
  caches, **{k: v for k, v in self._cache.items() if not k.startswith('_')})
211
+ # Ensure we use the minimum created date for each table
212
+ for table in caches['_meta']['raw']:
213
+ raw_meta = [x['_meta']['raw'].get(table, {}) for x in (caches, self._cache)]
214
+ created = filter(None, (x.get('date_created') for x in raw_meta))
215
+ if any(created := list(created)):
216
+ created = min(map(datetime.fromisoformat, created))
217
+ created = created.isoformat(sep=' ', timespec='minutes')
218
+ meta['raw'][table]['date_created'] = created
197
219
 
198
220
  with FileLock(save_dir, log=_logger, timeout=TIMEOUT, timeout_action='delete'):
199
221
  _logger.info('Saving cache tables...')
@@ -297,10 +319,8 @@ class One(ConversionMixin):
297
319
 
298
320
  Parameters
299
321
  ----------
300
- dataset : str, list
301
- One or more dataset names. Returns sessions containing all these datasets.
302
- A dataset matches if it contains the search string e.g. 'wheel.position' matches
303
- '_ibl_wheel.position.npy'.
322
+ datasets : str, list
323
+ One or more (exact) dataset names. Returns sessions containing all of these datasets.
304
324
  dataset_qc_lte : str, int, one.alf.spec.QC
305
325
  A dataset QC value, returns sessions with datasets at or below this QC value, including
306
326
  those with no QC set. If `dataset` not passed, sessions with any passing QC datasets
@@ -348,7 +368,9 @@ class One(ConversionMixin):
348
368
 
349
369
  Search for sessions on a given date, in a given lab, containing trials and spike data.
350
370
 
351
- >>> eids = one.search(date='2023-01-01', lab='churchlandlab', dataset=['trials', 'spikes'])
371
+ >>> eids = one.search(
372
+ ... date='2023-01-01', lab='churchlandlab',
373
+ ... datasets=['trials.table.pqt', 'spikes.times.npy'])
352
374
 
353
375
  Search for sessions containing trials and spike data where QC for both are WARNING or less.
354
376
 
@@ -375,13 +397,14 @@ class One(ConversionMixin):
375
397
 
376
398
  def all_present(x, dsets, exists=True):
377
399
  """Returns true if all datasets present in Series."""
378
- return all(any(x.str.contains(y, regex=self.wildcards) & exists) for y in dsets)
400
+ name = x.str.rsplit('/', n=1, expand=True).iloc[:, -1]
401
+ return all(any(name.str.fullmatch(y) & exists) for y in dsets)
379
402
 
380
403
  # Iterate over search filters, reducing the sessions table
381
404
  sessions = self._cache['sessions']
382
405
 
383
406
  # Ensure sessions filtered in a particular order, with datasets last
384
- search_order = ('date_range', 'number', 'dataset')
407
+ search_order = ('date_range', 'number', 'datasets')
385
408
 
386
409
  def sort_fcn(itm):
387
410
  return -1 if itm[0] not in search_order else search_order.index(itm[0])
@@ -408,12 +431,15 @@ class One(ConversionMixin):
408
431
  query = ensure_list(value)
409
432
  sessions = sessions[sessions[key].isin(map(int, query))]
410
433
  # Dataset/QC check is biggest so this should be done last
411
- elif key == 'dataset' or (key == 'dataset_qc_lte' and 'dataset' not in queries):
434
+ elif key == 'datasets' or (key == 'dataset_qc_lte' and 'datasets' not in queries):
412
435
  datasets = self._cache['datasets']
413
436
  qc = QC.validate(queries.get('dataset_qc_lte', 'FAIL')).name # validate value
414
437
  has_dset = sessions.index.isin(datasets.index.get_level_values('eid'))
438
+ if not has_dset.any():
439
+ sessions = sessions.iloc[0:0] # No datasets for any sessions
440
+ continue
415
441
  datasets = datasets.loc[(sessions.index.values[has_dset], ), :]
416
- query = ensure_list(value if key == 'dataset' else '')
442
+ query = ensure_list(value if key == 'datasets' else '')
417
443
  # For each session check any dataset both contains query and exists
418
444
  mask = (
419
445
  (datasets
@@ -1003,7 +1029,7 @@ class One(ConversionMixin):
1003
1029
  """
1004
1030
  query_type = query_type or self.mode
1005
1031
  datasets = self.list_datasets(
1006
- eid, details=True, query_type=query_type, keep_eid_index=True, revision=revision)
1032
+ eid, details=True, query_type=query_type, keep_eid_index=True)
1007
1033
 
1008
1034
  if len(datasets) == 0:
1009
1035
  raise alferr.ALFObjectNotFound(obj)
@@ -1643,6 +1669,11 @@ class OneAlyx(One):
1643
1669
  tag : str
1644
1670
  An optional Alyx dataset tag for loading cache tables containing a subset of datasets.
1645
1671
 
1672
+ Returns
1673
+ -------
1674
+ datetime.datetime
1675
+ A timestamp of when the cache was loaded.
1676
+
1646
1677
  Examples
1647
1678
  --------
1648
1679
  To load the cache tables for a given release tag
@@ -1666,6 +1697,8 @@ class OneAlyx(One):
1666
1697
  different_tag = any(x != tag for x in current_tags)
1667
1698
  if not (clobber or different_tag):
1668
1699
  super(OneAlyx, self).load_cache(tables_dir) # Load any present cache
1700
+ cache_meta = self._cache.get('_meta', {})
1701
+ raw_meta = cache_meta.get('raw', {}).values() or [{}]
1669
1702
 
1670
1703
  try:
1671
1704
  # Determine whether a newer cache is available
@@ -1676,15 +1709,21 @@ class OneAlyx(One):
1676
1709
  min_version = packaging.version.parse(cache_info.get('min_api_version', '0.0.0'))
1677
1710
  if packaging.version.parse(one.__version__) < min_version:
1678
1711
  warnings.warn(f'Newer cache tables require ONE version {min_version} or greater')
1679
- return
1712
+ return cache_meta['loaded_time']
1680
1713
 
1681
1714
  # Check whether remote cache more recent
1682
1715
  remote_created = datetime.fromisoformat(cache_info['date_created'])
1683
1716
  local_created = cache_meta.get('created_time', None)
1684
1717
  fresh = local_created and (remote_created - local_created) < timedelta(minutes=1)
1685
- if fresh and not different_tag:
1718
+ # The local cache may have been created locally more recently, but if it doesn't
1719
+ # contain the same tag or origin, we need to download the remote one.
1720
+ origin = cache_info.get('origin', 'unknown')
1721
+ local_origin = (x.get('origin', []) for x in raw_meta)
1722
+ local_origin = set(chain.from_iterable(map(ensure_list, local_origin)))
1723
+ different_origin = origin not in local_origin
1724
+ if fresh and not (different_tag or different_origin):
1686
1725
  _logger.info('No newer cache available')
1687
- return
1726
+ return cache_meta['loaded_time']
1688
1727
 
1689
1728
  # Set the cache table directory location
1690
1729
  if tables_dir: # If tables directory specified, use that
@@ -1696,19 +1735,27 @@ class OneAlyx(One):
1696
1735
  self._tables_dir = self._tables_dir or self.cache_dir
1697
1736
 
1698
1737
  # Check if the origin has changed. This is to warn users if downloading from a
1699
- # different database to the one currently loaded.
1700
- prev_origin = list(set(filter(None, (x.get('origin') for x in raw_meta))))
1701
- origin = cache_info.get('origin', 'unknown')
1702
- if prev_origin and origin not in prev_origin:
1738
+ # different database to the one currently loaded. When building the cache from
1739
+ # remote queries the origin is set to the Alyx database URL. If the cache info
1740
+ # origin name and URL are different, warn the user.
1741
+ if different_origin and local_origin and self.alyx.base_url not in local_origin:
1703
1742
  warnings.warn(
1704
1743
  'Downloading cache tables from another origin '
1705
- f'("{origin}" instead of "{", ".join(prev_origin)}")')
1744
+ f'("{origin}" instead of "{", ".join(local_origin)}")')
1706
1745
 
1707
1746
  # Download the remote cache files
1708
1747
  _logger.info('Downloading remote caches...')
1709
1748
  files = self.alyx.download_cache_tables(cache_info.get('location'), self._tables_dir)
1710
1749
  assert any(files)
1711
- super(OneAlyx, self).load_cache(self._tables_dir) # Reload cache after download
1750
+ # Reload cache after download
1751
+ loaded_time = super(OneAlyx, self).load_cache(self._tables_dir)
1752
+ # Add db URL to origin set so we know where the cache came from
1753
+ for raw_meta in self._cache['_meta']['raw'].values():
1754
+ table_origin = set(filter(None, ensure_list(raw_meta.get('origin', []))))
1755
+ if origin in table_origin:
1756
+ table_origin.add(self.alyx.base_url)
1757
+ raw_meta['origin'] = table_origin
1758
+ return loaded_time
1712
1759
  except (requests.exceptions.HTTPError, wc.HTTPError, requests.exceptions.SSLError) as ex:
1713
1760
  _logger.debug(ex)
1714
1761
  _logger.error(f'{type(ex).__name__}: Failed to load the remote cache file')
@@ -1725,6 +1772,7 @@ class OneAlyx(One):
1725
1772
  'Please provide valid tables_dir / cache_dir kwargs '
1726
1773
  'or run ONE.setup to update the default directory.'
1727
1774
  )
1775
+ return cache_meta['loaded_time']
1728
1776
 
1729
1777
  @property
1730
1778
  def alyx(self):
@@ -1817,7 +1865,8 @@ class OneAlyx(One):
1817
1865
  return self._cache['datasets'].iloc[0:0] if details else [] # Return empty
1818
1866
  session, datasets = ses2records(self.alyx.rest('sessions', 'read', id=eid))
1819
1867
  # Add to cache tables
1820
- merge_tables(self._cache, sessions=session, datasets=datasets.copy())
1868
+ merge_tables(
1869
+ self._cache, sessions=session, datasets=datasets.copy(), origin=self.alyx.base_url)
1821
1870
  if datasets is None or datasets.empty:
1822
1871
  return self._cache['datasets'].iloc[0:0] if details else [] # Return empty
1823
1872
  assert set(datasets.index.unique('eid')) == {eid}
@@ -1969,7 +2018,7 @@ class OneAlyx(One):
1969
2018
  rec = self.alyx.rest('insertions', 'read', id=str(pid))
1970
2019
  return UUID(rec['session']), rec['name']
1971
2020
 
1972
- def eid2pid(self, eid, query_type=None, details=False):
2021
+ def eid2pid(self, eid, query_type=None, details=False, **kwargs) -> (UUID, str, list):
1973
2022
  """Given an experiment UUID (eID), return the probe IDs and labels (i.e. ALF collection).
1974
2023
 
1975
2024
  NB: Requires a connection to the Alyx database.
@@ -1983,6 +2032,8 @@ class OneAlyx(One):
1983
2032
  Query mode - options include 'remote', and 'refresh'.
1984
2033
  details : bool
1985
2034
  Additionally return the complete Alyx records from insertions endpoint.
2035
+ kwargs
2036
+ Additional parameters to filter insertions Alyx endpoint.
1986
2037
 
1987
2038
  Returns
1988
2039
  -------
@@ -1993,6 +2044,15 @@ class OneAlyx(One):
1993
2044
  list of dict (optional)
1994
2045
  If details is true, returns the Alyx records from insertions endpoint.
1995
2046
 
2047
+ Examples
2048
+ --------
2049
+ Get the probe IDs and details for a given session ID
2050
+
2051
+ >>> pids, labels, recs = one.eid2pid(eid, details=True)
2052
+
2053
+ Get the probe ID for a given session ID and label
2054
+
2055
+ >>> (pid,), _ = one.eid2pid(eid, details=False, name='probe00')
1996
2056
  """
1997
2057
  query_type = query_type or self.mode
1998
2058
  if query_type == 'local' and 'insertions' not in self._cache.keys():
@@ -2000,7 +2060,7 @@ class OneAlyx(One):
2000
2060
  eid = self.to_eid(eid) # Ensure we have a UUID str
2001
2061
  if not eid:
2002
2062
  return (None,) * (3 if details else 2)
2003
- recs = self.alyx.rest('insertions', 'list', session=eid)
2063
+ recs = self.alyx.rest('insertions', 'list', session=eid, **kwargs)
2004
2064
  pids = [UUID(x['id']) for x in recs]
2005
2065
  labels = [x['name'] for x in recs]
2006
2066
  if details:
@@ -2143,7 +2203,8 @@ class OneAlyx(One):
2143
2203
  # Build sessions table
2144
2204
  session_records = (x['session_info'] for x in insertions_records)
2145
2205
  sessions_df = pd.DataFrame(next(zip(*map(ses2records, session_records))))
2146
- return merge_tables(self._cache, insertions=df, sessions=sessions_df)
2206
+ return merge_tables(
2207
+ self._cache, insertions=df, sessions=sessions_df, origin=self.alyx.base_url)
2147
2208
 
2148
2209
  def search(self, details=False, query_type=None, **kwargs):
2149
2210
  """Searches sessions matching the given criteria and returns a list of matching eids.
@@ -2162,10 +2223,8 @@ class OneAlyx(One):
2162
2223
 
2163
2224
  Parameters
2164
2225
  ----------
2165
- dataset : str
2166
- A (partial) dataset name. Returns sessions containing matching datasets.
2167
- A dataset matches if it contains the search string e.g. 'wheel.position' matches
2168
- '_ibl_wheel.position.npy'. C.f. `datasets` argument.
2226
+ datasets : str, list
2227
+ One or more (exact) dataset names. Returns sessions containing all of these datasets.
2169
2228
  date_range : str, list, datetime.datetime, datetime.date, pandas.timestamp
2170
2229
  A single date to search or a list of 2 dates that define the range (inclusive). To
2171
2230
  define only the upper or lower date bound, set the other element to None.
@@ -2192,11 +2251,12 @@ class OneAlyx(One):
2192
2251
  A str or list of lab location (as per Alyx definition) name.
2193
2252
  Note: this corresponds to the specific rig, not the lab geographical location per se.
2194
2253
  dataset_types : str, list
2195
- One or more of dataset_types.
2196
- datasets : str, list
2197
- One or more (exact) dataset names. Returns sessions containing all of these datasets.
2254
+ One or more of dataset_types. Unlike with `datasets`, the dataset types for the
2255
+ sessions returned may not be reachable (i.e. for recent sessions the datasets may not
2256
+ yet be available).
2198
2257
  dataset_qc_lte : int, str, one.alf.spec.QC
2199
- The maximum QC value for associated datasets.
2258
+ The maximum QC value for associated datasets. NB: Without `datasets`, not all
2259
+ associated datasets with the matching QC values are guarenteed to be reachable.
2200
2260
  details : bool
2201
2261
  If true also returns a dict of dataset details.
2202
2262
  query_type : str, None
@@ -2241,6 +2301,9 @@ class OneAlyx(One):
2241
2301
  - In default and local mode, when the one.wildcards flag is True (default), queries are
2242
2302
  interpreted as regular expressions. To turn this off set one.wildcards to False.
2243
2303
  - In remote mode regular expressions are only supported using the `django` argument.
2304
+ - In remote mode, only the `datasets` argument returns sessions where datasets are
2305
+ registered *and* exist. Using `dataset_types` or `dataset_qc_lte` without `datasets`
2306
+ will not check that the datasets are reachable.
2244
2307
 
2245
2308
  """
2246
2309
  query_type = query_type or self.mode
@@ -2310,7 +2373,7 @@ class OneAlyx(One):
2310
2373
 
2311
2374
  """
2312
2375
  df = pd.DataFrame(next(zip(*map(ses2records, session_records))))
2313
- return merge_tables(self._cache, sessions=df)
2376
+ return merge_tables(self._cache, sessions=df, origin=self.alyx.base_url)
2314
2377
 
2315
2378
  def _download_datasets(self, dsets, **kwargs) -> List[ALFPath]:
2316
2379
  """Download a single or multitude of datasets if stored on AWS.
@@ -1211,7 +1211,7 @@ class Globus(DownloadClient):
1211
1211
  async def task_wait_async(self, task_id, polling_interval=10, timeout=10):
1212
1212
  """Asynchronously wait until a Task is complete or fails, with a time limit.
1213
1213
 
1214
- If the task status is ACTIVE after timout, returns False, otherwise returns True.
1214
+ If the task status is ACTIVE after timeout, returns False, otherwise returns True.
1215
1215
 
1216
1216
  Parameters
1217
1217
  ----------
@@ -34,6 +34,7 @@ from uuid import UUID
34
34
  import json
35
35
  import logging
36
36
  import math
37
+ import os
37
38
  import re
38
39
  import functools
39
40
  import urllib.request
@@ -61,6 +62,8 @@ from iblutil.io.params import set_hidden
61
62
  from iblutil.util import ensure_list
62
63
  import concurrent.futures
63
64
  _logger = logging.getLogger(__name__)
65
+ N_THREADS = int(os.environ.get('ONE_HTTP_DL_THREADS', 4))
66
+ """int: The number of download threads."""
64
67
 
65
68
 
66
69
  class _JSONEncoder(json.JSONEncoder):
@@ -355,7 +358,6 @@ def http_download_file_list(links_to_file_list, **kwargs):
355
358
 
356
359
  """
357
360
  links_to_file_list = list(links_to_file_list) # In case generator was passed
358
- n_threads = 4 # Max number of threads
359
361
  outputs = []
360
362
  target_dir = kwargs.pop('target_dir', None)
361
363
  # Ensure target dir the length of url list
@@ -364,7 +366,7 @@ def http_download_file_list(links_to_file_list, **kwargs):
364
366
  assert len(target_dir) == len(links_to_file_list)
365
367
  # using with statement to ensure threads are cleaned up promptly
366
368
  zipped = zip(links_to_file_list, target_dir)
367
- with concurrent.futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
369
+ with concurrent.futures.ThreadPoolExecutor(max_workers=N_THREADS) as executor:
368
370
  # Multithreading load operations
369
371
  futures = [executor.submit(
370
372
  http_download_file, link, target_dir=target, **kwargs) for link, target in zipped]
@@ -1153,7 +1155,8 @@ class AlyxClient:
1153
1155
  assert endpoint_scheme[action]['action'] == 'get'
1154
1156
  # add to url data if it is a string
1155
1157
  if id:
1156
- # this is a special case of the list where we query a uuid. Usually read is better
1158
+ # this is a special case of the list where we query a uuid
1159
+ # usually read is better but list may return fewer data and therefore be faster
1157
1160
  if 'django' in kwargs.keys():
1158
1161
  kwargs['django'] = kwargs['django'] + ','
1159
1162
  else:
@@ -1161,6 +1164,9 @@ class AlyxClient:
1161
1164
  kwargs['django'] = f"{kwargs['django']}pk,{id}"
1162
1165
  # otherwise, look for a dictionary of filter terms
1163
1166
  if kwargs:
1167
+ # if django arg is present but is None, server will return a cryptic 500 status
1168
+ if 'django' in kwargs and kwargs['django'] is None:
1169
+ del kwargs['django']
1164
1170
  # Convert all lists in query params to comma separated list
1165
1171
  query_params = {k: ','.join(map(str, ensure_list(v))) for k, v in kwargs.items()}
1166
1172
  url = update_url_params(url, query_params)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes