ONE-api 3.2.1__py3-none-any.whl → 3.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
one/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
1
  """The Open Neurophysiology Environment (ONE) API."""
2
- __version__ = '3.2.1'
2
+ __version__ = '3.4.0'
one/alf/path.py CHANGED
@@ -27,6 +27,7 @@ ALFPath properties return empty str instead of None if ALF part isn't present..
27
27
  """
28
28
  import os
29
29
  import pathlib
30
+ from functools import cached_property
30
31
  from collections import OrderedDict
31
32
  from datetime import datetime
32
33
  from typing import Union, Optional, Iterable
@@ -844,17 +845,17 @@ class PureALFPath(pathlib.PurePath): # py3.12 supports direct subclassing
844
845
  """
845
846
  return filename_parts(self.name, assert_valid=False, as_dict=as_dict)
846
847
 
847
- @property
848
+ @cached_property
848
849
  def dataset_name_parts(self):
849
850
  """tuple of str: the dataset name parts, with empty strings for missing parts."""
850
851
  return tuple(p or '' for p in self.parse_alf_name(as_dict=False))
851
852
 
852
- @property
853
+ @cached_property
853
854
  def session_parts(self):
854
855
  """tuple of str: the session path parts, with empty strings for missing parts."""
855
856
  return tuple(p or '' for p in session_path_parts(self, assert_valid=False))
856
857
 
857
- @property
858
+ @cached_property
858
859
  def alf_parts(self):
859
860
  """tuple of str: the full ALF path parts, with empty strings for missing parts."""
860
861
  return tuple(p or '' for p in self.parse_alf_path(as_dict=False))
one/api.py CHANGED
@@ -6,6 +6,7 @@ import logging
6
6
  from weakref import WeakMethod
7
7
  from datetime import datetime, timedelta
8
8
  from functools import lru_cache, partial
9
+ from itertools import islice
9
10
  from inspect import unwrap
10
11
  from pathlib import Path, PurePosixPath
11
12
  from typing import Any, Union, Optional, List
@@ -28,7 +29,7 @@ import one.webclient as wc
28
29
  import one.alf.io as alfio
29
30
  import one.alf.path as alfiles
30
31
  import one.alf.exceptions as alferr
31
- from one.alf.path import ALFPath
32
+ from one.alf.path import ALFPath, ensure_alf_path
32
33
  from .alf.cache import (
33
34
  make_parquet_db, load_tables, remove_table_files, merge_tables,
34
35
  default_cache, cast_index_object)
@@ -41,6 +42,8 @@ _logger = logging.getLogger(__name__)
41
42
  __all__ = ['ONE', 'One', 'OneAlyx']
42
43
  SAVE_ON_DELETE = (os.environ.get('ONE_SAVE_ON_DELETE') or '0').casefold() in ('true', '1')
43
44
  """bool: Whether to save modified cache tables on delete."""
45
+ REVISION_LAST_BEFORE = os.environ.get('ONE_REVISION_LAST_BEFORE')
46
+ """str: If set, the revision string to use when loading data before a given date."""
44
47
 
45
48
  _logger.debug('ONE_SAVE_ON_DELETE: %s', SAVE_ON_DELETE)
46
49
 
@@ -1001,7 +1004,7 @@ class One(ConversionMixin):
1001
1004
  The dataset revision (typically an ISO date). If no exact match, the previous
1002
1005
  revision (ordered lexicographically) is returned. If None, the default revision is
1003
1006
  returned (usually the most recent revision). Regular expressions/wildcards not
1004
- permitted.
1007
+ permitted. May be set with `ONE_REVISION_LAST_BEFORE` environment variable.
1005
1008
  query_type : str
1006
1009
  Query cache ('local') or Alyx database ('remote').
1007
1010
  download_only : bool
@@ -1041,6 +1044,7 @@ class One(ConversionMixin):
1041
1044
  raise alferr.ALFObjectNotFound(obj)
1042
1045
 
1043
1046
  dataset = {'object': obj, **kwargs}
1047
+ revision = revision or REVISION_LAST_BEFORE # Use env var if set
1044
1048
  datasets = util.filter_datasets(datasets, dataset, collection, revision,
1045
1049
  assert_unique=False, wildcards=self.wildcards)
1046
1050
 
@@ -1094,7 +1098,7 @@ class One(ConversionMixin):
1094
1098
  The dataset revision (typically an ISO date). If no exact match, the previous
1095
1099
  revision (ordered lexicographically) is returned. If None, the default revision is
1096
1100
  returned (usually the most recent revision). Regular expressions/wildcards not
1097
- permitted.
1101
+ permitted. May be set with `ONE_REVISION_LAST_BEFORE` environment variable.
1098
1102
  query_type : str
1099
1103
  Query cache ('local') or Alyx database ('remote')
1100
1104
  download_only : bool
@@ -1150,6 +1154,7 @@ class One(ConversionMixin):
1150
1154
  dataset += '.*'
1151
1155
  _logger.debug('Appending extension wildcard: ' + dataset)
1152
1156
 
1157
+ revision = revision or REVISION_LAST_BEFORE # Use env var if set
1153
1158
  assert_unique = ('/' if isinstance(dataset, str) else 'collection') not in dataset
1154
1159
  # Check if wildcard was used (this is not an exhaustive check)
1155
1160
  if not assert_unique and isinstance(dataset, str) and '*' in dataset:
@@ -1204,7 +1209,7 @@ class One(ConversionMixin):
1204
1209
  The dataset revision (typically an ISO date). If no exact match, the previous
1205
1210
  revision (ordered lexicographically) is returned. If None, the default revision is
1206
1211
  returned (usually the most recent revision). Regular expressions/wildcards not
1207
- permitted.
1212
+ permitted. May be set with `ONE_REVISION_LAST_BEFORE` environment variable.
1208
1213
  query_type : str
1209
1214
  Query cache ('local') or Alyx database ('remote')
1210
1215
  assert_present : bool
@@ -1281,9 +1286,11 @@ class One(ConversionMixin):
1281
1286
  # Check if rel paths have been used (e.g. the output of list_datasets)
1282
1287
  is_frame = isinstance(datasets, pd.DataFrame)
1283
1288
  if is_rel_paths := (is_frame or any('/' in x for x in datasets)):
1284
- if not (collections, revisions) == (None, None):
1289
+ if not (collections, revisions) == (None, None) or REVISION_LAST_BEFORE:
1285
1290
  raise ValueError(
1286
- 'collection and revision kwargs must be None when dataset is a relative path')
1291
+ 'collection and revision kwargs must be None when dataset is a relative path '
1292
+ '(ONE_REVISION_LAST_BEFORE env var must also be unset)'
1293
+ )
1287
1294
  if is_frame:
1288
1295
  if 'eid' in datasets.index.names:
1289
1296
  assert set(datasets.index.get_level_values('eid')) == {eid}
@@ -1322,6 +1329,7 @@ class One(ConversionMixin):
1322
1329
  ]
1323
1330
 
1324
1331
  # Check input args
1332
+ revisions = revisions or REVISION_LAST_BEFORE
1325
1333
  collections, revisions = _verify_specifiers([collections, revisions])
1326
1334
 
1327
1335
  # If collections provided in datasets list, e.g. [collection/x.y.z], do not assert unique
@@ -1456,7 +1464,7 @@ class One(ConversionMixin):
1456
1464
  The dataset revision (typically an ISO date). If no exact match, the previous
1457
1465
  revision (ordered lexicographically) is returned. If None, the default revision is
1458
1466
  returned (usually the most recent revision). Regular expressions/wildcards not
1459
- permitted.
1467
+ permitted. May be set with `ONE_REVISION_LAST_BEFORE` environment variable.
1460
1468
  query_type : str
1461
1469
  Query cache ('local') or Alyx database ('remote')
1462
1470
  download_only : bool
@@ -1495,6 +1503,7 @@ class One(ConversionMixin):
1495
1503
  raise alferr.ALFError(f'{collection} not found for session {eid}')
1496
1504
 
1497
1505
  dataset = {'object': object, **kwargs}
1506
+ revision = revision or REVISION_LAST_BEFORE # use env var if set
1498
1507
  datasets = util.filter_datasets(datasets, dataset, revision,
1499
1508
  assert_unique=False, wildcards=self.wildcards)
1500
1509
 
@@ -1815,7 +1824,7 @@ class OneAlyx(One):
1815
1824
 
1816
1825
  endpoint = endpoint or self._search_endpoint
1817
1826
  # Return search terms from REST schema
1818
- fields = self.alyx.rest_schemes[endpoint]['list']['fields']
1827
+ fields = self.alyx.rest_schemes.fields(endpoint, action='list')
1819
1828
  excl = ('lab',) # 'laboratory' already in search terms
1820
1829
  if endpoint != 'sessions':
1821
1830
  return tuple(x['name'] for x in fields)
@@ -1981,6 +1990,9 @@ class OneAlyx(One):
1981
1990
 
1982
1991
  >>> trials = one.load_aggregate('subjects', 'SP026', '_ibl_subjectTraining.table')
1983
1992
 
1993
+ Notes
1994
+ -----
1995
+ Unlike other loading functions, this function loads datasets with a matching revision.
1984
1996
  """
1985
1997
  # If only two parts and wildcards are on, append ext wildcard
1986
1998
  if self.wildcards and isinstance(dataset, str) and len(dataset.split('.')) == 2:
@@ -2020,9 +2032,26 @@ class OneAlyx(One):
2020
2032
 
2021
2033
  """
2022
2034
  query_type = query_type or self.mode
2023
- if query_type == 'local' and 'insertions' not in self._cache.keys():
2024
- raise NotImplementedError('Converting probe IDs required remote connection')
2025
- rec = self.alyx.rest('insertions', 'read', id=str(pid))
2035
+ if query_type == 'local': # and 'insertions' not in self._cache.keys():
2036
+ if 'insertions' not in self._cache.keys():
2037
+ raise NotImplementedError('Converting probe IDs requires remote connection')
2038
+ else:
2039
+ # If local, use the cache table
2040
+ pid = UUID(pid) if isinstance(pid, str) else pid
2041
+ try:
2042
+ rec = self._cache['insertions'].loc[pd.IndexSlice[:, pid], 'name']
2043
+ (eid, _), name = next(rec.items())
2044
+ return eid, name
2045
+ except KeyError:
2046
+ return None, None
2047
+ try:
2048
+ rec = self.alyx.rest('insertions', 'read', id=pid)
2049
+ except requests.exceptions.HTTPError as ex:
2050
+ if ex.response.status_code == 404:
2051
+ _logger.error(f'Probe {pid} not found in Alyx')
2052
+ return None, None
2053
+ raise ex
2054
+ self._update_insertions_table([rec])
2026
2055
  return UUID(rec['session']), rec['name']
2027
2056
 
2028
2057
  def eid2pid(self, eid, query_type=None, details=False, **kwargs) -> (UUID, str, list):
@@ -2063,15 +2092,33 @@ class OneAlyx(One):
2063
2092
  """
2064
2093
  query_type = query_type or self.mode
2065
2094
  if query_type == 'local' and 'insertions' not in self._cache.keys():
2066
- raise NotImplementedError('Converting probe IDs required remote connection')
2095
+ raise NotImplementedError('Converting to probe ID requires remote connection')
2067
2096
  eid = self.to_eid(eid) # Ensure we have a UUID str
2068
2097
  if not eid:
2069
2098
  return (None,) * (3 if details else 2)
2070
- recs = self.alyx.rest('insertions', 'list', session=eid, **kwargs)
2071
- pids = [UUID(x['id']) for x in recs]
2072
- labels = [x['name'] for x in recs]
2099
+ if query_type == 'local':
2100
+ try: # If local, use the cache table
2101
+ rec = self._cache['insertions'].loc[(eid,), :]
2102
+ pids, names = map(list, zip(*rec.sort_values('name')['name'].items()))
2103
+ if details:
2104
+ rec['session'] = str(eid)
2105
+ session_info = self._cache['sessions'].loc[eid].to_dict()
2106
+ session_info['date'] = session_info['date'].isoformat()
2107
+ session_info['projects'] = session_info['projects'].split(',')
2108
+ rec['session_info'] = session_info
2109
+ # Convert to list of dicts after casting UUIDs to strings
2110
+ recs = cast_index_object(rec, str).reset_index().to_dict('records')
2111
+ return pids, names, recs
2112
+ return pids, names
2113
+ except KeyError:
2114
+ return (None,) * (3 if details else 2)
2115
+
2116
+ if recs := self.alyx.rest('insertions', 'list', session=eid, **kwargs):
2117
+ self._update_insertions_table(recs)
2118
+ pids = [UUID(x['id']) for x in recs] or None
2119
+ labels = [x['name'] for x in recs] or None
2073
2120
  if details:
2074
- return pids, labels, recs
2121
+ return pids, labels, recs or None
2075
2122
  else:
2076
2123
  return pids, labels
2077
2124
 
@@ -2325,12 +2372,6 @@ class OneAlyx(One):
2325
2372
  # check that the input matches one of the defined filters
2326
2373
  if field == 'date_range':
2327
2374
  params[field] = [x.date().isoformat() for x in util.validate_date_range(value)]
2328
- elif field == 'dataset':
2329
- if not isinstance(value, str):
2330
- raise TypeError(
2331
- '"dataset" parameter must be a string. For lists use "datasets"')
2332
- query = f'data_dataset_session_related__name__icontains,{value}'
2333
- params['django'] += (',' if params['django'] else '') + query
2334
2375
  elif field == 'laboratory':
2335
2376
  params['lab'] = value
2336
2377
  else:
@@ -2757,10 +2798,11 @@ class OneAlyx(One):
2757
2798
  return [unwrapped(self, e, query_type='remote') for e in eid]
2758
2799
 
2759
2800
  # if it wasn't successful, query Alyx
2760
- ses = self.alyx.rest('sessions', 'list', django=f'pk,{str(eid)}')
2801
+ ses = self.alyx.rest('sessions', 'list', id=eid)
2761
2802
  if len(ses) == 0:
2762
2803
  return None
2763
2804
  else:
2805
+ self._update_sessions_table(ses)
2764
2806
  return ALFPath(self.cache_dir).joinpath(
2765
2807
  ses[0]['lab'], 'Subjects', ses[0]['subject'], ses[0]['start_time'][:10],
2766
2808
  str(ses[0]['number']).zfill(3))
@@ -2788,7 +2830,7 @@ class OneAlyx(One):
2788
2830
  eid_list.append(self.path2eid(p))
2789
2831
  return eid_list
2790
2832
  # else ensure the path ends with mouse, date, number
2791
- path_obj = ALFPath(path_obj)
2833
+ path_obj = ensure_alf_path(path_obj)
2792
2834
 
2793
2835
  # try the cached info to possibly avoid hitting database
2794
2836
  mode = query_type or self.mode
@@ -2969,26 +3011,40 @@ class OneAlyx(One):
2969
3011
  [Errno 404] Remote session not found on Alyx.
2970
3012
 
2971
3013
  """
3014
+ def process(d, root=self.cache_dir):
3015
+ """Returns dict in similar format to One.search output."""
3016
+ det_fields = ['subject', 'start_time', 'number', 'lab', 'projects',
3017
+ 'url', 'task_protocol', 'local_path']
3018
+ out = {k: v for k, v in d.items() if k in det_fields}
3019
+ out['projects'] = ','.join(out['projects'])
3020
+ out['date'] = datetime.fromisoformat(out['start_time']).date()
3021
+ out['local_path'] = session_record2path(out, root)
3022
+ return out
3023
+
2972
3024
  if (query_type or self.mode) == 'local':
2973
3025
  return super().get_details(eid, full=full)
2974
3026
  # If eid is a list of eIDs recurse through list and return the results
2975
- if isinstance(eid, (list, util.LazyId)):
2976
- details_list = []
2977
- for p in eid:
2978
- details_list.append(self.get_details(p, full=full))
2979
- return details_list
2980
- # load all details
2981
- dets = self.alyx.rest('sessions', 'read', eid)
3027
+ eids = ensure_list(eid)
3028
+ details = dict.fromkeys(map(str, eids), None) # create map to skip duplicates
2982
3029
  if full:
2983
- return dets
2984
- # If it's not full return the normal output like from a one.search
2985
- det_fields = ['subject', 'start_time', 'number', 'lab', 'projects',
2986
- 'url', 'task_protocol', 'local_path']
2987
- out = {k: v for k, v in dets.items() if k in det_fields}
2988
- out['projects'] = ','.join(out['projects'])
2989
- out.update({'local_path': self.eid2path(eid),
2990
- 'date': datetime.fromisoformat(out['start_time']).date()})
2991
- return out
3030
+ for e in details:
3031
+ # check for duplicates
3032
+ details[e] = self.alyx.rest('sessions', 'read', id=e)
3033
+ session, datasets = ses2records(details[e])
3034
+ merge_tables(
3035
+ self._cache, sessions=session, datasets=datasets.copy(),
3036
+ origin=self.alyx.base_url)
3037
+ details = [details[str(e)].copy() for e in eids]
3038
+ else:
3039
+ # batch to ensure the list is not too long for the GET request
3040
+ iterator = iter(details.keys())
3041
+ while batch := tuple(islice(iterator, 50)):
3042
+ ret = self.alyx.rest('sessions', 'list', django=f'pk__in,{batch}')
3043
+ details.update({d['id']: d for d in ret})
3044
+ self._update_sessions_table(details.values())
3045
+ details = [process(details[str(e)]) for e in eids]
3046
+ # Return either a single dict or a list of dicts depending on the input type
3047
+ return (details if isinstance(eid, (list, util.LazyId)) else details[0])
2992
3048
 
2993
3049
 
2994
3050
  def _setup(**kwargs):
one/remote/globus.py CHANGED
@@ -642,7 +642,7 @@ class Globus(DownloadClient):
642
642
  recursive : bool
643
643
  If true, transfer the contents of nested directories (NB: all data_paths must be
644
644
  directories).
645
- **kwargs
645
+ kwargs
646
646
  See Globus.transfer_data.
647
647
 
648
648
  Returns
@@ -715,7 +715,7 @@ class Globus(DownloadClient):
715
715
  ----------
716
716
  client_name : str
717
717
  Parameter profile name to set up e.g. 'default', 'admin'.
718
- **kwargs
718
+ kwargs
719
719
  Optional Globus constructor arguments.
720
720
 
721
721
  Returns
@@ -937,7 +937,7 @@ class Globus(DownloadClient):
937
937
  recursive : bool
938
938
  If true, transfer the contents of nested directories (NB: all data_paths must be
939
939
  directories).
940
- **kwargs
940
+ kwargs
941
941
  See globus_sdk.TransferData.
942
942
 
943
943
  Returns
@@ -996,7 +996,7 @@ class Globus(DownloadClient):
996
996
  recursive : bool
997
997
  If true, delete the contents of nested directories (NB: all data_paths must be
998
998
  directories).
999
- **kwargs
999
+ kwargs
1000
1000
  See globus_sdk.DeleteData.
1001
1001
 
1002
1002
  Returns
@@ -1108,7 +1108,7 @@ class Globus(DownloadClient):
1108
1108
  is a UUID, the path must be absolute.
1109
1109
  timeout : int
1110
1110
  Maximum time in seconds to wait for the task to complete.
1111
- **kwargs
1111
+ kwargs
1112
1112
  Optional arguments for globus_sdk.TransferData.
1113
1113
 
1114
1114
  Returns