PyPI - ONE-api - Versions diffs - 3.0b4__tar.gz → 3.1.0__tar.gz - Mend

ONE-api 3.0b4tar.gz → 3.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{one_api-3.0b4 → one_api-3.1.0/ONE_api.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: ONE-api
-Version: 3.0b4
+Version: 3.1.0
 Summary: Open Neurophysiology Environment
 Author: IBL Staff
 License: MIT
@@ -21,6 +21,7 @@ Requires-Dist: iblutil>=1.14.0
 Requires-Dist: packaging
 Requires-Dist: boto3
 Requires-Dist: pyyaml
+Dynamic: license-file
 # Open Neurophysiology Environment
 [![Coverage Status](https://coveralls.io/repos/github/int-brain-lab/ONE/badge.svg?branch=main)](https://coveralls.io/github/int-brain-lab/ONE?branch=main)

{one_api-3.0b4/ONE_api.egg-info → one_api-3.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: ONE-api
-Version: 3.0b4
+Version: 3.1.0
 Summary: Open Neurophysiology Environment
 Author: IBL Staff
 License: MIT
@@ -21,6 +21,7 @@ Requires-Dist: iblutil>=1.14.0
 Requires-Dist: packaging
 Requires-Dist: boto3
 Requires-Dist: pyyaml
+Dynamic: license-file
 # Open Neurophysiology Environment
 [![Coverage Status](https://coveralls.io/repos/github/int-brain-lab/ONE/badge.svg?branch=main)](https://coveralls.io/github/int-brain-lab/ONE?branch=main)

{one_api-3.0b4 → one_api-3.1.0}/one/__init__.py RENAMED Viewed

@@ -1,2 +1,2 @@
 """The Open Neurophysiology Environment (ONE) API."""
-__version__ = '3.0b4'
+__version__ = '3.1.0'

{one_api-3.0b4 → one_api-3.1.0}/one/alf/cache.py RENAMED Viewed

@@ -26,7 +26,7 @@ import logging
 import pandas as pd
 import numpy as np
 from packaging import version
-from iblutil.util import Bunch
+from iblutil.util import Bunch, ensure_list
 from iblutil.io import parquet
 from iblutil.io.hashfile import md5
@@ -35,8 +35,9 @@ from one.alf.io import iter_sessions
 from one.alf.path import session_path_parts, get_alf_path
 __all__ = [
-    'make_parquet_db', 'patch_tables', 'merge_tables', 'QC_TYPE', 'remove_table_files',
-    'remove_missing_datasets', 'load_tables', 'EMPTY_DATASETS_FRAME', 'EMPTY_SESSIONS_FRAME']
+    'make_parquet_db', 'load_tables', 'patch_tables', 'merge_tables',
+    'remove_table_files', 'remove_missing_datasets', 'default_cache',
+    'QC_TYPE', 'EMPTY_DATASETS_FRAME', 'EMPTY_SESSIONS_FRAME']
 _logger = logging.getLogger(__name__)
 # -------------------------------------------------------------------------------------------------
@@ -259,6 +260,33 @@ def _make_datasets_df(root_dir, hash_files=False) -> pd.DataFrame:
     return pd.DataFrame(rows, columns=DATASETS_COLUMNS).astype(DATASETS_COLUMNS)
+def default_cache(origin=''):
+    """Returns an empty cache dictionary with the default tables.
+    Parameters
+    ----------
+    origin : str, optional
+        The origin of the cache (e.g. a computer name or database name).
+    Returns
+    -------
+    Bunch
+        A Bunch object containing the loaded cache tables and associated metadata.
+    """
+    table_meta = _metadata(origin)
+    return Bunch({
+            'datasets': EMPTY_DATASETS_FRAME.copy(),
+            'sessions': EMPTY_SESSIONS_FRAME.copy(),
+            '_meta': {
+                'created_time': None,
+                'loaded_time': None,
+                'modified_time': None,
+                'saved_time': None,
+                'raw': {k: table_meta.copy() for k in ('datasets', 'sessions')}}
+        })
 def make_parquet_db(root_dir, out_dir=None, hash_ids=True, hash_files=False, lab=None):
     """Given a data directory, index the ALF datasets and save the generated cache tables.
@@ -375,17 +403,8 @@ def load_tables(tables_dir, glob_pattern='*.pqt'):
         A Bunch object containing the loaded cache tables and associated metadata.
     """
-    meta = {
-        'created_time': None,
-        'loaded_time': None,
-        'modified_time': None,
-        'saved_time': None,
-        'raw': {}
-    }
-    caches = Bunch({
-            'datasets': EMPTY_DATASETS_FRAME.copy(),
-            'sessions': EMPTY_SESSIONS_FRAME.copy(),
-            '_meta': meta})
+    caches = default_cache()
+    meta = caches['_meta']
     INDEX_KEY = '.?id'
     for cache_file in Path(tables_dir).glob(glob_pattern):
         table = cache_file.stem
@@ -425,9 +444,12 @@ def load_tables(tables_dir, glob_pattern='*.pqt'):
     return caches
-def merge_tables(cache, strict=False, **kwargs):
+def merge_tables(cache, strict=False, origin=None, **kwargs):
     """Update the cache tables with new records.
+    Note: A copy of the tables in cache may be returned if the original tables are immutable.
+    This can happen when tables are loaded from a parquet file.
     Parameters
     ----------
     dict
@@ -435,6 +457,8 @@ def merge_tables(cache, strict=False, **kwargs):
     strict : bool
         If not True, the columns don't need to match.  Extra columns in input tables are
         dropped and missing columns are added and filled with np.nan.
+    origin : str
+        The origin of the cache (e.g. a computer name or database name).
     kwargs
         pandas.DataFrame or pandas.Series to insert/update for each table.
@@ -488,13 +512,31 @@ def merge_tables(cache, strict=False, **kwargs):
         records = records.astype(cache[table].dtypes)
         # Update existing rows
         to_update = records.index.isin(cache[table].index)
-        cache[table].loc[records.index[to_update], :] = records[to_update]
+        try:
+            cache[table].loc[records.index[to_update], :] = records[to_update]
+        except ValueError as e:
+            if 'assignment destination is read-only' in str(e):
+                # NB: nullable integer and categorical dtypes may be backed by immutable arrays
+                # after loading from parquet and therefore must be copied before assignment
+                cache[table] = cache[table].copy()
+                cache[table].loc[records.index[to_update], :] = records[to_update]
+            else:
+                raise e  # pragma: no cover
         # Assign new rows
         to_assign = records[~to_update]
         frames = [cache[table], to_assign]
         # Concatenate and sort
         cache[table] = pd.concat(frames).sort_index()
         updated = datetime.datetime.now()
+        # Update the table metadata with the origin
+        if origin is not None:
+            table_meta = cache['_meta']['raw'].get(table, {})
+            if not table_meta.get('origin'):
+                table_meta['origin'] = origin
+            else:
+                table_meta['origin'] = set((*ensure_list(table_meta['origin']), origin))
+            cache['_meta']['raw'][table] = table_meta
     cache['_meta']['modified_time'] = updated
     return updated

{one_api-3.0b4 → one_api-3.1.0}/one/api.py RENAMED Viewed

@@ -6,6 +6,7 @@ import logging
 from weakref import WeakMethod
 from datetime import datetime, timedelta
 from functools import lru_cache, partial
+from itertools import chain
 from inspect import unwrap
 from pathlib import Path, PurePosixPath
 from typing import Any, Union, Optional, List
@@ -31,7 +32,7 @@ import one.alf.exceptions as alferr
 from one.alf.path import ALFPath
 from .alf.cache import (
     make_parquet_db, load_tables, remove_table_files, merge_tables,
-    EMPTY_DATASETS_FRAME, EMPTY_SESSIONS_FRAME, cast_index_object)
+    default_cache, cast_index_object)
 from .alf.spec import is_uuid, is_uuid_string, QC, to_alf
 from . import __version__
 from one.converters import ConversionMixin, session_record2path, ses2records, datasets2records
@@ -39,15 +40,17 @@ from one import util
 _logger = logging.getLogger(__name__)
 __all__ = ['ONE', 'One', 'OneAlyx']
-N_THREADS = os.environ.get('ONE_HTTP_DL_THREADS', 4)
-"""int: The number of download threads."""
+SAVE_ON_DELETE = (os.environ.get('ONE_SAVE_ON_DELETE') or '1').casefold() in ('true', '1')
+"""bool: Whether to save modified cache tables on delete."""
+_logger.debug('ONE_SAVE_ON_DELETE: %s', SAVE_ON_DELETE)
 class One(ConversionMixin):
     """An API for searching and loading data on a local filesystem."""
     _search_terms = (
-        'dataset', 'date_range', 'laboratory', 'number',
+        'datasets', 'date_range', 'laboratory', 'number',
         'projects', 'subject', 'task_protocol', 'dataset_qc_lte'
     )
@@ -97,7 +100,8 @@ class One(ConversionMixin):
     def __del__(self):
         """Save cache tables to disk before deleting the object."""
-        self.save_cache()
+        if SAVE_ON_DELETE:
+            self.save_cache()
     @property
     def offline(self):
@@ -110,16 +114,7 @@ class One(ConversionMixin):
     def _reset_cache(self):
         """Replace the cache object with a Bunch that contains the right fields."""
-        self._cache = Bunch({
-            'datasets': EMPTY_DATASETS_FRAME.copy(),
-            'sessions': EMPTY_SESSIONS_FRAME.copy(),
-            '_meta': {
-                'created_time': None,
-                'loaded_time': None,
-                'modified_time': None,
-                'saved_time': None,
-                'raw': {}}  # map of original table metadata
-        })
+        self._cache = default_cache()
     def _remove_table_files(self, tables=None):
         """Delete cache tables on disk.
@@ -162,6 +157,25 @@ class One(ConversionMixin):
             if self.offline:  # In online mode, the cache tables should be downloaded later
                 warnings.warn(f'No cache tables found in {self._tables_dir}')
+        # If in remote mode and loading old tables generated on Alyx,
+        # prompt the user to delete them to improve load times
+        raw_meta = self._cache['_meta'].get('raw', {}).values() or [{}]
+        tagged = any(filter(None, flatten(x.get('database_tags') for x in raw_meta)))
+        origin = set(x['origin'] for x in raw_meta if 'origin' in x)
+        older = (self._cache['_meta']['created_time'] or datetime.now()) < datetime(2025, 2, 13)
+        remote = not self.offline and self.mode == 'remote'
+        if remote and origin == {'alyx'} and older and not self._web_client.silent and not tagged:
+            message = ('Old Alyx cache tables detected on disk. '
+                      'It\'s recomended to remove these tables as they '
+                      'negatively affect performance.\nDelete these tables? [Y/n]: ')
+            if (input(message).casefold().strip() or 'y')[0] == 'y':
+                self._remove_table_files()
+                self._reset_cache()
+        elif len(self._cache.datasets) > 1e6:
+            warnings.warn(
+                'Large cache tables affect performance. '
+                'Consider removing them by calling the `_remove_table_files` method.')
         return self._cache['_meta']['loaded_time']
     def save_cache(self, save_dir=None, clobber=False):
@@ -194,6 +208,14 @@ class One(ConversionMixin):
             caches = load_tables(save_dir)
             merge_tables(
                 caches, **{k: v for k, v in self._cache.items() if not k.startswith('_')})
+            # Ensure we use the minimum created date for each table
+            for table in caches['_meta']['raw']:
+                raw_meta = [x['_meta']['raw'].get(table, {}) for x in (caches, self._cache)]
+                created = filter(None, (x.get('date_created') for x in raw_meta))
+                if any(created := list(created)):
+                    created = min(map(datetime.fromisoformat, created))
+                    created = created.isoformat(sep=' ', timespec='minutes')
+                    meta['raw'][table]['date_created'] = created
         with FileLock(save_dir, log=_logger, timeout=TIMEOUT, timeout_action='delete'):
             _logger.info('Saving cache tables...')
@@ -297,10 +319,8 @@ class One(ConversionMixin):
         Parameters
         ----------
-        dataset : str, list
-            One or more dataset names. Returns sessions containing all these datasets.
-            A dataset matches if it contains the search string e.g. 'wheel.position' matches
-            '_ibl_wheel.position.npy'.
+        datasets : str, list
+            One or more (exact) dataset names. Returns sessions containing all of these datasets.
         dataset_qc_lte : str, int, one.alf.spec.QC
             A dataset QC value, returns sessions with datasets at or below this QC value, including
             those with no QC set.  If `dataset` not passed, sessions with any passing QC datasets
@@ -348,7 +368,9 @@ class One(ConversionMixin):
         Search for sessions on a given date, in a given lab, containing trials and spike data.
-        >>> eids = one.search(date='2023-01-01', lab='churchlandlab', dataset=['trials', 'spikes'])
+        >>> eids = one.search(
+        ...    date='2023-01-01', lab='churchlandlab',
+        ...    datasets=['trials.table.pqt', 'spikes.times.npy'])
         Search for sessions containing trials and spike data where QC for both are WARNING or less.
@@ -375,13 +397,14 @@ class One(ConversionMixin):
         def all_present(x, dsets, exists=True):
             """Returns true if all datasets present in Series."""
-            return all(any(x.str.contains(y, regex=self.wildcards) & exists) for y in dsets)
+            name = x.str.rsplit('/', n=1, expand=True).iloc[:, -1]
+            return all(any(name.str.fullmatch(y) & exists) for y in dsets)
         # Iterate over search filters, reducing the sessions table
         sessions = self._cache['sessions']
         # Ensure sessions filtered in a particular order, with datasets last
-        search_order = ('date_range', 'number', 'dataset')
+        search_order = ('date_range', 'number', 'datasets')
         def sort_fcn(itm):
             return -1 if itm[0] not in search_order else search_order.index(itm[0])
@@ -408,12 +431,15 @@ class One(ConversionMixin):
                 query = ensure_list(value)
                 sessions = sessions[sessions[key].isin(map(int, query))]
             # Dataset/QC check is biggest so this should be done last
-            elif key == 'dataset' or (key == 'dataset_qc_lte' and 'dataset' not in queries):
+            elif key == 'datasets' or (key == 'dataset_qc_lte' and 'datasets' not in queries):
                 datasets = self._cache['datasets']
                 qc = QC.validate(queries.get('dataset_qc_lte', 'FAIL')).name  # validate value
                 has_dset = sessions.index.isin(datasets.index.get_level_values('eid'))
+                if not has_dset.any():
+                    sessions = sessions.iloc[0:0]  # No datasets for any sessions
+                    continue
                 datasets = datasets.loc[(sessions.index.values[has_dset], ), :]
-                query = ensure_list(value if key == 'dataset' else '')
+                query = ensure_list(value if key == 'datasets' else '')
                 # For each session check any dataset both contains query and exists
                 mask = (
                     (datasets
@@ -1003,7 +1029,7 @@ class One(ConversionMixin):
         """
         query_type = query_type or self.mode
         datasets = self.list_datasets(
-            eid, details=True, query_type=query_type, keep_eid_index=True, revision=revision)
+            eid, details=True, query_type=query_type, keep_eid_index=True)
         if len(datasets) == 0:
             raise alferr.ALFObjectNotFound(obj)
@@ -1643,6 +1669,11 @@ class OneAlyx(One):
         tag : str
             An optional Alyx dataset tag for loading cache tables containing a subset of datasets.
+        Returns
+        -------
+        datetime.datetime
+            A timestamp of when the cache was loaded.
         Examples
         --------
         To load the cache tables for a given release tag
@@ -1666,6 +1697,8 @@ class OneAlyx(One):
         different_tag = any(x != tag for x in current_tags)
         if not (clobber or different_tag):
             super(OneAlyx, self).load_cache(tables_dir)  # Load any present cache
+            cache_meta = self._cache.get('_meta', {})
+            raw_meta = cache_meta.get('raw', {}).values() or [{}]
         try:
             # Determine whether a newer cache is available
@@ -1676,15 +1709,21 @@ class OneAlyx(One):
             min_version = packaging.version.parse(cache_info.get('min_api_version', '0.0.0'))
             if packaging.version.parse(one.__version__) < min_version:
                 warnings.warn(f'Newer cache tables require ONE version {min_version} or greater')
-                return
+                return cache_meta['loaded_time']
             # Check whether remote cache more recent
             remote_created = datetime.fromisoformat(cache_info['date_created'])
             local_created = cache_meta.get('created_time', None)
             fresh = local_created and (remote_created - local_created) < timedelta(minutes=1)
-            if fresh and not different_tag:
+            # The local cache may have been created locally more recently, but if it doesn't
+            # contain the same tag or origin, we need to download the remote one.
+            origin = cache_info.get('origin', 'unknown')
+            local_origin = (x.get('origin', []) for x in raw_meta)
+            local_origin = set(chain.from_iterable(map(ensure_list, local_origin)))
+            different_origin = origin not in local_origin
+            if fresh and not (different_tag or different_origin):
                 _logger.info('No newer cache available')
-                return
+                return cache_meta['loaded_time']
             # Set the cache table directory location
             if tables_dir:  # If tables directory specified, use that
@@ -1696,19 +1735,27 @@ class OneAlyx(One):
                 self._tables_dir = self._tables_dir or self.cache_dir
             # Check if the origin has changed. This is to warn users if downloading from a
-            # different database to the one currently loaded.
-            prev_origin = list(set(filter(None, (x.get('origin') for x in raw_meta))))
-            origin = cache_info.get('origin', 'unknown')
-            if prev_origin and origin not in prev_origin:
+            # different database to the one currently loaded. When building the cache from
+            # remote queries the origin is set to the Alyx database URL. If the cache info
+            # origin name and URL are different, warn the user.
+            if different_origin and local_origin and self.alyx.base_url not in local_origin:
                 warnings.warn(
                     'Downloading cache tables from another origin '
-                    f'("{origin}" instead of "{", ".join(prev_origin)}")')
+                    f'("{origin}" instead of "{", ".join(local_origin)}")')
             # Download the remote cache files
             _logger.info('Downloading remote caches...')
             files = self.alyx.download_cache_tables(cache_info.get('location'), self._tables_dir)
             assert any(files)
-            super(OneAlyx, self).load_cache(self._tables_dir)  # Reload cache after download
+            # Reload cache after download
+            loaded_time = super(OneAlyx, self).load_cache(self._tables_dir)
+            # Add db URL to origin set so we know where the cache came from
+            for raw_meta in self._cache['_meta']['raw'].values():
+                table_origin = set(filter(None, ensure_list(raw_meta.get('origin', []))))
+                if origin in table_origin:
+                    table_origin.add(self.alyx.base_url)
+                raw_meta['origin'] = table_origin
+            return loaded_time
         except (requests.exceptions.HTTPError, wc.HTTPError, requests.exceptions.SSLError) as ex:
             _logger.debug(ex)
             _logger.error(f'{type(ex).__name__}: Failed to load the remote cache file')
@@ -1725,6 +1772,7 @@ class OneAlyx(One):
                 'Please provide valid tables_dir / cache_dir kwargs '
                 'or run ONE.setup to update the default directory.'
             )
+        return cache_meta['loaded_time']
     @property
     def alyx(self):
@@ -1817,7 +1865,8 @@ class OneAlyx(One):
             return self._cache['datasets'].iloc[0:0] if details else []  # Return empty
         session, datasets = ses2records(self.alyx.rest('sessions', 'read', id=eid))
         # Add to cache tables
-        merge_tables(self._cache, sessions=session, datasets=datasets.copy())
+        merge_tables(
+            self._cache, sessions=session, datasets=datasets.copy(), origin=self.alyx.base_url)
         if datasets is None or datasets.empty:
             return self._cache['datasets'].iloc[0:0] if details else []  # Return empty
         assert set(datasets.index.unique('eid')) == {eid}
@@ -1969,7 +2018,7 @@ class OneAlyx(One):
         rec = self.alyx.rest('insertions', 'read', id=str(pid))
         return UUID(rec['session']), rec['name']
-    def eid2pid(self, eid, query_type=None, details=False):
+    def eid2pid(self, eid, query_type=None, details=False, **kwargs) -> (UUID, str, list):
         """Given an experiment UUID (eID), return the probe IDs and labels (i.e. ALF collection).
         NB: Requires a connection to the Alyx database.
@@ -1983,6 +2032,8 @@ class OneAlyx(One):
             Query mode - options include 'remote', and 'refresh'.
         details : bool
             Additionally return the complete Alyx records from insertions endpoint.
+        kwargs
+            Additional parameters to filter insertions Alyx endpoint.
         Returns
         -------
@@ -1993,6 +2044,15 @@ class OneAlyx(One):
         list of dict (optional)
             If details is true, returns the Alyx records from insertions endpoint.
+        Examples
+        --------
+        Get the probe IDs and details for a given session ID
+        >>> pids, labels, recs = one.eid2pid(eid, details=True)
+        Get the probe ID for a given session ID and label
+        >>> (pid,), _ = one.eid2pid(eid, details=False, name='probe00')
         """
         query_type = query_type or self.mode
         if query_type == 'local' and 'insertions' not in self._cache.keys():
@@ -2000,7 +2060,7 @@ class OneAlyx(One):
         eid = self.to_eid(eid)  # Ensure we have a UUID str
         if not eid:
             return (None,) * (3 if details else 2)
-        recs = self.alyx.rest('insertions', 'list', session=eid)
+        recs = self.alyx.rest('insertions', 'list', session=eid, **kwargs)
         pids = [UUID(x['id']) for x in recs]
         labels = [x['name'] for x in recs]
         if details:
@@ -2143,7 +2203,8 @@ class OneAlyx(One):
         # Build sessions table
         session_records = (x['session_info'] for x in insertions_records)
         sessions_df = pd.DataFrame(next(zip(*map(ses2records, session_records))))
-        return merge_tables(self._cache, insertions=df, sessions=sessions_df)
+        return merge_tables(
+            self._cache, insertions=df, sessions=sessions_df, origin=self.alyx.base_url)
     def search(self, details=False, query_type=None, **kwargs):
         """Searches sessions matching the given criteria and returns a list of matching eids.
@@ -2162,10 +2223,8 @@ class OneAlyx(One):
         Parameters
         ----------
-        dataset : str
-            A (partial) dataset name. Returns sessions containing matching datasets.
-            A dataset matches if it contains the search string e.g. 'wheel.position' matches
-            '_ibl_wheel.position.npy'. C.f. `datasets` argument.
+        datasets : str, list
+            One or more (exact) dataset names. Returns sessions containing all of these datasets.
         date_range : str, list, datetime.datetime, datetime.date, pandas.timestamp
             A single date to search or a list of 2 dates that define the range (inclusive).  To
             define only the upper or lower date bound, set the other element to None.
@@ -2192,11 +2251,12 @@ class OneAlyx(One):
             A str or list of lab location (as per Alyx definition) name.
             Note: this corresponds to the specific rig, not the lab geographical location per se.
         dataset_types : str, list
-            One or more of dataset_types.
-        datasets : str, list
-            One or more (exact) dataset names. Returns sessions containing all of these datasets.
+            One or more of dataset_types. Unlike with `datasets`, the dataset types for the
+            sessions returned may not be reachable (i.e. for recent sessions the datasets may not
+            yet be available).
         dataset_qc_lte : int, str, one.alf.spec.QC
-            The maximum QC value for associated datasets.
+            The maximum QC value for associated datasets. NB: Without `datasets`, not all
+            associated datasets with the matching QC values are guarenteed to be reachable.
         details : bool
             If true also returns a dict of dataset details.
         query_type : str, None
@@ -2241,6 +2301,9 @@ class OneAlyx(One):
         - In default and local mode, when the one.wildcards flag is True (default), queries are
           interpreted as regular expressions. To turn this off set one.wildcards to False.
         - In remote mode regular expressions are only supported using the `django` argument.
+        - In remote mode, only the `datasets` argument returns sessions where datasets are
+          registered *and* exist. Using `dataset_types` or `dataset_qc_lte` without `datasets`
+          will not check that the datasets are reachable.
         """
         query_type = query_type or self.mode
@@ -2310,7 +2373,7 @@ class OneAlyx(One):
         """
         df = pd.DataFrame(next(zip(*map(ses2records, session_records))))
-        return merge_tables(self._cache, sessions=df)
+        return merge_tables(self._cache, sessions=df, origin=self.alyx.base_url)
     def _download_datasets(self, dsets, **kwargs) -> List[ALFPath]:
         """Download a single or multitude of datasets if stored on AWS.

{one_api-3.0b4 → one_api-3.1.0}/one/remote/globus.py RENAMED Viewed

@@ -1211,7 +1211,7 @@ class Globus(DownloadClient):
     async def task_wait_async(self, task_id, polling_interval=10, timeout=10):
         """Asynchronously wait until a Task is complete or fails, with a time limit.
-        If the task status is ACTIVE after timout, returns False, otherwise returns True.
+        If the task status is ACTIVE after timeout, returns False, otherwise returns True.
         Parameters
         ----------

{one_api-3.0b4 → one_api-3.1.0}/one/webclient.py RENAMED Viewed

@@ -34,6 +34,7 @@ from uuid import UUID
 import json
 import logging
 import math
+import os
 import re
 import functools
 import urllib.request
@@ -61,6 +62,8 @@ from iblutil.io.params import set_hidden
 from iblutil.util import ensure_list
 import concurrent.futures
 _logger = logging.getLogger(__name__)
+N_THREADS = int(os.environ.get('ONE_HTTP_DL_THREADS', 4))
+"""int: The number of download threads."""
 class _JSONEncoder(json.JSONEncoder):
@@ -355,7 +358,6 @@ def http_download_file_list(links_to_file_list, **kwargs):
     """
     links_to_file_list = list(links_to_file_list)  # In case generator was passed
-    n_threads = 4  # Max number of threads
     outputs = []
     target_dir = kwargs.pop('target_dir', None)
     # Ensure target dir the length of url list
@@ -364,7 +366,7 @@ def http_download_file_list(links_to_file_list, **kwargs):
     assert len(target_dir) == len(links_to_file_list)
     # using with statement to ensure threads are cleaned up promptly
     zipped = zip(links_to_file_list, target_dir)
-    with concurrent.futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
+    with concurrent.futures.ThreadPoolExecutor(max_workers=N_THREADS) as executor:
         # Multithreading load operations
         futures = [executor.submit(
             http_download_file, link, target_dir=target, **kwargs) for link, target in zipped]
@@ -1153,7 +1155,8 @@ class AlyxClient:
             assert endpoint_scheme[action]['action'] == 'get'
             # add to url data if it is a string
             if id:
-                # this is a special case of the list where we query a uuid. Usually read is better
+                # this is a special case of the list where we query a uuid
+                # usually read is better but list may return fewer data and therefore be faster
                 if 'django' in kwargs.keys():
                     kwargs['django'] = kwargs['django'] + ','
                 else:
@@ -1161,6 +1164,9 @@ class AlyxClient:
                 kwargs['django'] = f"{kwargs['django']}pk,{id}"
             # otherwise, look for a dictionary of filter terms
             if kwargs:
+                # if django arg is present but is None, server will return a cryptic 500 status
+                if 'django' in kwargs and kwargs['django'] is None:
+                    del kwargs['django']
                 # Convert all lists in query params to comma separated list
                 query_params = {k: ','.join(map(str, ensure_list(v))) for k, v in kwargs.items()}
                 url = update_url_params(url, query_params)