PyPI - eegdash - Versions diffs - 0.0.9__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

eegdash 0.0.9py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of eegdash might be problematic. Click here for more details.

Files changed (29) hide show

eegdash/__init__.py +8 -1
eegdash/api.py +690 -0
eegdash/data_config.py +33 -27
eegdash/data_utils.py +365 -222
eegdash/dataset.py +60 -0
eegdash/features/__init__.py +46 -18
eegdash/features/datasets.py +62 -23
eegdash/features/decorators.py +14 -6
eegdash/features/extractors.py +22 -22
eegdash/features/feature_bank/__init__.py +3 -3
eegdash/features/feature_bank/complexity.py +6 -3
eegdash/features/feature_bank/connectivity.py +16 -56
eegdash/features/feature_bank/csp.py +3 -4
eegdash/features/feature_bank/dimensionality.py +8 -5
eegdash/features/feature_bank/signal.py +30 -4
eegdash/features/feature_bank/spectral.py +10 -28
eegdash/features/feature_bank/utils.py +48 -0
eegdash/features/inspect.py +48 -0
eegdash/features/serialization.py +4 -5
eegdash/features/utils.py +9 -7
eegdash/preprocessing.py +65 -0
eegdash/utils.py +11 -0
{eegdash-0.0.9.dist-info → eegdash-0.2.0.dist-info}/METADATA +67 -20
eegdash-0.2.0.dist-info/RECORD +27 -0
{eegdash-0.0.9.dist-info → eegdash-0.2.0.dist-info}/WHEEL +1 -1
{eegdash-0.0.9.dist-info → eegdash-0.2.0.dist-info}/licenses/LICENSE +1 -0
eegdash/main.py +0 -359
eegdash-0.0.9.dist-info/RECORD +0 -22
{eegdash-0.0.9.dist-info → eegdash-0.2.0.dist-info}/top_level.txt +0 -0

eegdash/dataset.py ADDED Viewed

@@ -0,0 +1,60 @@
+from .api import EEGDashDataset
+class EEGChallengeDataset(EEGDashDataset):
+    def __init__(
+        self,
+        release: str = "R5",
+        cache_dir: str = ".eegdash_cache",
+        s3_bucket: str | None = "s3://nmdatasets/NeurIPS25/R5_L100",
+        **kwargs,
+    ):
+        """Create a new EEGDashDataset from a given query or local BIDS dataset directory
+        and dataset name. An EEGDashDataset is pooled collection of EEGDashBaseDataset
+        instances (individual recordings) and is a subclass of braindecode's BaseConcatDataset.
+        Parameters
+        ----------
+        query : dict | None
+            Optionally a dictionary that specifies the query to be executed; see
+            EEGDash.find() for details on the query format.
+        data_dir : str | list[str] | None
+            Optionally a string or a list of strings specifying one or more local
+            BIDS dataset directories from which to load the EEG data files. Exactly one
+            of query or data_dir must be provided.
+        dataset : str | list[str] | None
+            If data_dir is given, a name or list of names for for the dataset(s) to be loaded.
+        description_fields : list[str]
+            A list of fields to be extracted from the dataset records
+            and included in the returned data description(s). Examples are typical
+            subject metadata fields such as "subject", "session", "run", "task", etc.;
+            see also data_config.description_fields for the default set of fields.
+        cache_dir : str
+            A directory where the dataset will be cached locally.
+        s3_bucket : str | None
+            An optional S3 bucket URI to use instead of the
+            default OpenNeuro bucket for loading data files.
+        kwargs : dict
+            Additional keyword arguments to be passed to the EEGDashBaseDataset
+            constructor.
+        """
+        dsnumber_release_map = {
+            "R11": "ds005516",
+            "R10": "ds005515",
+            "R9": "ds005514",
+            "R8": "ds005512",
+            "R7": "ds005511",
+            "R6": "ds005510",
+            "R4": "ds005508",
+            "R5": "ds005509",
+            "R3": "ds005507",
+            "R2": "ds005506",
+            "R1": "ds005505",
+        }
+        super().__init__(
+            query={"dataset": dsnumber_release_map[release]},
+            cache_dir=cache_dir,
+            s3_bucket=s3_bucket,
+            **kwargs,
+        )

eegdash/features/__init__.py CHANGED Viewed

@@ -1,25 +1,53 @@
-# Features datasets
-from .datasets import FeaturesDataset, FeaturesConcatDataset
-from .serialization import load_features_concat_dataset
-# Feature extraction
+from .datasets import FeaturesConcatDataset, FeaturesDataset
+from .decorators import (
+    FeatureKind,
+    FeaturePredecessor,
+    bivariate_feature,
+    multivariate_feature,
+    univariate_feature,
+)
 from .extractors import (
-    FeatureExtractor,
-    FitableFeature,
-    UnivariateFeature,
     BivariateFeature,
     DirectedBivariateFeature,
+    FeatureExtractor,
     MultivariateFeature,
+    TrainableFeature,
+    UnivariateFeature,
 )
-from .decorators import (
-    FeaturePredecessor,
-    FeatureKind,
-    univariate_feature,
-    bivariate_feature,
-    directed_bivariate_feature,
-    multivariate_feature,
+from .feature_bank import *  # noqa: F401
+from .inspect import (
+    get_all_feature_extractors,
+    get_all_feature_kinds,
+    get_all_features,
+    get_feature_kind,
+    get_feature_predecessors,
+)
+from .serialization import load_features_concat_dataset
+from .utils import (
+    extract_features,
+    fit_feature_extractors,
 )
-from .utils import extract_features, fit_feature_extractors
-# Features:
-from .feature_bank import *
+__all__ = [
+    "FeaturesConcatDataset",
+    "FeaturesDataset",
+    "FeatureKind",
+    "FeaturePredecessor",
+    "bivariate_feature",
+    "multivariate_feature",
+    "univariate_feature",
+    "BivariateFeature",
+    "DirectedBivariateFeature",
+    "FeatureExtractor",
+    "MultivariateFeature",
+    "TrainableFeature",
+    "UnivariateFeature",
+    "get_all_feature_extractors",
+    "get_all_feature_kinds",
+    "get_all_features",
+    "get_feature_kind",
+    "get_feature_predecessors",
+    "load_features_concat_dataset",
+    "extract_features",
+    "fit_feature_extractors",
+]

eegdash/features/datasets.py CHANGED Viewed

@@ -1,16 +1,19 @@
 from __future__ import annotations
-import os
 import json
+import os
 import shutil
 import warnings
-from typing import Dict, no_type_check
-from collections.abc import Callable, Iterable
+from collections.abc import Callable
+from typing import Dict, List
 import numpy as np
 import pandas as pd
 from joblib import Parallel, delayed
 from braindecode.datasets.base import (
-    EEGWindowsDataset,
     BaseConcatDataset,
+    EEGWindowsDataset,
     _create_description,
 )
@@ -30,6 +33,7 @@ class FeaturesDataset(EEGWindowsDataset):
         Holds additional description about the continuous signal / subject.
     transform : callable | None
         On-the-fly transform applied to the example before it is returned.
     """
     def __init__(
@@ -92,10 +96,12 @@ def _compute_stats(
     return tuple(res)
-def _pooled_var(counts, means, variances, ddof):
+def _pooled_var(counts, means, variances, ddof, ddof_in=None):
+    if ddof_in is None:
+        ddof_in = ddof
     count = counts.sum(axis=0)
     mean = np.sum((counts / count) * means, axis=0)
-    var = np.sum(((counts - ddof) / (count - ddof)) * variances, axis=0)
+    var = np.sum(((counts - ddof_in) / (count - ddof)) * variances, axis=0)
     var[:] += np.sum((counts / (count - ddof)) * (means**2), axis=0)
     var[:] -= (count / (count - ddof)) * (mean**2)
     var[:] = var.clip(min=0)
@@ -153,6 +159,7 @@ class FeaturesConcatDataset(BaseConcatDataset):
         splits : dict
             A dictionary with the name of the split (a string) as key and the
             dataset as value.
         """
         if isinstance(by, str):
             split_ids = {
@@ -184,6 +191,7 @@ class FeaturesConcatDataset(BaseConcatDataset):
             DataFrame containing as many rows as there are windows in the
             BaseConcatDataset, with the metadata and description information
             for each window.
         """
         if not all([isinstance(ds, FeaturesDataset) for ds in self.datasets]):
             raise TypeError(
@@ -235,6 +243,7 @@ class FeaturesConcatDataset(BaseConcatDataset):
             concat. This is useful in the setting of very large datasets, where
             one dataset has to be processed and saved at a time to account for
             its original position.
         """
         if len(self.datasets) == 0:
             raise ValueError("Expect at least one dataset")
@@ -320,25 +329,53 @@ class FeaturesConcatDataset(BaseConcatDataset):
                         json.dump(kwargs, f)
     def to_dataframe(
-        self, include_metadata=False, include_target=False, include_crop_inds=False
+        self,
+        include_metadata: bool | str | List[str] = False,
+        include_target: bool = False,
+        include_crop_inds: bool = False,
     ):
-        if include_metadata or (include_target and include_crop_inds):
+        if (
+            not isinstance(include_metadata, bool)
+            or include_metadata
+            or include_crop_inds
+        ):
+            include_dataset = False
+            if isinstance(include_metadata, bool) and include_metadata:
+                include_dataset = True
+                cols = self.datasets[0].metadata.columns
+            else:
+                cols = include_metadata
+                if isinstance(cols, bool) and not cols:
+                    cols = []
+                elif isinstance(cols, str):
+                    cols = [cols]
+                cols = set(cols)
+                if include_crop_inds:
+                    cols = {
+                        "i_dataset",
+                        "i_window_in_trial",
+                        "i_start_in_trial",
+                        "i_stop_in_trial",
+                        *cols,
+                    }
+                if include_target:
+                    cols.add("target")
+                cols = list(cols)
+                include_dataset = "i_dataset" in cols
+                if include_dataset:
+                    cols.remove("i_dataset")
             dataframes = [
-                ds.metadata.join(ds.features, how="right", lsuffix="_metadata")
+                ds.metadata[cols].join(ds.features, how="right", lsuffix="_metadata")
                 for ds in self.datasets
             ]
+            if include_dataset:
+                for i, df in enumerate(dataframes):
+                    df.insert(loc=0, column="i_dataset", value=i)
         elif include_target:
             dataframes = [
                 ds.features.join(ds.metadata["target"], how="left", rsuffix="_metadata")
                 for ds in self.datasets
             ]
-        elif include_crop_inds:
-            dataframes = [
-                ds.metadata.drop("target", axis="columns").join(
-                    ds.features, how="right", lsuffix="_metadata"
-                )
-                for ds in self.datasets
-            ]
         else:
             dataframes = [ds.features for ds in self.datasets]
         return pd.concat(dataframes, axis=0, ignore_index=True)
@@ -374,7 +411,7 @@ class FeaturesConcatDataset(BaseConcatDataset):
                 return_count=True,
                 return_mean=True,
                 return_var=True,
-                ddof=ddof,
+                ddof=0,
                 numeric_only=numeric_only,
             )
             for ds in self.datasets
@@ -384,11 +421,13 @@ class FeaturesConcatDataset(BaseConcatDataset):
             np.array([s[1] for s in stats]),
             np.array([s[2] for s in stats]),
         )
-        _, _, var = _pooled_var(counts, means, variances, ddof)
+        _, _, var = _pooled_var(counts, means, variances, ddof, ddof_in=0)
         return pd.Series(var, index=self._numeric_columns())
-    def std(self, ddof=1, numeric_only=False, n_jobs=1):
-        return np.sqrt(self.var(ddof=ddof, numeric_only=numeric_only, n_jobs=n_jobs))
+    def std(self, ddof=1, numeric_only=False, eps=0, n_jobs=1):
+        return np.sqrt(
+            self.var(ddof=ddof, numeric_only=numeric_only, n_jobs=n_jobs) + eps
+        )
     def zscore(self, ddof=1, numeric_only=False, eps=0, n_jobs=1):
         stats = Parallel(n_jobs)(
@@ -397,7 +436,7 @@ class FeaturesConcatDataset(BaseConcatDataset):
                 return_count=True,
                 return_mean=True,
                 return_var=True,
-                ddof=ddof,
+                ddof=0,
                 numeric_only=numeric_only,
             )
             for ds in self.datasets
@@ -407,8 +446,8 @@ class FeaturesConcatDataset(BaseConcatDataset):
             np.array([s[1] for s in stats]),
             np.array([s[2] for s in stats]),
         )
-        _, mean, var = _pooled_var(counts, means, variances, ddof)
-        std = np.sqrt(var) + eps
+        _, mean, var = _pooled_var(counts, means, variances, ddof, ddof_in=0)
+        std = np.sqrt(var + eps)
         for ds in self.datasets:
             ds.features = (ds.features - mean) / std

eegdash/features/decorators.py CHANGED Viewed

@@ -1,14 +1,14 @@
-from typing import List, Type
 from collections.abc import Callable
+from typing import List, Type
 from .extractors import (
-    FeatureExtractor,
-    UnivariateFeature,
     BivariateFeature,
     DirectedBivariateFeature,
+    FeatureExtractor,
     MultivariateFeature,
+    UnivariateFeature,
+    _get_underlying_func,
 )
-from .extractors import _get_underlying_func
 class FeaturePredecessor:
@@ -38,6 +38,14 @@ class FeatureKind:
 # Syntax sugar
 univariate_feature = FeatureKind(UnivariateFeature())
-bivariate_feature = FeatureKind(BivariateFeature())
-directed_bivariate_feature = FeatureKind(DirectedBivariateFeature())
+def bivariate_feature(func, directed=False):
+    if directed:
+        kind = DirectedBivariateFeature()
+    else:
+        kind = BivariateFeature()
+    return FeatureKind(kind)(func)
 multivariate_feature = FeatureKind(MultivariateFeature())

eegdash/features/extractors.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from abc import ABC, abstractmethod
-from typing import Dict
 from collections.abc import Callable
 from functools import partial
+from typing import Dict
 import numpy as np
 from numba.core.dispatcher import Dispatcher
@@ -15,9 +16,9 @@ def _get_underlying_func(func):
     return f
-class FitableFeature(ABC):
+class TrainableFeature(ABC):
     def __init__(self):
-        self._is_fitted = False
+        self._is_trained = False
         self.clear()
     @abstractmethod
@@ -38,12 +39,12 @@ class FitableFeature(ABC):
             )
-class FeatureExtractor(FitableFeature):
+class FeatureExtractor(TrainableFeature):
     def __init__(
         self, feature_extractors: Dict[str, Callable], **preprocess_kwargs: Dict
     ):
         self.feature_extractors_dict = self._validate_execution_tree(feature_extractors)
-        self._is_fitable = self._check_is_fitable(feature_extractors)
+        self._is_trainable = self._check_is_trainable(feature_extractors)
         super().__init__()
         # bypassing FeaturePredecessor to avoid circular import
@@ -69,32 +70,31 @@ class FeatureExtractor(FitableFeature):
             assert type(self) in pe_type
         return feature_extractors
-    def _check_is_fitable(self, feature_extractors):
-        is_fitable = False
+    def _check_is_trainable(self, feature_extractors):
+        is_trainable = False
         for fname, f in feature_extractors.items():
             if isinstance(f, FeatureExtractor):
-                is_fitable = f._is_fitable
+                is_trainable = f._is_trainable
             else:
                 f = _get_underlying_func(f)
-                if isinstance(f, FitableFeature):
-                    is_fitable = True
-            if is_fitable:
+                if isinstance(f, TrainableFeature):
+                    is_trainable = True
+            if is_trainable:
                 break
-        return is_fitable
+        return is_trainable
     def preprocess(self, *x, **kwargs):
         return (*x,)
-    def feature_channel_names(self, ch_names):
-        return [""]
     def __call__(self, *x, _batch_size=None, _ch_names=None):
         assert _batch_size is not None
         assert _ch_names is not None
-        if self._is_fitable:
+        if self._is_trainable:
             super().__call__()
         results_dict = dict()
         z = self.preprocess(*x, **self.preprocess_kwargs)
+        if not isinstance(z, tuple):
+            z = (z,)
         for fname, f in self.feature_extractors_dict.items():
             if isinstance(f, FeatureExtractor):
                 r = f(*z, _batch_size=_batch_size, _ch_names=_ch_names)
@@ -125,28 +125,28 @@ class FeatureExtractor(FitableFeature):
             results_dict[name] = value
     def clear(self):
-        if not self._is_fitable:
+        if not self._is_trainable:
             return
         for fname, f in self.feature_extractors_dict.items():
             f = _get_underlying_func(f)
-            if isinstance(f, FitableFeature):
+            if isinstance(f, TrainableFeature):
                 f.clear()
     def partial_fit(self, *x, y=None):
-        if not self._is_fitable:
+        if not self._is_trainable:
             return
         z = self.preprocess(*x, **self.preprocess_kwargs)
         for fname, f in self.feature_extractors_dict.items():
             f = _get_underlying_func(f)
-            if isinstance(f, FitableFeature):
+            if isinstance(f, TrainableFeature):
                 f.partial_fit(*z, y=y)
     def fit(self):
-        if not self._is_fitable:
+        if not self._is_trainable:
             return
         for fname, f in self.feature_extractors_dict.items():
             f = _get_underlying_func(f)
-            if isinstance(f, FitableFeature):
+            if isinstance(f, TrainableFeature):
                 f.fit()
         super().fit()

eegdash/features/feature_bank/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from .signal import *
-from .spectral import *
 from .complexity import *
-from .dimensionality import *
 from .connectivity import *
 from .csp import *
+from .dimensionality import *
+from .signal import *
+from .spectral import *

eegdash/features/feature_bank/complexity.py CHANGED Viewed

@@ -1,10 +1,10 @@
-import numpy as np
 import numba as nb
+import numpy as np
 from sklearn.neighbors import KDTree
-from ..extractors import FeatureExtractor
 from ..decorators import FeaturePredecessor, univariate_feature
+from ..extractors import FeatureExtractor
+from .signal import SIGNAL_PREDECESSORS
 __all__ = [
     "EntropyFeatureExtractor",
@@ -29,6 +29,7 @@ def _channel_app_samp_entropy_counts(x, m, r, l):
     return kdtree.query_radius(x_emb, r, count_only=True)
+@FeaturePredecessor(*SIGNAL_PREDECESSORS)
 class EntropyFeatureExtractor(FeatureExtractor):
     def preprocess(self, x, m=2, r=0.2, l=1):
         rr = r * x.std(axis=-1)
@@ -56,6 +57,7 @@ def complexity_sample_entropy(counts_m, counts_mp1):
     return -np.log(A / B)
+@FeaturePredecessor(*SIGNAL_PREDECESSORS)
 @univariate_feature
 def complexity_svd_entropy(x, m=10, tau=1):
     x_emb = np.empty((*x.shape[:-1], (x.shape[-1] - m + 1) // tau, m))
@@ -66,6 +68,7 @@ def complexity_svd_entropy(x, m=10, tau=1):
     return -np.sum(s * np.log(s), axis=-1)
+@FeaturePredecessor(*SIGNAL_PREDECESSORS)
 @univariate_feature
 @nb.njit(cache=True, fastmath=True)
 def complexity_lempel_ziv(x, threshold=None):

eegdash/features/feature_bank/connectivity.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from itertools import chain
 import numpy as np
 from scipy.signal import csd
-from ..extractors import FeatureExtractor, BivariateFeature
 from ..decorators import FeaturePredecessor, bivariate_feature
+from ..extractors import BivariateFeature, FeatureExtractor
+from . import utils
 __all__ = [
     "CoherenceFeatureExtractor",
@@ -18,82 +19,41 @@ class CoherenceFeatureExtractor(FeatureExtractor):
     def preprocess(self, x, **kwargs):
         f_min = kwargs.pop("f_min") if "f_min" in kwargs else None
         f_max = kwargs.pop("f_max") if "f_max" in kwargs else None
+        assert "fs" in kwargs and "nperseg" in kwargs
         kwargs["axis"] = -1
         n = x.shape[1]
         idx_x, idx_y = BivariateFeature.get_pair_iterators(n)
         ix, iy = list(chain(range(n), idx_x)), list(chain(range(n), idx_y))
         f, s = csd(x[:, ix], x[:, iy], **kwargs)
-        if f_min is not None or f_max is not None:
-            f_min_idx = f > f_min if f_min is not None else True
-            f_max_idx = f < f_max if f_max is not None else True
-            idx = np.logical_and(f_min_idx, f_max_idx)
-            f = f[idx]
-            s = s[..., idx]
-        sx, sxy = np.split(s, [n], axis=1)
-        sxx, syy = sx[:, idx_x].real, sx[:, idx_y].real
+        f_min, f_max = utils.get_valid_freq_band(
+            kwargs["fs"], x.shape[-1], f_min, f_max
+        )
+        f, s = utils.slice_freq_band(f, s, f_min=f_min, f_max=f_max)
+        p, sxy = np.split(s, [n], axis=1)
+        sxx, syy = p[:, idx_x].real, p[:, idx_y].real
         c = sxy / np.sqrt(sxx * syy)
         return f, c
-def _avg_over_bands(f, x, bands):
-    bands_avg = dict()
-    for k, v in bands.items():
-        assert isinstance(k, str)
-        assert isinstance(v, tuple)
-        assert len(v) == 2
-        assert v[0] < v[1]
-        mask = np.logical_and(f > v[0], f < v[1])
-        avg = x[..., mask].mean(axis=-1)
-        bands_avg[k] = avg
-    return bands_avg
 @FeaturePredecessor(CoherenceFeatureExtractor)
 @bivariate_feature
-def connectivity_magnitude_square_coherence(
-    f,
-    c,
-    bands={
-        "delta": (1, 4.5),
-        "theta": (4.5, 8),
-        "alpha": (8, 12),
-        "beta": (12, 30),
-    },
-):
+def connectivity_magnitude_square_coherence(f, c, bands=utils.DEFAULT_FREQ_BANDS):
     # https://neuroimage.usc.edu/brainstorm/Tutorials/Connectivity
     coher = c.real**2 + c.imag**2
-    return _avg_over_bands(f, coher, bands)
+    return utils.reduce_freq_bands(f, coher, bands, np.mean)
 @FeaturePredecessor(CoherenceFeatureExtractor)
 @bivariate_feature
-def connectivity_imaginary_coherence(
-    f,
-    c,
-    bands={
-        "delta": (1, 4.5),
-        "theta": (4.5, 8),
-        "alpha": (8, 12),
-        "beta": (12, 30),
-    },
-):
+def connectivity_imaginary_coherence(f, c, bands=utils.DEFAULT_FREQ_BANDS):
     # https://neuroimage.usc.edu/brainstorm/Tutorials/Connectivity
     coher = c.imag
-    return _avg_over_bands(f, coher, bands)
+    return utils.reduce_freq_bands(f, coher, bands, np.mean)
 @FeaturePredecessor(CoherenceFeatureExtractor)
 @bivariate_feature
-def connectivity_lagged_coherence(
-    f,
-    c,
-    bands={
-        "delta": (1, 4.5),
-        "theta": (4.5, 8),
-        "alpha": (8, 12),
-        "beta": (12, 30),
-    },
-):
+def connectivity_lagged_coherence(f, c, bands=utils.DEFAULT_FREQ_BANDS):
     # https://neuroimage.usc.edu/brainstorm/Tutorials/Connectivity
     coher = c.imag / np.sqrt(1 - c.real)
-    return _avg_over_bands(f, coher, bands)
+    return utils.reduce_freq_bands(f, coher, bands, np.mean)

eegdash/features/feature_bank/csp.py CHANGED Viewed

@@ -1,11 +1,10 @@
-import numpy as np
 import numba as nb
+import numpy as np
 import scipy
 import scipy.linalg
-from ..extractors import FitableFeature
 from ..decorators import multivariate_feature
+from ..extractors import TrainableFeature
 __all__ = [
     "CommonSpatialPattern",
@@ -23,7 +22,7 @@ def _update_mean_cov(count, mean, cov, x_count, x_mean, x_cov):
 @multivariate_feature
-class CommonSpatialPattern(FitableFeature):
+class CommonSpatialPattern(TrainableFeature):
     def __init__(self):
         super().__init__()

eegdash 0.0.9__py3-none-any.whl → 0.2.0__py3-none-any.whl

Potentially problematic release.

eegdash 0.0.9py3-none-any.whl → 0.2.0py3-none-any.whl