PyPI - eegdash - Versions diffs - 0.4.0.dev173498563__py3-none-any.whl → 0.4.1.dev185__py3-none-any.whl - Mend

eegdash 0.4.0.dev173498563py3-none-any.whl → 0.4.1.dev185py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of eegdash might be problematic. Click here for more details.

Files changed (34) hide show

eegdash/__init__.py +3 -3
eegdash/api.py +143 -526
eegdash/bids_eeg_metadata.py +139 -39
eegdash/const.py +25 -0
eegdash/dataset/__init__.py +8 -2
eegdash/dataset/base.py +311 -0
eegdash/dataset/bids_dataset.py +443 -0
eegdash/dataset/dataset.py +542 -17
eegdash/dataset/dataset_summary.csv +255 -255
eegdash/dataset/registry.py +69 -4
eegdash/downloader.py +95 -9
eegdash/features/datasets.py +326 -136
eegdash/features/decorators.py +96 -3
eegdash/features/extractors.py +212 -55
eegdash/features/feature_bank/complexity.py +7 -3
eegdash/features/feature_bank/dimensionality.py +1 -1
eegdash/features/feature_bank/signal.py +11 -10
eegdash/features/feature_bank/utils.py +8 -0
eegdash/features/inspect.py +97 -11
eegdash/features/serialization.py +56 -19
eegdash/features/utils.py +90 -16
eegdash/hbn/preprocessing.py +50 -17
eegdash/hbn/windows.py +145 -32
eegdash/logging.py +19 -0
eegdash/mongodb.py +44 -27
eegdash/paths.py +15 -5
eegdash/utils.py +16 -1
{eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dev185.dist-info}/METADATA +7 -8
eegdash-0.4.1.dev185.dist-info/RECORD +38 -0
eegdash/data_utils.py +0 -677
eegdash-0.4.0.dev173498563.dist-info/RECORD +0 -37
{eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dev185.dist-info}/WHEEL +0 -0
{eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dev185.dist-info}/licenses/LICENSE +0 -0
{eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dev185.dist-info}/top_level.txt +0 -0

eegdash/features/decorators.py CHANGED Viewed

@@ -10,8 +10,31 @@ from .extractors import (
     _get_underlying_func,
 )
+__all__ = [
+    "bivariate_feature",
+    "FeatureKind",
+    "FeaturePredecessor",
+    "multivariate_feature",
+    "univariate_feature",
+]
 class FeaturePredecessor:
+    """A decorator to specify parent extractors for a feature function.
+    This decorator attaches a list of parent extractor types to a feature
+    extraction function. This information can be used to build a dependency
+    graph of features.
+    Parameters
+    ----------
+    *parent_extractor_type : list of Type
+        A list of feature extractor classes (subclasses of
+        :class:`~eegdash.features.extractors.FeatureExtractor`) that this
+        feature depends on.
+    """
     def __init__(self, *parent_extractor_type: List[Type]):
         parent_cls = parent_extractor_type
         if not parent_cls:
@@ -20,17 +43,58 @@ class FeaturePredecessor:
             assert issubclass(p_cls, FeatureExtractor)
         self.parent_extractor_type = parent_cls
-    def __call__(self, func: Callable):
+    def __call__(self, func: Callable) -> Callable:
+        """Apply the decorator to a function.
+        Parameters
+        ----------
+        func : callable
+            The feature extraction function to decorate.
+        Returns
+        -------
+        callable
+            The decorated function with the `parent_extractor_type` attribute
+            set.
+        """
         f = _get_underlying_func(func)
         f.parent_extractor_type = self.parent_extractor_type
         return func
 class FeatureKind:
+    """A decorator to specify the kind of a feature.
+    This decorator attaches a "feature kind" (e.g., univariate, bivariate)
+    to a feature extraction function.
+    Parameters
+    ----------
+    feature_kind : ~eegdash.features.extractors.MultivariateFeature
+        An instance of a feature kind class, such as
+        :class:`~eegdash.features.extractors.UnivariateFeature` or
+        :class:`~eegdash.features.extractors.BivariateFeature`.
+    """
     def __init__(self, feature_kind: MultivariateFeature):
         self.feature_kind = feature_kind
-    def __call__(self, func):
+    def __call__(self, func: Callable) -> Callable:
+        """Apply the decorator to a function.
+        Parameters
+        ----------
+        func : callable
+            The feature extraction function to decorate.
+        Returns
+        -------
+        callable
+            The decorated function with the `feature_kind` attribute set.
+        """
         f = _get_underlying_func(func)
         f.feature_kind = self.feature_kind
         return func
@@ -38,9 +102,33 @@ class FeatureKind:
 # Syntax sugar
 univariate_feature = FeatureKind(UnivariateFeature())
+"""Decorator to mark a feature as univariate.
+This is a convenience instance of :class:`~eegdash.features.decorators.FeatureKind` pre-configured for
+univariate features.
+"""
-def bivariate_feature(func, directed=False):
+def bivariate_feature(func: Callable, directed: bool = False) -> Callable:
+    """Decorator to mark a feature as bivariate.
+    This decorator specifies that the feature operates on pairs of channels.
+    Parameters
+    ----------
+    func : callable
+        The feature extraction function to decorate.
+    directed : bool, default False
+        If True, the feature is directed (e.g., connectivity from channel A
+        to B is different from B to A). If False, the feature is undirected.
+    Returns
+    -------
+    callable
+        The decorated function with the appropriate bivariate feature kind
+        attached.
+    """
     if directed:
         kind = DirectedBivariateFeature()
     else:
@@ -49,3 +137,8 @@ def bivariate_feature(func, directed=False):
 multivariate_feature = FeatureKind(MultivariateFeature())
+"""Decorator to mark a feature as multivariate.
+This is a convenience instance of :class:`~eegdash.features.decorators.FeatureKind` pre-configured for
+multivariate features, which operate on all channels simultaneously.
+"""

eegdash/features/extractors.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 from abc import ABC, abstractmethod
 from collections.abc import Callable
 from functools import partial
@@ -6,8 +8,33 @@ from typing import Dict
 import numpy as np
 from numba.core.dispatcher import Dispatcher
+__all__ = [
+    "BivariateFeature",
+    "DirectedBivariateFeature",
+    "FeatureExtractor",
+    "MultivariateFeature",
+    "TrainableFeature",
+    "UnivariateFeature",
+]
+def _get_underlying_func(func: Callable) -> Callable:
+    """Get the underlying function from a potential wrapper.
+    This helper unwraps functions that might be wrapped by `functools.partial`
+    or `numba.dispatcher.Dispatcher`.
+    Parameters
+    ----------
+    func : callable
+        The function to unwrap.
-def _get_underlying_func(func):
+    Returns
+    -------
+    callable
+        The underlying Python function.
+    """
     f = func
     if isinstance(f, partial):
         f = f.func
@@ -17,22 +44,46 @@ def _get_underlying_func(func):
 class TrainableFeature(ABC):
+    """Abstract base class for features that require training.
+    This ABC defines the interface for feature extractors that need to be
+    fitted on data before they can be used. It includes methods for fitting
+    the feature extractor and for resetting its state.
+    """
     def __init__(self):
         self._is_trained = False
         self.clear()
     @abstractmethod
     def clear(self):
+        """Reset the internal state of the feature extractor."""
         pass
     @abstractmethod
     def partial_fit(self, *x, y=None):
+        """Update the feature extractor's state with a batch of data.
+        Parameters
+        ----------
+        *x : tuple
+            The input data for fitting.
+        y : any, optional
+            The target data, if required for supervised training.
+        """
         pass
     def fit(self):
+        """Finalize the training of the feature extractor.
+        This method should be called after all data has been seen via
+        `partial_fit`. It marks the feature as fitted.
+        """
         self._is_fitted = True
     def __call__(self, *args, **kwargs):
+        """Check if the feature is fitted before execution."""
         if not self._is_fitted:
             raise RuntimeError(
                 f"{self.__class__} cannot be called, it has to be fitted first."
@@ -40,6 +91,22 @@ class TrainableFeature(ABC):
 class FeatureExtractor(TrainableFeature):
+    """A composite feature extractor that applies multiple feature functions.
+    This class orchestrates the application of a dictionary of feature
+    extraction functions to input data. It can handle nested extractors,
+    pre-processing, and trainable features.
+    Parameters
+    ----------
+    feature_extractors : dict[str, callable]
+        A dictionary where keys are feature names and values are the feature
+        extraction functions or other `FeatureExtractor` instances.
+    **preprocess_kwargs
+        Keyword arguments to be passed to the `preprocess` method.
+    """
     def __init__(
         self, feature_extractors: Dict[str, Callable], **preprocess_kwargs: Dict
     ):
@@ -63,30 +130,64 @@ class FeatureExtractor(TrainableFeature):
             if isinstance(fe, partial):
                 self.features_kwargs[fn] = fe.keywords
-    def _validate_execution_tree(self, feature_extractors):
+    def _validate_execution_tree(self, feature_extractors: dict) -> dict:
+        """Validate the feature dependency graph."""
         for fname, f in feature_extractors.items():
             f = _get_underlying_func(f)
             pe_type = getattr(f, "parent_extractor_type", [FeatureExtractor])
-            assert type(self) in pe_type
+            if type(self) not in pe_type:
+                raise TypeError(
+                    f"Feature '{fname}' cannot be a child of {type(self).__name__}"
+                )
         return feature_extractors
-    def _check_is_trainable(self, feature_extractors):
-        is_trainable = False
+    def _check_is_trainable(self, feature_extractors: dict) -> bool:
+        """Check if any of the contained features are trainable."""
         for fname, f in feature_extractors.items():
             if isinstance(f, FeatureExtractor):
-                is_trainable = f._is_trainable
-            else:
-                f = _get_underlying_func(f)
-                if isinstance(f, TrainableFeature):
-                    is_trainable = True
-            if is_trainable:
-                break
-        return is_trainable
+                if f._is_trainable:
+                    return True
+            elif isinstance(_get_underlying_func(f), TrainableFeature):
+                return True
+        return False
     def preprocess(self, *x, **kwargs):
+        """Apply pre-processing to the input data.
+        Parameters
+        ----------
+        *x : tuple
+            Input data.
+        **kwargs
+            Additional keyword arguments.
+        Returns
+        -------
+        tuple
+            The pre-processed data.
+        """
         return (*x,)
-    def __call__(self, *x, _batch_size=None, _ch_names=None):
+    def __call__(self, *x, _batch_size=None, _ch_names=None) -> dict:
+        """Apply all feature extractors to the input data.
+        Parameters
+        ----------
+        *x : tuple
+            Input data.
+        _batch_size : int, optional
+            The number of samples in the batch.
+        _ch_names : list of str, optional
+            The names of the channels in the input data.
+        Returns
+        -------
+        dict
+            A dictionary where keys are feature names and values are the
+            computed feature values.
+        """
         assert _batch_size is not None
         assert _ch_names is not None
         if self._is_trainable:
@@ -100,59 +201,83 @@ class FeatureExtractor(TrainableFeature):
                 r = f(*z, _batch_size=_batch_size, _ch_names=_ch_names)
             else:
                 r = f(*z)
-            f = _get_underlying_func(f)
-            if hasattr(f, "feature_kind"):
-                r = f.feature_kind(r, _ch_names=_ch_names)
+            f_und = _get_underlying_func(f)
+            if hasattr(f_und, "feature_kind"):
+                r = f_und.feature_kind(r, _ch_names=_ch_names)
             if not isinstance(fname, str) or not fname:
-                if isinstance(f, FeatureExtractor) or not hasattr(f, "__name__"):
-                    fname = ""
-                else:
-                    fname = f.__name__
+                fname = getattr(f_und, "__name__", "")
             if isinstance(r, dict):
-                if fname:
-                    fname += "_"
+                prefix = f"{fname}_" if fname else ""
                 for k, v in r.items():
-                    self._add_feature_to_dict(results_dict, fname + k, v, _batch_size)
+                    self._add_feature_to_dict(results_dict, prefix + k, v, _batch_size)
             else:
                 self._add_feature_to_dict(results_dict, fname, r, _batch_size)
         return results_dict
-    def _add_feature_to_dict(self, results_dict, name, value, batch_size):
-        if not isinstance(value, np.ndarray):
-            results_dict[name] = value
-        else:
+    def _add_feature_to_dict(
+        self, results_dict: dict, name: str, value: any, batch_size: int
+    ):
+        """Add a computed feature to the results dictionary."""
+        if isinstance(value, np.ndarray):
             assert value.shape[0] == batch_size
-            results_dict[name] = value
+        results_dict[name] = value
     def clear(self):
+        """Clear the state of all trainable sub-features."""
         if not self._is_trainable:
             return
-        for fname, f in self.feature_extractors_dict.items():
-            f = _get_underlying_func(f)
-            if isinstance(f, TrainableFeature):
-                f.clear()
+        for f in self.feature_extractors_dict.values():
+            if isinstance(_get_underlying_func(f), TrainableFeature):
+                _get_underlying_func(f).clear()
     def partial_fit(self, *x, y=None):
+        """Partially fit all trainable sub-features."""
         if not self._is_trainable:
             return
         z = self.preprocess(*x, **self.preprocess_kwargs)
-        for fname, f in self.feature_extractors_dict.items():
-            f = _get_underlying_func(f)
-            if isinstance(f, TrainableFeature):
-                f.partial_fit(*z, y=y)
+        if not isinstance(z, tuple):
+            z = (z,)
+        for f in self.feature_extractors_dict.values():
+            if isinstance(_get_underlying_func(f), TrainableFeature):
+                _get_underlying_func(f).partial_fit(*z, y=y)
     def fit(self):
+        """Fit all trainable sub-features."""
         if not self._is_trainable:
             return
-        for fname, f in self.feature_extractors_dict.items():
-            f = _get_underlying_func(f)
-            if isinstance(f, TrainableFeature):
+        for f in self.feature_extractors_dict.values():
+            if isinstance(_get_underlying_func(f), TrainableFeature):
                 f.fit()
         super().fit()
 class MultivariateFeature:
-    def __call__(self, x, _ch_names=None):
+    """A mixin for features that operate on multiple channels.
+    This class provides a `__call__` method that converts a feature array into
+    a dictionary with named features, where names are derived from channel
+    names.
+    """
+    def __call__(
+        self, x: np.ndarray, _ch_names: list[str] | None = None
+    ) -> dict | np.ndarray:
+        """Convert a feature array to a named dictionary.
+        Parameters
+        ----------
+        x : numpy.ndarray
+            The computed feature array.
+        _ch_names : list of str, optional
+            The list of channel names.
+        Returns
+        -------
+        dict or numpy.ndarray
+            A dictionary of named features, or the original array if feature
+            channel names cannot be generated.
+        """
         assert _ch_names is not None
         f_channels = self.feature_channel_names(_ch_names)
         if isinstance(x, dict):
@@ -163,37 +288,66 @@ class MultivariateFeature:
         return self._array_to_dict(x, f_channels)
     @staticmethod
-    def _array_to_dict(x, f_channels, name=""):
+    def _array_to_dict(
+        x: np.ndarray, f_channels: list[str], name: str = ""
+    ) -> dict | np.ndarray:
+        """Convert a numpy array to a dictionary with named keys."""
         assert isinstance(x, np.ndarray)
-        if len(f_channels) == 0:
-            assert x.ndim == 1
-            if name:
-                return {name: x}
-            return x
-        assert x.shape[1] == len(f_channels)
+        if not f_channels:
+            return {name: x} if name else x
+        assert x.shape[1] == len(f_channels), f"{x.shape[1]} != {len(f_channels)}"
         x = x.swapaxes(0, 1)
-        names = [f"{name}_{ch}" for ch in f_channels] if name else f_channels
+        prefix = f"{name}_" if name else ""
+        names = [f"{prefix}{ch}" for ch in f_channels]
         return dict(zip(names, x))
-    def feature_channel_names(self, ch_names):
+    def feature_channel_names(self, ch_names: list[str]) -> list[str]:
+        """Generate feature names based on channel names.
+        Parameters
+        ----------
+        ch_names : list of str
+            The names of the input channels.
+        Returns
+        -------
+        list of str
+            The names for the output features.
+        """
         return []
 class UnivariateFeature(MultivariateFeature):
-    def feature_channel_names(self, ch_names):
+    """A feature kind for operations applied to each channel independently."""
+    def feature_channel_names(self, ch_names: list[str]) -> list[str]:
+        """Return the channel names themselves as feature names."""
         return ch_names
 class BivariateFeature(MultivariateFeature):
-    def __init__(self, *args, channel_pair_format="{}<>{}"):
+    """A feature kind for operations on pairs of channels.
+    Parameters
+    ----------
+    channel_pair_format : str, default="{}<>{}"
+        A format string used to create feature names from pairs of
+        channel names.
+    """
+    def __init__(self, *args, channel_pair_format: str = "{}<>{}"):
         super().__init__(*args)
         self.channel_pair_format = channel_pair_format
     @staticmethod
-    def get_pair_iterators(n):
+    def get_pair_iterators(n: int) -> tuple[np.ndarray, np.ndarray]:
+        """Get indices for unique, unordered pairs of channels."""
         return np.triu_indices(n, 1)
-    def feature_channel_names(self, ch_names):
+    def feature_channel_names(self, ch_names: list[str]) -> list[str]:
+        """Generate feature names for each pair of channels."""
         return [
             self.channel_pair_format.format(ch_names[i], ch_names[j])
             for i, j in zip(*self.get_pair_iterators(len(ch_names)))
@@ -201,8 +355,11 @@ class BivariateFeature(MultivariateFeature):
 class DirectedBivariateFeature(BivariateFeature):
+    """A feature kind for directed operations on pairs of channels."""
     @staticmethod
-    def get_pair_iterators(n):
+    def get_pair_iterators(n: int) -> list[np.ndarray]:
+        """Get indices for all ordered pairs of channels (excluding self-pairs)."""
         return [
             np.append(a, b)
             for a, b in zip(np.tril_indices(n, -1), np.triu_indices(n, 1))

eegdash/features/feature_bank/complexity.py CHANGED Viewed

@@ -36,8 +36,12 @@ class EntropyFeatureExtractor(FeatureExtractor):
         counts_m = np.empty((*x.shape[:-1], (x.shape[-1] - m + 1) // l))
         counts_mp1 = np.empty((*x.shape[:-1], (x.shape[-1] - m) // l))
         for i in np.ndindex(x.shape[:-1]):
-            counts_m[*i, :] = _channel_app_samp_entropy_counts(x[i], m, rr[i], l)
-            counts_mp1[*i, :] = _channel_app_samp_entropy_counts(x[i], m + 1, rr[i], l)
+            counts_m[i + (slice(None),)] = _channel_app_samp_entropy_counts(
+                x[i], m, rr[i], l
+            )
+            counts_mp1[i + (slice(None),)] = _channel_app_samp_entropy_counts(
+                x[i], m + 1, rr[i], l
+            )
         return counts_m, counts_mp1
@@ -62,7 +66,7 @@ def complexity_sample_entropy(counts_m, counts_mp1):
 def complexity_svd_entropy(x, m=10, tau=1):
     x_emb = np.empty((*x.shape[:-1], (x.shape[-1] - m + 1) // tau, m))
     for i in np.ndindex(x.shape[:-1]):
-        x_emb[*i, :, :] = _create_embedding(x[i], m, tau)
+        x_emb[i + (slice(None), slice(None))] = _create_embedding(x[i], m, tau)
     s = np.linalg.svdvals(x_emb)
     s /= s.sum(axis=-1, keepdims=True)
     return -np.sum(s * np.log(s), axis=-1)

eegdash/features/feature_bank/dimensionality.py CHANGED Viewed

@@ -26,7 +26,7 @@ def dimensionality_higuchi_fractal_dim(x, k_max=10, eps=1e-7):
     for i in np.ndindex(x.shape[:-1]):
         for k in range(1, k_max + 1):
             for m in range(k):
-                L_km[m] = np.mean(np.abs(np.diff(x[*i, m:], n=k)))
+                L_km[m] = np.mean(np.abs(np.diff(x[i + (slice(m, None),)], n=k)))
             L_k[k - 1] = (N - 1) * np.sum(L_km[:k]) / (k**3)
         L_k = np.maximum(L_k, eps)
         hfd[i] = np.linalg.lstsq(log_k, np.log(L_k))[0][0]

eegdash/features/feature_bank/signal.py CHANGED Viewed

@@ -8,20 +8,21 @@ from ..extractors import FeatureExtractor
 __all__ = [
     "HilbertFeatureExtractor",
-    "signal_mean",
-    "signal_variance",
-    "signal_skewness",
+    "SIGNAL_PREDECESSORS",
+    "signal_decorrelation_time",
+    "signal_hjorth_activity",
+    "signal_hjorth_complexity",
+    "signal_hjorth_mobility",
     "signal_kurtosis",
-    "signal_std",
-    "signal_root_mean_square",
+    "signal_line_length",
+    "signal_mean",
     "signal_peak_to_peak",
     "signal_quantile",
+    "signal_root_mean_square",
+    "signal_skewness",
+    "signal_std",
+    "signal_variance",
     "signal_zero_crossings",
-    "signal_line_length",
-    "signal_hjorth_activity",
-    "signal_hjorth_mobility",
-    "signal_hjorth_complexity",
-    "signal_decorrelation_time",
 ]

eegdash/features/feature_bank/utils.py CHANGED Viewed

@@ -1,5 +1,13 @@
 import numpy as np
+__all__ = [
+    "DEFAULT_FREQ_BANDS",
+    "get_valid_freq_band",
+    "reduce_freq_bands",
+    "slice_freq_band",
+]
 DEFAULT_FREQ_BANDS = {
     "delta": (1, 4.5),
     "theta": (4.5, 8),

eegdash 0.4.0.dev173498563__py3-none-any.whl → 0.4.1.dev185__py3-none-any.whl

Potentially problematic release.

eegdash 0.4.0.dev173498563py3-none-any.whl → 0.4.1.dev185py3-none-any.whl