PyPI - eegdash - Versions diffs - 0.4.0.dev173498563__py3-none-any.whl → 0.4.1.dev185__py3-none-any.whl - Mend

eegdash 0.4.0.dev173498563py3-none-any.whl → 0.4.1.dev185py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of eegdash might be problematic. Click here for more details.

Files changed (34) hide show

eegdash/__init__.py +3 -3
eegdash/api.py +143 -526
eegdash/bids_eeg_metadata.py +139 -39
eegdash/const.py +25 -0
eegdash/dataset/__init__.py +8 -2
eegdash/dataset/base.py +311 -0
eegdash/dataset/bids_dataset.py +443 -0
eegdash/dataset/dataset.py +542 -17
eegdash/dataset/dataset_summary.csv +255 -255
eegdash/dataset/registry.py +69 -4
eegdash/downloader.py +95 -9
eegdash/features/datasets.py +326 -136
eegdash/features/decorators.py +96 -3
eegdash/features/extractors.py +212 -55
eegdash/features/feature_bank/complexity.py +7 -3
eegdash/features/feature_bank/dimensionality.py +1 -1
eegdash/features/feature_bank/signal.py +11 -10
eegdash/features/feature_bank/utils.py +8 -0
eegdash/features/inspect.py +97 -11
eegdash/features/serialization.py +56 -19
eegdash/features/utils.py +90 -16
eegdash/hbn/preprocessing.py +50 -17
eegdash/hbn/windows.py +145 -32
eegdash/logging.py +19 -0
eegdash/mongodb.py +44 -27
eegdash/paths.py +15 -5
eegdash/utils.py +16 -1
{eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dev185.dist-info}/METADATA +7 -8
eegdash-0.4.1.dev185.dist-info/RECORD +38 -0
eegdash/data_utils.py +0 -677
eegdash-0.4.0.dev173498563.dist-info/RECORD +0 -37
{eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dev185.dist-info}/WHEEL +0 -0
{eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dev185.dist-info}/licenses/LICENSE +0 -0
{eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dev185.dist-info}/top_level.txt +0 -0

eegdash/features/inspect.py CHANGED Viewed

@@ -1,15 +1,48 @@
+from __future__ import annotations
 import inspect
 from collections.abc import Callable
 from . import extractors, feature_bank
-from .extractors import FeatureExtractor, MultivariateFeature, _get_underlying_func
+from .extractors import _get_underlying_func
+__all__ = [
+    "get_all_feature_extractors",
+    "get_all_feature_kinds",
+    "get_all_features",
+    "get_feature_kind",
+    "get_feature_predecessors",
+]
+def get_feature_predecessors(feature_or_extractor: Callable) -> list:
+    """Get the dependency hierarchy for a feature or feature extractor.
+    This function recursively traverses the `parent_extractor_type` attribute
+    of a feature or extractor to build a list representing its dependency
+    lineage.
+    Parameters
+    ----------
+    feature_or_extractor : callable
+        The feature function or :class:`~eegdash.features.extractors.FeatureExtractor`
+        class to inspect.
+    Returns
+    -------
+    list
+        A nested list representing the dependency tree. For a simple linear
+        chain, this will be a flat list from the specific feature up to the
+        base :class:`~eegdash.features.extractors.FeatureExtractor`. For
+        multiple dependencies, it will contain tuples of sub-dependencies.
-def get_feature_predecessors(feature_or_extractor: Callable):
+    """
     current = _get_underlying_func(feature_or_extractor)
-    if current is FeatureExtractor:
+    if current is extractors.FeatureExtractor:
         return [current]
-    predecessor = getattr(current, "parent_extractor_type", [FeatureExtractor])
+    predecessor = getattr(
+        current, "parent_extractor_type", [extractors.FeatureExtractor]
+    )
     if len(predecessor) == 1:
         return [current, *get_feature_predecessors(predecessor[0])]
     else:
@@ -20,29 +53,82 @@ def get_feature_predecessors(feature_or_extractor: Callable):
         return [current, tuple(predecessors)]
-def get_feature_kind(feature: Callable):
+def get_feature_kind(feature: Callable) -> extractors.MultivariateFeature:
+    """Get the 'kind' of a feature function.
+    The feature kind (e.g., univariate, bivariate) is typically attached by a
+    decorator.
+    Parameters
+    ----------
+    feature : callable
+        The feature function to inspect.
+    Returns
+    -------
+    :class:`~eegdash.features.extractors.MultivariateFeature`
+        An instance of the feature kind (e.g., ``UnivariateFeature()``).
+    """
     return _get_underlying_func(feature).feature_kind
-def get_all_features():
+def get_all_features() -> list[tuple[str, Callable]]:
+    """Get a list of all available feature functions.
+    Scans the `eegdash.features.feature_bank` module for functions that have
+    been decorated to have a `feature_kind` attribute.
+    Returns
+    -------
+    list[tuple[str, callable]]
+        A list of (name, function) tuples for all discovered features.
+    """
     def isfeature(x):
         return hasattr(_get_underlying_func(x), "feature_kind")
     return inspect.getmembers(feature_bank, isfeature)
-def get_all_feature_extractors():
+def get_all_feature_extractors() -> list[tuple[str, type[extractors.FeatureExtractor]]]:
+    """Get a list of all available :class:`~eegdash.features.extractors.FeatureExtractor` classes.
+    Scans the `eegdash.features.feature_bank` module for all classes that
+    subclass :class:`~eegdash.features.extractors.FeatureExtractor`.
+    Returns
+    -------
+    list[tuple[str, type[eegdash.features.extractors.FeatureExtractor]]]
+        A list of (name, class) tuples for all discovered feature extractors,
+        including the base :class:`~eegdash.features.extractors.FeatureExtractor` itself.
+    """
     def isfeatureextractor(x):
-        return inspect.isclass(x) and issubclass(x, FeatureExtractor)
+        return inspect.isclass(x) and issubclass(x, extractors.FeatureExtractor)
     return [
-        ("FeatureExtractor", FeatureExtractor),
+        ("FeatureExtractor", extractors.FeatureExtractor),
         *inspect.getmembers(feature_bank, isfeatureextractor),
     ]
-def get_all_feature_kinds():
+def get_all_feature_kinds() -> list[tuple[str, type[extractors.MultivariateFeature]]]:
+    """Get a list of all available feature 'kind' classes.
+    Scans the `eegdash.features.extractors` module for all classes that
+    subclass :class:`~eegdash.features.extractors.MultivariateFeature`.
+    Returns
+    -------
+    list[tuple[str, type[eegdash.features.extractors.MultivariateFeature]]]
+        A list of (name, class) tuples for all discovered feature kinds.
+    """
     def isfeaturekind(x):
-        return inspect.isclass(x) and issubclass(x, MultivariateFeature)
+        return inspect.isclass(x) and issubclass(x, extractors.MultivariateFeature)
     return inspect.getmembers(extractors, isfeaturekind)

eegdash/features/serialization.py CHANGED Viewed

@@ -1,10 +1,13 @@
 """Convenience functions for storing and loading features datasets.
-See Also:
-    https://github.com/braindecode/braindecode//blob/master/braindecode/datautil/serialization.py#L165-L229
+See Also
+--------
+https://github.com/braindecode/braindecode/blob/master/braindecode/datautil/serialization.py#L165-L229
 """
+from __future__ import annotations
 from pathlib import Path
 import pandas as pd
@@ -15,35 +18,48 @@ from braindecode.datautil.serialization import _load_kwargs_json
 from .datasets import FeaturesConcatDataset, FeaturesDataset
+__all__ = [
+    "load_features_concat_dataset",
+]
+def load_features_concat_dataset(
+    path: str | Path, ids_to_load: list[int] | None = None, n_jobs: int = 1
+) -> FeaturesConcatDataset:
+    """Load a stored :class:`~eegdash.features.datasets.FeaturesConcatDataset` from a directory.
-def load_features_concat_dataset(path, ids_to_load=None, n_jobs=1):
-    """Load a stored features dataset from files.
+    This function reconstructs a
+    :class:`~eegdash.features.datasets.FeaturesConcatDataset` by loading
+    individual :class:`~eegdash.features.datasets.FeaturesDataset` instances
+    from subdirectories within the given path. It uses joblib for parallel
+    loading.
     Parameters
     ----------
-    path: str | pathlib.Path
-        Path to the directory of the .fif / -epo.fif and .json files.
-    ids_to_load: list of int | None
-        Ids of specific files to load.
-    n_jobs: int
-        Number of jobs to be used to read files in parallel.
+    path : str or pathlib.Path
+        The path to the directory where the dataset was saved. This directory
+        should contain subdirectories (e.g., "0", "1", "2", ...) for each
+        individual dataset.
+    ids_to_load : list of int, optional
+        A list of specific dataset IDs (subdirectory names) to load. If None,
+        all subdirectories in the path will be loaded.
+    n_jobs : int, default 1
+        The number of jobs to use for parallel loading. -1 means using all
+        processors.
     Returns
     -------
-    concat_dataset: eegdash.features.datasets.FeaturesConcatDataset
-        A concatenation of multiple eegdash.features.datasets.FeaturesDataset
-        instances loaded from the given directory.
+    eegdash.features.datasets.FeaturesConcatDataset
+        A concatenated dataset containing the loaded
+        :class:`~eegdash.features.datasets.FeaturesDataset` instances.
     """
     # Make sure we always work with a pathlib.Path
     path = Path(path)
-    # else we have a dataset saved in the new way with subdirectories in path
-    # for every dataset with description.json and -feat.parquet,
-    # target_name.json, raw_preproc_kwargs.json, window_kwargs.json,
-    # window_preproc_kwargs.json, features_kwargs.json
     if ids_to_load is None:
-        ids_to_load = [p.name for p in path.iterdir()]
+        # Get all subdirectories and sort them numerically
+        ids_to_load = [p.name for p in path.iterdir() if p.is_dir()]
         ids_to_load = sorted(ids_to_load, key=lambda i: int(i))
     ids_to_load = [str(i) for i in ids_to_load]
@@ -51,7 +67,28 @@ def load_features_concat_dataset(path, ids_to_load=None, n_jobs=1):
     return FeaturesConcatDataset(datasets)
-def _load_parallel(path, i):
+def _load_parallel(path: Path, i: str) -> FeaturesDataset:
+    """Load a single :class:`~eegdash.features.datasets.FeaturesDataset` from its subdirectory.
+    This is a helper function for
+    :func:`~eegdash.features.serialization.load_features_concat_dataset` that
+    handles the loading of one dataset's files (features, metadata, descriptions, etc.).
+    Parameters
+    ----------
+    path : pathlib.Path
+        The root directory of the saved
+        :class:`~eegdash.features.datasets.FeaturesConcatDataset`.
+    i : str
+        The identifier of the dataset to load, corresponding to its
+        subdirectory name.
+    Returns
+    -------
+    eegdash.features.datasets.FeaturesDataset
+        The loaded dataset instance.
+    """
     sub_dir = path / i
     parquet_name_pattern = "{}-feat.parquet"

eegdash/features/utils.py CHANGED Viewed

@@ -14,15 +14,41 @@ from braindecode.datasets.base import (
     WindowsDataset,
 )
+from . import extractors
 from .datasets import FeaturesConcatDataset, FeaturesDataset
-from .extractors import FeatureExtractor
+__all__ = [
+    "extract_features",
+    "fit_feature_extractors",
+]
 def _extract_features_from_windowsdataset(
     win_ds: EEGWindowsDataset | WindowsDataset,
-    feature_extractor: FeatureExtractor,
+    feature_extractor: extractors.FeatureExtractor,
     batch_size: int = 512,
-):
+) -> FeaturesDataset:
+    """Extract features from a single `WindowsDataset`.
+    This is a helper function that iterates through a `WindowsDataset` in
+    batches, applies a `FeatureExtractor`, and returns the results as a
+    `FeaturesDataset`.
+    Parameters
+    ----------
+    win_ds : EEGWindowsDataset or WindowsDataset
+        The windowed dataset to extract features from.
+    feature_extractor : ~eegdash.features.extractors.FeatureExtractor
+        The feature extractor instance to apply.
+    batch_size : int, default 512
+        The number of windows to process in each batch.
+    Returns
+    -------
+    ~eegdash.features.datasets.FeaturesDataset
+        A new dataset containing the extracted features and associated metadata.
+    """
     metadata = win_ds.metadata
     if not win_ds.targets_from == "metadata":
         metadata = copy.deepcopy(metadata)
@@ -51,33 +77,59 @@ def _extract_features_from_windowsdataset(
             features_dict[k].extend(v)
     features_df = pd.DataFrame(features_dict)
     if not win_ds.targets_from == "metadata":
-        metadata.set_index("orig_index", drop=False, inplace=True)
         metadata.reset_index(drop=True, inplace=True)
-        metadata.drop("orig_index", axis=1, inplace=True)
+        metadata.drop("orig_index", axis=1, inplace=True, errors="ignore")
-    # FUTURE: truly support WindowsDataset objects
     return FeaturesDataset(
         features_df,
         metadata=metadata,
         description=win_ds.description,
         raw_info=win_ds.raw.info,
-        raw_preproc_kwargs=win_ds.raw_preproc_kwargs,
-        window_kwargs=win_ds.window_kwargs,
+        raw_preproc_kwargs=getattr(win_ds, "raw_preproc_kwargs", None),
+        window_kwargs=getattr(win_ds, "window_kwargs", None),
         features_kwargs=feature_extractor.features_kwargs,
     )
 def extract_features(
     concat_dataset: BaseConcatDataset,
-    features: FeatureExtractor | Dict[str, Callable] | List[Callable],
+    features: extractors.FeatureExtractor | Dict[str, Callable] | List[Callable],
     *,
     batch_size: int = 512,
     n_jobs: int = 1,
-):
+) -> FeaturesConcatDataset:
+    """Extract features from a concatenated dataset of windows.
+    This function applies a feature extractor to each `WindowsDataset` within a
+    `BaseConcatDataset` in parallel and returns a `FeaturesConcatDataset`
+    with the results.
+    Parameters
+    ----------
+    concat_dataset : BaseConcatDataset
+        A concatenated dataset of `WindowsDataset` or `EEGWindowsDataset`
+        instances.
+    features : ~eegdash.features.extractors.FeatureExtractor or dict or list
+        The feature extractor(s) to apply. Can be a
+        :class:`~eegdash.features.extractors.FeatureExtractor`
+        instance, a dictionary of named feature functions, or a list of
+        feature functions.
+    batch_size : int, default 512
+        The size of batches to use for feature extraction.
+    n_jobs : int, default 1
+        The number of parallel jobs to use for extracting features from the
+        datasets.
+    Returns
+    -------
+    ~eegdash.features.datasets.FeaturesConcatDataset
+        A new concatenated dataset containing the extracted features.
+    """
     if isinstance(features, list):
         features = dict(enumerate(features))
-    if not isinstance(features, FeatureExtractor):
-        features = FeatureExtractor(features)
+    if not isinstance(features, extractors.FeatureExtractor):
+        features = extractors.FeatureExtractor(features)
     feature_ds_list = list(
         tqdm(
             Parallel(n_jobs=n_jobs, return_as="generator")(
@@ -95,13 +147,35 @@ def extract_features(
 def fit_feature_extractors(
     concat_dataset: BaseConcatDataset,
-    features: FeatureExtractor | Dict[str, Callable] | List[Callable],
+    features: extractors.FeatureExtractor | Dict[str, Callable] | List[Callable],
     batch_size: int = 8192,
-):
+) -> extractors.FeatureExtractor:
+    """Fit trainable feature extractors on a dataset.
+    If the provided feature extractor (or any of its sub-extractors) is
+    trainable (i.e., subclasses
+    :class:`~eegdash.features.extractors.TrainableFeature`), this function
+    iterates through the dataset to fit it.
+    Parameters
+    ----------
+    concat_dataset : BaseConcatDataset
+        The dataset to use for fitting the feature extractors.
+    features : ~eegdash.features.extractors.FeatureExtractor or dict or list
+        The feature extractor(s) to fit.
+    batch_size : int, default 8192
+        The batch size to use when iterating through the dataset for fitting.
+    Returns
+    -------
+    ~eegdash.features.extractors.FeatureExtractor
+        The fitted feature extractor.
+    """
     if isinstance(features, list):
         features = dict(enumerate(features))
-    if not isinstance(features, FeatureExtractor):
-        features = FeatureExtractor(features)
+    if not isinstance(features, extractors.FeatureExtractor):
+        features = extractors.FeatureExtractor(features)
     if not features._is_trainable:
         return features
     features.clear()

eegdash/hbn/preprocessing.py CHANGED Viewed

@@ -18,27 +18,47 @@ from ..logging import logger
 class hbn_ec_ec_reannotation(Preprocessor):
-    """Preprocessor to reannotate the raw data for eyes open and eyes closed events.
+    """Preprocessor to reannotate HBN data for eyes-open/eyes-closed events.
-    This processor is designed for HBN datasets.
+    This preprocessor is specifically designed for Healthy Brain Network (HBN)
+    datasets. It identifies existing annotations for "instructed_toCloseEyes"
+    and "instructed_toOpenEyes" and creates new, regularly spaced annotations
+    for "eyes_closed" and "eyes_open" segments, respectively.
+    This is useful for creating windowed datasets based on these new, more
+    precise event markers.
+    Notes
+    -----
+    This class inherits from :class:`braindecode.preprocessing.Preprocessor`
+    and is intended to be used within a braindecode preprocessing pipeline.
     """
     def __init__(self):
         super().__init__(fn=self.transform, apply_on_array=False)
-    def transform(self, raw):
-        """Reannotate the raw data to create new events for eyes open and eyes closed
+    def transform(self, raw: mne.io.Raw) -> mne.io.Raw:
+        """Create new annotations for eyes-open and eyes-closed periods.
-        This function modifies the raw MNE object by creating new events based on
-        the existing annotations for "instructed_toCloseEyes" and "instructed_toOpenEyes".
-        It generates new events every 2 seconds within specified time ranges after
-        the original events, and replaces the existing annotations with these new events.
+        This function finds the original "instructed_to..." annotations and
+        generates new annotations every 2 seconds within specific time ranges
+        relative to the original markers:
+        - "eyes_closed": 15s to 29s after "instructed_toCloseEyes"
+        - "eyes_open": 5s to 19s after "instructed_toOpenEyes"
+        The original annotations in the `mne.io.Raw` object are replaced by
+        this new set of annotations.
         Parameters
         ----------
         raw : mne.io.Raw
-            The raw MNE object containing EEG data and annotations.
+            The raw MNE object containing the HBN data and original annotations.
+        Returns
+        -------
+        mne.io.Raw
+            The raw MNE object with the modified annotations.
         """
         events, event_id = mne.events_from_annotations(raw)
@@ -48,15 +68,27 @@ class hbn_ec_ec_reannotation(Preprocessor):
         # Create new events array for 2-second segments
         new_events = []
         sfreq = raw.info["sfreq"]
-        for event in events[events[:, 2] == event_id["instructed_toCloseEyes"]]:
-            # For each original event, create events every 2 seconds from 15s to 29s after
-            start_times = event[0] + np.arange(15, 29, 2) * sfreq
-            new_events.extend([[int(t), 0, 1] for t in start_times])
-        for event in events[events[:, 2] == event_id["instructed_toOpenEyes"]]:
-            # For each original event, create events every 2 seconds from 5s to 19s after
-            start_times = event[0] + np.arange(5, 19, 2) * sfreq
-            new_events.extend([[int(t), 0, 2] for t in start_times])
+        close_event_id = event_id.get("instructed_toCloseEyes")
+        if close_event_id:
+            for event in events[events[:, 2] == close_event_id]:
+                # For each original event, create events every 2s from 15s to 29s after
+                start_times = event[0] + np.arange(15, 29, 2) * sfreq
+                new_events.extend([[int(t), 0, 1] for t in start_times])
+        open_event_id = event_id.get("instructed_toOpenEyes")
+        if open_event_id:
+            for event in events[events[:, 2] == open_event_id]:
+                # For each original event, create events every 2s from 5s to 19s after
+                start_times = event[0] + np.arange(5, 19, 2) * sfreq
+                new_events.extend([[int(t), 0, 2] for t in start_times])
+        if not new_events:
+            logger.warning(
+                "Could not find 'instructed_toCloseEyes' or 'instructed_toOpenEyes' "
+                "annotations. No new events created."
+            )
+            return raw
         # replace events in raw
         new_events = np.array(new_events)
@@ -65,6 +97,7 @@ class hbn_ec_ec_reannotation(Preprocessor):
             events=new_events,
             event_desc={1: "eyes_closed", 2: "eyes_open"},
             sfreq=raw.info["sfreq"],
+            orig_time=raw.info.get("meas_date"),
         )
         raw.set_annotations(annot_from_events)

eegdash 0.4.0.dev173498563__py3-none-any.whl → 0.4.1.dev185__py3-none-any.whl

Potentially problematic release.

eegdash 0.4.0.dev173498563py3-none-any.whl → 0.4.1.dev185py3-none-any.whl