PyPI - eegdash - Versions diffs - 0.3.3.dev61__py3-none-any.whl → 0.5.0.dev180784713__py3-none-any.whl - Mend

eegdash 0.3.3.dev61py3-none-any.whl → 0.5.0.dev180784713py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

eegdash/__init__.py +19 -6
eegdash/api.py +336 -539
eegdash/bids_eeg_metadata.py +495 -0
eegdash/const.py +349 -0
eegdash/dataset/__init__.py +28 -0
eegdash/dataset/base.py +311 -0
eegdash/dataset/bids_dataset.py +641 -0
eegdash/dataset/dataset.py +692 -0
eegdash/dataset/dataset_summary.csv +255 -0
eegdash/dataset/registry.py +287 -0
eegdash/downloader.py +197 -0
eegdash/features/__init__.py +15 -13
eegdash/features/datasets.py +329 -138
eegdash/features/decorators.py +105 -13
eegdash/features/extractors.py +233 -63
eegdash/features/feature_bank/__init__.py +12 -12
eegdash/features/feature_bank/complexity.py +22 -20
eegdash/features/feature_bank/connectivity.py +27 -28
eegdash/features/feature_bank/csp.py +3 -1
eegdash/features/feature_bank/dimensionality.py +6 -6
eegdash/features/feature_bank/signal.py +29 -30
eegdash/features/feature_bank/spectral.py +40 -44
eegdash/features/feature_bank/utils.py +8 -0
eegdash/features/inspect.py +126 -15
eegdash/features/serialization.py +58 -17
eegdash/features/utils.py +90 -16
eegdash/hbn/__init__.py +28 -0
eegdash/hbn/preprocessing.py +105 -0
eegdash/hbn/windows.py +428 -0
eegdash/logging.py +54 -0
eegdash/mongodb.py +55 -24
eegdash/paths.py +52 -0
eegdash/utils.py +29 -1
eegdash-0.5.0.dev180784713.dist-info/METADATA +121 -0
eegdash-0.5.0.dev180784713.dist-info/RECORD +38 -0
eegdash-0.5.0.dev180784713.dist-info/licenses/LICENSE +29 -0
eegdash/data_config.py +0 -34
eegdash/data_utils.py +0 -687
eegdash/dataset.py +0 -69
eegdash/preprocessing.py +0 -63
eegdash-0.3.3.dev61.dist-info/METADATA +0 -192
eegdash-0.3.3.dev61.dist-info/RECORD +0 -28
eegdash-0.3.3.dev61.dist-info/licenses/LICENSE +0 -23
{eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/WHEEL +0 -0
{eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/top_level.txt +0 -0

eegdash/features/datasets.py CHANGED Viewed

@@ -3,7 +3,6 @@ from __future__ import annotations
 import json
 import os
 import shutil
-import warnings
 from collections.abc import Callable
 from typing import Dict, List
@@ -17,22 +16,43 @@ from braindecode.datasets.base import (
     _create_description,
 )
+from ..logging import logger
+__all__ = [
+    "FeaturesDataset",
+    "FeaturesConcatDataset",
+]
 class FeaturesDataset(EEGWindowsDataset):
-    """Returns samples from a pandas DataFrame object along with a target.
+    """A dataset of features extracted from EEG windows.
-    Dataset which serves samples from a pandas DataFrame object along with a
-    target. The target is unique for the dataset, and is obtained through the
-    `description` attribute.
+    This class holds features in a pandas DataFrame and provides an interface
+    compatible with braindecode's dataset structure. Each row in the feature
+    DataFrame corresponds to a single sample (e.g., an EEG window).
     Parameters
     ----------
-    features : a pandas DataFrame
-        Tabular data.
-    description : dict | pandas.Series | None
-        Holds additional description about the continuous signal / subject.
-    transform : callable | None
-        On-the-fly transform applied to the example before it is returned.
+    features : pandas.DataFrame
+        A DataFrame where each row is a sample and each column is a feature.
+    metadata : pandas.DataFrame, optional
+        A DataFrame containing metadata for each sample, indexed consistently
+        with `features`. Must include columns 'i_window_in_trial',
+        'i_start_in_trial', 'i_stop_in_trial', and 'target'.
+    description : dict or pandas.Series, optional
+        Additional high-level information about the dataset (e.g., subject ID).
+    transform : callable, optional
+        A function or transform to apply to the feature data on-the-fly.
+    raw_info : dict, optional
+        Information about the original raw recording, for provenance.
+    raw_preproc_kwargs : dict, optional
+        Keyword arguments used for preprocessing the raw data.
+    window_kwargs : dict, optional
+        Keyword arguments used for windowing the data.
+    window_preproc_kwargs : dict, optional
+        Keyword arguments used for preprocessing the windowed data.
+    features_kwargs : dict, optional
+        Keyword arguments used for feature extraction.
     """
@@ -64,7 +84,21 @@ class FeaturesDataset(EEGWindowsDataset):
         ].to_numpy()
         self.y = metadata.loc[:, "target"].to_list()
-    def __getitem__(self, index):
+    def __getitem__(self, index: int) -> tuple[np.ndarray, int, list]:
+        """Get a single sample from the dataset.
+        Parameters
+        ----------
+        index : int
+            The index of the sample to retrieve.
+        Returns
+        -------
+        tuple
+            A tuple containing the feature vector (X), the target (y), and the
+            cropping indices.
+        """
         crop_inds = self.crop_inds[index].tolist()
         X = self.features.iloc[index].to_numpy()
         X = X.copy()
@@ -74,18 +108,27 @@ class FeaturesDataset(EEGWindowsDataset):
         y = self.y[index]
         return X, y, crop_inds
-    def __len__(self):
+    def __len__(self) -> int:
+        """Return the number of samples in the dataset.
+        Returns
+        -------
+        int
+            The total number of feature samples.
+        """
         return len(self.features.index)
 def _compute_stats(
     ds: FeaturesDataset,
-    return_count=False,
-    return_mean=False,
-    return_var=False,
-    ddof=1,
-    numeric_only=False,
-):
+    return_count: bool = False,
+    return_mean: bool = False,
+    return_var: bool = False,
+    ddof: int = 1,
+    numeric_only: bool = False,
+) -> tuple:
+    """Compute statistics for a single :class:`~eegdash.features.datasets.FeaturesDataset`."""
     res = []
     if return_count:
         res.append(ds.features.count(numeric_only=numeric_only))
@@ -96,7 +139,14 @@ def _compute_stats(
     return tuple(res)
-def _pooled_var(counts, means, variances, ddof, ddof_in=None):
+def _pooled_var(
+    counts: np.ndarray,
+    means: np.ndarray,
+    variances: np.ndarray,
+    ddof: int,
+    ddof_in: int | None = None,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Compute pooled variance across multiple datasets."""
     if ddof_in is None:
         ddof_in = ddof
     count = counts.sum(axis=0)
@@ -109,17 +159,20 @@ def _pooled_var(counts, means, variances, ddof, ddof_in=None):
 class FeaturesConcatDataset(BaseConcatDataset):
-    """A base class for concatenated datasets.
+    """A concatenated dataset of :class:`~eegdash.features.datasets.FeaturesDataset` objects.
-    Holds either mne.Raw or mne.Epoch in self.datasets and has
-    a pandas DataFrame with additional description.
+    This class holds a list of :class:`~eegdash.features.datasets.FeaturesDataset` instances and allows
+    them to be treated as a single, larger dataset. It provides methods for
+    splitting, saving, and performing DataFrame-like operations (e.g., `mean`,
+    `var`, `fillna`) across all contained datasets.
     Parameters
     ----------
-    list_of_ds : list
-        list of BaseDataset, BaseConcatDataset or WindowsDataset
-    target_transform : callable | None
-        Optional function to call on targets before returning them.
+    list_of_ds : list of ~eegdash.features.datasets.FeaturesDataset
+        A list of :class:`~eegdash.features.datasets.FeaturesDataset` objects to concatenate.
+    target_transform : callable, optional
+        A function to apply to the target values before they are returned.
     """
@@ -139,26 +192,28 @@ class FeaturesConcatDataset(BaseConcatDataset):
         self,
         by: str | list[int] | list[list[int]] | dict[str, list[int]],
     ) -> dict[str, FeaturesConcatDataset]:
-        """Split the dataset based on information listed in its description.
+        """Split the dataset into subsets.
-        The format could be based on a DataFrame or based on indices.
+        The splitting can be done based on a column in the description
+        DataFrame or by providing explicit indices for each split.
         Parameters
         ----------
-        by : str | list | dict
-            If ``by`` is a string, splitting is performed based on the
-            description DataFrame column with this name.
-            If ``by`` is a (list of) list of integers, the position in the first
-            list corresponds to the split id and the integers to the
-            datapoints of that split.
-            If a dict then each key will be used in the returned
-            splits dict and each value should be a list of int.
+        by : str or list or dict
+            - If a string, splits are created for each unique value in the
+              description column `by`.
+            - If a list of integers, a single split is created containing the
+              datasets at the specified indices.
+            - If a list of lists of integers, multiple splits are created, one
+              for each sublist of indices.
+            - If a dictionary, keys are used as split names and values are
+              lists of dataset indices.
         Returns
         -------
-        splits : dict
-            A dictionary with the name of the split (a string) as key and the
-            dataset as value.
+        dict[str, ~eegdash.features.datasets.FeaturesConcatDataset]
+            A dictionary where keys are split names and values are the new
+            :class:`~eegdash.features.datasets.FeaturesConcatDataset` subsets.
         """
         if isinstance(by, str):
@@ -183,14 +238,22 @@ class FeaturesConcatDataset(BaseConcatDataset):
         }
     def get_metadata(self) -> pd.DataFrame:
-        """Concatenate the metadata and description of the wrapped Epochs.
+        """Get the metadata of all datasets as a single DataFrame.
+        Concatenates the metadata from all contained datasets and adds columns
+        from their `description` attributes.
         Returns
         -------
-        metadata : pd.DataFrame
-            DataFrame containing as many rows as there are windows in the
-            BaseConcatDataset, with the metadata and description information
-            for each window.
+        pandas.DataFrame
+            A DataFrame containing the metadata for every sample in the
+            concatenated dataset.
+        Raises
+        ------
+        TypeError
+            If any of the contained datasets is not a
+            :class:`~eegdash.features.datasets.FeaturesDataset`.
         """
         if not all([isinstance(ds, FeaturesDataset) for ds in self.datasets]):
@@ -201,60 +264,59 @@ class FeaturesConcatDataset(BaseConcatDataset):
         all_dfs = list()
         for ds in self.datasets:
-            df = ds.metadata
+            df = ds.metadata.copy()
             for k, v in ds.description.items():
                 df[k] = v
             all_dfs.append(df)
         return pd.concat(all_dfs)
-    def save(self, path: str, overwrite: bool = False, offset: int = 0):
-        """Save datasets to files by creating one subdirectory for each dataset:
-        path/
-            0/
-                0-feat.parquet
-                metadata_df.pkl
-                description.json
-                raw-info.fif (if raw info was saved)
-                raw_preproc_kwargs.json (if raws were preprocessed)
-                window_kwargs.json (if this is a windowed dataset)
-                window_preproc_kwargs.json  (if windows were preprocessed)
-                features_kwargs.json
-            1/
-                1-feat.parquet
-                metadata_df.pkl
-                description.json
-                raw-info.fif (if raw info was saved)
-                raw_preproc_kwargs.json (if raws were preprocessed)
-                window_kwargs.json (if this is a windowed dataset)
-                window_preproc_kwargs.json  (if windows were preprocessed)
-                features_kwargs.json
+    def save(self, path: str, overwrite: bool = False, offset: int = 0) -> None:
+        """Save the concatenated dataset to a directory.
+        Creates a directory structure where each contained dataset is saved in
+        its own numbered subdirectory.
+        .. code-block::
+            path/
+                0/
+                    0-feat.parquet
+                    metadata_df.pkl
+                    description.json
+                    ...
+                1/
+                    1-feat.parquet
+                    ...
         Parameters
         ----------
         path : str
-            Directory in which subdirectories are created to store
-             -feat.parquet and .json files to.
-        overwrite : bool
-            Whether to delete old subdirectories that will be saved to in this
-            call.
-        offset : int
-            If provided, the integer is added to the id of the dataset in the
-            concat. This is useful in the setting of very large datasets, where
-            one dataset has to be processed and saved at a time to account for
-            its original position.
+            The directory where the dataset will be saved.
+        overwrite : bool, default False
+            If True, any existing subdirectories that conflict with the new
+            ones will be removed.
+        offset : int, default 0
+            An integer to add to the subdirectory names. Useful for saving
+            datasets in chunks.
+        Raises
+        ------
+        ValueError
+            If the dataset is empty.
+        FileExistsError
+            If a subdirectory already exists and `overwrite` is False.
         """
         if len(self.datasets) == 0:
             raise ValueError("Expect at least one dataset")
         path_contents = os.listdir(path)
-        n_sub_dirs = len([os.path.isdir(e) for e in path_contents])
+        n_sub_dirs = len([os.path.isdir(os.path.join(path, e)) for e in path_contents])
         for i_ds, ds in enumerate(self.datasets):
-            # remove subdirectory from list of untouched files / subdirectories
-            if str(i_ds + offset) in path_contents:
-                path_contents.remove(str(i_ds + offset))
-            # save_dir/i_ds/
-            sub_dir = os.path.join(path, str(i_ds + offset))
+            sub_dir_name = str(i_ds + offset)
+            if sub_dir_name in path_contents:
+                path_contents.remove(sub_dir_name)
+            sub_dir = os.path.join(path, sub_dir_name)
             if os.path.exists(sub_dir):
                 if overwrite:
                     shutil.rmtree(sub_dir)
@@ -264,56 +326,59 @@ class FeaturesConcatDataset(BaseConcatDataset):
                         f" a different directory, set overwrite=True, or "
                         f"resolve manually."
                     )
-            # save_dir/{i_ds+offset}/
             os.makedirs(sub_dir)
-            # save_dir/{i_ds+offset}/{i_ds+offset}-feat.parquet
             self._save_features(sub_dir, ds, i_ds, offset)
-            # save_dir/{i_ds+offset}/metadata_df.pkl
             self._save_metadata(sub_dir, ds)
-            # save_dir/{i_ds+offset}/description.json
             self._save_description(sub_dir, ds.description)
-            # save_dir/{i_ds+offset}/raw-info.fif
             self._save_raw_info(sub_dir, ds)
-            # save_dir/{i_ds+offset}/raw_preproc_kwargs.json
-            # save_dir/{i_ds+offset}/window_kwargs.json
-            # save_dir/{i_ds+offset}/window_preproc_kwargs.json
-            # save_dir/{i_ds+offset}/features_kwargs.json
             self._save_kwargs(sub_dir, ds)
-        if overwrite:
-            # the following will be True for all datasets preprocessed and
-            # stored in parallel with braindecode.preprocessing.preprocess
-            if i_ds + 1 + offset < n_sub_dirs:
-                warnings.warn(
-                    f"The number of saved datasets ({i_ds + 1 + offset}) "
-                    f"does not match the number of existing "
-                    f"subdirectories ({n_sub_dirs}). You may now "
-                    f"encounter a mix of differently preprocessed "
-                    f"datasets!",
-                    UserWarning,
-                )
-        # if path contains files or directories that were not touched, raise
-        # warning
+        if overwrite and i_ds + 1 + offset < n_sub_dirs:
+            logger.warning(
+                f"The number of saved datasets ({i_ds + 1 + offset}) "
+                f"does not match the number of existing "
+                f"subdirectories ({n_sub_dirs}). You may now "
+                f"encounter a mix of differently preprocessed "
+                f"datasets!",
+                UserWarning,
+            )
         if path_contents:
-            warnings.warn(
+            logger.warning(
                 f"Chosen directory {path} contains other "
                 f"subdirectories or files {path_contents}."
             )
     @staticmethod
-    def _save_features(sub_dir, ds, i_ds, offset):
+    def _save_features(sub_dir: str, ds: FeaturesDataset, i_ds: int, offset: int):
+        """Save the feature DataFrame to a Parquet file."""
         parquet_file_name = f"{i_ds + offset}-feat.parquet"
         parquet_file_path = os.path.join(sub_dir, parquet_file_name)
         ds.features.to_parquet(parquet_file_path)
     @staticmethod
-    def _save_raw_info(sub_dir, ds):
-        if hasattr(ds, "raw_info"):
+    def _save_metadata(sub_dir: str, ds: FeaturesDataset):
+        """Save the metadata DataFrame to a pickle file."""
+        metadata_file_name = "metadata_df.pkl"
+        metadata_file_path = os.path.join(sub_dir, metadata_file_name)
+        ds.metadata.to_pickle(metadata_file_path)
+    @staticmethod
+    def _save_description(sub_dir: str, description: pd.Series):
+        """Save the description Series to a JSON file."""
+        desc_file_name = "description.json"
+        desc_file_path = os.path.join(sub_dir, desc_file_name)
+        description.to_json(desc_file_path)
+    @staticmethod
+    def _save_raw_info(sub_dir: str, ds: FeaturesDataset):
+        """Save the raw info dictionary to a FIF file if it exists."""
+        if hasattr(ds, "raw_info") and ds.raw_info is not None:
             fif_file_name = "raw-info.fif"
             fif_file_path = os.path.join(sub_dir, fif_file_name)
-            ds.raw_info.save(fif_file_path)
+            ds.raw_info.save(fif_file_path, overwrite=True)
     @staticmethod
-    def _save_kwargs(sub_dir, ds):
+    def _save_kwargs(sub_dir: str, ds: FeaturesDataset):
+        """Save various keyword argument dictionaries to JSON files."""
         for kwargs_name in [
             "raw_preproc_kwargs",
             "window_kwargs",
@@ -321,10 +386,10 @@ class FeaturesConcatDataset(BaseConcatDataset):
             "features_kwargs",
         ]:
             if hasattr(ds, kwargs_name):
-                kwargs_file_name = ".".join([kwargs_name, "json"])
-                kwargs_file_path = os.path.join(sub_dir, kwargs_file_name)
                 kwargs = getattr(ds, kwargs_name)
                 if kwargs is not None:
+                    kwargs_file_name = ".".join([kwargs_name, "json"])
+                    kwargs_file_path = os.path.join(sub_dir, kwargs_file_name)
                     with open(kwargs_file_path, "w") as f:
                         json.dump(kwargs, f)
@@ -333,7 +398,25 @@ class FeaturesConcatDataset(BaseConcatDataset):
         include_metadata: bool | str | List[str] = False,
         include_target: bool = False,
         include_crop_inds: bool = False,
-    ):
+    ) -> pd.DataFrame:
+        """Convert the dataset to a single pandas DataFrame.
+        Parameters
+        ----------
+        include_metadata : bool or str or list of str, default False
+            If True, include all metadata columns. If a string or list of
+            strings, include only the specified metadata columns.
+        include_target : bool, default False
+            If True, include the 'target' column.
+        include_crop_inds : bool, default False
+            If True, include window cropping index columns.
+        Returns
+        -------
+        pandas.DataFrame
+            A DataFrame containing the features and requested metadata.
+        """
         if (
             not isinstance(include_metadata, bool)
             or include_metadata
@@ -342,7 +425,7 @@ class FeaturesConcatDataset(BaseConcatDataset):
             include_dataset = False
             if isinstance(include_metadata, bool) and include_metadata:
                 include_dataset = True
-                cols = self.datasets[0].metadata.columns
+                cols = self.datasets[0].metadata.columns.tolist()
             else:
                 cols = include_metadata
                 if isinstance(cols, bool) and not cols:
@@ -351,13 +434,14 @@ class FeaturesConcatDataset(BaseConcatDataset):
                     cols = [cols]
                 cols = set(cols)
                 if include_crop_inds:
-                    cols = {
-                        "i_dataset",
-                        "i_window_in_trial",
-                        "i_start_in_trial",
-                        "i_stop_in_trial",
-                        *cols,
-                    }
+                    cols.update(
+                        {
+                            "i_dataset",
+                            "i_window_in_trial",
+                            "i_start_in_trial",
+                            "i_stop_in_trial",
+                        }
+                    )
                 if include_target:
                     cols.add("target")
                 cols = list(cols)
@@ -380,10 +464,26 @@ class FeaturesConcatDataset(BaseConcatDataset):
             dataframes = [ds.features for ds in self.datasets]
         return pd.concat(dataframes, axis=0, ignore_index=True)
-    def _numeric_columns(self):
+    def _numeric_columns(self) -> pd.Index:
+        """Get the names of numeric columns from the feature DataFrames."""
         return self.datasets[0].features.select_dtypes(include=np.number).columns
-    def count(self, numeric_only=False, n_jobs=1):
+    def count(self, numeric_only: bool = False, n_jobs: int = 1) -> pd.Series:
+        """Count non-NA cells for each feature column.
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+        n_jobs : int, default 1
+            Number of jobs to run in parallel.
+        Returns
+        -------
+        pandas.Series
+            The count of non-NA cells for each column.
+        """
         stats = Parallel(n_jobs)(
             delayed(_compute_stats)(ds, return_count=True, numeric_only=numeric_only)
             for ds in self.datasets
@@ -392,7 +492,22 @@ class FeaturesConcatDataset(BaseConcatDataset):
         count = counts.sum(axis=0)
         return pd.Series(count, index=self._numeric_columns())
-    def mean(self, numeric_only=False, n_jobs=1):
+    def mean(self, numeric_only: bool = False, n_jobs: int = 1) -> pd.Series:
+        """Compute the mean for each feature column.
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+        n_jobs : int, default 1
+            Number of jobs to run in parallel.
+        Returns
+        -------
+        pandas.Series
+            The mean of each column.
+        """
         stats = Parallel(n_jobs)(
             delayed(_compute_stats)(
                 ds, return_count=True, return_mean=True, numeric_only=numeric_only
@@ -404,7 +519,26 @@ class FeaturesConcatDataset(BaseConcatDataset):
         mean = np.sum((counts / count) * means, axis=0)
         return pd.Series(mean, index=self._numeric_columns())
-    def var(self, ddof=1, numeric_only=False, n_jobs=1):
+    def var(
+        self, ddof: int = 1, numeric_only: bool = False, n_jobs: int = 1
+    ) -> pd.Series:
+        """Compute the variance for each feature column.
+        Parameters
+        ----------
+        ddof : int, default 1
+            Delta Degrees of Freedom. The divisor used in calculations is N - ddof.
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+        n_jobs : int, default 1
+            Number of jobs to run in parallel.
+        Returns
+        -------
+        pandas.Series
+            The variance of each column.
+        """
         stats = Parallel(n_jobs)(
             delayed(_compute_stats)(
                 ds,
@@ -424,12 +558,50 @@ class FeaturesConcatDataset(BaseConcatDataset):
         _, _, var = _pooled_var(counts, means, variances, ddof, ddof_in=0)
         return pd.Series(var, index=self._numeric_columns())
-    def std(self, ddof=1, numeric_only=False, eps=0, n_jobs=1):
+    def std(
+        self, ddof: int = 1, numeric_only: bool = False, eps: float = 0, n_jobs: int = 1
+    ) -> pd.Series:
+        """Compute the standard deviation for each feature column.
+        Parameters
+        ----------
+        ddof : int, default 1
+            Delta Degrees of Freedom.
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+        eps : float, default 0
+            A small epsilon value to add to the variance before taking the
+            square root to avoid numerical instability.
+        n_jobs : int, default 1
+            Number of jobs to run in parallel.
+        Returns
+        -------
+        pandas.Series
+            The standard deviation of each column.
+        """
         return np.sqrt(
             self.var(ddof=ddof, numeric_only=numeric_only, n_jobs=n_jobs) + eps
         )
-    def zscore(self, ddof=1, numeric_only=False, eps=0, n_jobs=1):
+    def zscore(
+        self, ddof: int = 1, numeric_only: bool = False, eps: float = 0, n_jobs: int = 1
+    ) -> None:
+        """Apply z-score normalization to numeric columns in-place.
+        Parameters
+        ----------
+        ddof : int, default 1
+            Delta Degrees of Freedom for variance calculation.
+        numeric_only : bool, default False
+            Include only float, int, boolean columns.
+        eps : float, default 0
+            Epsilon for numerical stability.
+        n_jobs : int, default 1
+            Number of jobs to run in parallel for statistics computation.
+        """
         stats = Parallel(n_jobs)(
             delayed(_compute_stats)(
                 ds,
@@ -449,10 +621,13 @@ class FeaturesConcatDataset(BaseConcatDataset):
         _, mean, var = _pooled_var(counts, means, variances, ddof, ddof_in=0)
         std = np.sqrt(var + eps)
         for ds in self.datasets:
-            ds.features = (ds.features - mean) / std
+            ds.features.loc[:, self._numeric_columns()] = (
+                ds.features.loc[:, self._numeric_columns()] - mean
+            ) / std
     @staticmethod
-    def _enforce_inplace_operations(func_name, kwargs):
+    def _enforce_inplace_operations(func_name: str, kwargs: dict):
+        """Raise an error if 'inplace=False' is passed to a method."""
         if "inplace" in kwargs and kwargs["inplace"] is False:
             raise ValueError(
                 f"{func_name} only works inplace, please change "
@@ -460,33 +635,49 @@ class FeaturesConcatDataset(BaseConcatDataset):
             )
         kwargs["inplace"] = True
-    def fillna(self, *args, **kwargs):
+    def fillna(self, *args, **kwargs) -> None:
+        """Fill NA/NaN values in-place. See :meth:`pandas.DataFrame.fillna`."""
         FeaturesConcatDataset._enforce_inplace_operations("fillna", kwargs)
         for ds in self.datasets:
             ds.features.fillna(*args, **kwargs)
-    def replace(self, *args, **kwargs):
+    def replace(self, *args, **kwargs) -> None:
+        """Replace values in-place. See :meth:`pandas.DataFrame.replace`."""
         FeaturesConcatDataset._enforce_inplace_operations("replace", kwargs)
         for ds in self.datasets:
             ds.features.replace(*args, **kwargs)
-    def interpolate(self, *args, **kwargs):
+    def interpolate(self, *args, **kwargs) -> None:
+        """Interpolate values in-place. See :meth:`pandas.DataFrame.interpolate`."""
         FeaturesConcatDataset._enforce_inplace_operations("interpolate", kwargs)
         for ds in self.datasets:
             ds.features.interpolate(*args, **kwargs)
-    def dropna(self, *args, **kwargs):
+    def dropna(self, *args, **kwargs) -> None:
+        """Remove missing values in-place. See :meth:`pandas.DataFrame.dropna`."""
         FeaturesConcatDataset._enforce_inplace_operations("dropna", kwargs)
         for ds in self.datasets:
             ds.features.dropna(*args, **kwargs)
-    def drop(self, *args, **kwargs):
+    def drop(self, *args, **kwargs) -> None:
+        """Drop specified labels from rows or columns in-place. See :meth:`pandas.DataFrame.drop`."""
         FeaturesConcatDataset._enforce_inplace_operations("drop", kwargs)
         for ds in self.datasets:
             ds.features.drop(*args, **kwargs)
-    def join(self, concat_dataset: FeaturesConcatDataset, **kwargs):
+    def join(self, concat_dataset: FeaturesConcatDataset, **kwargs) -> None:
+        """Join columns with other FeaturesConcatDataset in-place.
+        Parameters
+        ----------
+        concat_dataset : FeaturesConcatDataset
+            The dataset to join with. Must have the same number of datasets,
+            and each corresponding dataset must have the same length.
+        **kwargs
+            Keyword arguments to pass to :meth:`pandas.DataFrame.join`.
+        """
         assert len(self.datasets) == len(concat_dataset.datasets)
         for ds1, ds2 in zip(self.datasets, concat_dataset.datasets):
             assert len(ds1) == len(ds2)
-            ds1.features.join(ds2, **kwargs)
+            ds1.features = ds1.features.join(ds2.features, **kwargs)

eegdash 0.3.3.dev61__py3-none-any.whl → 0.5.0.dev180784713__py3-none-any.whl

eegdash 0.3.3.dev61py3-none-any.whl → 0.5.0.dev180784713py3-none-any.whl