PyPI - eegdash - Versions diffs - 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl - Mend

eegdash 0.0.8py3-none-any.whl → 0.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of eegdash might be problematic. Click here for more details.

Files changed (22) hide show

eegdash/data_config.py +28 -0
eegdash/data_utils.py +55 -56
eegdash/features/__init__.py +25 -0
eegdash/features/datasets.py +453 -0
eegdash/features/decorators.py +43 -0
eegdash/features/extractors.py +209 -0
eegdash/features/feature_bank/__init__.py +6 -0
eegdash/features/feature_bank/complexity.py +97 -0
eegdash/features/feature_bank/connectivity.py +99 -0
eegdash/features/feature_bank/csp.py +102 -0
eegdash/features/feature_bank/dimensionality.py +108 -0
eegdash/features/feature_bank/signal.py +103 -0
eegdash/features/feature_bank/spectral.py +134 -0
eegdash/features/serialization.py +87 -0
eegdash/features/utils.py +114 -0
eegdash/main.py +98 -50
{eegdash-0.0.8.dist-info → eegdash-0.0.9.dist-info}/METADATA +13 -47
eegdash-0.0.9.dist-info/RECORD +22 -0
{eegdash-0.0.8.dist-info → eegdash-0.0.9.dist-info}/WHEEL +1 -1
eegdash-0.0.8.dist-info/RECORD +0 -8
{eegdash-0.0.8.dist-info → eegdash-0.0.9.dist-info}/licenses/LICENSE +0 -0
{eegdash-0.0.8.dist-info → eegdash-0.0.9.dist-info}/top_level.txt +0 -0

eegdash/features/feature_bank/signal.py ADDED Viewed

@@ -0,0 +1,103 @@
+import numbers
+import numpy as np
+from scipy import stats
+from ..decorators import univariate_feature
+__all__ = [
+    "signal_mean",
+    "signal_variance",
+    "signal_skewness",
+    "signal_kurtosis",
+    "signal_std",
+    "signal_root_mean_square",
+    "signal_peak_to_peak",
+    "signal_quantile",
+    "signal_zero_crossings",
+    "signal_line_length",
+    "signal_hjorth_activity",
+    "signal_hjorth_mobility",
+    "signal_hjorth_complexity",
+    "signal_decorrelation_time",
+]
+@univariate_feature
+def signal_mean(x):
+    return x.mean(axis=-1)
+@univariate_feature
+def signal_variance(x, **kwargs):
+    return x.var(axis=-1, **kwargs)
+@univariate_feature
+def signal_std(x, **kwargs):
+    return x.std(axis=-1, **kwargs)
+@univariate_feature
+def signal_skewness(x, **kwargs):
+    return stats.skew(x, axis=x.ndim - 1, **kwargs)
+@univariate_feature
+def signal_kurtosis(x, **kwargs):
+    return stats.kurtosis(x, axis=x.ndim - 1, **kwargs)
+@univariate_feature
+def signal_root_mean_square(x):
+    return np.sqrt(np.power(x, 2).mean(axis=-1))
+@univariate_feature
+def signal_peak_to_peak(x, **kwargs):
+    return np.ptp(x, axis=-1, **kwargs)
+@univariate_feature
+def signal_quantile(x, q: numbers.Number = 0.5, **kwargs):
+    return np.quantile(x, q=q, axis=-1, **kwargs)
+@univariate_feature
+def signal_line_length(x):
+    return np.abs(np.diff(x, axis=-1)).mean(axis=-1)
+@univariate_feature
+def signal_zero_crossings(x, threshold=1e-15):
+    zero_ind = np.logical_and(x > -threshold, x < threshold)
+    zero_cross = np.diff(zero_ind, axis=-1).astype(int).sum(axis=-1)
+    y = x.copy()
+    y[zero_ind] = 0
+    zero_cross += np.sum(np.signbit(y[..., :-1]) != np.signbit(y[..., 1:]), axis=-1)
+    return zero_cross
+@univariate_feature
+def signal_hjorth_mobility(x):
+    return np.diff(x, axis=-1).std(axis=-1) / x.std(axis=-1)
+@univariate_feature
+def signal_hjorth_complexity(x):
+    return np.diff(x, 2, axis=-1).std(axis=-1) / x.std(axis=-1)
+@univariate_feature
+def signal_decorrelation_time(x, fs=1):
+    f = np.fft.fft(x - x.mean(axis=-1, keepdims=True), axis=-1)
+    ac = np.fft.ifft(f.real**2 + f.imag**2, axis=-1)[..., : x.shape[-1] // 2]
+    dct = np.empty(x.shape[:-1])
+    for i in np.ndindex(x.shape[:-1]):
+        dct[i] = np.searchsorted(ac[i] <= 0, True)
+    return dct / fs
+# =================================  Aliases  =================================
+signal_hjorth_activity = signal_variance

eegdash/features/feature_bank/spectral.py ADDED Viewed

@@ -0,0 +1,134 @@
+import numpy as np
+import numba as nb
+from scipy.signal import welch
+from ..extractors import FeatureExtractor
+from ..decorators import FeaturePredecessor, univariate_feature
+__all__ = [
+    "SpectralFeatureExtractor",
+    "NormalizedSpectralFeatureExtractor",
+    "DBSpectralFeatureExtractor",
+    "spectral_root_total_power",
+    "spectral_moment",
+    "spectral_entropy",
+    "spectral_edge",
+    "spectral_slope",
+    "spectral_bands_power",
+    "spectral_hjorth_activity",
+    "spectral_hjorth_mobility",
+    "spectral_hjorth_complexity",
+]
+class SpectralFeatureExtractor(FeatureExtractor):
+    def preprocess(self, x, **kwargs):
+        f_min = kwargs.pop("f_min") if "f_min" in kwargs else None
+        f_max = kwargs.pop("f_max") if "f_max" in kwargs else None
+        kwargs["axis"] = -1
+        f, p = welch(x, **kwargs)
+        if f_min is not None or f_max is not None:
+            f_min_idx = f > f_min if f_min is not None else True
+            f_max_idx = f < f_max if f_max is not None else True
+            idx = np.logical_and(f_min_idx, f_max_idx)
+            f = f[idx]
+            p = p[..., idx]
+        return f, p
+@FeaturePredecessor(SpectralFeatureExtractor)
+class NormalizedSpectralFeatureExtractor(FeatureExtractor):
+    def preprocess(self, *x):
+        return (*x[:-1], x[-1] / x[-1].sum(axis=-1, keepdims=True))
+@FeaturePredecessor(SpectralFeatureExtractor)
+class DBSpectralFeatureExtractor(FeatureExtractor):
+    def preprocess(self, *x, eps=1e-15):
+        return (*x[:-1], 10 * np.log10(x[-1] + eps))
+@FeaturePredecessor(SpectralFeatureExtractor)
+@univariate_feature
+def spectral_root_total_power(f, p):
+    return np.sqrt(p.sum(axis=-1))
+@FeaturePredecessor(NormalizedSpectralFeatureExtractor)
+@univariate_feature
+def spectral_moment(f, p):
+    return np.sum(f * p, axis=-1)
+@FeaturePredecessor(SpectralFeatureExtractor)
+@univariate_feature
+def spectral_hjorth_activity(f, p):
+    return np.sum(p, axis=-1)
+@FeaturePredecessor(NormalizedSpectralFeatureExtractor)
+@univariate_feature
+def spectral_hjorth_mobility(f, p):
+    return np.sqrt(np.sum(np.power(f, 2) * p, axis=-1))
+@FeaturePredecessor(NormalizedSpectralFeatureExtractor)
+@univariate_feature
+def spectral_hjorth_complexity(f, p):
+    return np.sqrt(np.sum(np.power(f, 4) * p, axis=-1))
+@FeaturePredecessor(NormalizedSpectralFeatureExtractor)
+@univariate_feature
+def spectral_entropy(f, p):
+    idx = p > 0
+    plogp = np.zeros_like(p)
+    plogp[idx] = p[idx] * np.log(p[idx])
+    return -np.sum(plogp, axis=-1)
+@FeaturePredecessor(NormalizedSpectralFeatureExtractor)
+@univariate_feature
+@nb.njit(cache=True, fastmath=True)
+def spectral_edge(f, p, edge=0.9):
+    se = np.empty(p.shape[:-1])
+    for i in np.ndindex(p.shape[:-1]):
+        se[i] = f[np.searchsorted(np.cumsum(p[i]), edge)]
+    return se
+@FeaturePredecessor(DBSpectralFeatureExtractor)
+@univariate_feature
+def spectral_slope(f, p):
+    log_f = np.vstack((np.log(f), np.ones(f.shape[0]))).T
+    r = np.linalg.lstsq(log_f, p.reshape(-1, p.shape[-1]).T)[0]
+    r = r.reshape(2, *p.shape[:-1])
+    return {"exp": r[0], "int": r[1]}
+@FeaturePredecessor(
+    SpectralFeatureExtractor,
+    NormalizedSpectralFeatureExtractor,
+    DBSpectralFeatureExtractor,
+)
+@univariate_feature
+def spectral_bands_power(
+    f,
+    p,
+    bands={
+        "delta": (1, 4.5),
+        "theta": (4.5, 8),
+        "alpha": (8, 12),
+        "beta": (12, 30),
+    },
+):
+    bands_power = dict()
+    for k, v in bands.items():
+        assert isinstance(k, str)
+        assert isinstance(v, tuple)
+        assert len(v) == 2
+        mask = np.logical_and(f > v[0], f < v[1])
+        power = p[..., mask].sum(axis=-1)
+        bands_power[k] = power
+    return bands_power

eegdash/features/serialization.py ADDED Viewed

@@ -0,0 +1,87 @@
+"""
+Convenience functions for storing and loading of features datasets.
+see also: https://github.com/braindecode/braindecode//blob/master/braindecode/datautil/serialization.py#L165-L229
+"""
+import json
+from pathlib import Path
+import pandas as pd
+from joblib import Parallel, delayed
+from mne.io import read_info
+from braindecode.datautil.serialization import _load_kwargs_json
+from .datasets import (
+    FeaturesDataset,
+    FeaturesConcatDataset,
+)
+def load_features_concat_dataset(path, ids_to_load=None, n_jobs=1):
+    """Load a stored FeaturesConcatDataset of FeaturesDatasets from files.
+    Parameters
+    ----------
+    path: str | pathlib.Path
+        Path to the directory of the .fif / -epo.fif and .json files.
+    ids_to_load: list of int | None
+        Ids of specific files to load.
+    n_jobs: int
+        Number of jobs to be used to read files in parallel.
+    Returns
+    -------
+    concat_dataset: FeaturesConcatDataset of FeaturesDatasets
+    """
+    # Make sure we always work with a pathlib.Path
+    path = Path(path)
+    # else we have a dataset saved in the new way with subdirectories in path
+    # for every dataset with description.json and -feat.parquet,
+    # target_name.json, raw_preproc_kwargs.json, window_kwargs.json,
+    # window_preproc_kwargs.json, features_kwargs.json
+    if ids_to_load is None:
+        ids_to_load = [p.name for p in path.iterdir()]
+        ids_to_load = sorted(ids_to_load, key=lambda i: int(i))
+    ids_to_load = [str(i) for i in ids_to_load]
+    datasets = Parallel(n_jobs)(delayed(_load_parallel)(path, i) for i in ids_to_load)
+    return FeaturesConcatDataset(datasets)
+def _load_parallel(path, i):
+    sub_dir = path / i
+    parquet_name_pattern = "{}-feat.parquet"
+    parquet_file_name = parquet_name_pattern.format(i)
+    parquet_file_path = sub_dir / parquet_file_name
+    features = pd.read_parquet(parquet_file_path)
+    description_file_path = sub_dir / "description.json"
+    description = pd.read_json(description_file_path, typ="series")
+    raw_info_file_path = sub_dir / "raw-info.fif"
+    raw_info = None
+    if raw_info_file_path.exists():
+        raw_info = read_info(raw_info_file_path)
+    raw_preproc_kwargs = _load_kwargs_json("raw_preproc_kwargs", sub_dir)
+    window_kwargs = _load_kwargs_json("window_kwargs", sub_dir)
+    window_preproc_kwargs = _load_kwargs_json("window_preproc_kwargs", sub_dir)
+    features_kwargs = _load_kwargs_json("features_kwargs", sub_dir)
+    metadata = pd.read_pickle(path / i / "metadata_df.pkl")
+    dataset = FeaturesDataset(
+        features,
+        metadata=metadata,
+        description=description,
+        raw_info=raw_info,
+        raw_preproc_kwargs=raw_preproc_kwargs,
+        window_kwargs=window_kwargs,
+        window_preproc_kwargs=window_preproc_kwargs,
+        features_kwargs=features_kwargs,
+    )
+    return dataset

eegdash/features/utils.py ADDED Viewed

@@ -0,0 +1,114 @@
+from typing import Dict, List
+from collections.abc import Callable
+import copy
+import numpy as np
+import pandas as pd
+from joblib import Parallel, delayed
+from tqdm import tqdm
+from torch.utils.data import DataLoader
+from braindecode.datasets.base import (
+    EEGWindowsDataset,
+    WindowsDataset,
+    BaseConcatDataset,
+)
+from .datasets import FeaturesDataset, FeaturesConcatDataset
+from .extractors import FeatureExtractor
+def _extract_features_from_windowsdataset(
+    win_ds: EEGWindowsDataset | WindowsDataset,
+    feature_extractor: FeatureExtractor,
+    batch_size: int = 512,
+):
+    metadata = win_ds.metadata
+    if not win_ds.targets_from == "metadata":
+        metadata = copy.deepcopy(metadata)
+        metadata["orig_index"] = metadata.index
+        metadata.set_index(
+            ["i_window_in_trial", "i_start_in_trial", "i_stop_in_trial"],
+            drop=False,
+            inplace=True,
+        )
+    win_dl = DataLoader(win_ds, batch_size=batch_size, shuffle=False, drop_last=False)
+    features_dict = dict()
+    ch_names = win_ds.raw.ch_names
+    for X, y, crop_inds in win_dl:
+        X = X.numpy()
+        if hasattr(y, "tolist"):
+            y = y.tolist()
+        win_dict = dict()
+        win_dict.update(
+            feature_extractor(X, _batch_size=X.shape[0], _ch_names=ch_names)
+        )
+        if not win_ds.targets_from == "metadata":
+            metadata.loc[crop_inds, "target"] = y
+        for k, v in win_dict.items():
+            if k not in features_dict:
+                features_dict[k] = []
+            features_dict[k].extend(v)
+    features_df = pd.DataFrame(features_dict)
+    if not win_ds.targets_from == "metadata":
+        metadata.set_index("orig_index", drop=False, inplace=True)
+        metadata.reset_index(drop=True, inplace=True)
+        metadata.drop("orig_index", axis=1, inplace=True)
+    # FUTURE: truely support WindowsDataset objects
+    return FeaturesDataset(
+        features_df,
+        metadata=metadata,
+        description=win_ds.description,
+        raw_info=win_ds.raw.info,
+        raw_preproc_kwargs=win_ds.raw_preproc_kwargs,
+        window_kwargs=win_ds.window_kwargs,
+        features_kwargs=feature_extractor.features_kwargs,
+    )
+def extract_features(
+    concat_dataset: BaseConcatDataset,
+    features: FeatureExtractor | Dict[str, Callable] | List[Callable],
+    *,
+    batch_size: int = 512,
+    n_jobs: int = 1,
+):
+    if isinstance(features, list):
+        features = dict(enumerate(features))
+    if not isinstance(features, FeatureExtractor):
+        features = FeatureExtractor(features)
+    feature_ds_list = list(
+        tqdm(
+            Parallel(n_jobs=n_jobs, return_as="generator")(
+                delayed(_extract_features_from_windowsdataset)(
+                    win_ds, features, batch_size
+                )
+                for win_ds in concat_dataset.datasets
+            ),
+            total=len(concat_dataset.datasets),
+            desc="Extracting features",
+        )
+    )
+    return FeaturesConcatDataset(feature_ds_list)
+def fit_feature_extractors(
+    concat_dataset: BaseConcatDataset,
+    features: FeatureExtractor | Dict[str, Callable] | List[Callable],
+    batch_size: int = 8192,
+):
+    if isinstance(features, list):
+        features = dict(enumerate(features))
+    if not isinstance(features, FeatureExtractor):
+        features = FeatureExtractor(features)
+    if not features._is_fitable:
+        return features
+    features.clear()
+    concat_dl = DataLoader(
+        concat_dataset, batch_size=batch_size, shuffle=False, drop_last=False
+    )
+    for X, y, _ in tqdm(
+        concat_dl, total=len(concat_dl), desc="Fitting feature extractors"
+    ):
+        features.partial_fit(X.numpy(), y=np.array(y))
+    features.fit()
+    return features

eegdash/main.py CHANGED Viewed

@@ -1,15 +1,16 @@
-from typing import List
 import pymongo
 from dotenv import load_dotenv
 import os
 from pathlib import Path
 import s3fs
 from joblib import Parallel, delayed
+import json
 import tempfile
 import mne
 import numpy as np
 import xarray as xr
-from .data_utils import BIDSDataset, EEGDashBaseRaw, EEGDashBaseDataset
+from .data_utils import EEGBIDSDataset, EEGDashBaseRaw, EEGDashBaseDataset
+from .data_config import config as data_config
 from braindecode.datasets import BaseDataset, BaseConcatDataset
 from collections import defaultdict
 from pymongo import MongoClient, InsertOne, UpdateOne, DeleteOne
@@ -18,6 +19,12 @@ class EEGDash:
     AWS_BUCKET = 's3://openneuro.org'
     def __init__(self,
                  is_public=True):
+        # Load config file
+        # config_path = Path(__file__).parent / 'config.json'
+        # with open(config_path, 'r') as f:
+        #     self.config = json.load(f)
+        self.config = data_config
         if is_public:
             DB_CONNECTION_STRING="mongodb+srv://eegdash-user:mdzoMjQcHWTVnKDq@cluster0.vz35p.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
         else:
@@ -37,10 +44,9 @@ class EEGDash:
         # convert to list using get_item on each element
         return [result for result in results]
-    def exist(self, data_name=''):
-        query = {
-            "data_name": data_name
-        }
+    def exist(self, query:dict):
+        accepted_query_fields = ['data_name', 'dataset']
+        assert all(field in accepted_query_fields for field in query.keys())
         sessions = self.find(query)
         return len(sessions) > 0
@@ -104,66 +110,111 @@ class EEGDash:
         )
         return eeg_xarray
-    def load_eeg_attrs_from_bids_file(self, bids_dataset: BIDSDataset, bids_file):
+    def get_raw_extensions(self, bids_file, bids_dataset: EEGBIDSDataset):
+        bids_file = Path(bids_file)
+        extensions = {
+            '.set': ['.set', '.fdt'], # eeglab
+            '.edf': ['.edf'], # european
+            '.vhdr': ['.eeg', '.vhdr', '.vmrk', '.dat', '.raw'], # brainvision
+            '.bdf': ['.bdf'], # biosemi
+        }
+        return [str(bids_dataset.get_relative_bidspath(bids_file.with_suffix(suffix))) for suffix in extensions[bids_file.suffix] if bids_file.with_suffix(suffix).exists()]
+    def load_eeg_attrs_from_bids_file(self, bids_dataset: EEGBIDSDataset, bids_file):
         '''
         bids_file must be a file of the bids_dataset
         '''
         if bids_file not in bids_dataset.files:
             raise ValueError(f'{bids_file} not in {bids_dataset.dataset}')
+        # Initialize attrs with None values for all expected fields
+        attrs = {field: None for field in self.config['attributes'].keys()}
         f = os.path.basename(bids_file)
         dsnumber = bids_dataset.dataset
         # extract openneuro path by finding the first occurrence of the dataset name in the filename and remove the path before that
         openneuro_path = dsnumber + bids_file.split(dsnumber)[1]
-        bids_dependencies_files = ['dataset_description.json', 'participants.tsv', 'events.tsv', 'events.json', 'eeg.json', 'electrodes.tsv', 'channels.tsv', 'coordsystem.json']
+        # Update with actual values where available
+        try:
+            participants_tsv = bids_dataset.subject_participant_tsv(bids_file)
+        except Exception as e:
+            print(f"Error getting participants_tsv: {str(e)}")
+            participants_tsv = None
+        try:
+            eeg_json = bids_dataset.eeg_json(bids_file)
+        except Exception as e:
+            print(f"Error getting eeg_json: {str(e)}")
+            eeg_json = None
+        bids_dependencies_files = self.config['bids_dependencies_files']
         bidsdependencies = []
         for extension in bids_dependencies_files:
-            dep_path = bids_dataset.get_bids_metadata_files(bids_file, extension)
-            dep_path = [str(bids_dataset.get_relative_bidspath(dep)) for dep in dep_path]
-            bidsdependencies.extend(dep_path)
-        participants_tsv = bids_dataset.subject_participant_tsv(bids_file)
-        eeg_json = bids_dataset.eeg_json(bids_file)
-        attrs = {
-            'data_name': f'{bids_dataset.dataset}_{f}',
-            'dataset': bids_dataset.dataset,
-            'bidspath': openneuro_path,
-            'subject': bids_dataset.subject(bids_file),
-            'task': bids_dataset.task(bids_file),
-            'session': bids_dataset.session(bids_file),
-            'run': bids_dataset.run(bids_file),
-            'modality': 'EEG',
-            'nchans': bids_dataset.num_channels(bids_file),
-            'ntimes': bids_dataset.num_times(bids_file),
-            'participant_tsv': participants_tsv,
-            'eeg_json': eeg_json,
-            'bidsdependencies': bidsdependencies,
+            try:
+                dep_path = bids_dataset.get_bids_metadata_files(bids_file, extension)
+                dep_path = [str(bids_dataset.get_relative_bidspath(dep)) for dep in dep_path]
+                bidsdependencies.extend(dep_path)
+            except Exception as e:
+                pass
+        bidsdependencies.extend(self.get_raw_extensions(bids_file, bids_dataset))
+        # Define field extraction functions with error handling
+        field_extractors = {
+            'data_name': lambda: f'{bids_dataset.dataset}_{f}',
+            'dataset': lambda: bids_dataset.dataset,
+            'bidspath': lambda: openneuro_path,
+            'subject': lambda: bids_dataset.get_bids_file_attribute('subject', bids_file),
+            'task': lambda: bids_dataset.get_bids_file_attribute('task', bids_file),
+            'session': lambda: bids_dataset.get_bids_file_attribute('session', bids_file),
+            'run': lambda: bids_dataset.get_bids_file_attribute('run', bids_file),
+            'modality': lambda: bids_dataset.get_bids_file_attribute('modality', bids_file),
+            'sampling_frequency': lambda: bids_dataset.get_bids_file_attribute('sfreq', bids_file),
+            'nchans': lambda: bids_dataset.get_bids_file_attribute('nchans', bids_file),
+            'ntimes': lambda: bids_dataset.get_bids_file_attribute('ntimes', bids_file),
+            'participant_tsv': lambda: participants_tsv,
+            'eeg_json': lambda: eeg_json,
+            'bidsdependencies': lambda: bidsdependencies,
         }
+        # Dynamically populate attrs with error handling
+        for field, extractor in field_extractors.items():
+            try:
+                attrs[field] = extractor()
+            except Exception as e:
+                print(f"Error extracting {field}: {str(e)}")
+                attrs[field] = None
         return attrs
-    def add_bids_dataset(self, dataset, data_dir, raw_format='eeglab', overwrite=True):
+    def add_bids_dataset(self, dataset, data_dir, overwrite=True):
         '''
         Create new records for the dataset in the MongoDB database if not found
         '''
         if self.is_public:
             raise ValueError('This operation is not allowed for public users')
-        bids_dataset = BIDSDataset(
-            data_dir=data_dir,
-            dataset=dataset,
-            raw_format=raw_format,
-        )
+        if not overwrite and self.exist({'dataset': dataset}):
+            print(f'Dataset {dataset} already exists in the database')
+            return
+        try:
+            bids_dataset = EEGBIDSDataset(
+                data_dir=data_dir,
+                dataset=dataset,
+            )
+        except Exception as e:
+            print(f'Error creating bids dataset {dataset}: {str(e)}')
+            raise e
         requests = []
         for bids_file in bids_dataset.get_files():
             try:
                 data_id = f"{dataset}_{os.path.basename(bids_file)}"
-                if self.exist(data_name=data_id):
+                if self.exist({'data_name':data_id}):
                     if overwrite:
                         eeg_attrs = self.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
-                        requests.append(UpdateOne(self.update_request(eeg_attrs)))
+                        requests.append(self.update_request(eeg_attrs))
                 else:
                     eeg_attrs = self.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
                     requests.append(self.add_request(eeg_attrs))
@@ -224,17 +275,22 @@ class EEGDash:
     def remove_field_from_db(self, field):
         self.__collection.update_many({}, {'$unset': {field: 1}})
+    @property
+    def collection(self):
+        return self.__collection
 class EEGDashDataset(BaseConcatDataset):
-    CACHE_DIR = '.eegdash_cache'
+    # CACHE_DIR = '.eegdash_cache'
     def __init__(
         self,
         query:dict=None,
         data_dir:str | list =None,
         dataset:str | list =None,
         description_fields: list[str]=['subject', 'session', 'run', 'task', 'age', 'gender', 'sex'],
+        cache_dir:str='.eegdash_cache',
         **kwargs
     ):
+        self.cache_dir = cache_dir
         if query:
             datasets = self.find_datasets(query, description_fields, **kwargs)
         elif data_dir:
@@ -247,6 +303,7 @@ class EEGDashDataset(BaseConcatDataset):
                     datasets.extend(self.load_bids_dataset(dataset[i], data_dir[i], description_fields))
         # convert to list using get_item on each element
         super().__init__(datasets)
     def find_key_in_nested_dict(self, data, target_key):
         if isinstance(data, dict):
@@ -267,7 +324,7 @@ class EEGDashDataset(BaseConcatDataset):
                 value = self.find_key_in_nested_dict(record, field)
                 if value:
                     description[field] = value
-            datasets.append(EEGDashBaseDataset(record, self.CACHE_DIR, description=description, **kwargs))
+            datasets.append(EEGDashBaseDataset(record, self.cache_dir, description=description, **kwargs))
         return datasets
     def load_bids_dataset(self, dataset, data_dir, description_fields: list[str],raw_format='eeglab', **kwargs):
@@ -280,9 +337,9 @@ class EEGDashDataset(BaseConcatDataset):
                 value = self.find_key_in_nested_dict(record, field)
                 if value:
                     description[field] = value
-            return EEGDashBaseDataset(record, self.CACHE_DIR, description=description, **kwargs)
+            return EEGDashBaseDataset(record, self.cache_dir, description=description, **kwargs)
-        bids_dataset = BIDSDataset(
+        bids_dataset = EEGBIDSDataset(
             data_dir=data_dir,
             dataset=dataset,
             raw_format=raw_format,
@@ -291,15 +348,6 @@ class EEGDashDataset(BaseConcatDataset):
         datasets = Parallel(n_jobs=-1, prefer="threads", verbose=1)(
                 delayed(get_base_dataset_from_bids_file)(bids_dataset, bids_file) for bids_file in bids_dataset.get_files()
             )
-        # datasets = []
-        # for bids_file in bids_dataset.get_files():
-        #     record = eegdashObj.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
-        #     description = {}
-        #     for field in description_fields:
-        #         value = self.find_key_in_nested_dict(record, field)
-        #         if value:
-        #             description[field] = value
-        #     datasets.append(EEGDashBaseDataset(record, self.CACHE_DIR, description=description, **kwargs))
         return datasets
 def main():

eegdash 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl

Potentially problematic release.

eegdash 0.0.8py3-none-any.whl → 0.0.9py3-none-any.whl