PyPI - eegdash - Versions diffs - 0.0.7__tar.gz → 0.0.9__tar.gz - Mend

eegdash 0.0.7tar.gz → 0.0.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of eegdash might be problematic. Click here for more details.

Files changed (34) hide show

eegdash-0.0.9/PKG-INFO +123 -0
eegdash-0.0.9/README.md +72 -0
{eegdash-0.0.7 → eegdash-0.0.9}/pyproject.toml +10 -3
eegdash-0.0.9/src/eegdash/__init__.py +1 -0
eegdash-0.0.9/src/eegdash/data_config.py +28 -0
{eegdash-0.0.7 → eegdash-0.0.9}/src/eegdash/data_utils.py +204 -63
eegdash-0.0.9/src/eegdash/features/__init__.py +25 -0
eegdash-0.0.9/src/eegdash/features/datasets.py +453 -0
eegdash-0.0.9/src/eegdash/features/decorators.py +43 -0
eegdash-0.0.9/src/eegdash/features/extractors.py +209 -0
eegdash-0.0.9/src/eegdash/features/feature_bank/__init__.py +6 -0
eegdash-0.0.9/src/eegdash/features/feature_bank/complexity.py +97 -0
eegdash-0.0.9/src/eegdash/features/feature_bank/connectivity.py +99 -0
eegdash-0.0.9/src/eegdash/features/feature_bank/csp.py +102 -0
eegdash-0.0.9/src/eegdash/features/feature_bank/dimensionality.py +108 -0
eegdash-0.0.9/src/eegdash/features/feature_bank/signal.py +103 -0
eegdash-0.0.9/src/eegdash/features/feature_bank/spectral.py +134 -0
eegdash-0.0.9/src/eegdash/features/serialization.py +87 -0
eegdash-0.0.9/src/eegdash/features/utils.py +114 -0
eegdash-0.0.9/src/eegdash/main.py +359 -0
eegdash-0.0.9/src/eegdash.egg-info/PKG-INFO +123 -0
eegdash-0.0.9/src/eegdash.egg-info/SOURCES.txt +25 -0
eegdash-0.0.9/src/eegdash.egg-info/requires.txt +15 -0
eegdash-0.0.7/PKG-INFO +0 -146
eegdash-0.0.7/README.md +0 -103
eegdash-0.0.7/src/eegdash/__init__.py +0 -1
eegdash-0.0.7/src/eegdash/main.py +0 -199
eegdash-0.0.7/src/eegdash.egg-info/PKG-INFO +0 -146
eegdash-0.0.7/src/eegdash.egg-info/SOURCES.txt +0 -11
eegdash-0.0.7/src/eegdash.egg-info/requires.txt +0 -8
{eegdash-0.0.7 → eegdash-0.0.9}/LICENSE +0 -0
{eegdash-0.0.7 → eegdash-0.0.9}/setup.cfg +0 -0
{eegdash-0.0.7 → eegdash-0.0.9}/src/eegdash.egg-info/dependency_links.txt +0 -0
{eegdash-0.0.7 → eegdash-0.0.9}/src/eegdash.egg-info/top_level.txt +0 -0

eegdash-0.0.9/PKG-INFO ADDED Viewed

@@ -0,0 +1,123 @@
+Metadata-Version: 2.4
+Name: eegdash
+Version: 0.0.9
+Summary: EEG data for machine learning
+Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>
+License: GNU General Public License
+        Copyright (C) 2024-2025
+        Young Truong, UCSD, dt.young112@gmail.com
+        Arnaud Delorme, UCSD, adelorme@ucsd.edu
+        This program is free software; you can redistribute it and/or modify
+        it under the terms of the GNU General Public License as published by
+        the Free Software Foundation; either version 2 of the License, or
+        (at your option) any later version.
+        This program is distributed in the hope that it will be useful,
+        but WITHOUT ANY WARRANTY; without even the implied warranty of
+        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+        GNU General Public License for more details.
+        You should have received a copy of the GNU General Public License
+        along with this program; if not, write to the Free Software
+        Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1.07  USA
+Project-URL: Homepage, https://eegdash.org
+Project-URL: Issues, https://github.com/sccn/EEGDash/issues
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: xarray
+Requires-Dist: python-dotenv
+Requires-Dist: s3fs
+Requires-Dist: mne
+Requires-Dist: pynwb
+Requires-Dist: h5py
+Requires-Dist: pymongo
+Requires-Dist: joblib
+Requires-Dist: braindecode
+Requires-Dist: mne-bids
+Requires-Dist: pybids
+Requires-Dist: pymatreader
+Requires-Dist: pyarrow
+Requires-Dist: tqdm
+Requires-Dist: numba
+Dynamic: license-file
+# EEG-Dash
+To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
+## Data source
+The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
+## Featured data
+The following HBN datasets are currently featured on EEGDash. Documentation about these datasets is available [here](https://neuromechanist.github.io/data/hbn/).
+| DatasetID | Participants | Files | Sessions | Population | Channels | Is 10-20? | Modality | Size |
+|---|---|---|---|---|---|---|---|---|
+| [ds005505](https://nemar.org/dataexplorer/detail?dataset_id=ds005505) | 136 | 5393 | 1 | Healthy | 129 | other | Visual | 103 GB |
+| [ds005506](https://nemar.org/dataexplorer/detail?dataset_id=ds005506) | 150 | 5645 | 1 | Healthy | 129 | other | Visual | 112 GB |
+| [ds005507](https://nemar.org/dataexplorer/detail?dataset_id=ds005507) | 184 | 7273 | 1 | Healthy | 129 | other | Visual | 140 GB |
+| [ds005508](https://nemar.org/dataexplorer/detail?dataset_id=ds005508) | 324 | 13393 | 1 | Healthy | 129 | other | Visual | 230 GB |
+| [ds005510](https://nemar.org/dataexplorer/detail?dataset_id=ds005510) | 135 | 4933 | 1 | Healthy | 129 | other | Visual | 91 GB |
+| [ds005512](https://nemar.org/dataexplorer/detail?dataset_id=ds005512) | 257 | 9305 | 1 | Healthy | 129 | other | Visual | 157 GB |
+| [ds005514](https://nemar.org/dataexplorer/detail?dataset_id=ds005514) | 295 | 11565 | 1 | Healthy | 129 | other | Visual | 185 GB |
+A total of [246 other datasets](datasets.md) are also available through EEGDash.
+## Data format
+EEGDash queries return a **Pytorch Dataset** formatted to facilitate machine learning (ML) and deep learning (DL) applications. PyTorch Datasets are the best format for EEGDash queries because they provide an efficient, scalable, and flexible structure for machine learning (ML) and deep learning (DL) applications. They allow seamless integration with PyTorch’s DataLoader, enabling efficient batching, shuffling, and parallel data loading, which is essential for training deep learning models on large EEG datasets.
+## Data preprocessing
+EEGDash datasets are processed using the popular [BrainDecode](https://braindecode.org/stable/index.html) library. In fact, EEGDash datasets are BrainDecode datasets, which are themselves PyTorch datasets. This means that any preprocessing possible on BrainDecode datasets is also possible on EEGDash datasets. Refer to [BrainDecode](https://braindecode.org/stable/index.html) tutorials for guidance on preprocessing EEG data.
+## EEG-Dash usage
+### Install
+Use your preferred Python environment manager with Python > 3.9 to install the package.
+* To install the eegdash package, use the following command: `pip install eegdash`
+* To verify the installation, start a Python session and type: `from eegdash import EEGDash`
+### Data access
+To use the data from a single subject, enter:
+```python
+from eegdash import EEGDashDataset
+ds_NDARDB033FW5 = EEGDashDataset({'dataset': 'ds005514', 'task': 'RestingState', 'subject': 'NDARDB033FW5'})
+```
+This will search and download the metadata for the task **RestingState** for subject **NDARDB033FW5** in BIDS dataset **ds005514**. The actual data will not be downloaded at this stage. Following standard practice, data is only downloaded once it is processed. The **ds_NDARDB033FW5** object is a fully functional BrainDecode dataset, which is itself a PyTorch dataset. This [tutorial](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_eoec.ipynb) shows how to preprocess the EEG data, extracting portions of the data containing eyes-open and eyes-closed segments, then perform eyes-open vs. eyes-closed classification using a (shallow) deep-learning model.
+To use the data from multiple subjects, enter:
+```python
+from eegdash import EEGDashDataset
+ds_ds005505rest = EEGDashDataset({'dataset': 'ds005505', 'task': 'RestingState'}, target_name='sex')
+```
+This will search and download the metadata for the task 'RestingState' for all subjects in BIDS dataset 'ds005505' (a total of 136). As above, the actual data will not be downloaded at this stage so this command is quick to execute. Also, the target class for each subject is assigned using the target_name parameter. This means that this object is ready to be directly fed to a deep learning model, although the [tutorial script](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_sex_classification.ipynb) performs minimal processing on it, prior to training a deep-learning model. Because 14 gigabytes of data are downloaded, this tutorial takes about 10 minutes to execute.
+### Automatic caching
+EEGDash automatically caches the downloaded data in the .eegdash_cache folder of the current directory from which the script is called. This means that if you run the tutorial [scripts](https://github.com/sccn/EEGDash/tree/develop/notebooks), the data will only be downloaded the first time the script is executed.
+## Education -- Coming soon...
+We organize workshops and educational events to foster cross-cultural education and student training, offering both online and in-person opportunities in collaboration with US and Israeli partners. Events for 2025 will be announced via the EEGLABNEWS mailing list. Be sure to [subscribe](https://sccn.ucsd.edu/mailman/listinfo/eeglabnews).
+## About EEG-DaSh
+EEG-DaSh is a collaborative initiative between the United States and Israel, supported by the National Science Foundation (NSF). The partnership brings together experts from the Swartz Center for Computational Neuroscience (SCCN) at the University of California San Diego (UCSD) and Ben-Gurion University (BGU) in Israel.
+![Screenshot 2024-10-03 at 09 14 06](https://github.com/user-attachments/assets/327639d3-c3b4-46b1-9335-37803209b0d3)

eegdash-0.0.9/README.md ADDED Viewed

@@ -0,0 +1,72 @@
+# EEG-Dash
+To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
+## Data source
+The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
+## Featured data
+The following HBN datasets are currently featured on EEGDash. Documentation about these datasets is available [here](https://neuromechanist.github.io/data/hbn/).
+| DatasetID | Participants | Files | Sessions | Population | Channels | Is 10-20? | Modality | Size |
+|---|---|---|---|---|---|---|---|---|
+| [ds005505](https://nemar.org/dataexplorer/detail?dataset_id=ds005505) | 136 | 5393 | 1 | Healthy | 129 | other | Visual | 103 GB |
+| [ds005506](https://nemar.org/dataexplorer/detail?dataset_id=ds005506) | 150 | 5645 | 1 | Healthy | 129 | other | Visual | 112 GB |
+| [ds005507](https://nemar.org/dataexplorer/detail?dataset_id=ds005507) | 184 | 7273 | 1 | Healthy | 129 | other | Visual | 140 GB |
+| [ds005508](https://nemar.org/dataexplorer/detail?dataset_id=ds005508) | 324 | 13393 | 1 | Healthy | 129 | other | Visual | 230 GB |
+| [ds005510](https://nemar.org/dataexplorer/detail?dataset_id=ds005510) | 135 | 4933 | 1 | Healthy | 129 | other | Visual | 91 GB |
+| [ds005512](https://nemar.org/dataexplorer/detail?dataset_id=ds005512) | 257 | 9305 | 1 | Healthy | 129 | other | Visual | 157 GB |
+| [ds005514](https://nemar.org/dataexplorer/detail?dataset_id=ds005514) | 295 | 11565 | 1 | Healthy | 129 | other | Visual | 185 GB |
+A total of [246 other datasets](datasets.md) are also available through EEGDash.
+## Data format
+EEGDash queries return a **Pytorch Dataset** formatted to facilitate machine learning (ML) and deep learning (DL) applications. PyTorch Datasets are the best format for EEGDash queries because they provide an efficient, scalable, and flexible structure for machine learning (ML) and deep learning (DL) applications. They allow seamless integration with PyTorch’s DataLoader, enabling efficient batching, shuffling, and parallel data loading, which is essential for training deep learning models on large EEG datasets.
+## Data preprocessing
+EEGDash datasets are processed using the popular [BrainDecode](https://braindecode.org/stable/index.html) library. In fact, EEGDash datasets are BrainDecode datasets, which are themselves PyTorch datasets. This means that any preprocessing possible on BrainDecode datasets is also possible on EEGDash datasets. Refer to [BrainDecode](https://braindecode.org/stable/index.html) tutorials for guidance on preprocessing EEG data.
+## EEG-Dash usage
+### Install
+Use your preferred Python environment manager with Python > 3.9 to install the package.
+* To install the eegdash package, use the following command: `pip install eegdash`
+* To verify the installation, start a Python session and type: `from eegdash import EEGDash`
+### Data access
+To use the data from a single subject, enter:
+```python
+from eegdash import EEGDashDataset
+ds_NDARDB033FW5 = EEGDashDataset({'dataset': 'ds005514', 'task': 'RestingState', 'subject': 'NDARDB033FW5'})
+```
+This will search and download the metadata for the task **RestingState** for subject **NDARDB033FW5** in BIDS dataset **ds005514**. The actual data will not be downloaded at this stage. Following standard practice, data is only downloaded once it is processed. The **ds_NDARDB033FW5** object is a fully functional BrainDecode dataset, which is itself a PyTorch dataset. This [tutorial](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_eoec.ipynb) shows how to preprocess the EEG data, extracting portions of the data containing eyes-open and eyes-closed segments, then perform eyes-open vs. eyes-closed classification using a (shallow) deep-learning model.
+To use the data from multiple subjects, enter:
+```python
+from eegdash import EEGDashDataset
+ds_ds005505rest = EEGDashDataset({'dataset': 'ds005505', 'task': 'RestingState'}, target_name='sex')
+```
+This will search and download the metadata for the task 'RestingState' for all subjects in BIDS dataset 'ds005505' (a total of 136). As above, the actual data will not be downloaded at this stage so this command is quick to execute. Also, the target class for each subject is assigned using the target_name parameter. This means that this object is ready to be directly fed to a deep learning model, although the [tutorial script](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_sex_classification.ipynb) performs minimal processing on it, prior to training a deep-learning model. Because 14 gigabytes of data are downloaded, this tutorial takes about 10 minutes to execute.
+### Automatic caching
+EEGDash automatically caches the downloaded data in the .eegdash_cache folder of the current directory from which the script is called. This means that if you run the tutorial [scripts](https://github.com/sccn/EEGDash/tree/develop/notebooks), the data will only be downloaded the first time the script is executed.
+## Education -- Coming soon...
+We organize workshops and educational events to foster cross-cultural education and student training, offering both online and in-person opportunities in collaboration with US and Israeli partners. Events for 2025 will be announced via the EEGLABNEWS mailing list. Be sure to [subscribe](https://sccn.ucsd.edu/mailman/listinfo/eeglabnews).
+## About EEG-DaSh
+EEG-DaSh is a collaborative initiative between the United States and Israel, supported by the National Science Foundation (NSF). The partnership brings together experts from the Swartz Center for Computational Neuroscience (SCCN) at the University of California San Diego (UCSD) and Ben-Gurion University (BGU) in Israel.
+![Screenshot 2024-10-03 at 09 14 06](https://github.com/user-attachments/assets/327639d3-c3b4-46b1-9335-37803209b0d3)

{eegdash-0.0.7 → eegdash-0.0.9}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "eegdash"
-version = "0.0.7"
+version = "0.0.9"
 authors = [
   { name="Young Truong", email="dt.young112@gmail.com" },
   { name="Arnaud Delorme", email="adelorme@gmail.com" },
@@ -27,8 +27,15 @@ dependencies = [
   "h5py",
   "pymongo",
   "joblib",
+  "braindecode",
+  "mne-bids",
+  "pybids",
+  "pymatreader",
+  "pyarrow",
+  "tqdm",
+  "numba",
 ]
 [project.urls]
-Homepage = "https://github.com/sccn/EEG-Dash-Data"
-Issues = "https://github.com/sccn/EEG-Dash-Data/issues"
+Homepage = "https://eegdash.org"
+Issues = "https://github.com/sccn/EEGDash/issues"

eegdash-0.0.9/src/eegdash/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .main import EEGDash, EEGDashDataset

eegdash-0.0.9/src/eegdash/data_config.py ADDED Viewed

@@ -0,0 +1,28 @@
+config = {
+  "required_fields": ["data_name"],
+  "attributes": {
+    "data_name": "str",
+    "dataset": "str",
+    "bidspath": "str",
+    "subject": "str",
+    "task": "str",
+    "session": "str",
+    "run": "str",
+    "sampling_frequency": "float",
+    "modality": "str",
+    "nchans": "int",
+    "ntimes": "int"
+  },
+  "description_fields": ["subject", "session", "run", "task", "age", "gender", "sex"],
+  "bids_dependencies_files": [
+    "dataset_description.json",
+    "participants.tsv",
+    "events.tsv",
+    "events.json",
+    "eeg.json",
+    "electrodes.tsv",
+    "channels.tsv",
+    "coordsystem.json"
+  ],
+  "accepted_query_fields": ["data_name", "dataset"]
+}

{eegdash-0.0.7 → eegdash-0.0.9}/src/eegdash/data_utils.py RENAMED Viewed

@@ -12,9 +12,107 @@ from mne._fiff.utils import _find_channels, _read_segments_file
 import s3fs
 import tempfile
 from mne._fiff.utils import _read_segments_file
+from braindecode.datasets import BaseDataset
+import mne_bids
+from mne_bids import (
+    BIDSPath,
+)
+from bids import BIDSLayout
-class RawEEGDash(BaseRaw):
-    r"""Raw object from EEG-Dash connection with Openneuro S3 file.
+class EEGDashBaseDataset(BaseDataset):
+    """Returns samples from an mne.io.Raw object along with a target.
+    Dataset which serves samples from an mne.io.Raw object along with a target.
+    The target is unique for the dataset, and is obtained through the
+    `description` attribute.
+    Parameters
+    ----------
+    raw : mne.io.Raw
+        Continuous data.
+    description : dict | pandas.Series | None
+        Holds additional description about the continuous signal / subject.
+    target_name : str | tuple | None
+        Name(s) of the index in `description` that should be used to provide the
+        target (e.g., to be used in a prediction task later on).
+    transform : callable | None
+        On-the-fly transform applied to the example before it is returned.
+    """
+    AWS_BUCKET = 's3://openneuro.org'
+    def __init__(self, record, cache_dir, **kwargs):
+        super().__init__(None, **kwargs)
+        self.record = record
+        self.cache_dir = Path(cache_dir)
+        bids_kwargs = self.get_raw_bids_args()
+        self.bidspath = BIDSPath(root=self.cache_dir / record['dataset'], datatype='eeg', suffix='eeg', **bids_kwargs)
+        self.s3file = self.get_s3path(record['bidspath'])
+        self.filecache = self.cache_dir / record['bidspath']
+        self.bids_dependencies = record['bidsdependencies']
+        self._raw = None
+        # if os.path.exists(self.filecache):
+        #     self.raw = mne_bids.read_raw_bids(self.bidspath, verbose=False)
+    def get_s3path(self, filepath):
+        return f"{self.AWS_BUCKET}/{filepath}"
+    def _download_s3(self):
+        self.filecache.parent.mkdir(parents=True, exist_ok=True)
+        filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
+        filesystem.download(self.s3file, self.filecache)
+        self.filenames = [self.filecache]
+    def _download_dependencies(self):
+        filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
+        for dep in self.bids_dependencies:
+            s3path = self.get_s3path(dep)
+            filepath = self.cache_dir / dep
+            if not filepath.exists():
+                filepath.parent.mkdir(parents=True, exist_ok=True)
+                filesystem.download(s3path, filepath)
+    def get_raw_bids_args(self):
+        desired_fields = ['subject', 'session', 'task', 'run']
+        return {k: self.record[k] for k in desired_fields if self.record[k]}
+    def check_and_get_raw(self):
+        if not os.path.exists(self.filecache): # not preload
+            if self.bids_dependencies:
+                self._download_dependencies()
+            self._download_s3()
+        if self._raw is None:
+            self._raw = mne_bids.read_raw_bids(self.bidspath, verbose=False)
+    def __getitem__(self, index):
+        # self.check_and_get_raw()
+        X = self.raw[:, index][0]
+        y = None
+        if self.target_name is not None:
+            y = self.description[self.target_name]
+        if isinstance(y, pd.Series):
+            y = y.to_list()
+        if self.transform is not None:
+            X = self.transform(X)
+        return X, y
+    def __len__(self):
+        if self._raw is None:
+            return int(self.record['ntimes'] * self.record['sampling_frequency'])
+        else:
+            return len(self._raw)
+    @property
+    def raw(self):
+        if self._raw is None:
+            self.check_and_get_raw()
+        return self._raw
+    @raw.setter
+    def raw(self, raw):
+        self._raw = raw
+class EEGDashBaseRaw(BaseRaw):
+    r"""MNE Raw object from EEG-Dash connection with Openneuro S3 file.
     Parameters
     ----------
@@ -40,6 +138,7 @@ class RawEEGDash(BaseRaw):
     .. versionadded:: 0.11.0
     """
+    AWS_BUCKET = 's3://openneuro.org'
     def __init__(
         self,
         input_fname,
@@ -48,6 +147,7 @@ class RawEEGDash(BaseRaw):
         preload=False,
         *,
         cache_dir='./.eegdash_cache',
+        bids_dependencies:list = [],
         uint16_codec=None,
         montage_units="auto",
         verbose=None,
@@ -66,9 +166,10 @@ class RawEEGDash(BaseRaw):
                 chtype = 'eog'
             ch_types.append(chtype)
         info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
-        self.s3file = input_fname
-        os.makedirs(cache_dir, exist_ok=True)
-        self.filecache = os.path.join(cache_dir, os.path.basename(self.s3file))
+        self.s3file = self.get_s3path(input_fname)
+        self.cache_dir = Path(cache_dir)
+        self.filecache = self.cache_dir / input_fname
+        self.bids_dependencies = bids_dependencies
         if preload and not os.path.exists(self.filecache):
             self._download_s3()
@@ -82,17 +183,30 @@ class RawEEGDash(BaseRaw):
             verbose=verbose,
         )
+    def get_s3path(self, filepath):
+        return f"{self.AWS_BUCKET}/{filepath}"
     def _download_s3(self):
+        self.filecache.parent.mkdir(parents=True, exist_ok=True)
         filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
-        print('s3file', self.s3file)
-        print('filecache', self.filecache)
         filesystem.download(self.s3file, self.filecache)
         self.filenames = [self.filecache]
+    def _download_dependencies(self):
+        filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
+        for dep in self.bids_dependencies:
+            s3path = self.get_s3path(dep)
+            filepath = self.cache_dir / dep
+            if not filepath.exists():
+                filepath.parent.mkdir(parents=True, exist_ok=True)
+                filesystem.download(s3path, filepath)
     def _read_segment(
         self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
     ):
         if not os.path.exists(self.filecache): # not preload
+            if self.bids_dependencies:
+                self._download_dependencies()
             self._download_s3()
         else: # not preload and file is not cached
             self.filenames = [self.filecache]
@@ -103,38 +217,53 @@ class RawEEGDash(BaseRaw):
         _read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
-class BIDSDataset():
+class EEGBIDSDataset():
     ALLOWED_FILE_FORMAT = ['eeglab', 'brainvision', 'biosemi', 'european']
-    RAW_EXTENSION = {
-        'eeglab': '.set',
-        'brainvision': '.vhdr',
-        'biosemi': '.bdf',
-        'european': '.edf'
-    }
+    RAW_EXTENSIONS = {
+            '.set': ['.set', '.fdt'], # eeglab
+            '.edf': ['.edf'], # european
+            '.vhdr': ['.eeg', '.vhdr', '.vmrk', '.dat', '.raw'], # brainvision
+            '.bdf': ['.bdf'], # biosemi
+        }
     METADATA_FILE_EXTENSIONS = ['eeg.json', 'channels.tsv', 'electrodes.tsv', 'events.tsv', 'events.json']
     def __init__(self,
             data_dir=None,                            # location of bids dataset
             dataset='',                               # dataset name
-            raw_format='eeglab',                      # format of raw data
         ):
         if data_dir is None or not os.path.exists(data_dir):
             raise ValueError('data_dir must be specified and must exist')
         self.bidsdir = Path(data_dir)
         self.dataset = dataset
-        if raw_format.lower() not in self.ALLOWED_FILE_FORMAT:
-            raise ValueError('raw_format must be one of {}'.format(self.ALLOWED_FILE_FORMAT))
-        self.raw_format = raw_format.lower()
-        # get all .set files in the bids directory
-        temp_dir = (Path().resolve() / 'data')
-        if not os.path.exists(temp_dir):
-            os.mkdir(temp_dir)
-        if not os.path.exists(temp_dir / f'{dataset}_files.npy'):
-            self.files = self.get_files_with_extension_parallel(self.bidsdir, extension=self.RAW_EXTENSION[self.raw_format])
-            np.save(temp_dir / f'{dataset}_files.npy', self.files)
-        else:
-            self.files = np.load(temp_dir / f'{dataset}_files.npy', allow_pickle=True)
+        assert str(self.bidsdir).endswith(self.dataset)
+        self.layout = BIDSLayout(data_dir)
+        # get all recording files in the bids directory
+        self.files = self.get_recordings(self.layout)
+        assert len(self.files) > 0, ValueError('Unable to construct EEG dataset. No EEG recordings found.')
+        assert self.check_eeg_dataset(), ValueError('Dataset is not an EEG dataset.')
+        # temp_dir = (Path().resolve() / 'data')
+        # if not os.path.exists(temp_dir):
+        #     os.mkdir(temp_dir)
+        # if not os.path.exists(temp_dir / f'{dataset}_files.npy'):
+        #     self.files = self.get_files_with_extension_parallel(self.bidsdir, extension=self.RAW_EXTENSION[self.raw_format])
+        #     np.save(temp_dir / f'{dataset}_files.npy', self.files)
+        # else:
+        #     self.files = np.load(temp_dir / f'{dataset}_files.npy', allow_pickle=True)
+    def check_eeg_dataset(self):
+        return self.get_bids_file_attribute('modality', self.files[0]).lower() == 'eeg'
+    def get_recordings(self, layout:BIDSLayout):
+        files = []
+        for ext, exts in self.RAW_EXTENSIONS.items():
+            files = layout.get(extension=ext, return_type='filename')
+            if files:
+                break
+        return files
+    def get_relative_bidspath(self, filename):
+        bids_parent_dir = self.bidsdir.parent
+        return str(Path(filename).relative_to(bids_parent_dir))
     def get_property_from_filename(self, property, filename):
         import platform
@@ -177,8 +306,9 @@ class BIDSDataset():
         for file in os.listdir(path):
             # target_file = path / f"{cur_file_basename}_{extension}"
             if os.path.isfile(path/file):
-                cur_file_basename = file[:file.rfind('_')] # TODO: change to just search for any file with extension
-                if file.endswith(extension) and cur_file_basename in basename:
+                # check if file has extension extension
+                # check if file basename has extension
+                if file.endswith(extension):
                     filepath = path / file
                     bids_files.append(filepath)
@@ -210,14 +340,11 @@ class BIDSDataset():
         basename = filename[:filename.rfind('_')]
         # metadata files
         meta_files = self.get_bids_file_inheritance(path, basename, metadata_file_extension)
-        if not meta_files:
-            raise ValueError('No metadata files found for filepath {filepath} and extension {metadata_file_extension}')
-        else:
-            return meta_files
+        return meta_files
     def scan_directory(self, directory, extension):
         result_files = []
-        directory_to_ignore = ['.git']
+        directory_to_ignore = ['.git', '.datalad', 'derivatives', 'code']
         with os.scandir(directory) as entries:
             for entry in entries:
                 if entry.is_file() and entry.name.endswith(extension):
@@ -298,32 +425,22 @@ class BIDSDataset():
                 json_dict.update(json.load(f))
         return json_dict
-    def sfreq(self, data_filepath):
-        json_files = self.get_bids_metadata_files(data_filepath, 'eeg.json')
-        if len(json_files) == 0:
-            raise ValueError('No eeg.json found')
-        metadata = self.resolve_bids_json(json_files)
-        if 'SamplingFrequency' not in metadata:
-            raise ValueError('SamplingFrequency not found in metadata')
-        else:
-            return metadata['SamplingFrequency']
-    def task(self, data_filepath):
-        return self.get_property_from_filename('task', data_filepath)
-    def session(self, data_filepath):
-        return self.get_property_from_filename('session', data_filepath)
-    def run(self, data_filepath):
-        return self.get_property_from_filename('run', data_filepath)
-    def subject(self, data_filepath):
-        return self.get_property_from_filename('sub', data_filepath)
-    def num_channels(self, data_filepath):
-        channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
-        return len(channels_tsv)
+    def get_bids_file_attribute(self, attribute, data_filepath):
+        entities = self.layout.parse_file_entities(data_filepath)
+        bidsfile = self.layout.get(**entities)[0]
+        attributes = bidsfile.get_entities(metadata='all')
+        attribute_mapping = {
+            'sfreq': 'SamplingFrequency',
+            'modality': 'datatype',
+            'task': 'task',
+            'session': 'session',
+            'run': 'run',
+            'subject': 'subject',
+            'ntimes': 'RecordingDuration',
+            'nchans': 'EEGChannelCount'
+        }
+        attribute_value = attributes.get(attribute_mapping.get(attribute), None)
+        return attribute_value
     def channel_labels(self, data_filepath):
         channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
@@ -336,4 +453,28 @@ class BIDSDataset():
     def num_times(self, data_filepath):
         eeg_jsons = self.get_bids_metadata_files(data_filepath, 'eeg.json')
         eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
-        return int(eeg_json_dict['SamplingFrequency'] * eeg_json_dict['RecordingDuration'])
+        return int(eeg_json_dict['SamplingFrequency'] * eeg_json_dict['RecordingDuration'])
+    def subject_participant_tsv(self, data_filepath):
+        '''Get participants_tsv info of a subject based on filepath'''
+        participants_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'participants.tsv')[0], sep='\t')
+        # if participants_tsv is not empty
+        if participants_tsv.empty:
+            return {}
+        # set 'participant_id' as index
+        participants_tsv.set_index('participant_id', inplace=True)
+        subject = f"sub-{self.get_bids_file_attribute('subject', data_filepath)}"
+        return participants_tsv.loc[subject].to_dict()
+    def eeg_json(self, data_filepath):
+        eeg_jsons = self.get_bids_metadata_files(data_filepath, 'eeg.json')
+        eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
+        return eeg_json_dict
+    def channel_tsv(self, data_filepath):
+        channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
+        channel_tsv = channels_tsv.to_dict()
+        # 'name' and 'type' now have a dictionary of index-value. Convert them to list
+        for list_field in ['name', 'type', 'units']:
+            channel_tsv[list_field] = list(channel_tsv[list_field].values())
+        return channel_tsv

eegdash-0.0.9/src/eegdash/features/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+# Features datasets
+from .datasets import FeaturesDataset, FeaturesConcatDataset
+from .serialization import load_features_concat_dataset
+# Feature extraction
+from .extractors import (
+    FeatureExtractor,
+    FitableFeature,
+    UnivariateFeature,
+    BivariateFeature,
+    DirectedBivariateFeature,
+    MultivariateFeature,
+)
+from .decorators import (
+    FeaturePredecessor,
+    FeatureKind,
+    univariate_feature,
+    bivariate_feature,
+    directed_bivariate_feature,
+    multivariate_feature,
+)
+from .utils import extract_features, fit_feature_extractors
+# Features:
+from .feature_bank import *

eegdash 0.0.7__tar.gz → 0.0.9__tar.gz

Potentially problematic release.

eegdash 0.0.7tar.gz → 0.0.9tar.gz