PyPI - eegdash - Versions diffs - 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl - Mend

eegdash 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of eegdash might be problematic. Click here for more details.

Files changed (24) hide show

eegdash/__init__.py +1 -1
eegdash/data_config.py +28 -0
eegdash/data_utils.py +204 -63
eegdash/features/__init__.py +25 -0
eegdash/features/datasets.py +453 -0
eegdash/features/decorators.py +43 -0
eegdash/features/extractors.py +209 -0
eegdash/features/feature_bank/__init__.py +6 -0
eegdash/features/feature_bank/complexity.py +97 -0
eegdash/features/feature_bank/connectivity.py +99 -0
eegdash/features/feature_bank/csp.py +102 -0
eegdash/features/feature_bank/dimensionality.py +108 -0
eegdash/features/feature_bank/signal.py +103 -0
eegdash/features/feature_bank/spectral.py +134 -0
eegdash/features/serialization.py +87 -0
eegdash/features/utils.py +114 -0
eegdash/main.py +216 -56
eegdash-0.0.9.dist-info/METADATA +123 -0
eegdash-0.0.9.dist-info/RECORD +22 -0
{eegdash-0.0.7.dist-info → eegdash-0.0.9.dist-info}/WHEEL +1 -1
eegdash-0.0.7.dist-info/METADATA +0 -146
eegdash-0.0.7.dist-info/RECORD +0 -8
{eegdash-0.0.7.dist-info → eegdash-0.0.9.dist-info/licenses}/LICENSE +0 -0
{eegdash-0.0.7.dist-info → eegdash-0.0.9.dist-info}/top_level.txt +0 -0

eegdash/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from .main import EEGDash
1	+ from .main import EEGDash, EEGDashDataset

eegdash/data_config.py ADDED Viewed

@@ -0,0 +1,28 @@
+config = {
+  "required_fields": ["data_name"],
+  "attributes": {
+    "data_name": "str",
+    "dataset": "str",
+    "bidspath": "str",
+    "subject": "str",
+    "task": "str",
+    "session": "str",
+    "run": "str",
+    "sampling_frequency": "float",
+    "modality": "str",
+    "nchans": "int",
+    "ntimes": "int"
+  },
+  "description_fields": ["subject", "session", "run", "task", "age", "gender", "sex"],
+  "bids_dependencies_files": [
+    "dataset_description.json",
+    "participants.tsv",
+    "events.tsv",
+    "events.json",
+    "eeg.json",
+    "electrodes.tsv",
+    "channels.tsv",
+    "coordsystem.json"
+  ],
+  "accepted_query_fields": ["data_name", "dataset"]
+}

eegdash/data_utils.py CHANGED Viewed

@@ -12,9 +12,107 @@ from mne._fiff.utils import _find_channels, _read_segments_file
 import s3fs
 import tempfile
 from mne._fiff.utils import _read_segments_file
+from braindecode.datasets import BaseDataset
+import mne_bids
+from mne_bids import (
+    BIDSPath,
+)
+from bids import BIDSLayout
-class RawEEGDash(BaseRaw):
-    r"""Raw object from EEG-Dash connection with Openneuro S3 file.
+class EEGDashBaseDataset(BaseDataset):
+    """Returns samples from an mne.io.Raw object along with a target.
+    Dataset which serves samples from an mne.io.Raw object along with a target.
+    The target is unique for the dataset, and is obtained through the
+    `description` attribute.
+    Parameters
+    ----------
+    raw : mne.io.Raw
+        Continuous data.
+    description : dict | pandas.Series | None
+        Holds additional description about the continuous signal / subject.
+    target_name : str | tuple | None
+        Name(s) of the index in `description` that should be used to provide the
+        target (e.g., to be used in a prediction task later on).
+    transform : callable | None
+        On-the-fly transform applied to the example before it is returned.
+    """
+    AWS_BUCKET = 's3://openneuro.org'
+    def __init__(self, record, cache_dir, **kwargs):
+        super().__init__(None, **kwargs)
+        self.record = record
+        self.cache_dir = Path(cache_dir)
+        bids_kwargs = self.get_raw_bids_args()
+        self.bidspath = BIDSPath(root=self.cache_dir / record['dataset'], datatype='eeg', suffix='eeg', **bids_kwargs)
+        self.s3file = self.get_s3path(record['bidspath'])
+        self.filecache = self.cache_dir / record['bidspath']
+        self.bids_dependencies = record['bidsdependencies']
+        self._raw = None
+        # if os.path.exists(self.filecache):
+        #     self.raw = mne_bids.read_raw_bids(self.bidspath, verbose=False)
+    def get_s3path(self, filepath):
+        return f"{self.AWS_BUCKET}/{filepath}"
+    def _download_s3(self):
+        self.filecache.parent.mkdir(parents=True, exist_ok=True)
+        filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
+        filesystem.download(self.s3file, self.filecache)
+        self.filenames = [self.filecache]
+    def _download_dependencies(self):
+        filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
+        for dep in self.bids_dependencies:
+            s3path = self.get_s3path(dep)
+            filepath = self.cache_dir / dep
+            if not filepath.exists():
+                filepath.parent.mkdir(parents=True, exist_ok=True)
+                filesystem.download(s3path, filepath)
+    def get_raw_bids_args(self):
+        desired_fields = ['subject', 'session', 'task', 'run']
+        return {k: self.record[k] for k in desired_fields if self.record[k]}
+    def check_and_get_raw(self):
+        if not os.path.exists(self.filecache): # not preload
+            if self.bids_dependencies:
+                self._download_dependencies()
+            self._download_s3()
+        if self._raw is None:
+            self._raw = mne_bids.read_raw_bids(self.bidspath, verbose=False)
+    def __getitem__(self, index):
+        # self.check_and_get_raw()
+        X = self.raw[:, index][0]
+        y = None
+        if self.target_name is not None:
+            y = self.description[self.target_name]
+        if isinstance(y, pd.Series):
+            y = y.to_list()
+        if self.transform is not None:
+            X = self.transform(X)
+        return X, y
+    def __len__(self):
+        if self._raw is None:
+            return int(self.record['ntimes'] * self.record['sampling_frequency'])
+        else:
+            return len(self._raw)
+    @property
+    def raw(self):
+        if self._raw is None:
+            self.check_and_get_raw()
+        return self._raw
+    @raw.setter
+    def raw(self, raw):
+        self._raw = raw
+class EEGDashBaseRaw(BaseRaw):
+    r"""MNE Raw object from EEG-Dash connection with Openneuro S3 file.
     Parameters
     ----------
@@ -40,6 +138,7 @@ class RawEEGDash(BaseRaw):
     .. versionadded:: 0.11.0
     """
+    AWS_BUCKET = 's3://openneuro.org'
     def __init__(
         self,
         input_fname,
@@ -48,6 +147,7 @@ class RawEEGDash(BaseRaw):
         preload=False,
         *,
         cache_dir='./.eegdash_cache',
+        bids_dependencies:list = [],
         uint16_codec=None,
         montage_units="auto",
         verbose=None,
@@ -66,9 +166,10 @@ class RawEEGDash(BaseRaw):
                 chtype = 'eog'
             ch_types.append(chtype)
         info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
-        self.s3file = input_fname
-        os.makedirs(cache_dir, exist_ok=True)
-        self.filecache = os.path.join(cache_dir, os.path.basename(self.s3file))
+        self.s3file = self.get_s3path(input_fname)
+        self.cache_dir = Path(cache_dir)
+        self.filecache = self.cache_dir / input_fname
+        self.bids_dependencies = bids_dependencies
         if preload and not os.path.exists(self.filecache):
             self._download_s3()
@@ -82,17 +183,30 @@ class RawEEGDash(BaseRaw):
             verbose=verbose,
         )
+    def get_s3path(self, filepath):
+        return f"{self.AWS_BUCKET}/{filepath}"
     def _download_s3(self):
+        self.filecache.parent.mkdir(parents=True, exist_ok=True)
         filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
-        print('s3file', self.s3file)
-        print('filecache', self.filecache)
         filesystem.download(self.s3file, self.filecache)
         self.filenames = [self.filecache]
+    def _download_dependencies(self):
+        filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
+        for dep in self.bids_dependencies:
+            s3path = self.get_s3path(dep)
+            filepath = self.cache_dir / dep
+            if not filepath.exists():
+                filepath.parent.mkdir(parents=True, exist_ok=True)
+                filesystem.download(s3path, filepath)
     def _read_segment(
         self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
     ):
         if not os.path.exists(self.filecache): # not preload
+            if self.bids_dependencies:
+                self._download_dependencies()
             self._download_s3()
         else: # not preload and file is not cached
             self.filenames = [self.filecache]
@@ -103,38 +217,53 @@ class RawEEGDash(BaseRaw):
         _read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
-class BIDSDataset():
+class EEGBIDSDataset():
     ALLOWED_FILE_FORMAT = ['eeglab', 'brainvision', 'biosemi', 'european']
-    RAW_EXTENSION = {
-        'eeglab': '.set',
-        'brainvision': '.vhdr',
-        'biosemi': '.bdf',
-        'european': '.edf'
-    }
+    RAW_EXTENSIONS = {
+            '.set': ['.set', '.fdt'], # eeglab
+            '.edf': ['.edf'], # european
+            '.vhdr': ['.eeg', '.vhdr', '.vmrk', '.dat', '.raw'], # brainvision
+            '.bdf': ['.bdf'], # biosemi
+        }
     METADATA_FILE_EXTENSIONS = ['eeg.json', 'channels.tsv', 'electrodes.tsv', 'events.tsv', 'events.json']
     def __init__(self,
             data_dir=None,                            # location of bids dataset
             dataset='',                               # dataset name
-            raw_format='eeglab',                      # format of raw data
         ):
         if data_dir is None or not os.path.exists(data_dir):
             raise ValueError('data_dir must be specified and must exist')
         self.bidsdir = Path(data_dir)
         self.dataset = dataset
-        if raw_format.lower() not in self.ALLOWED_FILE_FORMAT:
-            raise ValueError('raw_format must be one of {}'.format(self.ALLOWED_FILE_FORMAT))
-        self.raw_format = raw_format.lower()
-        # get all .set files in the bids directory
-        temp_dir = (Path().resolve() / 'data')
-        if not os.path.exists(temp_dir):
-            os.mkdir(temp_dir)
-        if not os.path.exists(temp_dir / f'{dataset}_files.npy'):
-            self.files = self.get_files_with_extension_parallel(self.bidsdir, extension=self.RAW_EXTENSION[self.raw_format])
-            np.save(temp_dir / f'{dataset}_files.npy', self.files)
-        else:
-            self.files = np.load(temp_dir / f'{dataset}_files.npy', allow_pickle=True)
+        assert str(self.bidsdir).endswith(self.dataset)
+        self.layout = BIDSLayout(data_dir)
+        # get all recording files in the bids directory
+        self.files = self.get_recordings(self.layout)
+        assert len(self.files) > 0, ValueError('Unable to construct EEG dataset. No EEG recordings found.')
+        assert self.check_eeg_dataset(), ValueError('Dataset is not an EEG dataset.')
+        # temp_dir = (Path().resolve() / 'data')
+        # if not os.path.exists(temp_dir):
+        #     os.mkdir(temp_dir)
+        # if not os.path.exists(temp_dir / f'{dataset}_files.npy'):
+        #     self.files = self.get_files_with_extension_parallel(self.bidsdir, extension=self.RAW_EXTENSION[self.raw_format])
+        #     np.save(temp_dir / f'{dataset}_files.npy', self.files)
+        # else:
+        #     self.files = np.load(temp_dir / f'{dataset}_files.npy', allow_pickle=True)
+    def check_eeg_dataset(self):
+        return self.get_bids_file_attribute('modality', self.files[0]).lower() == 'eeg'
+    def get_recordings(self, layout:BIDSLayout):
+        files = []
+        for ext, exts in self.RAW_EXTENSIONS.items():
+            files = layout.get(extension=ext, return_type='filename')
+            if files:
+                break
+        return files
+    def get_relative_bidspath(self, filename):
+        bids_parent_dir = self.bidsdir.parent
+        return str(Path(filename).relative_to(bids_parent_dir))
     def get_property_from_filename(self, property, filename):
         import platform
@@ -177,8 +306,9 @@ class BIDSDataset():
         for file in os.listdir(path):
             # target_file = path / f"{cur_file_basename}_{extension}"
             if os.path.isfile(path/file):
-                cur_file_basename = file[:file.rfind('_')] # TODO: change to just search for any file with extension
-                if file.endswith(extension) and cur_file_basename in basename:
+                # check if file has extension extension
+                # check if file basename has extension
+                if file.endswith(extension):
                     filepath = path / file
                     bids_files.append(filepath)
@@ -210,14 +340,11 @@ class BIDSDataset():
         basename = filename[:filename.rfind('_')]
         # metadata files
         meta_files = self.get_bids_file_inheritance(path, basename, metadata_file_extension)
-        if not meta_files:
-            raise ValueError('No metadata files found for filepath {filepath} and extension {metadata_file_extension}')
-        else:
-            return meta_files
+        return meta_files
     def scan_directory(self, directory, extension):
         result_files = []
-        directory_to_ignore = ['.git']
+        directory_to_ignore = ['.git', '.datalad', 'derivatives', 'code']
         with os.scandir(directory) as entries:
             for entry in entries:
                 if entry.is_file() and entry.name.endswith(extension):
@@ -298,32 +425,22 @@ class BIDSDataset():
                 json_dict.update(json.load(f))
         return json_dict
-    def sfreq(self, data_filepath):
-        json_files = self.get_bids_metadata_files(data_filepath, 'eeg.json')
-        if len(json_files) == 0:
-            raise ValueError('No eeg.json found')
-        metadata = self.resolve_bids_json(json_files)
-        if 'SamplingFrequency' not in metadata:
-            raise ValueError('SamplingFrequency not found in metadata')
-        else:
-            return metadata['SamplingFrequency']
-    def task(self, data_filepath):
-        return self.get_property_from_filename('task', data_filepath)
-    def session(self, data_filepath):
-        return self.get_property_from_filename('session', data_filepath)
-    def run(self, data_filepath):
-        return self.get_property_from_filename('run', data_filepath)
-    def subject(self, data_filepath):
-        return self.get_property_from_filename('sub', data_filepath)
-    def num_channels(self, data_filepath):
-        channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
-        return len(channels_tsv)
+    def get_bids_file_attribute(self, attribute, data_filepath):
+        entities = self.layout.parse_file_entities(data_filepath)
+        bidsfile = self.layout.get(**entities)[0]
+        attributes = bidsfile.get_entities(metadata='all')
+        attribute_mapping = {
+            'sfreq': 'SamplingFrequency',
+            'modality': 'datatype',
+            'task': 'task',
+            'session': 'session',
+            'run': 'run',
+            'subject': 'subject',
+            'ntimes': 'RecordingDuration',
+            'nchans': 'EEGChannelCount'
+        }
+        attribute_value = attributes.get(attribute_mapping.get(attribute), None)
+        return attribute_value
     def channel_labels(self, data_filepath):
         channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
@@ -336,4 +453,28 @@ class BIDSDataset():
     def num_times(self, data_filepath):
         eeg_jsons = self.get_bids_metadata_files(data_filepath, 'eeg.json')
         eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
-        return int(eeg_json_dict['SamplingFrequency'] * eeg_json_dict['RecordingDuration'])
+        return int(eeg_json_dict['SamplingFrequency'] * eeg_json_dict['RecordingDuration'])
+    def subject_participant_tsv(self, data_filepath):
+        '''Get participants_tsv info of a subject based on filepath'''
+        participants_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'participants.tsv')[0], sep='\t')
+        # if participants_tsv is not empty
+        if participants_tsv.empty:
+            return {}
+        # set 'participant_id' as index
+        participants_tsv.set_index('participant_id', inplace=True)
+        subject = f"sub-{self.get_bids_file_attribute('subject', data_filepath)}"
+        return participants_tsv.loc[subject].to_dict()
+    def eeg_json(self, data_filepath):
+        eeg_jsons = self.get_bids_metadata_files(data_filepath, 'eeg.json')
+        eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
+        return eeg_json_dict
+    def channel_tsv(self, data_filepath):
+        channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
+        channel_tsv = channels_tsv.to_dict()
+        # 'name' and 'type' now have a dictionary of index-value. Convert them to list
+        for list_field in ['name', 'type', 'units']:
+            channel_tsv[list_field] = list(channel_tsv[list_field].values())
+        return channel_tsv

eegdash/features/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+# Features datasets
+from .datasets import FeaturesDataset, FeaturesConcatDataset
+from .serialization import load_features_concat_dataset
+# Feature extraction
+from .extractors import (
+    FeatureExtractor,
+    FitableFeature,
+    UnivariateFeature,
+    BivariateFeature,
+    DirectedBivariateFeature,
+    MultivariateFeature,
+)
+from .decorators import (
+    FeaturePredecessor,
+    FeatureKind,
+    univariate_feature,
+    bivariate_feature,
+    directed_bivariate_feature,
+    multivariate_feature,
+)
+from .utils import extract_features, fit_feature_extractors
+# Features:
+from .feature_bank import *

eegdash 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

Potentially problematic release.

eegdash 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl