eegdash 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

eegdash/__init__.py CHANGED
@@ -1 +1 @@
1
- from .main import EEGDash
1
+ from .main import EEGDash, EEGDashDataset
eegdash/data_config.py ADDED
@@ -0,0 +1,28 @@
1
+ config = {
2
+ "required_fields": ["data_name"],
3
+ "attributes": {
4
+ "data_name": "str",
5
+ "dataset": "str",
6
+ "bidspath": "str",
7
+ "subject": "str",
8
+ "task": "str",
9
+ "session": "str",
10
+ "run": "str",
11
+ "sampling_frequency": "float",
12
+ "modality": "str",
13
+ "nchans": "int",
14
+ "ntimes": "int"
15
+ },
16
+ "description_fields": ["subject", "session", "run", "task", "age", "gender", "sex"],
17
+ "bids_dependencies_files": [
18
+ "dataset_description.json",
19
+ "participants.tsv",
20
+ "events.tsv",
21
+ "events.json",
22
+ "eeg.json",
23
+ "electrodes.tsv",
24
+ "channels.tsv",
25
+ "coordsystem.json"
26
+ ],
27
+ "accepted_query_fields": ["data_name", "dataset"]
28
+ }
eegdash/data_utils.py CHANGED
@@ -12,9 +12,107 @@ from mne._fiff.utils import _find_channels, _read_segments_file
12
12
  import s3fs
13
13
  import tempfile
14
14
  from mne._fiff.utils import _read_segments_file
15
+ from braindecode.datasets import BaseDataset
16
+ import mne_bids
17
+ from mne_bids import (
18
+ BIDSPath,
19
+ )
20
+ from bids import BIDSLayout
15
21
 
16
- class RawEEGDash(BaseRaw):
17
- r"""Raw object from EEG-Dash connection with Openneuro S3 file.
22
+ class EEGDashBaseDataset(BaseDataset):
23
+ """Returns samples from an mne.io.Raw object along with a target.
24
+
25
+ Dataset which serves samples from an mne.io.Raw object along with a target.
26
+ The target is unique for the dataset, and is obtained through the
27
+ `description` attribute.
28
+
29
+ Parameters
30
+ ----------
31
+ raw : mne.io.Raw
32
+ Continuous data.
33
+ description : dict | pandas.Series | None
34
+ Holds additional description about the continuous signal / subject.
35
+ target_name : str | tuple | None
36
+ Name(s) of the index in `description` that should be used to provide the
37
+ target (e.g., to be used in a prediction task later on).
38
+ transform : callable | None
39
+ On-the-fly transform applied to the example before it is returned.
40
+ """
41
+ AWS_BUCKET = 's3://openneuro.org'
42
+ def __init__(self, record, cache_dir, **kwargs):
43
+ super().__init__(None, **kwargs)
44
+ self.record = record
45
+ self.cache_dir = Path(cache_dir)
46
+ bids_kwargs = self.get_raw_bids_args()
47
+ self.bidspath = BIDSPath(root=self.cache_dir / record['dataset'], datatype='eeg', suffix='eeg', **bids_kwargs)
48
+ self.s3file = self.get_s3path(record['bidspath'])
49
+ self.filecache = self.cache_dir / record['bidspath']
50
+ self.bids_dependencies = record['bidsdependencies']
51
+ self._raw = None
52
+ # if os.path.exists(self.filecache):
53
+ # self.raw = mne_bids.read_raw_bids(self.bidspath, verbose=False)
54
+
55
+ def get_s3path(self, filepath):
56
+ return f"{self.AWS_BUCKET}/{filepath}"
57
+
58
+ def _download_s3(self):
59
+ self.filecache.parent.mkdir(parents=True, exist_ok=True)
60
+ filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
61
+ filesystem.download(self.s3file, self.filecache)
62
+ self.filenames = [self.filecache]
63
+
64
+ def _download_dependencies(self):
65
+ filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
66
+ for dep in self.bids_dependencies:
67
+ s3path = self.get_s3path(dep)
68
+ filepath = self.cache_dir / dep
69
+ if not filepath.exists():
70
+ filepath.parent.mkdir(parents=True, exist_ok=True)
71
+ filesystem.download(s3path, filepath)
72
+
73
+ def get_raw_bids_args(self):
74
+ desired_fields = ['subject', 'session', 'task', 'run']
75
+ return {k: self.record[k] for k in desired_fields if self.record[k]}
76
+
77
+ def check_and_get_raw(self):
78
+ if not os.path.exists(self.filecache): # not preload
79
+ if self.bids_dependencies:
80
+ self._download_dependencies()
81
+ self._download_s3()
82
+ if self._raw is None:
83
+ self._raw = mne_bids.read_raw_bids(self.bidspath, verbose=False)
84
+
85
+ def __getitem__(self, index):
86
+ # self.check_and_get_raw()
87
+
88
+ X = self.raw[:, index][0]
89
+ y = None
90
+ if self.target_name is not None:
91
+ y = self.description[self.target_name]
92
+ if isinstance(y, pd.Series):
93
+ y = y.to_list()
94
+ if self.transform is not None:
95
+ X = self.transform(X)
96
+ return X, y
97
+
98
+ def __len__(self):
99
+ if self._raw is None:
100
+ return int(self.record['ntimes'] * self.record['sampling_frequency'])
101
+ else:
102
+ return len(self._raw)
103
+
104
+ @property
105
+ def raw(self):
106
+ if self._raw is None:
107
+ self.check_and_get_raw()
108
+ return self._raw
109
+
110
+ @raw.setter
111
+ def raw(self, raw):
112
+ self._raw = raw
113
+
114
+ class EEGDashBaseRaw(BaseRaw):
115
+ r"""MNE Raw object from EEG-Dash connection with Openneuro S3 file.
18
116
 
19
117
  Parameters
20
118
  ----------
@@ -40,6 +138,7 @@ class RawEEGDash(BaseRaw):
40
138
  .. versionadded:: 0.11.0
41
139
  """
42
140
 
141
+ AWS_BUCKET = 's3://openneuro.org'
43
142
  def __init__(
44
143
  self,
45
144
  input_fname,
@@ -48,6 +147,7 @@ class RawEEGDash(BaseRaw):
48
147
  preload=False,
49
148
  *,
50
149
  cache_dir='./.eegdash_cache',
150
+ bids_dependencies:list = [],
51
151
  uint16_codec=None,
52
152
  montage_units="auto",
53
153
  verbose=None,
@@ -66,9 +166,10 @@ class RawEEGDash(BaseRaw):
66
166
  chtype = 'eog'
67
167
  ch_types.append(chtype)
68
168
  info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
69
- self.s3file = input_fname
70
- os.makedirs(cache_dir, exist_ok=True)
71
- self.filecache = os.path.join(cache_dir, os.path.basename(self.s3file))
169
+ self.s3file = self.get_s3path(input_fname)
170
+ self.cache_dir = Path(cache_dir)
171
+ self.filecache = self.cache_dir / input_fname
172
+ self.bids_dependencies = bids_dependencies
72
173
 
73
174
  if preload and not os.path.exists(self.filecache):
74
175
  self._download_s3()
@@ -82,17 +183,30 @@ class RawEEGDash(BaseRaw):
82
183
  verbose=verbose,
83
184
  )
84
185
 
186
+ def get_s3path(self, filepath):
187
+ return f"{self.AWS_BUCKET}/{filepath}"
188
+
85
189
  def _download_s3(self):
190
+ self.filecache.parent.mkdir(parents=True, exist_ok=True)
86
191
  filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
87
- print('s3file', self.s3file)
88
- print('filecache', self.filecache)
89
192
  filesystem.download(self.s3file, self.filecache)
90
193
  self.filenames = [self.filecache]
91
194
 
195
+ def _download_dependencies(self):
196
+ filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
197
+ for dep in self.bids_dependencies:
198
+ s3path = self.get_s3path(dep)
199
+ filepath = self.cache_dir / dep
200
+ if not filepath.exists():
201
+ filepath.parent.mkdir(parents=True, exist_ok=True)
202
+ filesystem.download(s3path, filepath)
203
+
92
204
  def _read_segment(
93
205
  self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
94
206
  ):
95
207
  if not os.path.exists(self.filecache): # not preload
208
+ if self.bids_dependencies:
209
+ self._download_dependencies()
96
210
  self._download_s3()
97
211
  else: # not preload and file is not cached
98
212
  self.filenames = [self.filecache]
@@ -103,38 +217,53 @@ class RawEEGDash(BaseRaw):
103
217
  _read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
104
218
 
105
219
 
106
- class BIDSDataset():
220
+ class EEGBIDSDataset():
107
221
  ALLOWED_FILE_FORMAT = ['eeglab', 'brainvision', 'biosemi', 'european']
108
- RAW_EXTENSION = {
109
- 'eeglab': '.set',
110
- 'brainvision': '.vhdr',
111
- 'biosemi': '.bdf',
112
- 'european': '.edf'
113
- }
222
+ RAW_EXTENSIONS = {
223
+ '.set': ['.set', '.fdt'], # eeglab
224
+ '.edf': ['.edf'], # european
225
+ '.vhdr': ['.eeg', '.vhdr', '.vmrk', '.dat', '.raw'], # brainvision
226
+ '.bdf': ['.bdf'], # biosemi
227
+ }
114
228
  METADATA_FILE_EXTENSIONS = ['eeg.json', 'channels.tsv', 'electrodes.tsv', 'events.tsv', 'events.json']
115
229
  def __init__(self,
116
230
  data_dir=None, # location of bids dataset
117
231
  dataset='', # dataset name
118
- raw_format='eeglab', # format of raw data
119
232
  ):
120
233
  if data_dir is None or not os.path.exists(data_dir):
121
234
  raise ValueError('data_dir must be specified and must exist')
122
235
  self.bidsdir = Path(data_dir)
123
236
  self.dataset = dataset
124
-
125
- if raw_format.lower() not in self.ALLOWED_FILE_FORMAT:
126
- raise ValueError('raw_format must be one of {}'.format(self.ALLOWED_FILE_FORMAT))
127
- self.raw_format = raw_format.lower()
128
-
129
- # get all .set files in the bids directory
130
- temp_dir = (Path().resolve() / 'data')
131
- if not os.path.exists(temp_dir):
132
- os.mkdir(temp_dir)
133
- if not os.path.exists(temp_dir / f'{dataset}_files.npy'):
134
- self.files = self.get_files_with_extension_parallel(self.bidsdir, extension=self.RAW_EXTENSION[self.raw_format])
135
- np.save(temp_dir / f'{dataset}_files.npy', self.files)
136
- else:
137
- self.files = np.load(temp_dir / f'{dataset}_files.npy', allow_pickle=True)
237
+ assert str(self.bidsdir).endswith(self.dataset)
238
+ self.layout = BIDSLayout(data_dir)
239
+
240
+ # get all recording files in the bids directory
241
+ self.files = self.get_recordings(self.layout)
242
+ assert len(self.files) > 0, ValueError('Unable to construct EEG dataset. No EEG recordings found.')
243
+ assert self.check_eeg_dataset(), ValueError('Dataset is not an EEG dataset.')
244
+ # temp_dir = (Path().resolve() / 'data')
245
+ # if not os.path.exists(temp_dir):
246
+ # os.mkdir(temp_dir)
247
+ # if not os.path.exists(temp_dir / f'{dataset}_files.npy'):
248
+ # self.files = self.get_files_with_extension_parallel(self.bidsdir, extension=self.RAW_EXTENSION[self.raw_format])
249
+ # np.save(temp_dir / f'{dataset}_files.npy', self.files)
250
+ # else:
251
+ # self.files = np.load(temp_dir / f'{dataset}_files.npy', allow_pickle=True)
252
+
253
+ def check_eeg_dataset(self):
254
+ return self.get_bids_file_attribute('modality', self.files[0]).lower() == 'eeg'
255
+
256
+ def get_recordings(self, layout:BIDSLayout):
257
+ files = []
258
+ for ext, exts in self.RAW_EXTENSIONS.items():
259
+ files = layout.get(extension=ext, return_type='filename')
260
+ if files:
261
+ break
262
+ return files
263
+
264
+ def get_relative_bidspath(self, filename):
265
+ bids_parent_dir = self.bidsdir.parent
266
+ return str(Path(filename).relative_to(bids_parent_dir))
138
267
 
139
268
  def get_property_from_filename(self, property, filename):
140
269
  import platform
@@ -177,8 +306,9 @@ class BIDSDataset():
177
306
  for file in os.listdir(path):
178
307
  # target_file = path / f"{cur_file_basename}_{extension}"
179
308
  if os.path.isfile(path/file):
180
- cur_file_basename = file[:file.rfind('_')] # TODO: change to just search for any file with extension
181
- if file.endswith(extension) and cur_file_basename in basename:
309
+ # check if file has extension extension
310
+ # check if file basename has extension
311
+ if file.endswith(extension):
182
312
  filepath = path / file
183
313
  bids_files.append(filepath)
184
314
 
@@ -210,14 +340,11 @@ class BIDSDataset():
210
340
  basename = filename[:filename.rfind('_')]
211
341
  # metadata files
212
342
  meta_files = self.get_bids_file_inheritance(path, basename, metadata_file_extension)
213
- if not meta_files:
214
- raise ValueError('No metadata files found for filepath {filepath} and extension {metadata_file_extension}')
215
- else:
216
- return meta_files
343
+ return meta_files
217
344
 
218
345
  def scan_directory(self, directory, extension):
219
346
  result_files = []
220
- directory_to_ignore = ['.git']
347
+ directory_to_ignore = ['.git', '.datalad', 'derivatives', 'code']
221
348
  with os.scandir(directory) as entries:
222
349
  for entry in entries:
223
350
  if entry.is_file() and entry.name.endswith(extension):
@@ -298,32 +425,22 @@ class BIDSDataset():
298
425
  json_dict.update(json.load(f))
299
426
  return json_dict
300
427
 
301
- def sfreq(self, data_filepath):
302
- json_files = self.get_bids_metadata_files(data_filepath, 'eeg.json')
303
- if len(json_files) == 0:
304
- raise ValueError('No eeg.json found')
305
-
306
- metadata = self.resolve_bids_json(json_files)
307
- if 'SamplingFrequency' not in metadata:
308
- raise ValueError('SamplingFrequency not found in metadata')
309
- else:
310
- return metadata['SamplingFrequency']
311
-
312
- def task(self, data_filepath):
313
- return self.get_property_from_filename('task', data_filepath)
314
-
315
- def session(self, data_filepath):
316
- return self.get_property_from_filename('session', data_filepath)
317
-
318
- def run(self, data_filepath):
319
- return self.get_property_from_filename('run', data_filepath)
320
-
321
- def subject(self, data_filepath):
322
- return self.get_property_from_filename('sub', data_filepath)
323
-
324
- def num_channels(self, data_filepath):
325
- channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
326
- return len(channels_tsv)
428
+ def get_bids_file_attribute(self, attribute, data_filepath):
429
+ entities = self.layout.parse_file_entities(data_filepath)
430
+ bidsfile = self.layout.get(**entities)[0]
431
+ attributes = bidsfile.get_entities(metadata='all')
432
+ attribute_mapping = {
433
+ 'sfreq': 'SamplingFrequency',
434
+ 'modality': 'datatype',
435
+ 'task': 'task',
436
+ 'session': 'session',
437
+ 'run': 'run',
438
+ 'subject': 'subject',
439
+ 'ntimes': 'RecordingDuration',
440
+ 'nchans': 'EEGChannelCount'
441
+ }
442
+ attribute_value = attributes.get(attribute_mapping.get(attribute), None)
443
+ return attribute_value
327
444
 
328
445
  def channel_labels(self, data_filepath):
329
446
  channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
@@ -336,4 +453,28 @@ class BIDSDataset():
336
453
  def num_times(self, data_filepath):
337
454
  eeg_jsons = self.get_bids_metadata_files(data_filepath, 'eeg.json')
338
455
  eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
339
- return int(eeg_json_dict['SamplingFrequency'] * eeg_json_dict['RecordingDuration'])
456
+ return int(eeg_json_dict['SamplingFrequency'] * eeg_json_dict['RecordingDuration'])
457
+
458
+ def subject_participant_tsv(self, data_filepath):
459
+ '''Get participants_tsv info of a subject based on filepath'''
460
+ participants_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'participants.tsv')[0], sep='\t')
461
+ # if participants_tsv is not empty
462
+ if participants_tsv.empty:
463
+ return {}
464
+ # set 'participant_id' as index
465
+ participants_tsv.set_index('participant_id', inplace=True)
466
+ subject = f"sub-{self.get_bids_file_attribute('subject', data_filepath)}"
467
+ return participants_tsv.loc[subject].to_dict()
468
+
469
+ def eeg_json(self, data_filepath):
470
+ eeg_jsons = self.get_bids_metadata_files(data_filepath, 'eeg.json')
471
+ eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
472
+ return eeg_json_dict
473
+
474
+ def channel_tsv(self, data_filepath):
475
+ channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
476
+ channel_tsv = channels_tsv.to_dict()
477
+ # 'name' and 'type' now have a dictionary of index-value. Convert them to list
478
+ for list_field in ['name', 'type', 'units']:
479
+ channel_tsv[list_field] = list(channel_tsv[list_field].values())
480
+ return channel_tsv
@@ -0,0 +1,25 @@
1
+ # Features datasets
2
+ from .datasets import FeaturesDataset, FeaturesConcatDataset
3
+ from .serialization import load_features_concat_dataset
4
+
5
+ # Feature extraction
6
+ from .extractors import (
7
+ FeatureExtractor,
8
+ FitableFeature,
9
+ UnivariateFeature,
10
+ BivariateFeature,
11
+ DirectedBivariateFeature,
12
+ MultivariateFeature,
13
+ )
14
+ from .decorators import (
15
+ FeaturePredecessor,
16
+ FeatureKind,
17
+ univariate_feature,
18
+ bivariate_feature,
19
+ directed_bivariate_feature,
20
+ multivariate_feature,
21
+ )
22
+ from .utils import extract_features, fit_feature_extractors
23
+
24
+ # Features:
25
+ from .feature_bank import *