eegdash 0.0.9__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/__init__.py +4 -1
- eegdash/data_config.py +27 -27
- eegdash/data_utils.py +164 -118
- eegdash/features/__init__.py +14 -14
- eegdash/features/datasets.py +6 -3
- eegdash/features/decorators.py +4 -4
- eegdash/features/extractors.py +2 -1
- eegdash/features/feature_bank/__init__.py +3 -3
- eegdash/features/feature_bank/complexity.py +2 -3
- eegdash/features/feature_bank/connectivity.py +16 -56
- eegdash/features/feature_bank/csp.py +2 -3
- eegdash/features/feature_bank/dimensionality.py +1 -2
- eegdash/features/feature_bank/signal.py +1 -1
- eegdash/features/feature_bank/spectral.py +10 -28
- eegdash/features/feature_bank/utils.py +48 -0
- eegdash/features/serialization.py +2 -2
- eegdash/features/utils.py +8 -6
- eegdash/main.py +189 -132
- {eegdash-0.0.9.dist-info → eegdash-0.1.0.dist-info}/METADATA +22 -18
- eegdash-0.1.0.dist-info/RECORD +23 -0
- {eegdash-0.0.9.dist-info → eegdash-0.1.0.dist-info}/WHEEL +1 -1
- eegdash-0.0.9.dist-info/RECORD +0 -22
- {eegdash-0.0.9.dist-info → eegdash-0.1.0.dist-info}/licenses/LICENSE +0 -0
- {eegdash-0.0.9.dist-info → eegdash-0.1.0.dist-info}/top_level.txt +0 -0
eegdash/__init__.py
CHANGED
eegdash/data_config.py
CHANGED
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
config = {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
}
|
|
2
|
+
"required_fields": ["data_name"],
|
|
3
|
+
"attributes": {
|
|
4
|
+
"data_name": "str",
|
|
5
|
+
"dataset": "str",
|
|
6
|
+
"bidspath": "str",
|
|
7
|
+
"subject": "str",
|
|
8
|
+
"task": "str",
|
|
9
|
+
"session": "str",
|
|
10
|
+
"run": "str",
|
|
11
|
+
"sampling_frequency": "float",
|
|
12
|
+
"modality": "str",
|
|
13
|
+
"nchans": "int",
|
|
14
|
+
"ntimes": "int",
|
|
15
|
+
},
|
|
16
|
+
"description_fields": ["subject", "session", "run", "task", "age", "gender", "sex"],
|
|
17
|
+
"bids_dependencies_files": [
|
|
18
|
+
"dataset_description.json",
|
|
19
|
+
"participants.tsv",
|
|
20
|
+
"events.tsv",
|
|
21
|
+
"events.json",
|
|
22
|
+
"eeg.json",
|
|
23
|
+
"electrodes.tsv",
|
|
24
|
+
"channels.tsv",
|
|
25
|
+
"coordsystem.json",
|
|
26
|
+
],
|
|
27
|
+
"accepted_query_fields": ["data_name", "dataset"],
|
|
28
|
+
}
|
eegdash/data_utils.py
CHANGED
|
@@ -1,23 +1,25 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import os
|
|
2
|
-
import
|
|
3
|
-
|
|
3
|
+
import re
|
|
4
|
+
import sys
|
|
5
|
+
import tempfile
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
4
8
|
import mne
|
|
9
|
+
import mne_bids
|
|
5
10
|
import numpy as np
|
|
6
11
|
import pandas as pd
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
import re
|
|
9
|
-
import json
|
|
10
|
-
from mne.io import BaseRaw
|
|
11
|
-
from mne._fiff.utils import _find_channels, _read_segments_file
|
|
12
12
|
import s3fs
|
|
13
|
-
import
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
import
|
|
13
|
+
from bids import BIDSLayout
|
|
14
|
+
from joblib import Parallel, delayed
|
|
15
|
+
from mne._fiff.utils import _find_channels, _read_segments_file
|
|
16
|
+
from mne.io import BaseRaw
|
|
17
17
|
from mne_bids import (
|
|
18
18
|
BIDSPath,
|
|
19
19
|
)
|
|
20
|
-
|
|
20
|
+
|
|
21
|
+
from braindecode.datasets import BaseDataset
|
|
22
|
+
|
|
21
23
|
|
|
22
24
|
class EEGDashBaseDataset(BaseDataset):
|
|
23
25
|
"""Returns samples from an mne.io.Raw object along with a target.
|
|
@@ -38,16 +40,23 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
38
40
|
transform : callable | None
|
|
39
41
|
On-the-fly transform applied to the example before it is returned.
|
|
40
42
|
"""
|
|
41
|
-
|
|
43
|
+
|
|
44
|
+
AWS_BUCKET = "s3://openneuro.org"
|
|
45
|
+
|
|
42
46
|
def __init__(self, record, cache_dir, **kwargs):
|
|
43
47
|
super().__init__(None, **kwargs)
|
|
44
48
|
self.record = record
|
|
45
49
|
self.cache_dir = Path(cache_dir)
|
|
46
50
|
bids_kwargs = self.get_raw_bids_args()
|
|
47
|
-
self.bidspath = BIDSPath(
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
+
self.bidspath = BIDSPath(
|
|
52
|
+
root=self.cache_dir / record["dataset"],
|
|
53
|
+
datatype="eeg",
|
|
54
|
+
suffix="eeg",
|
|
55
|
+
**bids_kwargs,
|
|
56
|
+
)
|
|
57
|
+
self.s3file = self.get_s3path(record["bidspath"])
|
|
58
|
+
self.filecache = self.cache_dir / record["bidspath"]
|
|
59
|
+
self.bids_dependencies = record["bidsdependencies"]
|
|
51
60
|
self._raw = None
|
|
52
61
|
# if os.path.exists(self.filecache):
|
|
53
62
|
# self.raw = mne_bids.read_raw_bids(self.bidspath, verbose=False)
|
|
@@ -57,25 +66,29 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
57
66
|
|
|
58
67
|
def _download_s3(self):
|
|
59
68
|
self.filecache.parent.mkdir(parents=True, exist_ok=True)
|
|
60
|
-
filesystem = s3fs.S3FileSystem(
|
|
69
|
+
filesystem = s3fs.S3FileSystem(
|
|
70
|
+
anon=True, client_kwargs={"region_name": "us-east-2"}
|
|
71
|
+
)
|
|
61
72
|
filesystem.download(self.s3file, self.filecache)
|
|
62
73
|
self.filenames = [self.filecache]
|
|
63
74
|
|
|
64
75
|
def _download_dependencies(self):
|
|
65
|
-
filesystem = s3fs.S3FileSystem(
|
|
76
|
+
filesystem = s3fs.S3FileSystem(
|
|
77
|
+
anon=True, client_kwargs={"region_name": "us-east-2"}
|
|
78
|
+
)
|
|
66
79
|
for dep in self.bids_dependencies:
|
|
67
80
|
s3path = self.get_s3path(dep)
|
|
68
81
|
filepath = self.cache_dir / dep
|
|
69
82
|
if not filepath.exists():
|
|
70
83
|
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
71
|
-
filesystem.download(s3path, filepath)
|
|
84
|
+
filesystem.download(s3path, filepath)
|
|
72
85
|
|
|
73
86
|
def get_raw_bids_args(self):
|
|
74
|
-
desired_fields = [
|
|
87
|
+
desired_fields = ["subject", "session", "task", "run"]
|
|
75
88
|
return {k: self.record[k] for k in desired_fields if self.record[k]}
|
|
76
89
|
|
|
77
90
|
def check_and_get_raw(self):
|
|
78
|
-
if not os.path.exists(self.filecache):
|
|
91
|
+
if not os.path.exists(self.filecache): # not preload
|
|
79
92
|
if self.bids_dependencies:
|
|
80
93
|
self._download_dependencies()
|
|
81
94
|
self._download_s3()
|
|
@@ -94,10 +107,10 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
94
107
|
if self.transform is not None:
|
|
95
108
|
X = self.transform(X)
|
|
96
109
|
return X, y
|
|
97
|
-
|
|
110
|
+
|
|
98
111
|
def __len__(self):
|
|
99
112
|
if self._raw is None:
|
|
100
|
-
return int(self.record[
|
|
113
|
+
return int(self.record["ntimes"] * self.record["sampling_frequency"])
|
|
101
114
|
else:
|
|
102
115
|
return len(self._raw)
|
|
103
116
|
|
|
@@ -111,6 +124,7 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
111
124
|
def raw(self, raw):
|
|
112
125
|
self._raw = raw
|
|
113
126
|
|
|
127
|
+
|
|
114
128
|
class EEGDashBaseRaw(BaseRaw):
|
|
115
129
|
r"""MNE Raw object from EEG-Dash connection with Openneuro S3 file.
|
|
116
130
|
|
|
@@ -138,7 +152,8 @@ class EEGDashBaseRaw(BaseRaw):
|
|
|
138
152
|
.. versionadded:: 0.11.0
|
|
139
153
|
"""
|
|
140
154
|
|
|
141
|
-
AWS_BUCKET =
|
|
155
|
+
AWS_BUCKET = "s3://openneuro.org"
|
|
156
|
+
|
|
142
157
|
def __init__(
|
|
143
158
|
self,
|
|
144
159
|
input_fname,
|
|
@@ -146,24 +161,24 @@ class EEGDashBaseRaw(BaseRaw):
|
|
|
146
161
|
eog=(),
|
|
147
162
|
preload=False,
|
|
148
163
|
*,
|
|
149
|
-
cache_dir=
|
|
150
|
-
bids_dependencies:list = [],
|
|
164
|
+
cache_dir="./.eegdash_cache",
|
|
165
|
+
bids_dependencies: list = [],
|
|
151
166
|
uint16_codec=None,
|
|
152
167
|
montage_units="auto",
|
|
153
168
|
verbose=None,
|
|
154
169
|
):
|
|
155
|
-
|
|
170
|
+
"""
|
|
156
171
|
Get to work with S3 endpoint first, no caching
|
|
157
|
-
|
|
172
|
+
"""
|
|
158
173
|
# Create a simple RawArray
|
|
159
|
-
sfreq = metadata[
|
|
160
|
-
n_times = metadata[
|
|
161
|
-
ch_names = metadata[
|
|
174
|
+
sfreq = metadata["sfreq"] # Sampling frequency
|
|
175
|
+
n_times = metadata["n_times"]
|
|
176
|
+
ch_names = metadata["ch_names"]
|
|
162
177
|
ch_types = []
|
|
163
|
-
for ch in metadata[
|
|
178
|
+
for ch in metadata["ch_types"]:
|
|
164
179
|
chtype = ch.lower()
|
|
165
|
-
if chtype ==
|
|
166
|
-
chtype =
|
|
180
|
+
if chtype == "heog" or chtype == "veog":
|
|
181
|
+
chtype = "eog"
|
|
167
182
|
ch_types.append(chtype)
|
|
168
183
|
info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
|
|
169
184
|
self.s3file = self.get_s3path(input_fname)
|
|
@@ -178,7 +193,7 @@ class EEGDashBaseRaw(BaseRaw):
|
|
|
178
193
|
super().__init__(
|
|
179
194
|
info,
|
|
180
195
|
preload,
|
|
181
|
-
last_samps=[n_times-1],
|
|
196
|
+
last_samps=[n_times - 1],
|
|
182
197
|
orig_format="single",
|
|
183
198
|
verbose=verbose,
|
|
184
199
|
)
|
|
@@ -188,12 +203,16 @@ class EEGDashBaseRaw(BaseRaw):
|
|
|
188
203
|
|
|
189
204
|
def _download_s3(self):
|
|
190
205
|
self.filecache.parent.mkdir(parents=True, exist_ok=True)
|
|
191
|
-
filesystem = s3fs.S3FileSystem(
|
|
206
|
+
filesystem = s3fs.S3FileSystem(
|
|
207
|
+
anon=True, client_kwargs={"region_name": "us-east-2"}
|
|
208
|
+
)
|
|
192
209
|
filesystem.download(self.s3file, self.filecache)
|
|
193
210
|
self.filenames = [self.filecache]
|
|
194
211
|
|
|
195
212
|
def _download_dependencies(self):
|
|
196
|
-
filesystem = s3fs.S3FileSystem(
|
|
213
|
+
filesystem = s3fs.S3FileSystem(
|
|
214
|
+
anon=True, client_kwargs={"region_name": "us-east-2"}
|
|
215
|
+
)
|
|
197
216
|
for dep in self.bids_dependencies:
|
|
198
217
|
s3path = self.get_s3path(dep)
|
|
199
218
|
filepath = self.cache_dir / dep
|
|
@@ -204,34 +223,42 @@ class EEGDashBaseRaw(BaseRaw):
|
|
|
204
223
|
def _read_segment(
|
|
205
224
|
self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
|
|
206
225
|
):
|
|
207
|
-
if not os.path.exists(self.filecache):
|
|
226
|
+
if not os.path.exists(self.filecache): # not preload
|
|
208
227
|
if self.bids_dependencies:
|
|
209
228
|
self._download_dependencies()
|
|
210
229
|
self._download_s3()
|
|
211
|
-
else:
|
|
230
|
+
else: # not preload and file is not cached
|
|
212
231
|
self.filenames = [self.filecache]
|
|
213
232
|
return super()._read_segment(start, stop, sel, data_buffer, verbose=verbose)
|
|
214
|
-
|
|
233
|
+
|
|
215
234
|
def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
|
|
216
235
|
"""Read a chunk of data from the file."""
|
|
217
236
|
_read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
|
|
218
237
|
|
|
219
238
|
|
|
220
|
-
class EEGBIDSDataset
|
|
221
|
-
ALLOWED_FILE_FORMAT = [
|
|
239
|
+
class EEGBIDSDataset:
|
|
240
|
+
ALLOWED_FILE_FORMAT = ["eeglab", "brainvision", "biosemi", "european"]
|
|
222
241
|
RAW_EXTENSIONS = {
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
METADATA_FILE_EXTENSIONS = [
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
242
|
+
".set": [".set", ".fdt"], # eeglab
|
|
243
|
+
".edf": [".edf"], # european
|
|
244
|
+
".vhdr": [".eeg", ".vhdr", ".vmrk", ".dat", ".raw"], # brainvision
|
|
245
|
+
".bdf": [".bdf"], # biosemi
|
|
246
|
+
}
|
|
247
|
+
METADATA_FILE_EXTENSIONS = [
|
|
248
|
+
"eeg.json",
|
|
249
|
+
"channels.tsv",
|
|
250
|
+
"electrodes.tsv",
|
|
251
|
+
"events.tsv",
|
|
252
|
+
"events.json",
|
|
253
|
+
]
|
|
254
|
+
|
|
255
|
+
def __init__(
|
|
256
|
+
self,
|
|
257
|
+
data_dir=None, # location of bids dataset
|
|
258
|
+
dataset="", # dataset name
|
|
259
|
+
):
|
|
233
260
|
if data_dir is None or not os.path.exists(data_dir):
|
|
234
|
-
raise ValueError(
|
|
261
|
+
raise ValueError("data_dir must be specified and must exist")
|
|
235
262
|
self.bidsdir = Path(data_dir)
|
|
236
263
|
self.dataset = dataset
|
|
237
264
|
assert str(self.bidsdir).endswith(self.dataset)
|
|
@@ -239,8 +266,10 @@ class EEGBIDSDataset():
|
|
|
239
266
|
|
|
240
267
|
# get all recording files in the bids directory
|
|
241
268
|
self.files = self.get_recordings(self.layout)
|
|
242
|
-
assert len(self.files) > 0, ValueError(
|
|
243
|
-
|
|
269
|
+
assert len(self.files) > 0, ValueError(
|
|
270
|
+
"Unable to construct EEG dataset. No EEG recordings found."
|
|
271
|
+
)
|
|
272
|
+
assert self.check_eeg_dataset(), ValueError("Dataset is not an EEG dataset.")
|
|
244
273
|
# temp_dir = (Path().resolve() / 'data')
|
|
245
274
|
# if not os.path.exists(temp_dir):
|
|
246
275
|
# os.mkdir(temp_dir)
|
|
@@ -251,34 +280,35 @@ class EEGBIDSDataset():
|
|
|
251
280
|
# self.files = np.load(temp_dir / f'{dataset}_files.npy', allow_pickle=True)
|
|
252
281
|
|
|
253
282
|
def check_eeg_dataset(self):
|
|
254
|
-
return self.get_bids_file_attribute(
|
|
283
|
+
return self.get_bids_file_attribute("modality", self.files[0]).lower() == "eeg"
|
|
255
284
|
|
|
256
|
-
def get_recordings(self, layout:BIDSLayout):
|
|
285
|
+
def get_recordings(self, layout: BIDSLayout):
|
|
257
286
|
files = []
|
|
258
287
|
for ext, exts in self.RAW_EXTENSIONS.items():
|
|
259
|
-
files = layout.get(extension=ext, return_type=
|
|
288
|
+
files = layout.get(extension=ext, return_type="filename")
|
|
260
289
|
if files:
|
|
261
|
-
break
|
|
290
|
+
break
|
|
262
291
|
return files
|
|
263
292
|
|
|
264
293
|
def get_relative_bidspath(self, filename):
|
|
265
|
-
bids_parent_dir = self.bidsdir.parent
|
|
294
|
+
bids_parent_dir = self.bidsdir.parent.absolute()
|
|
266
295
|
return str(Path(filename).relative_to(bids_parent_dir))
|
|
267
296
|
|
|
268
297
|
def get_property_from_filename(self, property, filename):
|
|
269
298
|
import platform
|
|
299
|
+
|
|
270
300
|
if platform.system() == "Windows":
|
|
271
|
-
lookup = re.search(rf
|
|
301
|
+
lookup = re.search(rf"{property}-(.*?)[_\\]", filename)
|
|
272
302
|
else:
|
|
273
|
-
lookup = re.search(rf
|
|
274
|
-
return lookup.group(1) if lookup else
|
|
303
|
+
lookup = re.search(rf"{property}-(.*?)[_\/]", filename)
|
|
304
|
+
return lookup.group(1) if lookup else ""
|
|
275
305
|
|
|
276
306
|
def merge_json_inheritance(self, json_files):
|
|
277
|
-
|
|
307
|
+
"""
|
|
278
308
|
Merge list of json files found by get_bids_file_inheritance,
|
|
279
309
|
expecting the order (from left to right) is from lowest level to highest level,
|
|
280
310
|
and return a merged dictionary
|
|
281
|
-
|
|
311
|
+
"""
|
|
282
312
|
json_files.reverse()
|
|
283
313
|
json_dict = {}
|
|
284
314
|
for f in json_files:
|
|
@@ -286,26 +316,26 @@ class EEGBIDSDataset():
|
|
|
286
316
|
return json_dict
|
|
287
317
|
|
|
288
318
|
def get_bids_file_inheritance(self, path, basename, extension):
|
|
289
|
-
|
|
290
|
-
Get all files with given extension that applies to the basename file
|
|
319
|
+
"""
|
|
320
|
+
Get all files with given extension that applies to the basename file
|
|
291
321
|
following the BIDS inheritance principle in the order of lowest level first
|
|
292
322
|
@param
|
|
293
323
|
basename: bids file basename without _eeg.set extension for example
|
|
294
324
|
extension: e.g. channels.tsv
|
|
295
|
-
|
|
296
|
-
top_level_files = [
|
|
325
|
+
"""
|
|
326
|
+
top_level_files = ["README", "dataset_description.json", "participants.tsv"]
|
|
297
327
|
bids_files = []
|
|
298
328
|
|
|
299
329
|
# check if path is str object
|
|
300
330
|
if isinstance(path, str):
|
|
301
331
|
path = Path(path)
|
|
302
332
|
if not path.exists:
|
|
303
|
-
raise ValueError(
|
|
333
|
+
raise ValueError("path {path} does not exist")
|
|
304
334
|
|
|
305
335
|
# check if file is in current path
|
|
306
336
|
for file in os.listdir(path):
|
|
307
337
|
# target_file = path / f"{cur_file_basename}_{extension}"
|
|
308
|
-
if os.path.isfile(path/file):
|
|
338
|
+
if os.path.isfile(path / file):
|
|
309
339
|
# check if file has extension extension
|
|
310
340
|
# check if file basename has extension
|
|
311
341
|
if file.endswith(extension):
|
|
@@ -317,38 +347,42 @@ class EEGBIDSDataset():
|
|
|
317
347
|
return bids_files
|
|
318
348
|
else:
|
|
319
349
|
# call get_bids_file_inheritance recursively with parent directory
|
|
320
|
-
bids_files.extend(
|
|
350
|
+
bids_files.extend(
|
|
351
|
+
self.get_bids_file_inheritance(path.parent, basename, extension)
|
|
352
|
+
)
|
|
321
353
|
return bids_files
|
|
322
354
|
|
|
323
355
|
def get_bids_metadata_files(self, filepath, metadata_file_extension):
|
|
324
356
|
"""
|
|
325
357
|
(Wrapper for self.get_bids_file_inheritance)
|
|
326
358
|
Get all BIDS metadata files that are associated with the given filepath, following the BIDS inheritance principle.
|
|
327
|
-
|
|
359
|
+
|
|
328
360
|
Args:
|
|
329
361
|
filepath (str or Path): The filepath to get the associated metadata files for.
|
|
330
362
|
metadata_files_extensions (list): A list of file extensions to search for metadata files.
|
|
331
|
-
|
|
363
|
+
|
|
332
364
|
Returns:
|
|
333
365
|
list: A list of filepaths for all the associated metadata files
|
|
334
366
|
"""
|
|
335
367
|
if isinstance(filepath, str):
|
|
336
368
|
filepath = Path(filepath)
|
|
337
369
|
if not filepath.exists:
|
|
338
|
-
raise ValueError(
|
|
370
|
+
raise ValueError("filepath {filepath} does not exist")
|
|
339
371
|
path, filename = os.path.split(filepath)
|
|
340
|
-
basename = filename[:filename.rfind(
|
|
372
|
+
basename = filename[: filename.rfind("_")]
|
|
341
373
|
# metadata files
|
|
342
|
-
meta_files = self.get_bids_file_inheritance(
|
|
374
|
+
meta_files = self.get_bids_file_inheritance(
|
|
375
|
+
path, basename, metadata_file_extension
|
|
376
|
+
)
|
|
343
377
|
return meta_files
|
|
344
|
-
|
|
378
|
+
|
|
345
379
|
def scan_directory(self, directory, extension):
|
|
346
380
|
result_files = []
|
|
347
|
-
directory_to_ignore = [
|
|
381
|
+
directory_to_ignore = [".git", ".datalad", "derivatives", "code"]
|
|
348
382
|
with os.scandir(directory) as entries:
|
|
349
383
|
for entry in entries:
|
|
350
384
|
if entry.is_file() and entry.name.endswith(extension):
|
|
351
|
-
print(
|
|
385
|
+
print("Adding ", entry.path)
|
|
352
386
|
result_files.append(entry.path)
|
|
353
387
|
elif entry.is_dir():
|
|
354
388
|
# check that entry path doesn't contain any name in ignore list
|
|
@@ -356,7 +390,9 @@ class EEGBIDSDataset():
|
|
|
356
390
|
result_files.append(entry.path) # Add directory to scan later
|
|
357
391
|
return result_files
|
|
358
392
|
|
|
359
|
-
def get_files_with_extension_parallel(
|
|
393
|
+
def get_files_with_extension_parallel(
|
|
394
|
+
self, directory, extension=".set", max_workers=-1
|
|
395
|
+
):
|
|
360
396
|
result_files = []
|
|
361
397
|
dirs_to_scan = [directory]
|
|
362
398
|
|
|
@@ -367,7 +403,7 @@ class EEGBIDSDataset():
|
|
|
367
403
|
results = Parallel(n_jobs=max_workers, prefer="threads", verbose=1)(
|
|
368
404
|
delayed(self.scan_directory)(d, extension) for d in dirs_to_scan
|
|
369
405
|
)
|
|
370
|
-
|
|
406
|
+
|
|
371
407
|
# Reset the directories to scan and process the results
|
|
372
408
|
dirs_to_scan = []
|
|
373
409
|
for res in results:
|
|
@@ -382,8 +418,8 @@ class EEGBIDSDataset():
|
|
|
382
418
|
|
|
383
419
|
def load_and_preprocess_raw(self, raw_file, preprocess=False):
|
|
384
420
|
print(f"Loading {raw_file}")
|
|
385
|
-
EEG = mne.io.read_raw_eeglab(raw_file, preload=True, verbose=
|
|
386
|
-
|
|
421
|
+
EEG = mne.io.read_raw_eeglab(raw_file, preload=True, verbose="error")
|
|
422
|
+
|
|
387
423
|
if preprocess:
|
|
388
424
|
# highpass filter
|
|
389
425
|
EEG = EEG.filter(l_freq=0.25, h_freq=25, verbose=False)
|
|
@@ -391,7 +427,7 @@ class EEGBIDSDataset():
|
|
|
391
427
|
EEG = EEG.notch_filter(freqs=(60), verbose=False)
|
|
392
428
|
# bring to common sampling rate
|
|
393
429
|
sfreq = 128
|
|
394
|
-
if EEG.info[
|
|
430
|
+
if EEG.info["sfreq"] != sfreq:
|
|
395
431
|
EEG = EEG.resample(sfreq)
|
|
396
432
|
# # normalize data to zero mean and unit variance
|
|
397
433
|
# scalar = preprocessing.StandardScaler()
|
|
@@ -400,12 +436,12 @@ class EEGBIDSDataset():
|
|
|
400
436
|
mat_data = EEG.get_data()
|
|
401
437
|
|
|
402
438
|
if len(mat_data.shape) > 2:
|
|
403
|
-
raise ValueError(
|
|
439
|
+
raise ValueError("Expect raw data to be CxT dimension")
|
|
404
440
|
return mat_data
|
|
405
|
-
|
|
441
|
+
|
|
406
442
|
def get_files(self):
|
|
407
443
|
return self.files
|
|
408
|
-
|
|
444
|
+
|
|
409
445
|
def resolve_bids_json(self, json_files: list):
|
|
410
446
|
"""
|
|
411
447
|
Resolve the BIDS JSON files and return a dictionary of the resolved values.
|
|
@@ -416,8 +452,8 @@ class EEGBIDSDataset():
|
|
|
416
452
|
dict: A dictionary of the resolved values.
|
|
417
453
|
"""
|
|
418
454
|
if len(json_files) == 0:
|
|
419
|
-
raise ValueError(
|
|
420
|
-
json_files.reverse()
|
|
455
|
+
raise ValueError("No JSON files provided")
|
|
456
|
+
json_files.reverse() # TODO undeterministic
|
|
421
457
|
|
|
422
458
|
json_dict = {}
|
|
423
459
|
for json_file in json_files:
|
|
@@ -428,53 +464,63 @@ class EEGBIDSDataset():
|
|
|
428
464
|
def get_bids_file_attribute(self, attribute, data_filepath):
|
|
429
465
|
entities = self.layout.parse_file_entities(data_filepath)
|
|
430
466
|
bidsfile = self.layout.get(**entities)[0]
|
|
431
|
-
attributes = bidsfile.get_entities(metadata=
|
|
467
|
+
attributes = bidsfile.get_entities(metadata="all")
|
|
432
468
|
attribute_mapping = {
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
469
|
+
"sfreq": "SamplingFrequency",
|
|
470
|
+
"modality": "datatype",
|
|
471
|
+
"task": "task",
|
|
472
|
+
"session": "session",
|
|
473
|
+
"run": "run",
|
|
474
|
+
"subject": "subject",
|
|
475
|
+
"ntimes": "RecordingDuration",
|
|
476
|
+
"nchans": "EEGChannelCount",
|
|
441
477
|
}
|
|
442
478
|
attribute_value = attributes.get(attribute_mapping.get(attribute), None)
|
|
443
479
|
return attribute_value
|
|
444
480
|
|
|
445
481
|
def channel_labels(self, data_filepath):
|
|
446
|
-
channels_tsv = pd.read_csv(
|
|
447
|
-
|
|
448
|
-
|
|
482
|
+
channels_tsv = pd.read_csv(
|
|
483
|
+
self.get_bids_metadata_files(data_filepath, "channels.tsv")[0], sep="\t"
|
|
484
|
+
)
|
|
485
|
+
return channels_tsv["name"].tolist()
|
|
486
|
+
|
|
449
487
|
def channel_types(self, data_filepath):
|
|
450
|
-
channels_tsv = pd.read_csv(
|
|
451
|
-
|
|
452
|
-
|
|
488
|
+
channels_tsv = pd.read_csv(
|
|
489
|
+
self.get_bids_metadata_files(data_filepath, "channels.tsv")[0], sep="\t"
|
|
490
|
+
)
|
|
491
|
+
return channels_tsv["type"].tolist()
|
|
492
|
+
|
|
453
493
|
def num_times(self, data_filepath):
|
|
454
|
-
eeg_jsons = self.get_bids_metadata_files(data_filepath,
|
|
494
|
+
eeg_jsons = self.get_bids_metadata_files(data_filepath, "eeg.json")
|
|
455
495
|
eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
|
|
456
|
-
return int(
|
|
457
|
-
|
|
496
|
+
return int(
|
|
497
|
+
eeg_json_dict["SamplingFrequency"] * eeg_json_dict["RecordingDuration"]
|
|
498
|
+
)
|
|
499
|
+
|
|
458
500
|
def subject_participant_tsv(self, data_filepath):
|
|
459
|
-
|
|
460
|
-
participants_tsv = pd.read_csv(
|
|
501
|
+
"""Get participants_tsv info of a subject based on filepath"""
|
|
502
|
+
participants_tsv = pd.read_csv(
|
|
503
|
+
self.get_bids_metadata_files(data_filepath, "participants.tsv")[0], sep="\t"
|
|
504
|
+
)
|
|
461
505
|
# if participants_tsv is not empty
|
|
462
506
|
if participants_tsv.empty:
|
|
463
507
|
return {}
|
|
464
508
|
# set 'participant_id' as index
|
|
465
|
-
participants_tsv.set_index(
|
|
509
|
+
participants_tsv.set_index("participant_id", inplace=True)
|
|
466
510
|
subject = f"sub-{self.get_bids_file_attribute('subject', data_filepath)}"
|
|
467
511
|
return participants_tsv.loc[subject].to_dict()
|
|
468
|
-
|
|
512
|
+
|
|
469
513
|
def eeg_json(self, data_filepath):
|
|
470
|
-
eeg_jsons = self.get_bids_metadata_files(data_filepath,
|
|
514
|
+
eeg_jsons = self.get_bids_metadata_files(data_filepath, "eeg.json")
|
|
471
515
|
eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
|
|
472
516
|
return eeg_json_dict
|
|
473
|
-
|
|
517
|
+
|
|
474
518
|
def channel_tsv(self, data_filepath):
|
|
475
|
-
channels_tsv = pd.read_csv(
|
|
519
|
+
channels_tsv = pd.read_csv(
|
|
520
|
+
self.get_bids_metadata_files(data_filepath, "channels.tsv")[0], sep="\t"
|
|
521
|
+
)
|
|
476
522
|
channel_tsv = channels_tsv.to_dict()
|
|
477
523
|
# 'name' and 'type' now have a dictionary of index-value. Convert them to list
|
|
478
|
-
for list_field in [
|
|
524
|
+
for list_field in ["name", "type", "units"]:
|
|
479
525
|
channel_tsv[list_field] = list(channel_tsv[list_field].values())
|
|
480
|
-
return channel_tsv
|
|
526
|
+
return channel_tsv
|
eegdash/features/__init__.py
CHANGED
|
@@ -1,25 +1,25 @@
|
|
|
1
1
|
# Features datasets
|
|
2
|
-
from .datasets import
|
|
3
|
-
from .
|
|
2
|
+
from .datasets import FeaturesConcatDataset, FeaturesDataset
|
|
3
|
+
from .decorators import (
|
|
4
|
+
FeatureKind,
|
|
5
|
+
FeaturePredecessor,
|
|
6
|
+
bivariate_feature,
|
|
7
|
+
directed_bivariate_feature,
|
|
8
|
+
multivariate_feature,
|
|
9
|
+
univariate_feature,
|
|
10
|
+
)
|
|
4
11
|
|
|
5
12
|
# Feature extraction
|
|
6
13
|
from .extractors import (
|
|
7
|
-
FeatureExtractor,
|
|
8
|
-
FitableFeature,
|
|
9
|
-
UnivariateFeature,
|
|
10
14
|
BivariateFeature,
|
|
11
15
|
DirectedBivariateFeature,
|
|
16
|
+
FeatureExtractor,
|
|
17
|
+
FitableFeature,
|
|
12
18
|
MultivariateFeature,
|
|
19
|
+
UnivariateFeature,
|
|
13
20
|
)
|
|
14
|
-
from .decorators import (
|
|
15
|
-
FeaturePredecessor,
|
|
16
|
-
FeatureKind,
|
|
17
|
-
univariate_feature,
|
|
18
|
-
bivariate_feature,
|
|
19
|
-
directed_bivariate_feature,
|
|
20
|
-
multivariate_feature,
|
|
21
|
-
)
|
|
22
|
-
from .utils import extract_features, fit_feature_extractors
|
|
23
21
|
|
|
24
22
|
# Features:
|
|
25
23
|
from .feature_bank import *
|
|
24
|
+
from .serialization import load_features_concat_dataset
|
|
25
|
+
from .utils import extract_features, fit_feature_extractors
|