eegdash 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/__init__.py +1 -1
- eegdash/data_utils.py +156 -14
- eegdash/main.py +149 -37
- eegdash-0.0.8.dist-info/METADATA +157 -0
- eegdash-0.0.8.dist-info/RECORD +8 -0
- {eegdash-0.0.6.dist-info → eegdash-0.0.8.dist-info}/WHEEL +1 -1
- eegdash-0.0.6.dist-info/METADATA +0 -147
- eegdash-0.0.6.dist-info/RECORD +0 -8
- {eegdash-0.0.6.dist-info → eegdash-0.0.8.dist-info/licenses}/LICENSE +0 -0
- {eegdash-0.0.6.dist-info → eegdash-0.0.8.dist-info}/top_level.txt +0 -0
eegdash/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
from .main import EEGDash
|
|
1
|
+
from .main import EEGDash, EEGDashDataset
|
eegdash/data_utils.py
CHANGED
|
@@ -12,9 +12,106 @@ from mne._fiff.utils import _find_channels, _read_segments_file
|
|
|
12
12
|
import s3fs
|
|
13
13
|
import tempfile
|
|
14
14
|
from mne._fiff.utils import _read_segments_file
|
|
15
|
+
from braindecode.datasets import BaseDataset
|
|
16
|
+
import mne_bids
|
|
17
|
+
from mne_bids import (
|
|
18
|
+
BIDSPath,
|
|
19
|
+
)
|
|
15
20
|
|
|
16
|
-
class
|
|
17
|
-
|
|
21
|
+
class EEGDashBaseDataset(BaseDataset):
|
|
22
|
+
"""Returns samples from an mne.io.Raw object along with a target.
|
|
23
|
+
|
|
24
|
+
Dataset which serves samples from an mne.io.Raw object along with a target.
|
|
25
|
+
The target is unique for the dataset, and is obtained through the
|
|
26
|
+
`description` attribute.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
raw : mne.io.Raw
|
|
31
|
+
Continuous data.
|
|
32
|
+
description : dict | pandas.Series | None
|
|
33
|
+
Holds additional description about the continuous signal / subject.
|
|
34
|
+
target_name : str | tuple | None
|
|
35
|
+
Name(s) of the index in `description` that should be used to provide the
|
|
36
|
+
target (e.g., to be used in a prediction task later on).
|
|
37
|
+
transform : callable | None
|
|
38
|
+
On-the-fly transform applied to the example before it is returned.
|
|
39
|
+
"""
|
|
40
|
+
AWS_BUCKET = 's3://openneuro.org'
|
|
41
|
+
def __init__(self, record, cache_dir, **kwargs):
|
|
42
|
+
super().__init__(None, **kwargs)
|
|
43
|
+
self.record = record
|
|
44
|
+
self.cache_dir = Path(cache_dir)
|
|
45
|
+
bids_kwargs = self.get_raw_bids_args()
|
|
46
|
+
self.bidspath = BIDSPath(root=self.cache_dir / record['dataset'], datatype='eeg', suffix='eeg', **bids_kwargs)
|
|
47
|
+
self.s3file = self.get_s3path(record['bidspath'])
|
|
48
|
+
self.filecache = self.cache_dir / record['bidspath']
|
|
49
|
+
self.bids_dependencies = record['bidsdependencies']
|
|
50
|
+
self._raw = None
|
|
51
|
+
# if os.path.exists(self.filecache):
|
|
52
|
+
# self.raw = mne_bids.read_raw_bids(self.bidspath, verbose=False)
|
|
53
|
+
|
|
54
|
+
def get_s3path(self, filepath):
|
|
55
|
+
return f"{self.AWS_BUCKET}/{filepath}"
|
|
56
|
+
|
|
57
|
+
def _download_s3(self):
|
|
58
|
+
self.filecache.parent.mkdir(parents=True, exist_ok=True)
|
|
59
|
+
filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
|
|
60
|
+
filesystem.download(self.s3file, self.filecache)
|
|
61
|
+
self.filenames = [self.filecache]
|
|
62
|
+
|
|
63
|
+
def _download_dependencies(self):
|
|
64
|
+
filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
|
|
65
|
+
for dep in self.bids_dependencies:
|
|
66
|
+
s3path = self.get_s3path(dep)
|
|
67
|
+
filepath = self.cache_dir / dep
|
|
68
|
+
if not filepath.exists():
|
|
69
|
+
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
filesystem.download(s3path, filepath)
|
|
71
|
+
|
|
72
|
+
def get_raw_bids_args(self):
|
|
73
|
+
desired_fields = ['subject', 'session', 'task', 'run']
|
|
74
|
+
return {k: self.record[k] for k in desired_fields if self.record[k]}
|
|
75
|
+
|
|
76
|
+
def check_and_get_raw(self):
|
|
77
|
+
if not os.path.exists(self.filecache): # not preload
|
|
78
|
+
if self.bids_dependencies:
|
|
79
|
+
self._download_dependencies()
|
|
80
|
+
self._download_s3()
|
|
81
|
+
if self._raw is None:
|
|
82
|
+
self._raw = mne_bids.read_raw_bids(self.bidspath, verbose=False)
|
|
83
|
+
|
|
84
|
+
def __getitem__(self, index):
|
|
85
|
+
# self.check_and_get_raw()
|
|
86
|
+
|
|
87
|
+
X = self.raw[:, index][0]
|
|
88
|
+
y = None
|
|
89
|
+
if self.target_name is not None:
|
|
90
|
+
y = self.description[self.target_name]
|
|
91
|
+
if isinstance(y, pd.Series):
|
|
92
|
+
y = y.to_list()
|
|
93
|
+
if self.transform is not None:
|
|
94
|
+
X = self.transform(X)
|
|
95
|
+
return X, y
|
|
96
|
+
|
|
97
|
+
def __len__(self):
|
|
98
|
+
if self._raw is None:
|
|
99
|
+
return self.record['rawdatainfo']['ntimes']
|
|
100
|
+
else:
|
|
101
|
+
return len(self._raw)
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def raw(self):
|
|
105
|
+
if self._raw is None:
|
|
106
|
+
self.check_and_get_raw()
|
|
107
|
+
return self._raw
|
|
108
|
+
|
|
109
|
+
@raw.setter
|
|
110
|
+
def raw(self, raw):
|
|
111
|
+
self._raw = raw
|
|
112
|
+
|
|
113
|
+
class EEGDashBaseRaw(BaseRaw):
|
|
114
|
+
r"""MNE Raw object from EEG-Dash connection with Openneuro S3 file.
|
|
18
115
|
|
|
19
116
|
Parameters
|
|
20
117
|
----------
|
|
@@ -40,6 +137,7 @@ class RawEEGDash(BaseRaw):
|
|
|
40
137
|
.. versionadded:: 0.11.0
|
|
41
138
|
"""
|
|
42
139
|
|
|
140
|
+
AWS_BUCKET = 's3://openneuro.org'
|
|
43
141
|
def __init__(
|
|
44
142
|
self,
|
|
45
143
|
input_fname,
|
|
@@ -48,6 +146,7 @@ class RawEEGDash(BaseRaw):
|
|
|
48
146
|
preload=False,
|
|
49
147
|
*,
|
|
50
148
|
cache_dir='./.eegdash_cache',
|
|
149
|
+
bids_dependencies:list = [],
|
|
51
150
|
uint16_codec=None,
|
|
52
151
|
montage_units="auto",
|
|
53
152
|
verbose=None,
|
|
@@ -66,9 +165,10 @@ class RawEEGDash(BaseRaw):
|
|
|
66
165
|
chtype = 'eog'
|
|
67
166
|
ch_types.append(chtype)
|
|
68
167
|
info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
|
|
69
|
-
self.s3file = input_fname
|
|
70
|
-
|
|
71
|
-
self.filecache =
|
|
168
|
+
self.s3file = self.get_s3path(input_fname)
|
|
169
|
+
self.cache_dir = Path(cache_dir)
|
|
170
|
+
self.filecache = self.cache_dir / input_fname
|
|
171
|
+
self.bids_dependencies = bids_dependencies
|
|
72
172
|
|
|
73
173
|
if preload and not os.path.exists(self.filecache):
|
|
74
174
|
self._download_s3()
|
|
@@ -82,17 +182,30 @@ class RawEEGDash(BaseRaw):
|
|
|
82
182
|
verbose=verbose,
|
|
83
183
|
)
|
|
84
184
|
|
|
185
|
+
def get_s3path(self, filepath):
|
|
186
|
+
return f"{self.AWS_BUCKET}/{filepath}"
|
|
187
|
+
|
|
85
188
|
def _download_s3(self):
|
|
189
|
+
self.filecache.parent.mkdir(parents=True, exist_ok=True)
|
|
86
190
|
filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
|
|
87
|
-
print('s3file', self.s3file)
|
|
88
|
-
print('filecache', self.filecache)
|
|
89
191
|
filesystem.download(self.s3file, self.filecache)
|
|
90
192
|
self.filenames = [self.filecache]
|
|
91
193
|
|
|
194
|
+
def _download_dependencies(self):
|
|
195
|
+
filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
|
|
196
|
+
for dep in self.bids_dependencies:
|
|
197
|
+
s3path = self.get_s3path(dep)
|
|
198
|
+
filepath = self.cache_dir / dep
|
|
199
|
+
if not filepath.exists():
|
|
200
|
+
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
201
|
+
filesystem.download(s3path, filepath)
|
|
202
|
+
|
|
92
203
|
def _read_segment(
|
|
93
204
|
self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
|
|
94
205
|
):
|
|
95
206
|
if not os.path.exists(self.filecache): # not preload
|
|
207
|
+
if self.bids_dependencies:
|
|
208
|
+
self._download_dependencies()
|
|
96
209
|
self._download_s3()
|
|
97
210
|
else: # not preload and file is not cached
|
|
98
211
|
self.filenames = [self.filecache]
|
|
@@ -121,6 +234,7 @@ class BIDSDataset():
|
|
|
121
234
|
raise ValueError('data_dir must be specified and must exist')
|
|
122
235
|
self.bidsdir = Path(data_dir)
|
|
123
236
|
self.dataset = dataset
|
|
237
|
+
assert str(self.bidsdir).endswith(self.dataset)
|
|
124
238
|
|
|
125
239
|
if raw_format.lower() not in self.ALLOWED_FILE_FORMAT:
|
|
126
240
|
raise ValueError('raw_format must be one of {}'.format(self.ALLOWED_FILE_FORMAT))
|
|
@@ -136,6 +250,10 @@ class BIDSDataset():
|
|
|
136
250
|
else:
|
|
137
251
|
self.files = np.load(temp_dir / f'{dataset}_files.npy', allow_pickle=True)
|
|
138
252
|
|
|
253
|
+
def get_relative_bidspath(self, filename):
|
|
254
|
+
bids_parent_dir = self.bidsdir.parent
|
|
255
|
+
return str(Path(filename).relative_to(bids_parent_dir))
|
|
256
|
+
|
|
139
257
|
def get_property_from_filename(self, property, filename):
|
|
140
258
|
import platform
|
|
141
259
|
if platform.system() == "Windows":
|
|
@@ -177,11 +295,17 @@ class BIDSDataset():
|
|
|
177
295
|
for file in os.listdir(path):
|
|
178
296
|
# target_file = path / f"{cur_file_basename}_{extension}"
|
|
179
297
|
if os.path.isfile(path/file):
|
|
180
|
-
|
|
181
|
-
if file
|
|
298
|
+
# check if file has extension extension
|
|
299
|
+
# check if file basename has extension
|
|
300
|
+
if file.endswith(extension):
|
|
182
301
|
filepath = path / file
|
|
183
302
|
bids_files.append(filepath)
|
|
184
303
|
|
|
304
|
+
# cur_file_basename = file[:file.rfind('_')] # TODO: change to just search for any file with extension
|
|
305
|
+
# if file.endswith(extension) and cur_file_basename in basename:
|
|
306
|
+
# filepath = path / file
|
|
307
|
+
# bids_files.append(filepath)
|
|
308
|
+
|
|
185
309
|
# check if file is in top level directory
|
|
186
310
|
if any(file in os.listdir(path) for file in top_level_files):
|
|
187
311
|
return bids_files
|
|
@@ -210,10 +334,7 @@ class BIDSDataset():
|
|
|
210
334
|
basename = filename[:filename.rfind('_')]
|
|
211
335
|
# metadata files
|
|
212
336
|
meta_files = self.get_bids_file_inheritance(path, basename, metadata_file_extension)
|
|
213
|
-
|
|
214
|
-
raise ValueError('No metadata files found for filepath {filepath} and extension {metadata_file_extension}')
|
|
215
|
-
else:
|
|
216
|
-
return meta_files
|
|
337
|
+
return meta_files
|
|
217
338
|
|
|
218
339
|
def scan_directory(self, directory, extension):
|
|
219
340
|
result_files = []
|
|
@@ -336,4 +457,25 @@ class BIDSDataset():
|
|
|
336
457
|
def num_times(self, data_filepath):
|
|
337
458
|
eeg_jsons = self.get_bids_metadata_files(data_filepath, 'eeg.json')
|
|
338
459
|
eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
|
|
339
|
-
return int(eeg_json_dict['SamplingFrequency'] * eeg_json_dict['RecordingDuration'])
|
|
460
|
+
return int(eeg_json_dict['SamplingFrequency'] * eeg_json_dict['RecordingDuration'])
|
|
461
|
+
|
|
462
|
+
def subject_participant_tsv(self, data_filepath):
|
|
463
|
+
'''Get participants_tsv info of a subject based on filepath'''
|
|
464
|
+
participants_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'participants.tsv')[0], sep='\t')
|
|
465
|
+
# set 'participant_id' as index
|
|
466
|
+
participants_tsv.set_index('participant_id', inplace=True)
|
|
467
|
+
subject = f'sub-{self.subject(data_filepath)}'
|
|
468
|
+
return participants_tsv.loc[subject].to_dict()
|
|
469
|
+
|
|
470
|
+
def eeg_json(self, data_filepath):
|
|
471
|
+
eeg_jsons = self.get_bids_metadata_files(data_filepath, 'eeg.json')
|
|
472
|
+
eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
|
|
473
|
+
return eeg_json_dict
|
|
474
|
+
|
|
475
|
+
def channel_tsv(self, data_filepath):
|
|
476
|
+
channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
|
|
477
|
+
channel_tsv = channels_tsv.to_dict()
|
|
478
|
+
# 'name' and 'type' now have a dictionary of index-value. Convert them to list
|
|
479
|
+
for list_field in ['name', 'type', 'units']:
|
|
480
|
+
channel_tsv[list_field] = list(channel_tsv[list_field].values())
|
|
481
|
+
return channel_tsv
|
eegdash/main.py
CHANGED
|
@@ -1,13 +1,19 @@
|
|
|
1
|
+
from typing import List
|
|
1
2
|
import pymongo
|
|
2
3
|
from dotenv import load_dotenv
|
|
3
4
|
import os
|
|
5
|
+
from pathlib import Path
|
|
4
6
|
import s3fs
|
|
5
7
|
from joblib import Parallel, delayed
|
|
6
8
|
import tempfile
|
|
7
9
|
import mne
|
|
8
10
|
import numpy as np
|
|
9
11
|
import xarray as xr
|
|
10
|
-
from .data_utils import BIDSDataset
|
|
12
|
+
from .data_utils import BIDSDataset, EEGDashBaseRaw, EEGDashBaseDataset
|
|
13
|
+
from braindecode.datasets import BaseDataset, BaseConcatDataset
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
from pymongo import MongoClient, InsertOne, UpdateOne, DeleteOne
|
|
16
|
+
|
|
11
17
|
class EEGDash:
|
|
12
18
|
AWS_BUCKET = 's3://openneuro.org'
|
|
13
19
|
def __init__(self,
|
|
@@ -31,22 +37,15 @@ class EEGDash:
|
|
|
31
37
|
# convert to list using get_item on each element
|
|
32
38
|
return [result for result in results]
|
|
33
39
|
|
|
34
|
-
def exist(self,
|
|
40
|
+
def exist(self, data_name=''):
|
|
35
41
|
query = {
|
|
36
|
-
"schema_ref": schema_ref,
|
|
37
42
|
"data_name": data_name
|
|
38
43
|
}
|
|
39
44
|
sessions = self.find(query)
|
|
40
45
|
return len(sessions) > 0
|
|
41
46
|
|
|
42
|
-
def add(self, record:dict):
|
|
43
|
-
input_record = self._validate_input(record)
|
|
44
|
-
print(input_record)
|
|
45
|
-
self.__collection.insert_one(input_record)
|
|
46
|
-
|
|
47
47
|
def _validate_input(self, record:dict):
|
|
48
48
|
input_types = {
|
|
49
|
-
'schema_ref': str,
|
|
50
49
|
'data_name': str,
|
|
51
50
|
'dataset': str,
|
|
52
51
|
'bidspath': str,
|
|
@@ -61,7 +60,6 @@ class EEGDash:
|
|
|
61
60
|
'channel_types': list,
|
|
62
61
|
'channel_names': list,
|
|
63
62
|
}
|
|
64
|
-
record['schema_ref'] = 'eeg_signal'
|
|
65
63
|
if 'data_name' not in record:
|
|
66
64
|
raise ValueError("Missing key: data_name")
|
|
67
65
|
# check if args are in the keys and has correct type
|
|
@@ -117,21 +115,30 @@ class EEGDash:
|
|
|
117
115
|
# extract openneuro path by finding the first occurrence of the dataset name in the filename and remove the path before that
|
|
118
116
|
openneuro_path = dsnumber + bids_file.split(dsnumber)[1]
|
|
119
117
|
|
|
118
|
+
bids_dependencies_files = ['dataset_description.json', 'participants.tsv', 'events.tsv', 'events.json', 'eeg.json', 'electrodes.tsv', 'channels.tsv', 'coordsystem.json']
|
|
119
|
+
bidsdependencies = []
|
|
120
|
+
for extension in bids_dependencies_files:
|
|
121
|
+
dep_path = bids_dataset.get_bids_metadata_files(bids_file, extension)
|
|
122
|
+
dep_path = [str(bids_dataset.get_relative_bidspath(dep)) for dep in dep_path]
|
|
123
|
+
|
|
124
|
+
bidsdependencies.extend(dep_path)
|
|
125
|
+
|
|
126
|
+
participants_tsv = bids_dataset.subject_participant_tsv(bids_file)
|
|
127
|
+
eeg_json = bids_dataset.eeg_json(bids_file)
|
|
120
128
|
attrs = {
|
|
121
|
-
'schema_ref': 'eeg_signal',
|
|
122
129
|
'data_name': f'{bids_dataset.dataset}_{f}',
|
|
123
130
|
'dataset': bids_dataset.dataset,
|
|
124
131
|
'bidspath': openneuro_path,
|
|
125
132
|
'subject': bids_dataset.subject(bids_file),
|
|
126
|
-
'nchans': bids_dataset.num_channels(bids_file),
|
|
127
|
-
'ntimes': bids_dataset.num_times(bids_file),
|
|
128
|
-
'channel_types': bids_dataset.channel_types(bids_file),
|
|
129
|
-
'channel_names': bids_dataset.channel_labels(bids_file),
|
|
130
133
|
'task': bids_dataset.task(bids_file),
|
|
131
134
|
'session': bids_dataset.session(bids_file),
|
|
132
135
|
'run': bids_dataset.run(bids_file),
|
|
133
|
-
'sampling_frequency': bids_dataset.sfreq(bids_file),
|
|
134
136
|
'modality': 'EEG',
|
|
137
|
+
'nchans': bids_dataset.num_channels(bids_file),
|
|
138
|
+
'ntimes': bids_dataset.num_times(bids_file),
|
|
139
|
+
'participant_tsv': participants_tsv,
|
|
140
|
+
'eeg_json': eeg_json,
|
|
141
|
+
'bidsdependencies': bidsdependencies,
|
|
135
142
|
}
|
|
136
143
|
|
|
137
144
|
return attrs
|
|
@@ -148,27 +155,30 @@ class EEGDash:
|
|
|
148
155
|
dataset=dataset,
|
|
149
156
|
raw_format=raw_format,
|
|
150
157
|
)
|
|
158
|
+
requests = []
|
|
151
159
|
for bids_file in bids_dataset.get_files():
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
signalstore_data_id = f"{dataset}_{os.path.basename(bids_file)}"
|
|
160
|
+
try:
|
|
161
|
+
data_id = f"{dataset}_{os.path.basename(bids_file)}"
|
|
155
162
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
self.update(eeg_attrs)
|
|
163
|
+
if self.exist(data_name=data_id):
|
|
164
|
+
if overwrite:
|
|
165
|
+
eeg_attrs = self.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
|
|
166
|
+
requests.append(UpdateOne(self.update_request(eeg_attrs)))
|
|
161
167
|
else:
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
# Assume raw data already exists on Openneuro, recreating record only
|
|
167
|
-
print('adding record', eeg_attrs['data_name'])
|
|
168
|
-
self.add(eeg_attrs)
|
|
168
|
+
eeg_attrs = self.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
|
|
169
|
+
requests.append(self.add_request(eeg_attrs))
|
|
170
|
+
except:
|
|
171
|
+
print('error adding record', bids_file)
|
|
169
172
|
|
|
170
|
-
|
|
171
|
-
|
|
173
|
+
print('Number of database requests', len(requests))
|
|
174
|
+
|
|
175
|
+
if requests:
|
|
176
|
+
result = self.__collection.bulk_write(requests, ordered=False)
|
|
177
|
+
print(f"Inserted: {result.inserted_count}")
|
|
178
|
+
print(f"Modified: {result.modified_count}")
|
|
179
|
+
print(f"Deleted: {result.deleted_count}")
|
|
180
|
+
print(f"Upserted: {result.upserted_count}")
|
|
181
|
+
print(f"Errors: {result.bulk_api_result.get('writeErrors', [])}")
|
|
172
182
|
|
|
173
183
|
def get(self, query:dict):
|
|
174
184
|
'''
|
|
@@ -185,11 +195,113 @@ class EEGDash:
|
|
|
185
195
|
)
|
|
186
196
|
return results
|
|
187
197
|
|
|
198
|
+
def add_request(self, record:dict):
|
|
199
|
+
return InsertOne(record)
|
|
200
|
+
|
|
201
|
+
def add(self, record:dict):
|
|
202
|
+
try:
|
|
203
|
+
# input_record = self._validate_input(record)
|
|
204
|
+
self.__collection.insert_one(record)
|
|
205
|
+
# silent failing
|
|
206
|
+
except ValueError as e:
|
|
207
|
+
print(f"Failed to validate record: {record['data_name']}")
|
|
208
|
+
print(e)
|
|
209
|
+
except:
|
|
210
|
+
print(f"Error adding record: {record['data_name']}")
|
|
211
|
+
|
|
212
|
+
def update_request(self, record:dict):
|
|
213
|
+
return UpdateOne({'data_name': record['data_name']}, {'$set': record})
|
|
214
|
+
|
|
188
215
|
def update(self, record:dict):
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
216
|
+
try:
|
|
217
|
+
self.__collection.update_one({'data_name': record['data_name']}, {'$set': record})
|
|
218
|
+
except: # silent failure
|
|
219
|
+
print(f'Error updating record {record["data_name"]}')
|
|
220
|
+
|
|
221
|
+
def remove_field(self, record, field):
|
|
222
|
+
self.__collection.update_one({'data_name': record['data_name']}, {'$unset': {field: 1}})
|
|
223
|
+
|
|
224
|
+
def remove_field_from_db(self, field):
|
|
225
|
+
self.__collection.update_many({}, {'$unset': {field: 1}})
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class EEGDashDataset(BaseConcatDataset):
|
|
229
|
+
CACHE_DIR = '.eegdash_cache'
|
|
230
|
+
def __init__(
|
|
231
|
+
self,
|
|
232
|
+
query:dict=None,
|
|
233
|
+
data_dir:str | list =None,
|
|
234
|
+
dataset:str | list =None,
|
|
235
|
+
description_fields: list[str]=['subject', 'session', 'run', 'task', 'age', 'gender', 'sex'],
|
|
236
|
+
**kwargs
|
|
237
|
+
):
|
|
238
|
+
if query:
|
|
239
|
+
datasets = self.find_datasets(query, description_fields, **kwargs)
|
|
240
|
+
elif data_dir:
|
|
241
|
+
if type(data_dir) == str:
|
|
242
|
+
datasets = self.load_bids_dataset(dataset, data_dir, description_fields)
|
|
243
|
+
else:
|
|
244
|
+
assert len(data_dir) == len(dataset), 'Number of datasets and their directories must match'
|
|
245
|
+
datasets = []
|
|
246
|
+
for i in range(len(data_dir)):
|
|
247
|
+
datasets.extend(self.load_bids_dataset(dataset[i], data_dir[i], description_fields))
|
|
248
|
+
# convert to list using get_item on each element
|
|
249
|
+
super().__init__(datasets)
|
|
250
|
+
|
|
251
|
+
def find_key_in_nested_dict(self, data, target_key):
|
|
252
|
+
if isinstance(data, dict):
|
|
253
|
+
if target_key in data:
|
|
254
|
+
return data[target_key]
|
|
255
|
+
for value in data.values():
|
|
256
|
+
result = self.find_key_in_nested_dict(value, target_key)
|
|
257
|
+
if result is not None:
|
|
258
|
+
return result
|
|
259
|
+
return None
|
|
260
|
+
|
|
261
|
+
def find_datasets(self, query:dict, description_fields:list[str], **kwargs):
|
|
262
|
+
eegdashObj = EEGDash()
|
|
263
|
+
datasets = []
|
|
264
|
+
for record in eegdashObj.find(query):
|
|
265
|
+
description = {}
|
|
266
|
+
for field in description_fields:
|
|
267
|
+
value = self.find_key_in_nested_dict(record, field)
|
|
268
|
+
if value:
|
|
269
|
+
description[field] = value
|
|
270
|
+
datasets.append(EEGDashBaseDataset(record, self.CACHE_DIR, description=description, **kwargs))
|
|
271
|
+
return datasets
|
|
272
|
+
|
|
273
|
+
def load_bids_dataset(self, dataset, data_dir, description_fields: list[str],raw_format='eeglab', **kwargs):
|
|
274
|
+
'''
|
|
275
|
+
'''
|
|
276
|
+
def get_base_dataset_from_bids_file(bids_dataset, bids_file):
|
|
277
|
+
record = eegdashObj.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
|
|
278
|
+
description = {}
|
|
279
|
+
for field in description_fields:
|
|
280
|
+
value = self.find_key_in_nested_dict(record, field)
|
|
281
|
+
if value:
|
|
282
|
+
description[field] = value
|
|
283
|
+
return EEGDashBaseDataset(record, self.CACHE_DIR, description=description, **kwargs)
|
|
284
|
+
|
|
285
|
+
bids_dataset = BIDSDataset(
|
|
286
|
+
data_dir=data_dir,
|
|
287
|
+
dataset=dataset,
|
|
288
|
+
raw_format=raw_format,
|
|
289
|
+
)
|
|
290
|
+
eegdashObj = EEGDash()
|
|
291
|
+
datasets = Parallel(n_jobs=-1, prefer="threads", verbose=1)(
|
|
292
|
+
delayed(get_base_dataset_from_bids_file)(bids_dataset, bids_file) for bids_file in bids_dataset.get_files()
|
|
293
|
+
)
|
|
294
|
+
# datasets = []
|
|
295
|
+
# for bids_file in bids_dataset.get_files():
|
|
296
|
+
# record = eegdashObj.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
|
|
297
|
+
# description = {}
|
|
298
|
+
# for field in description_fields:
|
|
299
|
+
# value = self.find_key_in_nested_dict(record, field)
|
|
300
|
+
# if value:
|
|
301
|
+
# description[field] = value
|
|
302
|
+
# datasets.append(EEGDashBaseDataset(record, self.CACHE_DIR, description=description, **kwargs))
|
|
303
|
+
return datasets
|
|
304
|
+
|
|
193
305
|
def main():
|
|
194
306
|
eegdash = EEGDash()
|
|
195
307
|
record = eegdash.find({'dataset': 'ds005511', 'subject': 'NDARUF236HM7'})
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: eegdash
|
|
3
|
+
Version: 0.0.8
|
|
4
|
+
Summary: EEG data for machine learning
|
|
5
|
+
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>
|
|
6
|
+
License: GNU General Public License
|
|
7
|
+
|
|
8
|
+
Copyright (C) 2024-2025
|
|
9
|
+
|
|
10
|
+
Young Truong, UCSD, dt.young112@gmail.com
|
|
11
|
+
Arnaud Delorme, UCSD, adelorme@ucsd.edu
|
|
12
|
+
|
|
13
|
+
This program is free software; you can redistribute it and/or modify
|
|
14
|
+
it under the terms of the GNU General Public License as published by
|
|
15
|
+
the Free Software Foundation; either version 2 of the License, or
|
|
16
|
+
(at your option) any later version.
|
|
17
|
+
|
|
18
|
+
This program is distributed in the hope that it will be useful,
|
|
19
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
20
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
21
|
+
GNU General Public License for more details.
|
|
22
|
+
|
|
23
|
+
You should have received a copy of the GNU General Public License
|
|
24
|
+
along with this program; if not, write to the Free Software
|
|
25
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1.07 USA
|
|
26
|
+
|
|
27
|
+
Project-URL: Homepage, https://github.com/sccn/EEG-Dash-Data
|
|
28
|
+
Project-URL: Issues, https://github.com/sccn/EEG-Dash-Data/issues
|
|
29
|
+
Classifier: Programming Language :: Python :: 3
|
|
30
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
31
|
+
Classifier: Operating System :: OS Independent
|
|
32
|
+
Requires-Python: >=3.8
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
License-File: LICENSE
|
|
35
|
+
Requires-Dist: xarray
|
|
36
|
+
Requires-Dist: python-dotenv
|
|
37
|
+
Requires-Dist: s3fs
|
|
38
|
+
Requires-Dist: mne
|
|
39
|
+
Requires-Dist: pynwb
|
|
40
|
+
Requires-Dist: h5py
|
|
41
|
+
Requires-Dist: pymongo
|
|
42
|
+
Requires-Dist: joblib
|
|
43
|
+
Requires-Dist: braindecode
|
|
44
|
+
Requires-Dist: mne-bids
|
|
45
|
+
Dynamic: license-file
|
|
46
|
+
|
|
47
|
+
# EEG-Dash
|
|
48
|
+
To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
|
|
49
|
+
|
|
50
|
+
## Data source
|
|
51
|
+
The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
|
|
52
|
+
|
|
53
|
+
## Available data
|
|
54
|
+
|
|
55
|
+
The following datasets are currently available on EEGDash.
|
|
56
|
+
|
|
57
|
+
| DatasetID | Participants | Files | Sessions | Population | Channels | Is 10-20? | Modality | Size |
|
|
58
|
+
|---|---|---|---|---|---|---|---|---|
|
|
59
|
+
| [ds002181](https://nemar.org/dataexplorer/detail?dataset_id=ds002181) | 20 | 949 | 1 | Healthy | 63 | 10-20 | Visual | 0.163 GB |
|
|
60
|
+
| [ds002578](https://nemar.org/dataexplorer/detail?dataset_id=ds002578) | 2 | 22 | 1 | Healthy | 256 | 10-20 | Visual | 0.001 TB |
|
|
61
|
+
| [ds002680](https://nemar.org/dataexplorer/detail?dataset_id=ds002680) | 14 | 4977 | 2 | Healthy | 0 | 10-20 | Visual | 0.01 TB |
|
|
62
|
+
| [ds002691](https://nemar.org/dataexplorer/detail?dataset_id=ds002691) | 20 | 146 | 1 | Healthy | 32 | other | Visual | 0.001 TB |
|
|
63
|
+
| [ds002718](https://nemar.org/dataexplorer/detail?dataset_id=ds002718) | 18 | 582 | 1 | Healthy | 70 | other | Visual | 0.005 TB |
|
|
64
|
+
| [ds003061](https://nemar.org/dataexplorer/detail?dataset_id=ds003061) | 13 | 282 | 1 | Not specified | 64 | 10-20 | Auditory | 0.002 TB |
|
|
65
|
+
| [ds003690](https://nemar.org/dataexplorer/detail?dataset_id=ds003690) | 75 | 2630 | 1 | Healthy | 61 | 10-20 | Auditory | 0.023 TB |
|
|
66
|
+
| [ds003805](https://nemar.org/dataexplorer/detail?dataset_id=ds003805) | 1 | 10 | 1 | Healthy | 19 | 10-20 | Multisensory | 0 TB |
|
|
67
|
+
| [ds003838](https://nemar.org/dataexplorer/detail?dataset_id=ds003838) | 65 | 947 | 1 | Healthy | 63 | 10-20 | Auditory | 100.2 GB |
|
|
68
|
+
| [ds004010](https://nemar.org/dataexplorer/detail?dataset_id=ds004010) | 24 | 102 | 1 | Healthy | 64 | other | Multisensory | 0.025 TB |
|
|
69
|
+
| [ds004040](https://nemar.org/dataexplorer/detail?dataset_id=ds004040) | 13 | 160 | 2 | Healthy | 64 | 10-20 | Auditory | 0.012 TB |
|
|
70
|
+
| [ds004350](https://nemar.org/dataexplorer/detail?dataset_id=ds004350) | 24 | 960 | 2 | Healthy | 64 | other | Visual | 0.023 TB |
|
|
71
|
+
| [ds004362](https://nemar.org/dataexplorer/detail?dataset_id=ds004362) | 109 | 9162 | 1 | Healthy | 64 | 10-20 | Visual | 0.008 TB |
|
|
72
|
+
| [ds004504](https://nemar.org/dataexplorer/detail?dataset_id=ds004504) | 88 | 269 | 1 | Dementia | 19 | 10-20 | Resting State | 2.6 GB |
|
|
73
|
+
| [ds004554](https://nemar.org/dataexplorer/detail?dataset_id=ds004554) | 16 | 101 | 1 | Healthy | 99 | 10-20 | Visual | 0.009 TB |
|
|
74
|
+
| [ds004635](https://nemar.org/dataexplorer/detail?dataset_id=ds004635) | 48 | 292 | 1 | Healthy | 129 | other | Multisensory | 26.1 GB |
|
|
75
|
+
| [ds004657](https://nemar.org/dataexplorer/detail?dataset_id=ds004657) | 24 | 838 | 6 | Not specified | 64 | 10-20 | Motor | 43.1 GB |
|
|
76
|
+
| [ds004660](https://nemar.org/dataexplorer/detail?dataset_id=ds004660) | 21 | 299 | 1 | Healthy | 32 | 10-20 | Multisensory | 7.2 GB |
|
|
77
|
+
| [ds004661](https://nemar.org/dataexplorer/detail?dataset_id=ds004661) | 17 | 90 | 1 | Not specified | 64 | 10-20 | Multisensory | 1.4 GB |
|
|
78
|
+
| [ds004745](https://nemar.org/dataexplorer/detail?dataset_id=ds004745) | 52 | 762 | 1 | Healthy | 64 | ? | Auditory | 0 TB |
|
|
79
|
+
| [ds004785](https://nemar.org/dataexplorer/detail?dataset_id=ds004785) | 17 | 74 | 1 | Healthy | 32 | ? | Motor | 0 TB |
|
|
80
|
+
| [ds004841](https://nemar.org/dataexplorer/detail?dataset_id=ds004841) | 20 | 1034 | 2 | Not specified | 64 | 10-20 | Multisensory | 7.3 GB |
|
|
81
|
+
| [ds004842](https://nemar.org/dataexplorer/detail?dataset_id=ds004842) | 14 | 719 | 2 | Not specified | 64 | ? | Multisensory | 5.2 GB |
|
|
82
|
+
| [ds004843](https://nemar.org/dataexplorer/detail?dataset_id=ds004843) | 14 | 649 | 1 | Not specified | 64 | ? | Visual | 7.7 GB |
|
|
83
|
+
| [ds004844](https://nemar.org/dataexplorer/detail?dataset_id=ds004844) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 22.3 GB |
|
|
84
|
+
| [ds004849](https://nemar.org/dataexplorer/detail?dataset_id=ds004849) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
|
|
85
|
+
| [ds004850](https://nemar.org/dataexplorer/detail?dataset_id=ds004850) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
|
|
86
|
+
| [ds004851](https://nemar.org/dataexplorer/detail?dataset_id=ds004851) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
|
|
87
|
+
| [ds004852](https://nemar.org/dataexplorer/detail?dataset_id=ds004852) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
|
|
88
|
+
| [ds004853](https://nemar.org/dataexplorer/detail?dataset_id=ds004853) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
|
|
89
|
+
| [ds004854](https://nemar.org/dataexplorer/detail?dataset_id=ds004854) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
|
|
90
|
+
| [ds004855](https://nemar.org/dataexplorer/detail?dataset_id=ds004855) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
|
|
91
|
+
| [ds005034](https://nemar.org/dataexplorer/detail?dataset_id=ds005034) | 25 | 406 | 2 | Healthy | 129 | ? | Visual | 61.4 GB |
|
|
92
|
+
| [ds005079](https://nemar.org/dataexplorer/detail?dataset_id=ds005079) | 1 | 210 | 12 | Healthy | 64 | ? | Multisensory | 1.7 GB |
|
|
93
|
+
| [ds005342](https://nemar.org/dataexplorer/detail?dataset_id=ds005342) | 32 | 134 | 1 | Healthy | 17 | ? | Visual | 2 GB |
|
|
94
|
+
| [ds005410](https://nemar.org/dataexplorer/detail?dataset_id=ds005410) | 81 | 492 | 1 | Healthy | 63 | ? | ? | 19.8 GB |
|
|
95
|
+
| [ds005505](https://nemar.org/dataexplorer/detail?dataset_id=ds005505) | 136 | 5393 | 1 | Healthy | 129 | other | Visual | 103 GB |
|
|
96
|
+
| [ds005506](https://nemar.org/dataexplorer/detail?dataset_id=ds005506) | 150 | 5645 | 1 | Healthy | 129 | other | Visual | 112 GB |
|
|
97
|
+
| [ds005507](https://nemar.org/dataexplorer/detail?dataset_id=ds005507) | 184 | 7273 | 1 | Healthy | 129 | other | Visual | 140 GB |
|
|
98
|
+
| [ds005508](https://nemar.org/dataexplorer/detail?dataset_id=ds005508) | 324 | 13393 | 1 | Healthy | 129 | other | Visual | 230 GB |
|
|
99
|
+
| [ds005509](https://nemar.org/dataexplorer/detail?dataset_id=ds005509) | 330 | 19980 | 1 | Healthy | 129 | other | Visual | 224 GB |
|
|
100
|
+
| [ds005510](https://nemar.org/dataexplorer/detail?dataset_id=ds005510) | 135 | 4933 | 1 | Healthy | 129 | other | Visual | 91 GB |
|
|
101
|
+
| [ds005511](https://nemar.org/dataexplorer/detail?dataset_id=ds005511) | 381 | 18604 | 1 | Healthy | 129 | other | Visual | 245 GB |
|
|
102
|
+
| [ds005512](https://nemar.org/dataexplorer/detail?dataset_id=ds005512) | 257 | 9305 | 1 | Healthy | 129 | other | Visual | 157 GB |
|
|
103
|
+
| [ds005514](https://nemar.org/dataexplorer/detail?dataset_id=ds005514) | 295 | 11565 | 1 | Healthy | 129 | other | Visual | 185 GB |
|
|
104
|
+
| [ds005672](https://nemar.org/dataexplorer/detail?dataset_id=ds005672) | 3 | 18 | 1 | Healthy | 64 | 10-20 | Visual | 4.2 GB |
|
|
105
|
+
| [ds005697](https://nemar.org/dataexplorer/detail?dataset_id=ds005697) | 52 | 210 | 1 | Healthy | 64 | 10-20 | Visual | 67 GB |
|
|
106
|
+
| [ds005787](https://nemar.org/dataexplorer/detail?dataset_id=ds005787) | 30 | ? | 4 | Healthy | 64 | 10-20 | Visual | 185 GB |
|
|
107
|
+
|
|
108
|
+
## Data format
|
|
109
|
+
EEGDash queries return a **Pytorch Dataset** formatted to facilitate machine learning (ML) and deep learning (DL) applications. PyTorch Datasets are the best format for EEGDash queries because they provide an efficient, scalable, and flexible structure for machine learning (ML) and deep learning (DL) applications. They allow seamless integration with PyTorch’s DataLoader, enabling efficient batching, shuffling, and parallel data loading, which is essential for training deep learning models on large EEG datasets.
|
|
110
|
+
|
|
111
|
+
## Data preprocessing
|
|
112
|
+
EEGDash datasets are processed using the popular [BrainDecode](https://braindecode.org/stable/index.html) library. In fact, EEGDash datasets are BrainDecode datasets, which are themselves PyTorch datasets. This means that any preprocessing possible on BrainDecode datasets is also possible on EEGDash datasets. Refer to [BrainDecode](https://braindecode.org/stable/index.html) tutorials for guidance on preprocessing EEG data.
|
|
113
|
+
|
|
114
|
+
## EEG-Dash usage
|
|
115
|
+
|
|
116
|
+
### Install
|
|
117
|
+
Use your preferred Python environment manager with Python > 3.9 to install the package.
|
|
118
|
+
* To install the eegdash package, use the following temporary command (a direct pip install eegdash option will be available soon): `pip install -i https://test.pypi.org/simple/eegdash`
|
|
119
|
+
* To verify the installation, start a Python session and type: `from eegdash import EEGDash`
|
|
120
|
+
|
|
121
|
+
### Data access
|
|
122
|
+
|
|
123
|
+
To use the data from a single subject, enter:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from eegdash import EEGDashDataset
|
|
127
|
+
ds_NDARDB033FW5 = EEGDashDataset({'dataset': 'ds005514', 'task': 'RestingState', 'subject': 'NDARDB033FW5'})
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
This will search and download the metadata for the task **RestingState** for subject **NDARDB033FW5** in BIDS dataset **ds005514**. The actual data will not be downloaded at this stage. Following standard practice, data is only downloaded once it is processed. The **ds_NDARDB033FW5** object is a fully functional BrainDecode dataset, which is itself a PyTorch dataset. This [tutorial](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_eoec.ipynb) shows how to preprocess the EEG data, extracting portions of the data containing eyes-open and eyes-closed segments, then perform eyes-open vs. eyes-closed classification using a (shallow) deep-learning model.
|
|
131
|
+
|
|
132
|
+
To use the data from multiple subjects, enter:
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from eegdash import EEGDashDataset
|
|
136
|
+
ds_ds005505rest = EEGDashDataset({'dataset': 'ds005505', 'task': 'RestingState'}, target_name='sex')
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
This will search and download the metadata for the task 'RestingState' for all subjects in BIDS dataset 'ds005505' (a total of 136). As above, the actual data will not be downloaded at this stage so this command is quick to execute. Also, the target class for each subject is assigned using the target_name parameter. This means that this object is ready to be directly fed to a deep learning model, although the [tutorial script](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_sex_classification.ipynb) performs minimal processing on it, prior to training a deep-learning model. Because 14 gigabytes of data are downloaded, this tutorial takes about 10 minutes to execute.
|
|
140
|
+
|
|
141
|
+
### Automatic caching
|
|
142
|
+
|
|
143
|
+
EEGDash automatically caches the downloaded data in the .eegdash_cache folder of the current directory from which the script is called. This means that if you run the tutorial [scripts](https://github.com/sccn/EEGDash/tree/develop/notebooks), the data will only be downloaded the first time the script is executed.
|
|
144
|
+
|
|
145
|
+
## Education -- Coming soon...
|
|
146
|
+
|
|
147
|
+
We organize workshops and educational events to foster cross-cultural education and student training, offering both online and in-person opportunities in collaboration with US and Israeli partners. Events for 2025 will be announced via the EEGLABNEWS mailing list. Be sure to [subscribe](https://sccn.ucsd.edu/mailman/listinfo/eeglabnews).
|
|
148
|
+
|
|
149
|
+
## About EEG-DaSh
|
|
150
|
+
|
|
151
|
+
EEG-DaSh is a collaborative initiative between the United States and Israel, supported by the National Science Foundation (NSF). The partnership brings together experts from the Swartz Center for Computational Neuroscience (SCCN) at the University of California San Diego (UCSD) and Ben-Gurion University (BGU) in Israel.
|
|
152
|
+
|
|
153
|
+

|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
eegdash/__init__.py,sha256=DrliW5AazWcHJBznrmrS_YF8n8K48csOzfWWIvB6Esw,41
|
|
2
|
+
eegdash/data_utils.py,sha256=1ETB2rW-6HvXdli75pHW9yz3xjEZN7IqApKHn9XKHv4,19205
|
|
3
|
+
eegdash/main.py,sha256=QfFLzbs8iXmaJj4x_ylhFuEAuOCEKIKV6h_a__XPZ6Y,12048
|
|
4
|
+
eegdash-0.0.8.dist-info/licenses/LICENSE,sha256=Xafu48R-h_kyaNj2tuhfgdEv9_ovciktjUEgRRwMZ6w,812
|
|
5
|
+
eegdash-0.0.8.dist-info/METADATA,sha256=X3AsZ23uPQY71alITDp4cOHOvgD7QmlGVagiKHj4cjE,13881
|
|
6
|
+
eegdash-0.0.8.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
|
|
7
|
+
eegdash-0.0.8.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
|
|
8
|
+
eegdash-0.0.8.dist-info/RECORD,,
|
eegdash-0.0.6.dist-info/METADATA
DELETED
|
@@ -1,147 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.2
|
|
2
|
-
Name: eegdash
|
|
3
|
-
Version: 0.0.6
|
|
4
|
-
Summary: EEG data for machine learning
|
|
5
|
-
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>
|
|
6
|
-
License: GNU General Public License
|
|
7
|
-
|
|
8
|
-
Copyright (C) 2024-2025
|
|
9
|
-
|
|
10
|
-
Young Truong, UCSD, dt.young112@gmail.com
|
|
11
|
-
Arnaud Delorme, UCSD, adelorme@ucsd.edu
|
|
12
|
-
|
|
13
|
-
This program is free software; you can redistribute it and/or modify
|
|
14
|
-
it under the terms of the GNU General Public License as published by
|
|
15
|
-
the Free Software Foundation; either version 2 of the License, or
|
|
16
|
-
(at your option) any later version.
|
|
17
|
-
|
|
18
|
-
This program is distributed in the hope that it will be useful,
|
|
19
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
20
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
21
|
-
GNU General Public License for more details.
|
|
22
|
-
|
|
23
|
-
You should have received a copy of the GNU General Public License
|
|
24
|
-
along with this program; if not, write to the Free Software
|
|
25
|
-
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1.07 USA
|
|
26
|
-
|
|
27
|
-
Project-URL: Homepage, https://github.com/sccn/EEG-Dash-Data
|
|
28
|
-
Project-URL: Issues, https://github.com/sccn/EEG-Dash-Data/issues
|
|
29
|
-
Classifier: Programming Language :: Python :: 3
|
|
30
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
31
|
-
Classifier: Operating System :: OS Independent
|
|
32
|
-
Requires-Python: >=3.8
|
|
33
|
-
Description-Content-Type: text/markdown
|
|
34
|
-
License-File: LICENSE
|
|
35
|
-
Requires-Dist: zarr==2.18.3
|
|
36
|
-
Requires-Dist: xarray
|
|
37
|
-
Requires-Dist: python-dotenv
|
|
38
|
-
Requires-Dist: s3fs
|
|
39
|
-
Requires-Dist: mne
|
|
40
|
-
Requires-Dist: pynwb
|
|
41
|
-
Requires-Dist: h5py
|
|
42
|
-
Requires-Dist: pymongo
|
|
43
|
-
Requires-Dist: joblib
|
|
44
|
-
|
|
45
|
-
# EEG-Dash
|
|
46
|
-
To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
|
|
47
|
-
|
|
48
|
-
## Data source
|
|
49
|
-
The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will also incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
|
|
50
|
-
|
|
51
|
-
## Datasets available
|
|
52
|
-
|
|
53
|
-
There are currently only two datasets made available for testing purposes.
|
|
54
|
-
|
|
55
|
-
| Dataset ID | Description | Participants | Channels | Task | NEMAR Link |
|
|
56
|
-
|------------|---------------------------------------------------------------------------------------------|--------------|-----------------|-----------------|------------------------------------------------------------------------------------------------|
|
|
57
|
-
| ds002718 | EEG dataset focused on face processing with MRI for source localization | 18 | 70 EEG, 2 EOG | FaceRecognition | [NEMAR ds002718](https://nemar.org/dataexplorer/detail?dataset_id=ds002718) |
|
|
58
|
-
| ds004745 | 8-Channel SSVEP EEG dataset with trials including voluntary movements to introduce artifacts | 6 | 8 EEG | SSVEP tasks | [NEMAR ds004745](https://nemar.org/dataexplorer/detail?dataset_id=ds004745) |
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
## Data formatting
|
|
63
|
-
The data in EEG-DaSh is formatted to facilitate machine learning (ML) and deep learning (DL) applications by using a simplified structure commonly adopted by these communities. This will involve converting raw MEEG data into a matrix format, where samples (e.g., individual EEG or MEG recordings) are represented by rows, and values (such as time or channel data) are represented by columns. The data is also divided into training and testing sets, with 80% of the data allocated for training and 20% for testing, ensuring a balanced representation of relevant labels across sets. Hierarchical Event Descriptor (HED) tags will be used to annotate labels, which will be stored in a text table, and detailed metadata, including dataset origins and methods. This formatting process will ensure that data is ready for ML/DL models, allowing for efficient training and testing of algorithms while preserving data integrity and reusability.
|
|
64
|
-
|
|
65
|
-

|
|
66
|
-
|
|
67
|
-
## Data access
|
|
68
|
-
The data in EEG-DaSh is formatted to facilitate machine learning (ML) and deep learning (DL) applications by using a simplified structure commonly adopted by these communities. This will involve converting raw MEEG data into a matrix format, where samples (e.g., individual EEG or MEG recordings) are represented by rows, and values (such as time or channel data) are represented by columns. The data is also divided into training and testing sets, with 80% of the data allocated for training and 20% for testing, ensuring a balanced representation of relevant labels across sets. Hierarchical Event Descriptor (HED) tags will be used to annotate labels, which will be stored in a text table, and detailed metadata, including dataset origins and methods. This formatting process will ensure that data is ready for ML/DL models, allowing for efficient training and testing of algorithms while preserving data integrity and reusability.
|
|
69
|
-
|
|
70
|
-
The data in EEG-DaSh is accessed through Python and MATLAB libraries specifically designed for this platform. These libraries will use objects compatible with deep learning data storage formats in each language, such as <i>Torchvision.dataset</i> in Python and <i>DataStore</i> in MATLAB. Users can dynamically fetch data from the EEG-DaSh server which is then cached locally.
|
|
71
|
-
|
|
72
|
-
### Install
|
|
73
|
-
Use your preferred Python environment manager with Python > 3.9 to install the package. Here we show example using Conda environment with Python 3.11.5:
|
|
74
|
-
* Create a new environment Python 3.11.5 -> `conda create --name eegdash python=3.11.5`
|
|
75
|
-
* Switch to the right environment -> `conda activate eegdash`
|
|
76
|
-
* Install dependencies (this is a temporary link that will be updated soon) -> `pip install -r https://raw.githubusercontent.com/sccn/EEG-Dash-Data/refs/heads/develop/requirements.txt`
|
|
77
|
-
* Install _eegdash_ package (this is a temporary link that will be updated soon) -> `pip install -i https://test.pypi.org/simple/ eegdash`
|
|
78
|
-
* Check installation. Start a Python session and type `from eegdash import EEGDash`
|
|
79
|
-
|
|
80
|
-
### Python data access
|
|
81
|
-
|
|
82
|
-
To create a local object for accessing the database, use the following code:
|
|
83
|
-
|
|
84
|
-
```python
|
|
85
|
-
from eegdash import EEGDash
|
|
86
|
-
EEGDashInstance = EEGDash()
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
Once the object is instantiated, it can be utilized to search datasets. Providing an empty parameter will search the entire database and return all available datasets.
|
|
90
|
-
|
|
91
|
-
```python
|
|
92
|
-
EEGDashInstance.find({})
|
|
93
|
-
```
|
|
94
|
-
A list of dataset is returned.
|
|
95
|
-
|
|
96
|
-
```python
|
|
97
|
-
[{'schema_ref': 'eeg_signal',
|
|
98
|
-
'data_name': 'ds004745_sub-001_task-unnamed_eeg.set',
|
|
99
|
-
'dataset': 'ds004745',
|
|
100
|
-
'subject': '001',
|
|
101
|
-
'task': 'unnamed',
|
|
102
|
-
'session': '',
|
|
103
|
-
'run': '',
|
|
104
|
-
'modality': 'EEG',
|
|
105
|
-
'sampling_frequency': 1000,
|
|
106
|
-
'version_timestamp': 0,
|
|
107
|
-
'has_file': True,
|
|
108
|
-
'time_of_save': datetime.datetime(2024, 10, 25, 14, 11, 48, 843593, tzinfo=datetime.timezone.utc),
|
|
109
|
-
'time_of_removal': None}, ...
|
|
110
|
-
|
|
111
|
-
```
|
|
112
|
-
|
|
113
|
-
Additionally, users can search for a specific dataset by specifying criteria.
|
|
114
|
-
|
|
115
|
-
```python
|
|
116
|
-
EEGDashInstance.find({'task': 'FaceRecognition'})
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
After locating the desired dataset or data record, users can download it locally by executing the following command. This will return an xArray Python object.
|
|
120
|
-
|
|
121
|
-
```python
|
|
122
|
-
XArrayData = EEGDashInstance.get({'task': 'FaceRecognition', 'subject': '019'})
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
Optionally, this is how you may access the raw data for the first record. This will return an numpy array.
|
|
126
|
-
|
|
127
|
-
```python
|
|
128
|
-
npData = EEGDashInstance.get({'task': 'FaceRecognition', 'subject': '019'})[0].values
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
## Example use
|
|
132
|
-
|
|
133
|
-
This [example](tests/eegdash.ipynb) demonstrates the full workflow from data retrieval with `EEGDash` to model definition, data handling, and training in PyTorch.
|
|
134
|
-
|
|
135
|
-
## Education - Coming soon...
|
|
136
|
-
|
|
137
|
-
We organize workshops and educational events to foster cross-cultural education and student training, offering both online and in-person opportunities in collaboration with US and Israeli partners. There is no event planned for 2024. Events for 2025 will be advertised on the EEGLABNEWS mailing list so make sure to [subscribe](https://sccn.ucsd.edu/mailman/listinfo/eeglabnews).
|
|
138
|
-
|
|
139
|
-
## About EEG-DaSh
|
|
140
|
-
|
|
141
|
-
EEG-DaSh is a collaborative initiative between the United States and Israel, supported by the National Science Foundation (NSF). The partnership brings together experts from the Swartz Center for Computational Neuroscience (SCCN) at the University of California San Diego (UCSD) and Ben-Gurion University (BGU) in Israel.
|
|
142
|
-
|
|
143
|
-

|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
eegdash-0.0.6.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
eegdash/__init__.py,sha256=PjBBYCX47NLQxybOvz0WjcfFKGI8F3m1BBJxFhMJ6eA,25
|
|
2
|
-
eegdash/data_utils.py,sha256=vzMGVp4PBWyRF8tbYNqkJs0QnUd5CzvmJUkpPfxdJh8,13491
|
|
3
|
-
eegdash/main.py,sha256=fFZHHdVYNLqKr2X_NDB0XXla7A2QlHexgI9AD79_niY,7217
|
|
4
|
-
eegdash-0.0.6.dist-info/LICENSE,sha256=Xafu48R-h_kyaNj2tuhfgdEv9_ovciktjUEgRRwMZ6w,812
|
|
5
|
-
eegdash-0.0.6.dist-info/METADATA,sha256=halHsdTDj4by5BMtg4FXsCPMKnjoKPmrOz3FOXO_RPw,9609
|
|
6
|
-
eegdash-0.0.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
7
|
-
eegdash-0.0.6.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
|
|
8
|
-
eegdash-0.0.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|