eegdash 0.0.9__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/__init__.py +4 -1
- eegdash/data_config.py +27 -27
- eegdash/data_utils.py +164 -118
- eegdash/features/__init__.py +14 -14
- eegdash/features/datasets.py +6 -3
- eegdash/features/decorators.py +4 -4
- eegdash/features/extractors.py +2 -1
- eegdash/features/feature_bank/__init__.py +3 -3
- eegdash/features/feature_bank/complexity.py +2 -3
- eegdash/features/feature_bank/connectivity.py +16 -56
- eegdash/features/feature_bank/csp.py +2 -3
- eegdash/features/feature_bank/dimensionality.py +1 -2
- eegdash/features/feature_bank/signal.py +1 -1
- eegdash/features/feature_bank/spectral.py +10 -28
- eegdash/features/feature_bank/utils.py +48 -0
- eegdash/features/serialization.py +2 -2
- eegdash/features/utils.py +8 -6
- eegdash/main.py +189 -132
- {eegdash-0.0.9.dist-info → eegdash-0.1.0.dist-info}/METADATA +22 -18
- eegdash-0.1.0.dist-info/RECORD +23 -0
- {eegdash-0.0.9.dist-info → eegdash-0.1.0.dist-info}/WHEEL +1 -1
- eegdash-0.0.9.dist-info/RECORD +0 -22
- {eegdash-0.0.9.dist-info → eegdash-0.1.0.dist-info}/licenses/LICENSE +0 -0
- {eegdash-0.0.9.dist-info → eegdash-0.1.0.dist-info}/top_level.txt +0 -0
eegdash/main.py
CHANGED
|
@@ -1,24 +1,28 @@
|
|
|
1
|
-
import pymongo
|
|
2
|
-
from dotenv import load_dotenv
|
|
3
|
-
import os
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
import s3fs
|
|
6
|
-
from joblib import Parallel, delayed
|
|
7
1
|
import json
|
|
2
|
+
import os
|
|
8
3
|
import tempfile
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
9
7
|
import mne
|
|
10
8
|
import numpy as np
|
|
9
|
+
import pymongo
|
|
10
|
+
import s3fs
|
|
11
11
|
import xarray as xr
|
|
12
|
-
from
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
from joblib import Parallel, delayed
|
|
14
|
+
from pymongo import DeleteOne, InsertOne, MongoClient, UpdateOne
|
|
15
|
+
|
|
16
|
+
from braindecode.datasets import BaseConcatDataset, BaseDataset
|
|
17
|
+
|
|
13
18
|
from .data_config import config as data_config
|
|
14
|
-
from
|
|
15
|
-
|
|
16
|
-
from pymongo import MongoClient, InsertOne, UpdateOne, DeleteOne
|
|
19
|
+
from .data_utils import EEGBIDSDataset, EEGDashBaseDataset, EEGDashBaseRaw
|
|
20
|
+
|
|
17
21
|
|
|
18
22
|
class EEGDash:
|
|
19
|
-
AWS_BUCKET =
|
|
20
|
-
|
|
21
|
-
|
|
23
|
+
AWS_BUCKET = "s3://openneuro.org"
|
|
24
|
+
|
|
25
|
+
def __init__(self, is_public=True):
|
|
22
26
|
# Load config file
|
|
23
27
|
# config_path = Path(__file__).parent / 'config.json'
|
|
24
28
|
# with open(config_path, 'r') as f:
|
|
@@ -26,50 +30,52 @@ class EEGDash:
|
|
|
26
30
|
|
|
27
31
|
self.config = data_config
|
|
28
32
|
if is_public:
|
|
29
|
-
DB_CONNECTION_STRING="mongodb+srv://eegdash-user:mdzoMjQcHWTVnKDq@cluster0.vz35p.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
|
|
33
|
+
DB_CONNECTION_STRING = "mongodb+srv://eegdash-user:mdzoMjQcHWTVnKDq@cluster0.vz35p.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
|
|
30
34
|
else:
|
|
31
35
|
load_dotenv()
|
|
32
|
-
DB_CONNECTION_STRING = os.getenv(
|
|
36
|
+
DB_CONNECTION_STRING = os.getenv("DB_CONNECTION_STRING")
|
|
33
37
|
|
|
34
38
|
self.__client = pymongo.MongoClient(DB_CONNECTION_STRING)
|
|
35
|
-
self.__db = self.__client[
|
|
36
|
-
self.__collection = self.__db[
|
|
39
|
+
self.__db = self.__client["eegdash"]
|
|
40
|
+
self.__collection = self.__db["records"]
|
|
37
41
|
|
|
38
42
|
self.is_public = is_public
|
|
39
|
-
self.filesystem = s3fs.S3FileSystem(
|
|
40
|
-
|
|
43
|
+
self.filesystem = s3fs.S3FileSystem(
|
|
44
|
+
anon=True, client_kwargs={"region_name": "us-east-2"}
|
|
45
|
+
)
|
|
46
|
+
|
|
41
47
|
def find(self, *args):
|
|
42
48
|
results = self.__collection.find(*args)
|
|
43
|
-
|
|
49
|
+
|
|
44
50
|
# convert to list using get_item on each element
|
|
45
51
|
return [result for result in results]
|
|
46
52
|
|
|
47
|
-
def exist(self, query:dict):
|
|
48
|
-
accepted_query_fields = [
|
|
53
|
+
def exist(self, query: dict):
|
|
54
|
+
accepted_query_fields = ["data_name", "dataset"]
|
|
49
55
|
assert all(field in accepted_query_fields for field in query.keys())
|
|
50
56
|
sessions = self.find(query)
|
|
51
57
|
return len(sessions) > 0
|
|
52
58
|
|
|
53
|
-
def _validate_input(self, record:dict):
|
|
59
|
+
def _validate_input(self, record: dict):
|
|
54
60
|
input_types = {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
61
|
+
"data_name": str,
|
|
62
|
+
"dataset": str,
|
|
63
|
+
"bidspath": str,
|
|
64
|
+
"subject": str,
|
|
65
|
+
"task": str,
|
|
66
|
+
"session": str,
|
|
67
|
+
"run": str,
|
|
68
|
+
"sampling_frequency": float,
|
|
69
|
+
"modality": str,
|
|
70
|
+
"nchans": int,
|
|
71
|
+
"ntimes": int,
|
|
72
|
+
"channel_types": list,
|
|
73
|
+
"channel_names": list,
|
|
68
74
|
}
|
|
69
|
-
if
|
|
75
|
+
if "data_name" not in record:
|
|
70
76
|
raise ValueError("Missing key: data_name")
|
|
71
77
|
# check if args are in the keys and has correct type
|
|
72
|
-
for key,value in record.items():
|
|
78
|
+
for key, value in record.items():
|
|
73
79
|
if key not in input_types:
|
|
74
80
|
raise ValueError(f"Invalid input: {key}")
|
|
75
81
|
if not isinstance(value, input_types[key]):
|
|
@@ -78,7 +84,7 @@ class EEGDash:
|
|
|
78
84
|
return record
|
|
79
85
|
|
|
80
86
|
def load_eeg_data_from_s3(self, s3path):
|
|
81
|
-
with tempfile.NamedTemporaryFile(delete=False, suffix=
|
|
87
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".set") as tmp:
|
|
82
88
|
with self.filesystem.open(s3path) as s3_file:
|
|
83
89
|
tmp.write(s3_file.read())
|
|
84
90
|
tmp_path = tmp.name
|
|
@@ -86,26 +92,23 @@ class EEGDash:
|
|
|
86
92
|
os.unlink(tmp_path)
|
|
87
93
|
return eeg_data
|
|
88
94
|
|
|
89
|
-
def load_eeg_data_from_bids_file(self,
|
|
90
|
-
|
|
95
|
+
def load_eeg_data_from_bids_file(self, bids_file, eeg_attrs=None):
|
|
96
|
+
"""
|
|
91
97
|
bids_file must be a file of the bids_dataset
|
|
92
|
-
|
|
98
|
+
"""
|
|
93
99
|
EEG = mne.io.read_raw_eeglab(bids_file)
|
|
94
100
|
eeg_data = EEG.get_data()
|
|
95
|
-
|
|
96
|
-
fs = EEG.info[
|
|
101
|
+
|
|
102
|
+
fs = EEG.info["sfreq"]
|
|
97
103
|
max_time = eeg_data.shape[1] / fs
|
|
98
|
-
time_steps = np.linspace(0, max_time, eeg_data.shape[1]).squeeze()
|
|
104
|
+
time_steps = np.linspace(0, max_time, eeg_data.shape[1]).squeeze() # in seconds
|
|
99
105
|
|
|
100
106
|
channel_names = EEG.ch_names
|
|
101
107
|
|
|
102
108
|
eeg_xarray = xr.DataArray(
|
|
103
109
|
data=eeg_data,
|
|
104
|
-
dims=[
|
|
105
|
-
coords={
|
|
106
|
-
'time': time_steps,
|
|
107
|
-
'channel': channel_names
|
|
108
|
-
},
|
|
110
|
+
dims=["channel", "time"],
|
|
111
|
+
coords={"time": time_steps, "channel": channel_names},
|
|
109
112
|
# attrs=attrs
|
|
110
113
|
)
|
|
111
114
|
return eeg_xarray
|
|
@@ -113,22 +116,26 @@ class EEGDash:
|
|
|
113
116
|
def get_raw_extensions(self, bids_file, bids_dataset: EEGBIDSDataset):
|
|
114
117
|
bids_file = Path(bids_file)
|
|
115
118
|
extensions = {
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
119
|
+
".set": [".set", ".fdt"], # eeglab
|
|
120
|
+
".edf": [".edf"], # european
|
|
121
|
+
".vhdr": [".eeg", ".vhdr", ".vmrk", ".dat", ".raw"], # brainvision
|
|
122
|
+
".bdf": [".bdf"], # biosemi
|
|
120
123
|
}
|
|
121
|
-
return [
|
|
124
|
+
return [
|
|
125
|
+
str(bids_dataset.get_relative_bidspath(bids_file.with_suffix(suffix)))
|
|
126
|
+
for suffix in extensions[bids_file.suffix]
|
|
127
|
+
if bids_file.with_suffix(suffix).exists()
|
|
128
|
+
]
|
|
122
129
|
|
|
123
130
|
def load_eeg_attrs_from_bids_file(self, bids_dataset: EEGBIDSDataset, bids_file):
|
|
124
|
-
|
|
131
|
+
"""
|
|
125
132
|
bids_file must be a file of the bids_dataset
|
|
126
|
-
|
|
133
|
+
"""
|
|
127
134
|
if bids_file not in bids_dataset.files:
|
|
128
|
-
raise ValueError(f
|
|
135
|
+
raise ValueError(f"{bids_file} not in {bids_dataset.dataset}")
|
|
129
136
|
|
|
130
137
|
# Initialize attrs with None values for all expected fields
|
|
131
|
-
attrs = {field: None for field in self.config[
|
|
138
|
+
attrs = {field: None for field in self.config["attributes"].keys()}
|
|
132
139
|
|
|
133
140
|
f = os.path.basename(bids_file)
|
|
134
141
|
dsnumber = bids_dataset.dataset
|
|
@@ -141,43 +148,53 @@ class EEGDash:
|
|
|
141
148
|
except Exception as e:
|
|
142
149
|
print(f"Error getting participants_tsv: {str(e)}")
|
|
143
150
|
participants_tsv = None
|
|
144
|
-
|
|
151
|
+
|
|
145
152
|
try:
|
|
146
153
|
eeg_json = bids_dataset.eeg_json(bids_file)
|
|
147
154
|
except Exception as e:
|
|
148
155
|
print(f"Error getting eeg_json: {str(e)}")
|
|
149
156
|
eeg_json = None
|
|
150
|
-
|
|
151
|
-
bids_dependencies_files = self.config[
|
|
157
|
+
|
|
158
|
+
bids_dependencies_files = self.config["bids_dependencies_files"]
|
|
152
159
|
bidsdependencies = []
|
|
153
160
|
for extension in bids_dependencies_files:
|
|
154
161
|
try:
|
|
155
162
|
dep_path = bids_dataset.get_bids_metadata_files(bids_file, extension)
|
|
156
|
-
dep_path = [
|
|
163
|
+
dep_path = [
|
|
164
|
+
str(bids_dataset.get_relative_bidspath(dep)) for dep in dep_path
|
|
165
|
+
]
|
|
157
166
|
bidsdependencies.extend(dep_path)
|
|
158
167
|
except Exception as e:
|
|
159
168
|
pass
|
|
160
|
-
|
|
169
|
+
|
|
161
170
|
bidsdependencies.extend(self.get_raw_extensions(bids_file, bids_dataset))
|
|
162
171
|
|
|
163
172
|
# Define field extraction functions with error handling
|
|
164
173
|
field_extractors = {
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
174
|
+
"data_name": lambda: f"{bids_dataset.dataset}_{f}",
|
|
175
|
+
"dataset": lambda: bids_dataset.dataset,
|
|
176
|
+
"bidspath": lambda: openneuro_path,
|
|
177
|
+
"subject": lambda: bids_dataset.get_bids_file_attribute(
|
|
178
|
+
"subject", bids_file
|
|
179
|
+
),
|
|
180
|
+
"task": lambda: bids_dataset.get_bids_file_attribute("task", bids_file),
|
|
181
|
+
"session": lambda: bids_dataset.get_bids_file_attribute(
|
|
182
|
+
"session", bids_file
|
|
183
|
+
),
|
|
184
|
+
"run": lambda: bids_dataset.get_bids_file_attribute("run", bids_file),
|
|
185
|
+
"modality": lambda: bids_dataset.get_bids_file_attribute(
|
|
186
|
+
"modality", bids_file
|
|
187
|
+
),
|
|
188
|
+
"sampling_frequency": lambda: bids_dataset.get_bids_file_attribute(
|
|
189
|
+
"sfreq", bids_file
|
|
190
|
+
),
|
|
191
|
+
"nchans": lambda: bids_dataset.get_bids_file_attribute("nchans", bids_file),
|
|
192
|
+
"ntimes": lambda: bids_dataset.get_bids_file_attribute("ntimes", bids_file),
|
|
193
|
+
"participant_tsv": lambda: participants_tsv,
|
|
194
|
+
"eeg_json": lambda: eeg_json,
|
|
195
|
+
"bidsdependencies": lambda: bidsdependencies,
|
|
179
196
|
}
|
|
180
|
-
|
|
197
|
+
|
|
181
198
|
# Dynamically populate attrs with error handling
|
|
182
199
|
for field, extractor in field_extractors.items():
|
|
183
200
|
try:
|
|
@@ -189,14 +206,14 @@ class EEGDash:
|
|
|
189
206
|
return attrs
|
|
190
207
|
|
|
191
208
|
def add_bids_dataset(self, dataset, data_dir, overwrite=True):
|
|
192
|
-
|
|
209
|
+
"""
|
|
193
210
|
Create new records for the dataset in the MongoDB database if not found
|
|
194
|
-
|
|
211
|
+
"""
|
|
195
212
|
if self.is_public:
|
|
196
|
-
raise ValueError(
|
|
213
|
+
raise ValueError("This operation is not allowed for public users")
|
|
197
214
|
|
|
198
|
-
if not overwrite and self.exist({
|
|
199
|
-
print(f
|
|
215
|
+
if not overwrite and self.exist({"dataset": dataset}):
|
|
216
|
+
print(f"Dataset {dataset} already exists in the database")
|
|
200
217
|
return
|
|
201
218
|
try:
|
|
202
219
|
bids_dataset = EEGBIDSDataset(
|
|
@@ -204,24 +221,28 @@ class EEGDash:
|
|
|
204
221
|
dataset=dataset,
|
|
205
222
|
)
|
|
206
223
|
except Exception as e:
|
|
207
|
-
print(f
|
|
224
|
+
print(f"Error creating bids dataset {dataset}: {str(e)}")
|
|
208
225
|
raise e
|
|
209
226
|
requests = []
|
|
210
227
|
for bids_file in bids_dataset.get_files():
|
|
211
228
|
try:
|
|
212
229
|
data_id = f"{dataset}_{os.path.basename(bids_file)}"
|
|
213
230
|
|
|
214
|
-
if self.exist({
|
|
231
|
+
if self.exist({"data_name": data_id}):
|
|
215
232
|
if overwrite:
|
|
216
|
-
eeg_attrs = self.load_eeg_attrs_from_bids_file(
|
|
233
|
+
eeg_attrs = self.load_eeg_attrs_from_bids_file(
|
|
234
|
+
bids_dataset, bids_file
|
|
235
|
+
)
|
|
217
236
|
requests.append(self.update_request(eeg_attrs))
|
|
218
237
|
else:
|
|
219
|
-
eeg_attrs = self.load_eeg_attrs_from_bids_file(
|
|
238
|
+
eeg_attrs = self.load_eeg_attrs_from_bids_file(
|
|
239
|
+
bids_dataset, bids_file
|
|
240
|
+
)
|
|
220
241
|
requests.append(self.add_request(eeg_attrs))
|
|
221
242
|
except:
|
|
222
|
-
print(
|
|
243
|
+
print("error adding record", bids_file)
|
|
223
244
|
|
|
224
|
-
print(
|
|
245
|
+
print("Number of database requests", len(requests))
|
|
225
246
|
|
|
226
247
|
if requests:
|
|
227
248
|
result = self.__collection.bulk_write(requests, ordered=False)
|
|
@@ -231,25 +252,28 @@ class EEGDash:
|
|
|
231
252
|
print(f"Upserted: {result.upserted_count}")
|
|
232
253
|
print(f"Errors: {result.bulk_api_result.get('writeErrors', [])}")
|
|
233
254
|
|
|
234
|
-
def get(self, query:dict):
|
|
235
|
-
|
|
255
|
+
def get(self, query: dict):
|
|
256
|
+
"""
|
|
236
257
|
query: {
|
|
237
258
|
'dataset': 'dsxxxx',
|
|
238
259
|
|
|
239
|
-
}
|
|
260
|
+
}"""
|
|
240
261
|
sessions = self.find(query)
|
|
241
262
|
results = []
|
|
242
263
|
if sessions:
|
|
243
|
-
print(f
|
|
244
|
-
results = Parallel(
|
|
245
|
-
|
|
264
|
+
print(f"Found {len(sessions)} records")
|
|
265
|
+
results = Parallel(
|
|
266
|
+
n_jobs=-1 if len(sessions) > 1 else 1, prefer="threads", verbose=1
|
|
267
|
+
)(
|
|
268
|
+
delayed(self.load_eeg_data_from_s3)(self.get_s3path(session))
|
|
269
|
+
for session in sessions
|
|
246
270
|
)
|
|
247
271
|
return results
|
|
248
272
|
|
|
249
|
-
def add_request(self, record:dict):
|
|
273
|
+
def add_request(self, record: dict):
|
|
250
274
|
return InsertOne(record)
|
|
251
275
|
|
|
252
|
-
def add(self, record:dict):
|
|
276
|
+
def add(self, record: dict):
|
|
253
277
|
try:
|
|
254
278
|
# input_record = self._validate_input(record)
|
|
255
279
|
self.__collection.insert_one(record)
|
|
@@ -257,38 +281,51 @@ class EEGDash:
|
|
|
257
281
|
except ValueError as e:
|
|
258
282
|
print(f"Failed to validate record: {record['data_name']}")
|
|
259
283
|
print(e)
|
|
260
|
-
except:
|
|
284
|
+
except:
|
|
261
285
|
print(f"Error adding record: {record['data_name']}")
|
|
262
286
|
|
|
263
|
-
def update_request(self, record:dict):
|
|
264
|
-
return UpdateOne({
|
|
287
|
+
def update_request(self, record: dict):
|
|
288
|
+
return UpdateOne({"data_name": record["data_name"]}, {"$set": record})
|
|
265
289
|
|
|
266
|
-
def update(self, record:dict):
|
|
290
|
+
def update(self, record: dict):
|
|
267
291
|
try:
|
|
268
|
-
self.__collection.update_one(
|
|
269
|
-
|
|
270
|
-
|
|
292
|
+
self.__collection.update_one(
|
|
293
|
+
{"data_name": record["data_name"]}, {"$set": record}
|
|
294
|
+
)
|
|
295
|
+
except: # silent failure
|
|
296
|
+
print(f"Error updating record {record['data_name']}")
|
|
271
297
|
|
|
272
298
|
def remove_field(self, record, field):
|
|
273
|
-
self.__collection.update_one(
|
|
274
|
-
|
|
299
|
+
self.__collection.update_one(
|
|
300
|
+
{"data_name": record["data_name"]}, {"$unset": {field: 1}}
|
|
301
|
+
)
|
|
302
|
+
|
|
275
303
|
def remove_field_from_db(self, field):
|
|
276
|
-
self.__collection.update_many({}, {
|
|
277
|
-
|
|
304
|
+
self.__collection.update_many({}, {"$unset": {field: 1}})
|
|
305
|
+
|
|
278
306
|
@property
|
|
279
307
|
def collection(self):
|
|
280
308
|
return self.__collection
|
|
281
309
|
|
|
310
|
+
|
|
282
311
|
class EEGDashDataset(BaseConcatDataset):
|
|
283
312
|
# CACHE_DIR = '.eegdash_cache'
|
|
284
313
|
def __init__(
|
|
285
314
|
self,
|
|
286
|
-
query:dict=None,
|
|
287
|
-
data_dir:str | list =None,
|
|
288
|
-
dataset:str | list =None,
|
|
289
|
-
description_fields: list[str]=[
|
|
290
|
-
|
|
291
|
-
|
|
315
|
+
query: dict = None,
|
|
316
|
+
data_dir: str | list = None,
|
|
317
|
+
dataset: str | list = None,
|
|
318
|
+
description_fields: list[str] = [
|
|
319
|
+
"subject",
|
|
320
|
+
"session",
|
|
321
|
+
"run",
|
|
322
|
+
"task",
|
|
323
|
+
"age",
|
|
324
|
+
"gender",
|
|
325
|
+
"sex",
|
|
326
|
+
],
|
|
327
|
+
cache_dir: str = ".eegdash_cache",
|
|
328
|
+
**kwargs,
|
|
292
329
|
):
|
|
293
330
|
self.cache_dir = cache_dir
|
|
294
331
|
if query:
|
|
@@ -297,14 +334,19 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
297
334
|
if type(data_dir) == str:
|
|
298
335
|
datasets = self.load_bids_dataset(dataset, data_dir, description_fields)
|
|
299
336
|
else:
|
|
300
|
-
assert len(data_dir) == len(dataset),
|
|
337
|
+
assert len(data_dir) == len(dataset), (
|
|
338
|
+
"Number of datasets and their directories must match"
|
|
339
|
+
)
|
|
301
340
|
datasets = []
|
|
302
341
|
for i in range(len(data_dir)):
|
|
303
|
-
datasets.extend(
|
|
342
|
+
datasets.extend(
|
|
343
|
+
self.load_bids_dataset(
|
|
344
|
+
dataset[i], data_dir[i], description_fields
|
|
345
|
+
)
|
|
346
|
+
)
|
|
304
347
|
# convert to list using get_item on each element
|
|
305
348
|
super().__init__(datasets)
|
|
306
349
|
|
|
307
|
-
|
|
308
350
|
def find_key_in_nested_dict(self, data, target_key):
|
|
309
351
|
if isinstance(data, dict):
|
|
310
352
|
if target_key in data:
|
|
@@ -315,45 +357,60 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
315
357
|
return result
|
|
316
358
|
return None
|
|
317
359
|
|
|
318
|
-
def find_datasets(self, query:dict, description_fields:list[str], **kwargs):
|
|
360
|
+
def find_datasets(self, query: dict, description_fields: list[str], **kwargs):
|
|
319
361
|
eegdashObj = EEGDash()
|
|
320
362
|
datasets = []
|
|
321
363
|
for record in eegdashObj.find(query):
|
|
322
364
|
description = {}
|
|
323
365
|
for field in description_fields:
|
|
324
366
|
value = self.find_key_in_nested_dict(record, field)
|
|
325
|
-
if value:
|
|
367
|
+
if value is not None:
|
|
326
368
|
description[field] = value
|
|
327
|
-
datasets.append(
|
|
369
|
+
datasets.append(
|
|
370
|
+
EEGDashBaseDataset(
|
|
371
|
+
record, self.cache_dir, description=description, **kwargs
|
|
372
|
+
)
|
|
373
|
+
)
|
|
328
374
|
return datasets
|
|
329
375
|
|
|
330
|
-
def load_bids_dataset(
|
|
331
|
-
|
|
332
|
-
|
|
376
|
+
def load_bids_dataset(
|
|
377
|
+
self,
|
|
378
|
+
dataset,
|
|
379
|
+
data_dir,
|
|
380
|
+
description_fields: list[str],
|
|
381
|
+
raw_format="eeglab",
|
|
382
|
+
**kwargs,
|
|
383
|
+
):
|
|
384
|
+
""" """
|
|
385
|
+
|
|
333
386
|
def get_base_dataset_from_bids_file(bids_dataset, bids_file):
|
|
334
387
|
record = eegdashObj.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
|
|
335
388
|
description = {}
|
|
336
389
|
for field in description_fields:
|
|
337
390
|
value = self.find_key_in_nested_dict(record, field)
|
|
338
|
-
if value:
|
|
391
|
+
if value is not None:
|
|
339
392
|
description[field] = value
|
|
340
|
-
return EEGDashBaseDataset(
|
|
393
|
+
return EEGDashBaseDataset(
|
|
394
|
+
record, self.cache_dir, description=description, **kwargs
|
|
395
|
+
)
|
|
341
396
|
|
|
342
397
|
bids_dataset = EEGBIDSDataset(
|
|
343
398
|
data_dir=data_dir,
|
|
344
399
|
dataset=dataset,
|
|
345
|
-
raw_format=raw_format,
|
|
346
400
|
)
|
|
347
401
|
eegdashObj = EEGDash()
|
|
348
402
|
datasets = Parallel(n_jobs=-1, prefer="threads", verbose=1)(
|
|
349
|
-
|
|
350
|
-
)
|
|
403
|
+
delayed(get_base_dataset_from_bids_file)(bids_dataset, bids_file)
|
|
404
|
+
for bids_file in bids_dataset.get_files()
|
|
405
|
+
)
|
|
351
406
|
return datasets
|
|
352
407
|
|
|
408
|
+
|
|
353
409
|
def main():
|
|
354
410
|
eegdash = EEGDash()
|
|
355
|
-
record = eegdash.find({
|
|
411
|
+
record = eegdash.find({"dataset": "ds005511", "subject": "NDARUF236HM7"})
|
|
356
412
|
print(record)
|
|
357
413
|
|
|
358
|
-
|
|
359
|
-
|
|
414
|
+
|
|
415
|
+
if __name__ == "__main__":
|
|
416
|
+
main()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.1.0
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>
|
|
6
6
|
License: GNU General Public License
|
|
@@ -24,29 +24,27 @@ License: GNU General Public License
|
|
|
24
24
|
along with this program; if not, write to the Free Software
|
|
25
25
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1.07 USA
|
|
26
26
|
|
|
27
|
-
Project-URL: Homepage, https://
|
|
28
|
-
Project-URL: Issues, https://github.com/sccn/
|
|
27
|
+
Project-URL: Homepage, https://github.com/sccn/EEG-Dash-Data
|
|
28
|
+
Project-URL: Issues, https://github.com/sccn/EEG-Dash-Data/issues
|
|
29
29
|
Classifier: Programming Language :: Python :: 3
|
|
30
30
|
Classifier: License :: OSI Approved :: MIT License
|
|
31
31
|
Classifier: Operating System :: OS Independent
|
|
32
|
-
Requires-Python:
|
|
32
|
+
Requires-Python: >3.10
|
|
33
33
|
Description-Content-Type: text/markdown
|
|
34
34
|
License-File: LICENSE
|
|
35
|
-
Requires-Dist: xarray
|
|
36
|
-
Requires-Dist: python-dotenv
|
|
37
|
-
Requires-Dist: s3fs
|
|
38
|
-
Requires-Dist: mne
|
|
39
|
-
Requires-Dist: pynwb
|
|
40
|
-
Requires-Dist: h5py
|
|
41
|
-
Requires-Dist: pymongo
|
|
42
|
-
Requires-Dist: joblib
|
|
43
35
|
Requires-Dist: braindecode
|
|
44
|
-
Requires-Dist:
|
|
36
|
+
Requires-Dist: mne_bids
|
|
37
|
+
Requires-Dist: numba
|
|
38
|
+
Requires-Dist: numpy
|
|
39
|
+
Requires-Dist: pandas
|
|
45
40
|
Requires-Dist: pybids
|
|
46
|
-
Requires-Dist:
|
|
47
|
-
Requires-Dist:
|
|
41
|
+
Requires-Dist: pymongo
|
|
42
|
+
Requires-Dist: python-dotenv
|
|
43
|
+
Requires-Dist: s3fs
|
|
44
|
+
Requires-Dist: scipy
|
|
48
45
|
Requires-Dist: tqdm
|
|
49
|
-
Requires-Dist:
|
|
46
|
+
Requires-Dist: xarray
|
|
47
|
+
Requires-Dist: pre-commit
|
|
50
48
|
Dynamic: license-file
|
|
51
49
|
|
|
52
50
|
# EEG-Dash
|
|
@@ -90,7 +88,10 @@ To use the data from a single subject, enter:
|
|
|
90
88
|
|
|
91
89
|
```python
|
|
92
90
|
from eegdash import EEGDashDataset
|
|
93
|
-
|
|
91
|
+
|
|
92
|
+
ds_NDARDB033FW5 = EEGDashDataset(
|
|
93
|
+
{"dataset": "ds005514", "task": "RestingState", "subject": "NDARDB033FW5"}
|
|
94
|
+
)
|
|
94
95
|
```
|
|
95
96
|
|
|
96
97
|
This will search and download the metadata for the task **RestingState** for subject **NDARDB033FW5** in BIDS dataset **ds005514**. The actual data will not be downloaded at this stage. Following standard practice, data is only downloaded once it is processed. The **ds_NDARDB033FW5** object is a fully functional BrainDecode dataset, which is itself a PyTorch dataset. This [tutorial](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_eoec.ipynb) shows how to preprocess the EEG data, extracting portions of the data containing eyes-open and eyes-closed segments, then perform eyes-open vs. eyes-closed classification using a (shallow) deep-learning model.
|
|
@@ -99,7 +100,10 @@ To use the data from multiple subjects, enter:
|
|
|
99
100
|
|
|
100
101
|
```python
|
|
101
102
|
from eegdash import EEGDashDataset
|
|
102
|
-
|
|
103
|
+
|
|
104
|
+
ds_ds005505rest = EEGDashDataset(
|
|
105
|
+
{"dataset": "ds005505", "task": "RestingState"}, target_name="sex"
|
|
106
|
+
)
|
|
103
107
|
```
|
|
104
108
|
|
|
105
109
|
This will search and download the metadata for the task 'RestingState' for all subjects in BIDS dataset 'ds005505' (a total of 136). As above, the actual data will not be downloaded at this stage so this command is quick to execute. Also, the target class for each subject is assigned using the target_name parameter. This means that this object is ready to be directly fed to a deep learning model, although the [tutorial script](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_sex_classification.ipynb) performs minimal processing on it, prior to training a deep-learning model. Because 14 gigabytes of data are downloaded, this tutorial takes about 10 minutes to execute.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
eegdash/__init__.py,sha256=dyNvSv7ORVDYDz0P-XBNj_SApMlOqwt8LHQqfeuPKCg,105
|
|
2
|
+
eegdash/data_config.py,sha256=sIwj7lnZ1hCjeFs-0CXeHn93btm9fX7mwgVTZVeVh-w,763
|
|
3
|
+
eegdash/data_utils.py,sha256=LqAJygSpPpYEIerAnWHuHP0OMjd7jQtzXIodbvb0568,19436
|
|
4
|
+
eegdash/main.py,sha256=CFI-Bro_oru5iRJdNQZ8IqeRPhrZKXj8wKoMdcrhFt8,14865
|
|
5
|
+
eegdash/features/__init__.py,sha256=Ijhc-bLwysyF_HTmdJwbYoTHbxj2wxArs1xSUzhm7Hc,604
|
|
6
|
+
eegdash/features/datasets.py,sha256=JB-VTfXTwfbxpgF9wq34gKK69YNCZPQwsnaKEXQisWk,17180
|
|
7
|
+
eegdash/features/decorators.py,sha256=iVsbdQXGoLi-V6M9BgP6P8i_UzUtIAWQlf8Qq_LdRqY,1247
|
|
8
|
+
eegdash/features/extractors.py,sha256=bITM4DXbW1Dq8Nm8hS3OrSGfRFV6-IwzkTzjiy_yg9k,6816
|
|
9
|
+
eegdash/features/serialization.py,sha256=ceGcEvKCg4OsWyLpdAyJsvU1-6UXcvVx2q6nq58vt8Y,2873
|
|
10
|
+
eegdash/features/utils.py,sha256=jjVNVLFSXFj3j7NWgEbUlt5faTrWKLLQY9ZYy0xLp_M,3782
|
|
11
|
+
eegdash/features/feature_bank/__init__.py,sha256=BKrM3aaggXrfey1yEjEBYaxOV5e3UK-o8oGeB30epOg,149
|
|
12
|
+
eegdash/features/feature_bank/complexity.py,sha256=WkLin-f1WTPUtcpkLDObY8nQYRsvpa08Xy9ly1k0hik,3017
|
|
13
|
+
eegdash/features/feature_bank/connectivity.py,sha256=bQ6KlxWm5GNpCS9ypLqBUr2L171Yq7wpBQT2tRQKTZ4,2159
|
|
14
|
+
eegdash/features/feature_bank/csp.py,sha256=O-kUijM47cOH7yfe7sYL9wT41w1dGaq6sOieh-h82pw,3300
|
|
15
|
+
eegdash/features/feature_bank/dimensionality.py,sha256=e8rKpAT_xtZRsBDuVbznFx_daWdQj89Z3Zkt61Hs5qk,3734
|
|
16
|
+
eegdash/features/feature_bank/signal.py,sha256=4jgIXRVS274puKfOnDNnqLoBP_yXRyP38iMnXRvobYo,2437
|
|
17
|
+
eegdash/features/feature_bank/spectral.py,sha256=bNB7skusePs1gX7NOU6yRlw_Gr4UOCkO_ylkCgybzug,3319
|
|
18
|
+
eegdash/features/feature_bank/utils.py,sha256=DGh-Q7-XFIittP7iBBxvsJaZrlVvuY5mw-G7q6C-PCI,1237
|
|
19
|
+
eegdash-0.1.0.dist-info/licenses/LICENSE,sha256=Xafu48R-h_kyaNj2tuhfgdEv9_ovciktjUEgRRwMZ6w,812
|
|
20
|
+
eegdash-0.1.0.dist-info/METADATA,sha256=RixWQ9dqP1IQzz_HCAZL2Sp-at190rx4ocpvy2DVaio,8551
|
|
21
|
+
eegdash-0.1.0.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
22
|
+
eegdash-0.1.0.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
|
|
23
|
+
eegdash-0.1.0.dist-info/RECORD,,
|