eegdash 0.3.3.dev61__py3-none-any.whl → 0.5.0.dev180784713__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eegdash/__init__.py +19 -6
- eegdash/api.py +336 -539
- eegdash/bids_eeg_metadata.py +495 -0
- eegdash/const.py +349 -0
- eegdash/dataset/__init__.py +28 -0
- eegdash/dataset/base.py +311 -0
- eegdash/dataset/bids_dataset.py +641 -0
- eegdash/dataset/dataset.py +692 -0
- eegdash/dataset/dataset_summary.csv +255 -0
- eegdash/dataset/registry.py +287 -0
- eegdash/downloader.py +197 -0
- eegdash/features/__init__.py +15 -13
- eegdash/features/datasets.py +329 -138
- eegdash/features/decorators.py +105 -13
- eegdash/features/extractors.py +233 -63
- eegdash/features/feature_bank/__init__.py +12 -12
- eegdash/features/feature_bank/complexity.py +22 -20
- eegdash/features/feature_bank/connectivity.py +27 -28
- eegdash/features/feature_bank/csp.py +3 -1
- eegdash/features/feature_bank/dimensionality.py +6 -6
- eegdash/features/feature_bank/signal.py +29 -30
- eegdash/features/feature_bank/spectral.py +40 -44
- eegdash/features/feature_bank/utils.py +8 -0
- eegdash/features/inspect.py +126 -15
- eegdash/features/serialization.py +58 -17
- eegdash/features/utils.py +90 -16
- eegdash/hbn/__init__.py +28 -0
- eegdash/hbn/preprocessing.py +105 -0
- eegdash/hbn/windows.py +428 -0
- eegdash/logging.py +54 -0
- eegdash/mongodb.py +55 -24
- eegdash/paths.py +52 -0
- eegdash/utils.py +29 -1
- eegdash-0.5.0.dev180784713.dist-info/METADATA +121 -0
- eegdash-0.5.0.dev180784713.dist-info/RECORD +38 -0
- eegdash-0.5.0.dev180784713.dist-info/licenses/LICENSE +29 -0
- eegdash/data_config.py +0 -34
- eegdash/data_utils.py +0 -687
- eegdash/dataset.py +0 -69
- eegdash/preprocessing.py +0 -63
- eegdash-0.3.3.dev61.dist-info/METADATA +0 -192
- eegdash-0.3.3.dev61.dist-info/RECORD +0 -28
- eegdash-0.3.3.dev61.dist-info/licenses/LICENSE +0 -23
- {eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/WHEEL +0 -0
- {eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/top_level.txt +0 -0
eegdash/const.py
ADDED
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
# Authors: The EEGDash contributors.
|
|
2
|
+
# License: BSD-3-Clause
|
|
3
|
+
# Copyright the EEGDash contributors.
|
|
4
|
+
|
|
5
|
+
"""Configuration constants and mappings for EEGDash.
|
|
6
|
+
|
|
7
|
+
This module contains global configuration settings, allowed query fields, and mapping
|
|
8
|
+
constants used throughout the EEGDash package. It defines the interface between EEGDash
|
|
9
|
+
releases and OpenNeuro dataset identifiers, as well as validation rules for database queries.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"config",
|
|
14
|
+
"ALLOWED_QUERY_FIELDS",
|
|
15
|
+
"RELEASE_TO_OPENNEURO_DATASET_MAP",
|
|
16
|
+
"SUBJECT_MINI_RELEASE_MAP",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
ALLOWED_QUERY_FIELDS = {
|
|
20
|
+
"data_name",
|
|
21
|
+
"dataset",
|
|
22
|
+
"subject",
|
|
23
|
+
"task",
|
|
24
|
+
"session",
|
|
25
|
+
"run",
|
|
26
|
+
"modality",
|
|
27
|
+
"sampling_frequency",
|
|
28
|
+
"nchans",
|
|
29
|
+
"ntimes",
|
|
30
|
+
}
|
|
31
|
+
"""set: A set of field names that are permitted in database queries constructed
|
|
32
|
+
via :func:`~eegdash.api.EEGDash.find` with keyword arguments."""
|
|
33
|
+
|
|
34
|
+
RELEASE_TO_OPENNEURO_DATASET_MAP = {
|
|
35
|
+
"R11": "ds005516",
|
|
36
|
+
"R10": "ds005515",
|
|
37
|
+
"R9": "ds005514",
|
|
38
|
+
"R8": "ds005512",
|
|
39
|
+
"R7": "ds005511",
|
|
40
|
+
"R6": "ds005510",
|
|
41
|
+
"R4": "ds005508",
|
|
42
|
+
"R5": "ds005509",
|
|
43
|
+
"R3": "ds005507",
|
|
44
|
+
"R2": "ds005506",
|
|
45
|
+
"R1": "ds005505",
|
|
46
|
+
}
|
|
47
|
+
"""dict: A mapping from Healthy Brain Network (HBN) release identifiers (e.g., "R11")
|
|
48
|
+
to their corresponding OpenNeuro dataset identifiers (e.g., "ds005516")."""
|
|
49
|
+
|
|
50
|
+
SUBJECT_MINI_RELEASE_MAP = {
|
|
51
|
+
"R11": [
|
|
52
|
+
"NDARAB678VYW",
|
|
53
|
+
"NDARAG788YV9",
|
|
54
|
+
"NDARAM946HJE",
|
|
55
|
+
"NDARAY977BZT",
|
|
56
|
+
"NDARAZ532KK0",
|
|
57
|
+
"NDARCE912ZXW",
|
|
58
|
+
"NDARCM214WFE",
|
|
59
|
+
"NDARDL033XRG",
|
|
60
|
+
"NDARDT889RT9",
|
|
61
|
+
"NDARDZ794ZVP",
|
|
62
|
+
"NDAREV869CPW",
|
|
63
|
+
"NDARFN221WW5",
|
|
64
|
+
"NDARFV289RKB",
|
|
65
|
+
"NDARFY623ZTE",
|
|
66
|
+
"NDARGA890MKA",
|
|
67
|
+
"NDARHN206XY3",
|
|
68
|
+
"NDARHP518FUR",
|
|
69
|
+
"NDARJL292RYV",
|
|
70
|
+
"NDARKM199DXW",
|
|
71
|
+
"NDARKW236TN7",
|
|
72
|
+
],
|
|
73
|
+
"R10": [
|
|
74
|
+
"NDARAR935TGZ",
|
|
75
|
+
"NDARAV474ADJ",
|
|
76
|
+
"NDARCB869VM8",
|
|
77
|
+
"NDARCJ667UPL",
|
|
78
|
+
"NDARCM677TC1",
|
|
79
|
+
"NDARET671FTC",
|
|
80
|
+
"NDARKM061NHZ",
|
|
81
|
+
"NDARLD501HDK",
|
|
82
|
+
"NDARLL176DJR",
|
|
83
|
+
"NDARMT791WDH",
|
|
84
|
+
"NDARMW299ZAB",
|
|
85
|
+
"NDARNC405WJA",
|
|
86
|
+
"NDARNP962TJK",
|
|
87
|
+
"NDARPB967KU7",
|
|
88
|
+
"NDARRU560AGK",
|
|
89
|
+
"NDARTB173LY2",
|
|
90
|
+
"NDARUW377KAE",
|
|
91
|
+
"NDARVH565FX9",
|
|
92
|
+
"NDARVP799KGY",
|
|
93
|
+
"NDARVY962GB5",
|
|
94
|
+
],
|
|
95
|
+
"R9": [
|
|
96
|
+
"NDARAC589YMB",
|
|
97
|
+
"NDARAC853CR6",
|
|
98
|
+
"NDARAH239PGG",
|
|
99
|
+
"NDARAL897CYV",
|
|
100
|
+
"NDARAN160GUF",
|
|
101
|
+
"NDARAP049KXJ",
|
|
102
|
+
"NDARAP457WB5",
|
|
103
|
+
"NDARAW216PM7",
|
|
104
|
+
"NDARBA004KBT",
|
|
105
|
+
"NDARBD328NUQ",
|
|
106
|
+
"NDARBF042LDM",
|
|
107
|
+
"NDARBH019KPD",
|
|
108
|
+
"NDARBH728DFK",
|
|
109
|
+
"NDARBM370JCB",
|
|
110
|
+
"NDARBU183TDJ",
|
|
111
|
+
"NDARBW971DCW",
|
|
112
|
+
"NDARBZ444ZHK",
|
|
113
|
+
"NDARCC620ZFT",
|
|
114
|
+
"NDARCD182XT1",
|
|
115
|
+
"NDARCK113CJM",
|
|
116
|
+
],
|
|
117
|
+
"R8": [
|
|
118
|
+
"NDARAB514MAJ",
|
|
119
|
+
"NDARAD571FLB",
|
|
120
|
+
"NDARAF003VCL",
|
|
121
|
+
"NDARAG191AE8",
|
|
122
|
+
"NDARAJ977PRJ",
|
|
123
|
+
"NDARAP912JK3",
|
|
124
|
+
"NDARAV454VF0",
|
|
125
|
+
"NDARAY298THW",
|
|
126
|
+
"NDARBJ375VP4",
|
|
127
|
+
"NDARBT436PMT",
|
|
128
|
+
"NDARBV630BK6",
|
|
129
|
+
"NDARCB627KDN",
|
|
130
|
+
"NDARCC059WTH",
|
|
131
|
+
"NDARCM953HKD",
|
|
132
|
+
"NDARCN681CXW",
|
|
133
|
+
"NDARCT889DMB",
|
|
134
|
+
"NDARDJ204EPU",
|
|
135
|
+
"NDARDJ544BU5",
|
|
136
|
+
"NDARDP292DVC",
|
|
137
|
+
"NDARDW178AC6",
|
|
138
|
+
],
|
|
139
|
+
"R7": [
|
|
140
|
+
"NDARAY475AKD",
|
|
141
|
+
"NDARBW026UGE",
|
|
142
|
+
"NDARCK162REX",
|
|
143
|
+
"NDARCK481KRH",
|
|
144
|
+
"NDARCV378MMX",
|
|
145
|
+
"NDARCX462NVA",
|
|
146
|
+
"NDARDJ970ELG",
|
|
147
|
+
"NDARDU617ZW1",
|
|
148
|
+
"NDAREM609ZXW",
|
|
149
|
+
"NDAREW074ZM2",
|
|
150
|
+
"NDARFE555KXB",
|
|
151
|
+
"NDARFT176NJP",
|
|
152
|
+
"NDARGK442YHH",
|
|
153
|
+
"NDARGM439FZD",
|
|
154
|
+
"NDARGT634DUJ",
|
|
155
|
+
"NDARHE283KZN",
|
|
156
|
+
"NDARHG260BM9",
|
|
157
|
+
"NDARHL684WYU",
|
|
158
|
+
"NDARHN224TPA",
|
|
159
|
+
"NDARHP841RMR",
|
|
160
|
+
],
|
|
161
|
+
"R6": [
|
|
162
|
+
"NDARAD224CRB",
|
|
163
|
+
"NDARAE301XTM",
|
|
164
|
+
"NDARAT680GJA",
|
|
165
|
+
"NDARCA578CEB",
|
|
166
|
+
"NDARDZ147ETZ",
|
|
167
|
+
"NDARFL793LDE",
|
|
168
|
+
"NDARFX710UZA",
|
|
169
|
+
"NDARGE994BMX",
|
|
170
|
+
"NDARGP191YHN",
|
|
171
|
+
"NDARGV436PFT",
|
|
172
|
+
"NDARHF545HFW",
|
|
173
|
+
"NDARHP039DBU",
|
|
174
|
+
"NDARHT774ZK1",
|
|
175
|
+
"NDARJA830BYV",
|
|
176
|
+
"NDARKB614KGY",
|
|
177
|
+
"NDARKM250ET5",
|
|
178
|
+
"NDARKZ085UKQ",
|
|
179
|
+
"NDARLB581AXF",
|
|
180
|
+
"NDARNJ899HW7",
|
|
181
|
+
"NDARRZ606EDP",
|
|
182
|
+
],
|
|
183
|
+
"R4": [
|
|
184
|
+
"NDARAC350BZ0",
|
|
185
|
+
"NDARAD615WLJ",
|
|
186
|
+
"NDARAG584XLU",
|
|
187
|
+
"NDARAH503YG1",
|
|
188
|
+
"NDARAX272ZJL",
|
|
189
|
+
"NDARAY461TZZ",
|
|
190
|
+
"NDARBC734UVY",
|
|
191
|
+
"NDARBL444FBA",
|
|
192
|
+
"NDARBT640EBN",
|
|
193
|
+
"NDARBU098PJT",
|
|
194
|
+
"NDARBU928LV0",
|
|
195
|
+
"NDARBV059CGE",
|
|
196
|
+
"NDARCG037CX4",
|
|
197
|
+
"NDARCG947ZC0",
|
|
198
|
+
"NDARCH001CN2",
|
|
199
|
+
"NDARCU001ZN7",
|
|
200
|
+
"NDARCW497XW2",
|
|
201
|
+
"NDARCX053GU5",
|
|
202
|
+
"NDARDF568GL5",
|
|
203
|
+
"NDARDJ092YKH",
|
|
204
|
+
],
|
|
205
|
+
"R5": [
|
|
206
|
+
"NDARAH793FBF",
|
|
207
|
+
"NDARAJ689BVN",
|
|
208
|
+
"NDARAP785CTE",
|
|
209
|
+
"NDARAU708TL8",
|
|
210
|
+
"NDARBE091BGD",
|
|
211
|
+
"NDARBE103DHM",
|
|
212
|
+
"NDARBF851NH6",
|
|
213
|
+
"NDARBH228RDW",
|
|
214
|
+
"NDARBJ674TVU",
|
|
215
|
+
"NDARBM433VER",
|
|
216
|
+
"NDARCA740UC8",
|
|
217
|
+
"NDARCU633GCZ",
|
|
218
|
+
"NDARCU736GZ1",
|
|
219
|
+
"NDARCU744XWL",
|
|
220
|
+
"NDARDC843HHM",
|
|
221
|
+
"NDARDH086ZKK",
|
|
222
|
+
"NDARDL305BT8",
|
|
223
|
+
"NDARDU853XZ6",
|
|
224
|
+
"NDARDV245WJG",
|
|
225
|
+
"NDAREC480KFA",
|
|
226
|
+
],
|
|
227
|
+
"R3": [
|
|
228
|
+
"NDARAA948VFH",
|
|
229
|
+
"NDARAD774HAZ",
|
|
230
|
+
"NDARAE828CML",
|
|
231
|
+
"NDARAG340ERT",
|
|
232
|
+
"NDARBA839HLG",
|
|
233
|
+
"NDARBE641DGZ",
|
|
234
|
+
"NDARBG574KF4",
|
|
235
|
+
"NDARBM642JFT",
|
|
236
|
+
"NDARCL016NHB",
|
|
237
|
+
"NDARCV944JA6",
|
|
238
|
+
"NDARCY178KJP",
|
|
239
|
+
"NDARDY150ZP9",
|
|
240
|
+
"NDAREC542MH3",
|
|
241
|
+
"NDAREK549XUQ",
|
|
242
|
+
"NDAREM887YY8",
|
|
243
|
+
"NDARFA815FXE",
|
|
244
|
+
"NDARFF644ZGD",
|
|
245
|
+
"NDARFV557XAA",
|
|
246
|
+
"NDARFV780ABD",
|
|
247
|
+
"NDARGB102NWJ",
|
|
248
|
+
],
|
|
249
|
+
"R2": [
|
|
250
|
+
"NDARAB793GL3",
|
|
251
|
+
"NDARAM675UR8",
|
|
252
|
+
"NDARBM839WR5",
|
|
253
|
+
"NDARBU730PN8",
|
|
254
|
+
"NDARCT974NAJ",
|
|
255
|
+
"NDARCW933FD5",
|
|
256
|
+
"NDARCZ770BRG",
|
|
257
|
+
"NDARDW741HCF",
|
|
258
|
+
"NDARDZ058NZN",
|
|
259
|
+
"NDAREC377AU2",
|
|
260
|
+
"NDAREM500WWH",
|
|
261
|
+
"NDAREV527ZRF",
|
|
262
|
+
"NDAREV601CE7",
|
|
263
|
+
"NDARFF070XHV",
|
|
264
|
+
"NDARFR108JNB",
|
|
265
|
+
"NDARFT305CG1",
|
|
266
|
+
"NDARGA056TMW",
|
|
267
|
+
"NDARGH775KF5",
|
|
268
|
+
"NDARGJ878ZP4",
|
|
269
|
+
"NDARHA387FPM",
|
|
270
|
+
],
|
|
271
|
+
"R1": [
|
|
272
|
+
"NDARAC904DMU",
|
|
273
|
+
"NDARAM704GKZ",
|
|
274
|
+
"NDARAP359UM6",
|
|
275
|
+
"NDARBD879MBX",
|
|
276
|
+
"NDARBH024NH2",
|
|
277
|
+
"NDARBK082PDD",
|
|
278
|
+
"NDARCA153NKE",
|
|
279
|
+
"NDARCE721YB5",
|
|
280
|
+
"NDARCJ594BWQ",
|
|
281
|
+
"NDARCN669XPR",
|
|
282
|
+
"NDARCW094JCG",
|
|
283
|
+
"NDARCZ947WU5",
|
|
284
|
+
"NDARDH670PXH",
|
|
285
|
+
"NDARDL511UND",
|
|
286
|
+
"NDARDU986RBM",
|
|
287
|
+
"NDAREM731BYM",
|
|
288
|
+
"NDAREN519BLJ",
|
|
289
|
+
"NDARFK610GY5",
|
|
290
|
+
"NDARFT581ZW5",
|
|
291
|
+
"NDARFW972KFQ",
|
|
292
|
+
],
|
|
293
|
+
}
|
|
294
|
+
"""dict: A mapping from HBN release identifiers to a list of subject IDs.
|
|
295
|
+
This is used to select a small, representative subset of subjects for creating
|
|
296
|
+
"mini" datasets for testing and demonstration purposes."""
|
|
297
|
+
|
|
298
|
+
config = {
|
|
299
|
+
"required_fields": ["data_name"],
|
|
300
|
+
# Default set of user-facing primary record attributes expected in the database. Records
|
|
301
|
+
# where any of these are missing will be loaded with the respective attribute set to None.
|
|
302
|
+
# Additional fields may be returned if they are present in the database, notably bidsdependencies.
|
|
303
|
+
"attributes": {
|
|
304
|
+
"data_name": "str",
|
|
305
|
+
"dataset": "str",
|
|
306
|
+
"bidspath": "str",
|
|
307
|
+
"subject": "str",
|
|
308
|
+
"task": "str",
|
|
309
|
+
"session": "str",
|
|
310
|
+
"run": "str",
|
|
311
|
+
"sampling_frequency": "float",
|
|
312
|
+
"modality": "str",
|
|
313
|
+
"nchans": "int",
|
|
314
|
+
"ntimes": "int", # note: this is really the number of seconds in the data, rounded down
|
|
315
|
+
},
|
|
316
|
+
# queryable descriptive fields for a given recording
|
|
317
|
+
"description_fields": ["subject", "session", "run", "task", "age", "gender", "sex"],
|
|
318
|
+
# list of filenames that may be present in the BIDS dataset directory that are used
|
|
319
|
+
# to load and interpret a given BIDS recording.
|
|
320
|
+
"bids_dependencies_files": [
|
|
321
|
+
"dataset_description.json",
|
|
322
|
+
"participants.tsv",
|
|
323
|
+
"events.tsv",
|
|
324
|
+
"events.json",
|
|
325
|
+
"eeg.json",
|
|
326
|
+
"electrodes.tsv",
|
|
327
|
+
"channels.tsv",
|
|
328
|
+
"coordsystem.json",
|
|
329
|
+
],
|
|
330
|
+
"accepted_query_fields": ["data_name", "dataset"],
|
|
331
|
+
}
|
|
332
|
+
"""dict: A global configuration dictionary for the EEGDash package.
|
|
333
|
+
|
|
334
|
+
Keys
|
|
335
|
+
----
|
|
336
|
+
required_fields : list
|
|
337
|
+
Fields that must be present in every database record.
|
|
338
|
+
attributes : dict
|
|
339
|
+
A schema defining the expected primary attributes and their types for a
|
|
340
|
+
database record.
|
|
341
|
+
description_fields : list
|
|
342
|
+
A list of fields considered to be descriptive metadata for a recording,
|
|
343
|
+
which can be used for filtering and display.
|
|
344
|
+
bids_dependencies_files : list
|
|
345
|
+
A list of BIDS metadata filenames that are relevant for interpreting an
|
|
346
|
+
EEG recording.
|
|
347
|
+
accepted_query_fields : list
|
|
348
|
+
Fields that are accepted for lightweight existence checks in the database.
|
|
349
|
+
"""
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Public API for dataset helpers and dynamically generated datasets."""
|
|
2
|
+
|
|
3
|
+
from . import dataset as _dataset_mod # triggers dynamic class registration
|
|
4
|
+
from .bids_dataset import EEGBIDSDataset
|
|
5
|
+
from .dataset import EEGChallengeDataset, EEGDashDataset
|
|
6
|
+
from .registry import register_openneuro_datasets
|
|
7
|
+
|
|
8
|
+
# Re-export dynamically generated dataset classes at the package level so that
|
|
9
|
+
# ``eegdash.dataset`` shows them in the API docs and users can import as
|
|
10
|
+
# ``from eegdash.dataset import DSXXXXX``.
|
|
11
|
+
_dyn_names = []
|
|
12
|
+
for _name in getattr(_dataset_mod, "__all__", []):
|
|
13
|
+
if _name == "EEGChallengeDataset":
|
|
14
|
+
# Already imported explicitly above
|
|
15
|
+
continue
|
|
16
|
+
_obj = getattr(_dataset_mod, _name, None)
|
|
17
|
+
if _obj is not None:
|
|
18
|
+
globals()[_name] = _obj
|
|
19
|
+
_dyn_names.append(_name)
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"EEGBIDSDataset",
|
|
23
|
+
"EEGDashDataset",
|
|
24
|
+
"EEGChallengeDataset",
|
|
25
|
+
"register_openneuro_datasets",
|
|
26
|
+
] + _dyn_names
|
|
27
|
+
|
|
28
|
+
del _dataset_mod, _name, _obj, _dyn_names
|
eegdash/dataset/base.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
# Authors: The EEGDash contributors.
|
|
2
|
+
# License: BSD-3-Clause
|
|
3
|
+
# Copyright the EEGDash contributors.
|
|
4
|
+
|
|
5
|
+
"""Data utilities and dataset classes for EEG data handling.
|
|
6
|
+
|
|
7
|
+
This module provides core dataset classes for working with EEG data in the EEGDash ecosystem,
|
|
8
|
+
including classes for individual recordings and collections of datasets. It integrates with
|
|
9
|
+
braindecode for machine learning workflows and handles data loading from both local and remote sources.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import io
|
|
13
|
+
import os
|
|
14
|
+
import traceback
|
|
15
|
+
from contextlib import redirect_stderr
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
import mne
|
|
20
|
+
import mne_bids
|
|
21
|
+
from mne._fiff.utils import _read_segments_file
|
|
22
|
+
from mne.io import BaseRaw
|
|
23
|
+
from mne_bids import BIDSPath
|
|
24
|
+
|
|
25
|
+
from braindecode.datasets.base import BaseDataset
|
|
26
|
+
|
|
27
|
+
from .. import downloader
|
|
28
|
+
from ..bids_eeg_metadata import enrich_from_participants
|
|
29
|
+
from ..logging import logger
|
|
30
|
+
from ..paths import get_default_cache_dir
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class EEGDashBaseDataset(BaseDataset):
|
|
34
|
+
"""A single EEG recording dataset.
|
|
35
|
+
|
|
36
|
+
Represents a single EEG recording, typically hosted on a remote server (like AWS S3)
|
|
37
|
+
and cached locally upon first access. This class is a subclass of
|
|
38
|
+
:class:`braindecode.datasets.BaseDataset` and can be used with braindecode's
|
|
39
|
+
preprocessing and training pipelines.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
record : dict
|
|
44
|
+
A fully resolved metadata record for the data to load.
|
|
45
|
+
cache_dir : str
|
|
46
|
+
The local directory where the data will be cached.
|
|
47
|
+
s3_bucket : str, optional
|
|
48
|
+
The S3 bucket to download data from. If not provided, defaults to the
|
|
49
|
+
OpenNeuro bucket.
|
|
50
|
+
**kwargs
|
|
51
|
+
Additional keyword arguments passed to the
|
|
52
|
+
:class:`braindecode.datasets.BaseDataset` constructor.
|
|
53
|
+
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
_AWS_BUCKET = "s3://openneuro.org"
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
record: dict[str, Any],
|
|
61
|
+
cache_dir: str,
|
|
62
|
+
s3_bucket: str | None = None,
|
|
63
|
+
**kwargs,
|
|
64
|
+
):
|
|
65
|
+
super().__init__(None, **kwargs)
|
|
66
|
+
self.record = record
|
|
67
|
+
self.cache_dir = Path(cache_dir)
|
|
68
|
+
self.bids_kwargs = self._get_raw_bids_args()
|
|
69
|
+
|
|
70
|
+
if s3_bucket:
|
|
71
|
+
self.s3_bucket = s3_bucket
|
|
72
|
+
self.s3_open_neuro = False
|
|
73
|
+
else:
|
|
74
|
+
self.s3_bucket = self._AWS_BUCKET
|
|
75
|
+
self.s3_open_neuro = True
|
|
76
|
+
|
|
77
|
+
# Compute a dataset folder name under cache_dir that encodes preprocessing
|
|
78
|
+
# (e.g., bdf, mini) to avoid overlapping with the original dataset cache.
|
|
79
|
+
self.dataset_folder = record.get("dataset", "")
|
|
80
|
+
# TODO: remove this hack when competition is over
|
|
81
|
+
if s3_bucket:
|
|
82
|
+
suffixes: list[str] = []
|
|
83
|
+
bucket_lower = str(s3_bucket).lower()
|
|
84
|
+
if "bdf" in bucket_lower:
|
|
85
|
+
suffixes.append("bdf")
|
|
86
|
+
if "mini" in bucket_lower:
|
|
87
|
+
suffixes.append("mini")
|
|
88
|
+
if suffixes:
|
|
89
|
+
self.dataset_folder = f"{self.dataset_folder}-{'-'.join(suffixes)}"
|
|
90
|
+
|
|
91
|
+
# Place files under the dataset-specific folder (with suffix if any)
|
|
92
|
+
rel = Path(record["bidspath"]) # usually starts with dataset id
|
|
93
|
+
if rel.parts and rel.parts[0] == record.get("dataset"):
|
|
94
|
+
rel = Path(self.dataset_folder, *rel.parts[1:])
|
|
95
|
+
else:
|
|
96
|
+
rel = Path(self.dataset_folder) / rel
|
|
97
|
+
self.filecache = self.cache_dir / rel
|
|
98
|
+
self.bids_root = self.cache_dir / self.dataset_folder
|
|
99
|
+
|
|
100
|
+
self.bidspath = BIDSPath(
|
|
101
|
+
root=self.bids_root,
|
|
102
|
+
datatype="eeg",
|
|
103
|
+
suffix="eeg",
|
|
104
|
+
**self.bids_kwargs,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
self.s3file = downloader.get_s3path(self.s3_bucket, record["bidspath"])
|
|
108
|
+
self.bids_dependencies = record["bidsdependencies"]
|
|
109
|
+
self.bids_dependencies_original = record["bidsdependencies"]
|
|
110
|
+
# TODO: removing temporary fix for BIDS dependencies path
|
|
111
|
+
# when the competition is over and dataset is digested properly
|
|
112
|
+
if not self.s3_open_neuro:
|
|
113
|
+
self.bids_dependencies = [
|
|
114
|
+
dep.split("/", 1)[1] for dep in self.bids_dependencies
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
self._raw = None
|
|
118
|
+
|
|
119
|
+
def _get_raw_bids_args(self) -> dict[str, Any]:
|
|
120
|
+
"""Extract BIDS-related arguments from the metadata record."""
|
|
121
|
+
desired_fields = ["subject", "session", "task", "run"]
|
|
122
|
+
return {k: self.record[k] for k in desired_fields if self.record[k]}
|
|
123
|
+
|
|
124
|
+
def _ensure_raw(self) -> None:
|
|
125
|
+
"""Ensure the raw data file and its dependencies are cached locally."""
|
|
126
|
+
# TO-DO: remove this once is fixed on the our side
|
|
127
|
+
# for the competition
|
|
128
|
+
if not self.s3_open_neuro:
|
|
129
|
+
self.bidspath = self.bidspath.update(extension=".bdf")
|
|
130
|
+
self.filecache = self.filecache.with_suffix(".bdf")
|
|
131
|
+
|
|
132
|
+
if not os.path.exists(self.filecache): # not preload
|
|
133
|
+
if self.bids_dependencies:
|
|
134
|
+
downloader.download_dependencies(
|
|
135
|
+
s3_bucket=self.s3_bucket,
|
|
136
|
+
bids_dependencies=self.bids_dependencies,
|
|
137
|
+
bids_dependencies_original=self.bids_dependencies_original,
|
|
138
|
+
cache_dir=self.cache_dir,
|
|
139
|
+
dataset_folder=self.dataset_folder,
|
|
140
|
+
record=self.record,
|
|
141
|
+
s3_open_neuro=self.s3_open_neuro,
|
|
142
|
+
)
|
|
143
|
+
self.filecache = downloader.download_s3_file(
|
|
144
|
+
self.s3file, self.filecache, self.s3_open_neuro
|
|
145
|
+
)
|
|
146
|
+
self.filenames = [self.filecache]
|
|
147
|
+
if self._raw is None:
|
|
148
|
+
try:
|
|
149
|
+
# mne-bids can emit noisy warnings to stderr; keep user logs clean
|
|
150
|
+
_stderr_buffer = io.StringIO()
|
|
151
|
+
with redirect_stderr(_stderr_buffer):
|
|
152
|
+
self._raw = mne_bids.read_raw_bids(
|
|
153
|
+
bids_path=self.bidspath, verbose="ERROR"
|
|
154
|
+
)
|
|
155
|
+
# Enrich Raw.info and description with participants.tsv extras
|
|
156
|
+
enrich_from_participants(
|
|
157
|
+
self.bids_root, self.bidspath, self._raw, self.description
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error(
|
|
162
|
+
f"Error while reading BIDS file: {self.bidspath}\n"
|
|
163
|
+
"This may be due to a missing or corrupted file.\n"
|
|
164
|
+
"Please check the file and try again.\n"
|
|
165
|
+
"Usually erasing the local cache and re-downloading helps.\n"
|
|
166
|
+
f"`rm {self.bidspath}`"
|
|
167
|
+
)
|
|
168
|
+
logger.error(f"Exception: {e}")
|
|
169
|
+
logger.error(traceback.format_exc())
|
|
170
|
+
raise e
|
|
171
|
+
|
|
172
|
+
def __len__(self) -> int:
|
|
173
|
+
"""Return the number of samples in the dataset."""
|
|
174
|
+
if self._raw is None:
|
|
175
|
+
if (
|
|
176
|
+
self.record["ntimes"] is None
|
|
177
|
+
or self.record["sampling_frequency"] is None
|
|
178
|
+
):
|
|
179
|
+
self._ensure_raw()
|
|
180
|
+
else:
|
|
181
|
+
# FIXME: this is a bit strange and should definitely not change as a side effect
|
|
182
|
+
# of accessing the data (which it will, since ntimes is the actual length but rounded down)
|
|
183
|
+
return int(self.record["ntimes"] * self.record["sampling_frequency"])
|
|
184
|
+
return len(self._raw)
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def raw(self) -> BaseRaw:
|
|
188
|
+
"""The MNE Raw object for this recording.
|
|
189
|
+
|
|
190
|
+
Accessing this property triggers the download and caching of the data
|
|
191
|
+
if it has not been accessed before.
|
|
192
|
+
|
|
193
|
+
Returns
|
|
194
|
+
-------
|
|
195
|
+
mne.io.BaseRaw
|
|
196
|
+
The loaded MNE Raw object.
|
|
197
|
+
|
|
198
|
+
"""
|
|
199
|
+
if self._raw is None:
|
|
200
|
+
self._ensure_raw()
|
|
201
|
+
return self._raw
|
|
202
|
+
|
|
203
|
+
@raw.setter
|
|
204
|
+
def raw(self, raw: BaseRaw):
|
|
205
|
+
self._raw = raw
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class EEGDashBaseRaw(BaseRaw):
|
|
209
|
+
"""MNE BaseRaw wrapper for automatic S3 data fetching.
|
|
210
|
+
|
|
211
|
+
This class extends :class:`mne.io.BaseRaw` to automatically fetch data
|
|
212
|
+
from an S3 bucket and cache it locally when data is first accessed.
|
|
213
|
+
It is intended for internal use within the EEGDash ecosystem.
|
|
214
|
+
|
|
215
|
+
Parameters
|
|
216
|
+
----------
|
|
217
|
+
input_fname : str
|
|
218
|
+
The path to the file on the S3 bucket (relative to the bucket root).
|
|
219
|
+
metadata : dict
|
|
220
|
+
The metadata record for the recording, containing information like
|
|
221
|
+
sampling frequency, channel names, etc.
|
|
222
|
+
preload : bool, default False
|
|
223
|
+
If True, preload the data into memory.
|
|
224
|
+
cache_dir : str, optional
|
|
225
|
+
Local directory for caching data. If None, a default directory is used.
|
|
226
|
+
bids_dependencies : list of str, default []
|
|
227
|
+
A list of BIDS metadata files to download alongside the main recording.
|
|
228
|
+
verbose : str, int, or None, default None
|
|
229
|
+
The MNE verbosity level.
|
|
230
|
+
|
|
231
|
+
See Also
|
|
232
|
+
--------
|
|
233
|
+
mne.io.Raw : The base class for Raw objects in MNE.
|
|
234
|
+
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
_AWS_BUCKET = "s3://openneuro.org"
|
|
238
|
+
|
|
239
|
+
def __init__(
|
|
240
|
+
self,
|
|
241
|
+
input_fname: str,
|
|
242
|
+
metadata: dict[str, Any],
|
|
243
|
+
preload: bool = False,
|
|
244
|
+
*,
|
|
245
|
+
cache_dir: str | None = None,
|
|
246
|
+
bids_dependencies: list[str] | None = None,
|
|
247
|
+
verbose: Any = None,
|
|
248
|
+
):
|
|
249
|
+
# Create a simple RawArray
|
|
250
|
+
sfreq = metadata["sfreq"] # Sampling frequency
|
|
251
|
+
n_times = metadata["n_times"]
|
|
252
|
+
ch_names = metadata["ch_names"]
|
|
253
|
+
ch_types = []
|
|
254
|
+
for ch in metadata["ch_types"]:
|
|
255
|
+
chtype = ch.lower()
|
|
256
|
+
if chtype == "heog" or chtype == "veog":
|
|
257
|
+
chtype = "eog"
|
|
258
|
+
ch_types.append(chtype)
|
|
259
|
+
info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
|
|
260
|
+
|
|
261
|
+
self.s3file = downloader.get_s3path(self._AWS_BUCKET, input_fname)
|
|
262
|
+
self.cache_dir = Path(cache_dir) if cache_dir else get_default_cache_dir()
|
|
263
|
+
self.filecache = self.cache_dir / input_fname
|
|
264
|
+
if bids_dependencies is None:
|
|
265
|
+
bids_dependencies = []
|
|
266
|
+
self.bids_dependencies = bids_dependencies
|
|
267
|
+
|
|
268
|
+
if preload and not os.path.exists(self.filecache):
|
|
269
|
+
self.filecache = downloader.download_s3_file(
|
|
270
|
+
self.s3file, self.filecache, self.s3_open_neuro
|
|
271
|
+
)
|
|
272
|
+
self.filenames = [self.filecache]
|
|
273
|
+
preload = self.filecache
|
|
274
|
+
|
|
275
|
+
super().__init__(
|
|
276
|
+
info,
|
|
277
|
+
preload,
|
|
278
|
+
last_samps=[n_times - 1],
|
|
279
|
+
orig_format="single",
|
|
280
|
+
verbose=verbose,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
def _read_segment(
|
|
284
|
+
self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
|
|
285
|
+
):
|
|
286
|
+
"""Read a segment of data, downloading if necessary."""
|
|
287
|
+
if not os.path.exists(self.filecache): # not preload
|
|
288
|
+
if self.bids_dependencies: # this is use only to sidecars for now
|
|
289
|
+
downloader.download_dependencies(
|
|
290
|
+
s3_bucket=self._AWS_BUCKET,
|
|
291
|
+
bids_dependencies=self.bids_dependencies,
|
|
292
|
+
bids_dependencies_original=None,
|
|
293
|
+
cache_dir=self.cache_dir,
|
|
294
|
+
dataset_folder=self.filecache,
|
|
295
|
+
record={},
|
|
296
|
+
s3_open_neuro=self.s3_open_neuro,
|
|
297
|
+
)
|
|
298
|
+
self.filecache = downloader.download_s3_file(
|
|
299
|
+
self.s3file, self.filecache, self.s3_open_neuro
|
|
300
|
+
)
|
|
301
|
+
self.filenames = [self.filecache]
|
|
302
|
+
else: # not preload and file is not cached
|
|
303
|
+
self.filenames = [self.filecache]
|
|
304
|
+
return super()._read_segment(start, stop, sel, data_buffer, verbose=verbose)
|
|
305
|
+
|
|
306
|
+
def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
|
|
307
|
+
"""Read a chunk of data from a local file."""
|
|
308
|
+
_read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
__all__ = ["EEGDashBaseDataset", "EEGDashBaseRaw"]
|