eegdash 0.4.1__py3-none-any.whl → 0.4.1.dev185__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/__init__.py +3 -3
- eegdash/api.py +2 -480
- eegdash/dataset/__init__.py +8 -2
- eegdash/dataset/base.py +311 -0
- eegdash/{data_utils.py → dataset/bids_dataset.py} +4 -295
- eegdash/dataset/dataset.py +507 -4
- eegdash/features/datasets.py +9 -8
- eegdash/features/decorators.py +3 -3
- eegdash/features/inspect.py +21 -18
- eegdash/features/serialization.py +14 -9
- eegdash/features/utils.py +20 -18
- eegdash/paths.py +6 -5
- {eegdash-0.4.1.dist-info → eegdash-0.4.1.dev185.dist-info}/METADATA +2 -1
- {eegdash-0.4.1.dist-info → eegdash-0.4.1.dev185.dist-info}/RECORD +17 -16
- {eegdash-0.4.1.dist-info → eegdash-0.4.1.dev185.dist-info}/WHEEL +0 -0
- {eegdash-0.4.1.dist-info → eegdash-0.4.1.dev185.dist-info}/licenses/LICENSE +0 -0
- {eegdash-0.4.1.dist-info → eegdash-0.4.1.dev185.dist-info}/top_level.txt +0 -0
eegdash/dataset/base.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
# Authors: The EEGDash contributors.
|
|
2
|
+
# License: GNU General Public License
|
|
3
|
+
# Copyright the EEGDash contributors.
|
|
4
|
+
|
|
5
|
+
"""Data utilities and dataset classes for EEG data handling.
|
|
6
|
+
|
|
7
|
+
This module provides core dataset classes for working with EEG data in the EEGDash ecosystem,
|
|
8
|
+
including classes for individual recordings and collections of datasets. It integrates with
|
|
9
|
+
braindecode for machine learning workflows and handles data loading from both local and remote sources.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import io
|
|
13
|
+
import os
|
|
14
|
+
import traceback
|
|
15
|
+
from contextlib import redirect_stderr
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
import mne
|
|
20
|
+
import mne_bids
|
|
21
|
+
from mne._fiff.utils import _read_segments_file
|
|
22
|
+
from mne.io import BaseRaw
|
|
23
|
+
from mne_bids import BIDSPath
|
|
24
|
+
|
|
25
|
+
from braindecode.datasets import BaseDataset
|
|
26
|
+
|
|
27
|
+
from .. import downloader
|
|
28
|
+
from ..bids_eeg_metadata import enrich_from_participants
|
|
29
|
+
from ..logging import logger
|
|
30
|
+
from ..paths import get_default_cache_dir
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class EEGDashBaseDataset(BaseDataset):
|
|
34
|
+
"""A single EEG recording dataset.
|
|
35
|
+
|
|
36
|
+
Represents a single EEG recording, typically hosted on a remote server (like AWS S3)
|
|
37
|
+
and cached locally upon first access. This class is a subclass of
|
|
38
|
+
:class:`braindecode.datasets.BaseDataset` and can be used with braindecode's
|
|
39
|
+
preprocessing and training pipelines.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
record : dict
|
|
44
|
+
A fully resolved metadata record for the data to load.
|
|
45
|
+
cache_dir : str
|
|
46
|
+
The local directory where the data will be cached.
|
|
47
|
+
s3_bucket : str, optional
|
|
48
|
+
The S3 bucket to download data from. If not provided, defaults to the
|
|
49
|
+
OpenNeuro bucket.
|
|
50
|
+
**kwargs
|
|
51
|
+
Additional keyword arguments passed to the
|
|
52
|
+
:class:`braindecode.datasets.BaseDataset` constructor.
|
|
53
|
+
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
_AWS_BUCKET = "s3://openneuro.org"
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
record: dict[str, Any],
|
|
61
|
+
cache_dir: str,
|
|
62
|
+
s3_bucket: str | None = None,
|
|
63
|
+
**kwargs,
|
|
64
|
+
):
|
|
65
|
+
super().__init__(None, **kwargs)
|
|
66
|
+
self.record = record
|
|
67
|
+
self.cache_dir = Path(cache_dir)
|
|
68
|
+
self.bids_kwargs = self._get_raw_bids_args()
|
|
69
|
+
|
|
70
|
+
if s3_bucket:
|
|
71
|
+
self.s3_bucket = s3_bucket
|
|
72
|
+
self.s3_open_neuro = False
|
|
73
|
+
else:
|
|
74
|
+
self.s3_bucket = self._AWS_BUCKET
|
|
75
|
+
self.s3_open_neuro = True
|
|
76
|
+
|
|
77
|
+
# Compute a dataset folder name under cache_dir that encodes preprocessing
|
|
78
|
+
# (e.g., bdf, mini) to avoid overlapping with the original dataset cache.
|
|
79
|
+
self.dataset_folder = record.get("dataset", "")
|
|
80
|
+
# TODO: remove this hack when competition is over
|
|
81
|
+
if s3_bucket:
|
|
82
|
+
suffixes: list[str] = []
|
|
83
|
+
bucket_lower = str(s3_bucket).lower()
|
|
84
|
+
if "bdf" in bucket_lower:
|
|
85
|
+
suffixes.append("bdf")
|
|
86
|
+
if "mini" in bucket_lower:
|
|
87
|
+
suffixes.append("mini")
|
|
88
|
+
if suffixes:
|
|
89
|
+
self.dataset_folder = f"{self.dataset_folder}-{'-'.join(suffixes)}"
|
|
90
|
+
|
|
91
|
+
# Place files under the dataset-specific folder (with suffix if any)
|
|
92
|
+
rel = Path(record["bidspath"]) # usually starts with dataset id
|
|
93
|
+
if rel.parts and rel.parts[0] == record.get("dataset"):
|
|
94
|
+
rel = Path(self.dataset_folder, *rel.parts[1:])
|
|
95
|
+
else:
|
|
96
|
+
rel = Path(self.dataset_folder) / rel
|
|
97
|
+
self.filecache = self.cache_dir / rel
|
|
98
|
+
self.bids_root = self.cache_dir / self.dataset_folder
|
|
99
|
+
|
|
100
|
+
self.bidspath = BIDSPath(
|
|
101
|
+
root=self.bids_root,
|
|
102
|
+
datatype="eeg",
|
|
103
|
+
suffix="eeg",
|
|
104
|
+
**self.bids_kwargs,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
self.s3file = downloader.get_s3path(self.s3_bucket, record["bidspath"])
|
|
108
|
+
self.bids_dependencies = record["bidsdependencies"]
|
|
109
|
+
self.bids_dependencies_original = record["bidsdependencies"]
|
|
110
|
+
# TODO: removing temporary fix for BIDS dependencies path
|
|
111
|
+
# when the competition is over and dataset is digested properly
|
|
112
|
+
if not self.s3_open_neuro:
|
|
113
|
+
self.bids_dependencies = [
|
|
114
|
+
dep.split("/", 1)[1] for dep in self.bids_dependencies
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
self._raw = None
|
|
118
|
+
|
|
119
|
+
def _get_raw_bids_args(self) -> dict[str, Any]:
|
|
120
|
+
"""Extract BIDS-related arguments from the metadata record."""
|
|
121
|
+
desired_fields = ["subject", "session", "task", "run"]
|
|
122
|
+
return {k: self.record[k] for k in desired_fields if self.record[k]}
|
|
123
|
+
|
|
124
|
+
def _ensure_raw(self) -> None:
|
|
125
|
+
"""Ensure the raw data file and its dependencies are cached locally."""
|
|
126
|
+
# TO-DO: remove this once is fixed on the our side
|
|
127
|
+
# for the competition
|
|
128
|
+
if not self.s3_open_neuro:
|
|
129
|
+
self.bidspath = self.bidspath.update(extension=".bdf")
|
|
130
|
+
self.filecache = self.filecache.with_suffix(".bdf")
|
|
131
|
+
|
|
132
|
+
if not os.path.exists(self.filecache): # not preload
|
|
133
|
+
if self.bids_dependencies:
|
|
134
|
+
downloader.download_dependencies(
|
|
135
|
+
s3_bucket=self.s3_bucket,
|
|
136
|
+
bids_dependencies=self.bids_dependencies,
|
|
137
|
+
bids_dependencies_original=self.bids_dependencies_original,
|
|
138
|
+
cache_dir=self.cache_dir,
|
|
139
|
+
dataset_folder=self.dataset_folder,
|
|
140
|
+
record=self.record,
|
|
141
|
+
s3_open_neuro=self.s3_open_neuro,
|
|
142
|
+
)
|
|
143
|
+
self.filecache = downloader.download_s3_file(
|
|
144
|
+
self.s3file, self.filecache, self.s3_open_neuro
|
|
145
|
+
)
|
|
146
|
+
self.filenames = [self.filecache]
|
|
147
|
+
if self._raw is None:
|
|
148
|
+
try:
|
|
149
|
+
# mne-bids can emit noisy warnings to stderr; keep user logs clean
|
|
150
|
+
_stderr_buffer = io.StringIO()
|
|
151
|
+
with redirect_stderr(_stderr_buffer):
|
|
152
|
+
self._raw = mne_bids.read_raw_bids(
|
|
153
|
+
bids_path=self.bidspath, verbose="ERROR"
|
|
154
|
+
)
|
|
155
|
+
# Enrich Raw.info and description with participants.tsv extras
|
|
156
|
+
enrich_from_participants(
|
|
157
|
+
self.bids_root, self.bidspath, self._raw, self.description
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error(
|
|
162
|
+
f"Error while reading BIDS file: {self.bidspath}\n"
|
|
163
|
+
"This may be due to a missing or corrupted file.\n"
|
|
164
|
+
"Please check the file and try again.\n"
|
|
165
|
+
"Usually erasing the local cache and re-downloading helps.\n"
|
|
166
|
+
f"`rm {self.bidspath}`"
|
|
167
|
+
)
|
|
168
|
+
logger.error(f"Exception: {e}")
|
|
169
|
+
logger.error(traceback.format_exc())
|
|
170
|
+
raise e
|
|
171
|
+
|
|
172
|
+
def __len__(self) -> int:
|
|
173
|
+
"""Return the number of samples in the dataset."""
|
|
174
|
+
if self._raw is None:
|
|
175
|
+
if (
|
|
176
|
+
self.record["ntimes"] is None
|
|
177
|
+
or self.record["sampling_frequency"] is None
|
|
178
|
+
):
|
|
179
|
+
self._ensure_raw()
|
|
180
|
+
else:
|
|
181
|
+
# FIXME: this is a bit strange and should definitely not change as a side effect
|
|
182
|
+
# of accessing the data (which it will, since ntimes is the actual length but rounded down)
|
|
183
|
+
return int(self.record["ntimes"] * self.record["sampling_frequency"])
|
|
184
|
+
return len(self._raw)
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def raw(self) -> BaseRaw:
|
|
188
|
+
"""The MNE Raw object for this recording.
|
|
189
|
+
|
|
190
|
+
Accessing this property triggers the download and caching of the data
|
|
191
|
+
if it has not been accessed before.
|
|
192
|
+
|
|
193
|
+
Returns
|
|
194
|
+
-------
|
|
195
|
+
mne.io.BaseRaw
|
|
196
|
+
The loaded MNE Raw object.
|
|
197
|
+
|
|
198
|
+
"""
|
|
199
|
+
if self._raw is None:
|
|
200
|
+
self._ensure_raw()
|
|
201
|
+
return self._raw
|
|
202
|
+
|
|
203
|
+
@raw.setter
|
|
204
|
+
def raw(self, raw: BaseRaw):
|
|
205
|
+
self._raw = raw
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class EEGDashBaseRaw(BaseRaw):
|
|
209
|
+
"""MNE BaseRaw wrapper for automatic S3 data fetching.
|
|
210
|
+
|
|
211
|
+
This class extends :class:`mne.io.BaseRaw` to automatically fetch data
|
|
212
|
+
from an S3 bucket and cache it locally when data is first accessed.
|
|
213
|
+
It is intended for internal use within the EEGDash ecosystem.
|
|
214
|
+
|
|
215
|
+
Parameters
|
|
216
|
+
----------
|
|
217
|
+
input_fname : str
|
|
218
|
+
The path to the file on the S3 bucket (relative to the bucket root).
|
|
219
|
+
metadata : dict
|
|
220
|
+
The metadata record for the recording, containing information like
|
|
221
|
+
sampling frequency, channel names, etc.
|
|
222
|
+
preload : bool, default False
|
|
223
|
+
If True, preload the data into memory.
|
|
224
|
+
cache_dir : str, optional
|
|
225
|
+
Local directory for caching data. If None, a default directory is used.
|
|
226
|
+
bids_dependencies : list of str, default []
|
|
227
|
+
A list of BIDS metadata files to download alongside the main recording.
|
|
228
|
+
verbose : str, int, or None, default None
|
|
229
|
+
The MNE verbosity level.
|
|
230
|
+
|
|
231
|
+
See Also
|
|
232
|
+
--------
|
|
233
|
+
mne.io.Raw : The base class for Raw objects in MNE.
|
|
234
|
+
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
_AWS_BUCKET = "s3://openneuro.org"
|
|
238
|
+
|
|
239
|
+
def __init__(
|
|
240
|
+
self,
|
|
241
|
+
input_fname: str,
|
|
242
|
+
metadata: dict[str, Any],
|
|
243
|
+
preload: bool = False,
|
|
244
|
+
*,
|
|
245
|
+
cache_dir: str | None = None,
|
|
246
|
+
bids_dependencies: list[str] | None = None,
|
|
247
|
+
verbose: Any = None,
|
|
248
|
+
):
|
|
249
|
+
# Create a simple RawArray
|
|
250
|
+
sfreq = metadata["sfreq"] # Sampling frequency
|
|
251
|
+
n_times = metadata["n_times"]
|
|
252
|
+
ch_names = metadata["ch_names"]
|
|
253
|
+
ch_types = []
|
|
254
|
+
for ch in metadata["ch_types"]:
|
|
255
|
+
chtype = ch.lower()
|
|
256
|
+
if chtype == "heog" or chtype == "veog":
|
|
257
|
+
chtype = "eog"
|
|
258
|
+
ch_types.append(chtype)
|
|
259
|
+
info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
|
|
260
|
+
|
|
261
|
+
self.s3file = downloader.get_s3path(self._AWS_BUCKET, input_fname)
|
|
262
|
+
self.cache_dir = Path(cache_dir) if cache_dir else get_default_cache_dir()
|
|
263
|
+
self.filecache = self.cache_dir / input_fname
|
|
264
|
+
if bids_dependencies is None:
|
|
265
|
+
bids_dependencies = []
|
|
266
|
+
self.bids_dependencies = bids_dependencies
|
|
267
|
+
|
|
268
|
+
if preload and not os.path.exists(self.filecache):
|
|
269
|
+
self.filecache = downloader.download_s3_file(
|
|
270
|
+
self.s3file, self.filecache, self.s3_open_neuro
|
|
271
|
+
)
|
|
272
|
+
self.filenames = [self.filecache]
|
|
273
|
+
preload = self.filecache
|
|
274
|
+
|
|
275
|
+
super().__init__(
|
|
276
|
+
info,
|
|
277
|
+
preload,
|
|
278
|
+
last_samps=[n_times - 1],
|
|
279
|
+
orig_format="single",
|
|
280
|
+
verbose=verbose,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
def _read_segment(
|
|
284
|
+
self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
|
|
285
|
+
):
|
|
286
|
+
"""Read a segment of data, downloading if necessary."""
|
|
287
|
+
if not os.path.exists(self.filecache): # not preload
|
|
288
|
+
if self.bids_dependencies: # this is use only to sidecars for now
|
|
289
|
+
downloader.download_dependencies(
|
|
290
|
+
s3_bucket=self._AWS_BUCKET,
|
|
291
|
+
bids_dependencies=self.bids_dependencies,
|
|
292
|
+
bids_dependencies_original=None,
|
|
293
|
+
cache_dir=self.cache_dir,
|
|
294
|
+
dataset_folder=self.filecache,
|
|
295
|
+
record={},
|
|
296
|
+
s3_open_neuro=self.s3_open_neuro,
|
|
297
|
+
)
|
|
298
|
+
self.filecache = downloader.download_s3_file(
|
|
299
|
+
self.s3file, self.filecache, self.s3_open_neuro
|
|
300
|
+
)
|
|
301
|
+
self.filenames = [self.filecache]
|
|
302
|
+
else: # not preload and file is not cached
|
|
303
|
+
self.filenames = [self.filecache]
|
|
304
|
+
return super()._read_segment(start, stop, sel, data_buffer, verbose=verbose)
|
|
305
|
+
|
|
306
|
+
def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
|
|
307
|
+
"""Read a chunk of data from a local file."""
|
|
308
|
+
_read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
__all__ = ["EEGDashBaseDataset", "EEGDashBaseRaw"]
|
|
@@ -2,312 +2,21 @@
|
|
|
2
2
|
# License: GNU General Public License
|
|
3
3
|
# Copyright the EEGDash contributors.
|
|
4
4
|
|
|
5
|
-
"""
|
|
5
|
+
"""Local BIDS dataset interface for EEGDash.
|
|
6
6
|
|
|
7
|
-
This module provides
|
|
8
|
-
|
|
9
|
-
braindecode for machine learning workflows and handles data loading from both local and remote sources.
|
|
7
|
+
This module provides the EEGBIDSDataset class for interfacing with local BIDS
|
|
8
|
+
datasets on the filesystem, parsing metadata, and retrieving BIDS-related information.
|
|
10
9
|
"""
|
|
11
10
|
|
|
12
|
-
import io
|
|
13
11
|
import json
|
|
14
12
|
import os
|
|
15
13
|
import re
|
|
16
|
-
import traceback
|
|
17
|
-
from contextlib import redirect_stderr
|
|
18
14
|
from pathlib import Path
|
|
19
15
|
from typing import Any
|
|
20
16
|
|
|
21
|
-
import mne
|
|
22
|
-
import mne_bids
|
|
23
17
|
import pandas as pd
|
|
24
|
-
from mne._fiff.utils import _read_segments_file
|
|
25
|
-
from mne.io import BaseRaw
|
|
26
18
|
from mne_bids import BIDSPath, find_matching_paths
|
|
27
19
|
|
|
28
|
-
from braindecode.datasets import BaseDataset
|
|
29
|
-
|
|
30
|
-
from . import downloader
|
|
31
|
-
from .bids_eeg_metadata import enrich_from_participants
|
|
32
|
-
from .logging import logger
|
|
33
|
-
from .paths import get_default_cache_dir
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class EEGDashBaseDataset(BaseDataset):
|
|
37
|
-
"""A single EEG recording dataset.
|
|
38
|
-
|
|
39
|
-
Represents a single EEG recording, typically hosted on a remote server (like AWS S3)
|
|
40
|
-
and cached locally upon first access. This class is a subclass of
|
|
41
|
-
:class:`braindecode.datasets.BaseDataset` and can be used with braindecode's
|
|
42
|
-
preprocessing and training pipelines.
|
|
43
|
-
|
|
44
|
-
Parameters
|
|
45
|
-
----------
|
|
46
|
-
record : dict
|
|
47
|
-
A fully resolved metadata record for the data to load.
|
|
48
|
-
cache_dir : str
|
|
49
|
-
The local directory where the data will be cached.
|
|
50
|
-
s3_bucket : str, optional
|
|
51
|
-
The S3 bucket to download data from. If not provided, defaults to the
|
|
52
|
-
OpenNeuro bucket.
|
|
53
|
-
**kwargs
|
|
54
|
-
Additional keyword arguments passed to the
|
|
55
|
-
:class:`braindecode.datasets.BaseDataset` constructor.
|
|
56
|
-
|
|
57
|
-
"""
|
|
58
|
-
|
|
59
|
-
_AWS_BUCKET = "s3://openneuro.org"
|
|
60
|
-
|
|
61
|
-
def __init__(
|
|
62
|
-
self,
|
|
63
|
-
record: dict[str, Any],
|
|
64
|
-
cache_dir: str,
|
|
65
|
-
s3_bucket: str | None = None,
|
|
66
|
-
**kwargs,
|
|
67
|
-
):
|
|
68
|
-
super().__init__(None, **kwargs)
|
|
69
|
-
self.record = record
|
|
70
|
-
self.cache_dir = Path(cache_dir)
|
|
71
|
-
self.bids_kwargs = self._get_raw_bids_args()
|
|
72
|
-
|
|
73
|
-
if s3_bucket:
|
|
74
|
-
self.s3_bucket = s3_bucket
|
|
75
|
-
self.s3_open_neuro = False
|
|
76
|
-
else:
|
|
77
|
-
self.s3_bucket = self._AWS_BUCKET
|
|
78
|
-
self.s3_open_neuro = True
|
|
79
|
-
|
|
80
|
-
# Compute a dataset folder name under cache_dir that encodes preprocessing
|
|
81
|
-
# (e.g., bdf, mini) to avoid overlapping with the original dataset cache.
|
|
82
|
-
self.dataset_folder = record.get("dataset", "")
|
|
83
|
-
# TODO: remove this hack when competition is over
|
|
84
|
-
if s3_bucket:
|
|
85
|
-
suffixes: list[str] = []
|
|
86
|
-
bucket_lower = str(s3_bucket).lower()
|
|
87
|
-
if "bdf" in bucket_lower:
|
|
88
|
-
suffixes.append("bdf")
|
|
89
|
-
if "mini" in bucket_lower:
|
|
90
|
-
suffixes.append("mini")
|
|
91
|
-
if suffixes:
|
|
92
|
-
self.dataset_folder = f"{self.dataset_folder}-{'-'.join(suffixes)}"
|
|
93
|
-
|
|
94
|
-
# Place files under the dataset-specific folder (with suffix if any)
|
|
95
|
-
rel = Path(record["bidspath"]) # usually starts with dataset id
|
|
96
|
-
if rel.parts and rel.parts[0] == record.get("dataset"):
|
|
97
|
-
rel = Path(self.dataset_folder, *rel.parts[1:])
|
|
98
|
-
else:
|
|
99
|
-
rel = Path(self.dataset_folder) / rel
|
|
100
|
-
self.filecache = self.cache_dir / rel
|
|
101
|
-
self.bids_root = self.cache_dir / self.dataset_folder
|
|
102
|
-
|
|
103
|
-
self.bidspath = BIDSPath(
|
|
104
|
-
root=self.bids_root,
|
|
105
|
-
datatype="eeg",
|
|
106
|
-
suffix="eeg",
|
|
107
|
-
**self.bids_kwargs,
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
self.s3file = downloader.get_s3path(self.s3_bucket, record["bidspath"])
|
|
111
|
-
self.bids_dependencies = record["bidsdependencies"]
|
|
112
|
-
self.bids_dependencies_original = record["bidsdependencies"]
|
|
113
|
-
# TODO: removing temporary fix for BIDS dependencies path
|
|
114
|
-
# when the competition is over and dataset is digested properly
|
|
115
|
-
if not self.s3_open_neuro:
|
|
116
|
-
self.bids_dependencies = [
|
|
117
|
-
dep.split("/", 1)[1] for dep in self.bids_dependencies
|
|
118
|
-
]
|
|
119
|
-
|
|
120
|
-
self._raw = None
|
|
121
|
-
|
|
122
|
-
def _get_raw_bids_args(self) -> dict[str, Any]:
|
|
123
|
-
"""Extract BIDS-related arguments from the metadata record."""
|
|
124
|
-
desired_fields = ["subject", "session", "task", "run"]
|
|
125
|
-
return {k: self.record[k] for k in desired_fields if self.record[k]}
|
|
126
|
-
|
|
127
|
-
def _ensure_raw(self) -> None:
|
|
128
|
-
"""Ensure the raw data file and its dependencies are cached locally."""
|
|
129
|
-
# TO-DO: remove this once is fixed on the our side
|
|
130
|
-
# for the competition
|
|
131
|
-
if not self.s3_open_neuro:
|
|
132
|
-
self.bidspath = self.bidspath.update(extension=".bdf")
|
|
133
|
-
self.filecache = self.filecache.with_suffix(".bdf")
|
|
134
|
-
|
|
135
|
-
if not os.path.exists(self.filecache): # not preload
|
|
136
|
-
if self.bids_dependencies:
|
|
137
|
-
downloader.download_dependencies(
|
|
138
|
-
s3_bucket=self.s3_bucket,
|
|
139
|
-
bids_dependencies=self.bids_dependencies,
|
|
140
|
-
bids_dependencies_original=self.bids_dependencies_original,
|
|
141
|
-
cache_dir=self.cache_dir,
|
|
142
|
-
dataset_folder=self.dataset_folder,
|
|
143
|
-
record=self.record,
|
|
144
|
-
s3_open_neuro=self.s3_open_neuro,
|
|
145
|
-
)
|
|
146
|
-
self.filecache = downloader.download_s3_file(
|
|
147
|
-
self.s3file, self.filecache, self.s3_open_neuro
|
|
148
|
-
)
|
|
149
|
-
self.filenames = [self.filecache]
|
|
150
|
-
if self._raw is None:
|
|
151
|
-
try:
|
|
152
|
-
# mne-bids can emit noisy warnings to stderr; keep user logs clean
|
|
153
|
-
_stderr_buffer = io.StringIO()
|
|
154
|
-
with redirect_stderr(_stderr_buffer):
|
|
155
|
-
self._raw = mne_bids.read_raw_bids(
|
|
156
|
-
bids_path=self.bidspath, verbose="ERROR"
|
|
157
|
-
)
|
|
158
|
-
# Enrich Raw.info and description with participants.tsv extras
|
|
159
|
-
enrich_from_participants(
|
|
160
|
-
self.bids_root, self.bidspath, self._raw, self.description
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
except Exception as e:
|
|
164
|
-
logger.error(
|
|
165
|
-
f"Error while reading BIDS file: {self.bidspath}\n"
|
|
166
|
-
"This may be due to a missing or corrupted file.\n"
|
|
167
|
-
"Please check the file and try again.\n"
|
|
168
|
-
"Usually erasing the local cache and re-downloading helps.\n"
|
|
169
|
-
f"`rm {self.bidspath}`"
|
|
170
|
-
)
|
|
171
|
-
logger.error(f"Exception: {e}")
|
|
172
|
-
logger.error(traceback.format_exc())
|
|
173
|
-
raise e
|
|
174
|
-
|
|
175
|
-
def __len__(self) -> int:
|
|
176
|
-
"""Return the number of samples in the dataset."""
|
|
177
|
-
if self._raw is None:
|
|
178
|
-
if (
|
|
179
|
-
self.record["ntimes"] is None
|
|
180
|
-
or self.record["sampling_frequency"] is None
|
|
181
|
-
):
|
|
182
|
-
self._ensure_raw()
|
|
183
|
-
else:
|
|
184
|
-
# FIXME: this is a bit strange and should definitely not change as a side effect
|
|
185
|
-
# of accessing the data (which it will, since ntimes is the actual length but rounded down)
|
|
186
|
-
return int(self.record["ntimes"] * self.record["sampling_frequency"])
|
|
187
|
-
return len(self._raw)
|
|
188
|
-
|
|
189
|
-
@property
|
|
190
|
-
def raw(self) -> BaseRaw:
|
|
191
|
-
"""The MNE Raw object for this recording.
|
|
192
|
-
|
|
193
|
-
Accessing this property triggers the download and caching of the data
|
|
194
|
-
if it has not been accessed before.
|
|
195
|
-
|
|
196
|
-
Returns
|
|
197
|
-
-------
|
|
198
|
-
mne.io.BaseRaw
|
|
199
|
-
The loaded MNE Raw object.
|
|
200
|
-
|
|
201
|
-
"""
|
|
202
|
-
if self._raw is None:
|
|
203
|
-
self._ensure_raw()
|
|
204
|
-
return self._raw
|
|
205
|
-
|
|
206
|
-
@raw.setter
|
|
207
|
-
def raw(self, raw: BaseRaw):
|
|
208
|
-
self._raw = raw
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
class EEGDashBaseRaw(BaseRaw):
|
|
212
|
-
"""MNE BaseRaw wrapper for automatic S3 data fetching.
|
|
213
|
-
|
|
214
|
-
This class extends :class:`mne.io.BaseRaw` to automatically fetch data
|
|
215
|
-
from an S3 bucket and cache it locally when data is first accessed.
|
|
216
|
-
It is intended for internal use within the EEGDash ecosystem.
|
|
217
|
-
|
|
218
|
-
Parameters
|
|
219
|
-
----------
|
|
220
|
-
input_fname : str
|
|
221
|
-
The path to the file on the S3 bucket (relative to the bucket root).
|
|
222
|
-
metadata : dict
|
|
223
|
-
The metadata record for the recording, containing information like
|
|
224
|
-
sampling frequency, channel names, etc.
|
|
225
|
-
preload : bool, default False
|
|
226
|
-
If True, preload the data into memory.
|
|
227
|
-
cache_dir : str, optional
|
|
228
|
-
Local directory for caching data. If None, a default directory is used.
|
|
229
|
-
bids_dependencies : list of str, default []
|
|
230
|
-
A list of BIDS metadata files to download alongside the main recording.
|
|
231
|
-
verbose : str, int, or None, default None
|
|
232
|
-
The MNE verbosity level.
|
|
233
|
-
|
|
234
|
-
See Also
|
|
235
|
-
--------
|
|
236
|
-
mne.io.Raw : The base class for Raw objects in MNE.
|
|
237
|
-
|
|
238
|
-
"""
|
|
239
|
-
|
|
240
|
-
_AWS_BUCKET = "s3://openneuro.org"
|
|
241
|
-
|
|
242
|
-
def __init__(
|
|
243
|
-
self,
|
|
244
|
-
input_fname: str,
|
|
245
|
-
metadata: dict[str, Any],
|
|
246
|
-
preload: bool = False,
|
|
247
|
-
*,
|
|
248
|
-
cache_dir: str | None = None,
|
|
249
|
-
bids_dependencies: list[str] = [],
|
|
250
|
-
verbose: Any = None,
|
|
251
|
-
):
|
|
252
|
-
# Create a simple RawArray
|
|
253
|
-
sfreq = metadata["sfreq"] # Sampling frequency
|
|
254
|
-
n_times = metadata["n_times"]
|
|
255
|
-
ch_names = metadata["ch_names"]
|
|
256
|
-
ch_types = []
|
|
257
|
-
for ch in metadata["ch_types"]:
|
|
258
|
-
chtype = ch.lower()
|
|
259
|
-
if chtype == "heog" or chtype == "veog":
|
|
260
|
-
chtype = "eog"
|
|
261
|
-
ch_types.append(chtype)
|
|
262
|
-
info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
|
|
263
|
-
|
|
264
|
-
self.s3file = downloader.get_s3path(self._AWS_BUCKET, input_fname)
|
|
265
|
-
self.cache_dir = Path(cache_dir) if cache_dir else get_default_cache_dir()
|
|
266
|
-
self.filecache = self.cache_dir / input_fname
|
|
267
|
-
self.bids_dependencies = bids_dependencies
|
|
268
|
-
|
|
269
|
-
if preload and not os.path.exists(self.filecache):
|
|
270
|
-
self.filecache = downloader.download_s3_file(
|
|
271
|
-
self.s3file, self.filecache, self.s3_open_neuro
|
|
272
|
-
)
|
|
273
|
-
self.filenames = [self.filecache]
|
|
274
|
-
preload = self.filecache
|
|
275
|
-
|
|
276
|
-
super().__init__(
|
|
277
|
-
info,
|
|
278
|
-
preload,
|
|
279
|
-
last_samps=[n_times - 1],
|
|
280
|
-
orig_format="single",
|
|
281
|
-
verbose=verbose,
|
|
282
|
-
)
|
|
283
|
-
|
|
284
|
-
def _read_segment(
|
|
285
|
-
self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
|
|
286
|
-
):
|
|
287
|
-
"""Read a segment of data, downloading if necessary."""
|
|
288
|
-
if not os.path.exists(self.filecache): # not preload
|
|
289
|
-
if self.bids_dependencies: # this is use only to sidecars for now
|
|
290
|
-
downloader.download_dependencies(
|
|
291
|
-
s3_bucket=self._AWS_BUCKET,
|
|
292
|
-
bids_dependencies=self.bids_dependencies,
|
|
293
|
-
bids_dependencies_original=None,
|
|
294
|
-
cache_dir=self.cache_dir,
|
|
295
|
-
dataset_folder=self.filecache,
|
|
296
|
-
record={},
|
|
297
|
-
s3_open_neuro=self.s3_open_neuro,
|
|
298
|
-
)
|
|
299
|
-
self.filecache = downloader.download_s3_file(
|
|
300
|
-
self.s3file, self.filecache, self.s3_open_neuro
|
|
301
|
-
)
|
|
302
|
-
self.filenames = [self.filecache]
|
|
303
|
-
else: # not preload and file is not cached
|
|
304
|
-
self.filenames = [self.filecache]
|
|
305
|
-
return super()._read_segment(start, stop, sel, data_buffer, verbose=verbose)
|
|
306
|
-
|
|
307
|
-
def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
|
|
308
|
-
"""Read a chunk of data from a local file."""
|
|
309
|
-
_read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
|
|
310
|
-
|
|
311
20
|
|
|
312
21
|
class EEGBIDSDataset:
|
|
313
22
|
"""An interface to a local BIDS dataset containing EEG recordings.
|
|
@@ -731,4 +440,4 @@ class EEGBIDSDataset:
|
|
|
731
440
|
return self._get_json_with_inheritance(data_filepath, "eeg.json")
|
|
732
441
|
|
|
733
442
|
|
|
734
|
-
__all__ = ["
|
|
443
|
+
__all__ = ["EEGBIDSDataset"]
|