eegdash 0.4.0.dev173498563__py3-none-any.whl → 0.4.1.dev185__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/__init__.py +3 -3
- eegdash/api.py +143 -526
- eegdash/bids_eeg_metadata.py +139 -39
- eegdash/const.py +25 -0
- eegdash/dataset/__init__.py +8 -2
- eegdash/dataset/base.py +311 -0
- eegdash/dataset/bids_dataset.py +443 -0
- eegdash/dataset/dataset.py +542 -17
- eegdash/dataset/dataset_summary.csv +255 -255
- eegdash/dataset/registry.py +69 -4
- eegdash/downloader.py +95 -9
- eegdash/features/datasets.py +326 -136
- eegdash/features/decorators.py +96 -3
- eegdash/features/extractors.py +212 -55
- eegdash/features/feature_bank/complexity.py +7 -3
- eegdash/features/feature_bank/dimensionality.py +1 -1
- eegdash/features/feature_bank/signal.py +11 -10
- eegdash/features/feature_bank/utils.py +8 -0
- eegdash/features/inspect.py +97 -11
- eegdash/features/serialization.py +56 -19
- eegdash/features/utils.py +90 -16
- eegdash/hbn/preprocessing.py +50 -17
- eegdash/hbn/windows.py +145 -32
- eegdash/logging.py +19 -0
- eegdash/mongodb.py +44 -27
- eegdash/paths.py +15 -5
- eegdash/utils.py +16 -1
- {eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dev185.dist-info}/METADATA +7 -8
- eegdash-0.4.1.dev185.dist-info/RECORD +38 -0
- eegdash/data_utils.py +0 -677
- eegdash-0.4.0.dev173498563.dist-info/RECORD +0 -37
- {eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dev185.dist-info}/WHEEL +0 -0
- {eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dev185.dist-info}/licenses/LICENSE +0 -0
- {eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dev185.dist-info}/top_level.txt +0 -0
eegdash/bids_eeg_metadata.py
CHANGED
|
@@ -33,12 +33,30 @@ __all__ = [
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
|
|
36
|
-
"""Build and validate a MongoDB query from
|
|
36
|
+
"""Build and validate a MongoDB query from keyword arguments.
|
|
37
|
+
|
|
38
|
+
This function converts user-friendly keyword arguments into a valid
|
|
39
|
+
MongoDB query dictionary. It handles scalar values as exact matches and
|
|
40
|
+
list-like values as ``$in`` queries. It also performs validation to
|
|
41
|
+
reject unsupported fields and empty values.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
**kwargs
|
|
46
|
+
Keyword arguments representing query filters. Allowed keys are defined
|
|
47
|
+
in ``eegdash.const.ALLOWED_QUERY_FIELDS``.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
dict
|
|
52
|
+
A MongoDB query dictionary.
|
|
53
|
+
|
|
54
|
+
Raises
|
|
55
|
+
------
|
|
56
|
+
ValueError
|
|
57
|
+
If an unsupported query field is provided, or if a value is None or
|
|
58
|
+
an empty string/list.
|
|
37
59
|
|
|
38
|
-
Improvements:
|
|
39
|
-
- Reject None values and empty/whitespace-only strings
|
|
40
|
-
- For list/tuple/set values: strip strings, drop None/empties, deduplicate, and use `$in`
|
|
41
|
-
- Preserve scalars as exact matches
|
|
42
60
|
"""
|
|
43
61
|
# 1. Validate that all provided keys are allowed for querying
|
|
44
62
|
unknown_fields = set(kwargs.keys()) - ALLOWED_QUERY_FIELDS
|
|
@@ -89,24 +107,29 @@ def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
|
|
|
89
107
|
|
|
90
108
|
|
|
91
109
|
def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any]:
|
|
92
|
-
"""Build
|
|
110
|
+
"""Build a metadata record for a BIDS file.
|
|
93
111
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
112
|
+
Extracts metadata attributes from a single BIDS EEG file within a given
|
|
113
|
+
BIDS dataset. The extracted attributes include BIDS entities, file paths,
|
|
114
|
+
and technical metadata required for database indexing.
|
|
97
115
|
|
|
98
116
|
Parameters
|
|
99
117
|
----------
|
|
100
118
|
bids_dataset : EEGBIDSDataset
|
|
101
119
|
The BIDS dataset object containing the file.
|
|
102
120
|
bids_file : str
|
|
103
|
-
The path to the BIDS file
|
|
121
|
+
The path to the BIDS file to process.
|
|
104
122
|
|
|
105
123
|
Returns
|
|
106
124
|
-------
|
|
107
|
-
dict
|
|
108
|
-
A dictionary
|
|
109
|
-
|
|
125
|
+
dict
|
|
126
|
+
A dictionary of metadata attributes for the file, suitable for
|
|
127
|
+
insertion into the database.
|
|
128
|
+
|
|
129
|
+
Raises
|
|
130
|
+
------
|
|
131
|
+
ValueError
|
|
132
|
+
If ``bids_file`` is not found in the ``bids_dataset``.
|
|
110
133
|
|
|
111
134
|
"""
|
|
112
135
|
if bids_file not in bids_dataset.files:
|
|
@@ -198,11 +221,23 @@ def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any
|
|
|
198
221
|
|
|
199
222
|
|
|
200
223
|
def normalize_key(key: str) -> str:
|
|
201
|
-
"""Normalize a
|
|
224
|
+
"""Normalize a string key for robust matching.
|
|
225
|
+
|
|
226
|
+
Converts the key to lowercase, replaces non-alphanumeric characters with
|
|
227
|
+
underscores, and removes leading/trailing underscores. This allows for
|
|
228
|
+
tolerant matching of keys that may have different capitalization or
|
|
229
|
+
separators (e.g., "p-factor" becomes "p_factor").
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
key : str
|
|
234
|
+
The key to normalize.
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
str
|
|
239
|
+
The normalized key.
|
|
202
240
|
|
|
203
|
-
Lowercase and replace non-alphanumeric characters with underscores, then strip
|
|
204
|
-
leading/trailing underscores. This allows tolerant matching such as
|
|
205
|
-
"p-factor" ≈ "p_factor" ≈ "P Factor".
|
|
206
241
|
"""
|
|
207
242
|
return re.sub(r"[^a-z0-9]+", "_", str(key).lower()).strip("_")
|
|
208
243
|
|
|
@@ -212,27 +247,27 @@ def merge_participants_fields(
|
|
|
212
247
|
participants_row: dict[str, Any] | None,
|
|
213
248
|
description_fields: list[str] | None = None,
|
|
214
249
|
) -> dict[str, Any]:
|
|
215
|
-
"""Merge participants.tsv
|
|
250
|
+
"""Merge fields from a participants.tsv row into a description dict.
|
|
216
251
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
unless a matching requested field already captured them.
|
|
252
|
+
Enriches a description dictionary with data from a subject's row in
|
|
253
|
+
``participants.tsv``. It avoids overwriting existing keys in the
|
|
254
|
+
description.
|
|
221
255
|
|
|
222
256
|
Parameters
|
|
223
257
|
----------
|
|
224
258
|
description : dict
|
|
225
|
-
|
|
226
|
-
participants_row : dict
|
|
227
|
-
A
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
259
|
+
The description dictionary to enrich.
|
|
260
|
+
participants_row : dict or None
|
|
261
|
+
A dictionary representing a row from ``participants.tsv``. If None,
|
|
262
|
+
the original description is returned unchanged.
|
|
263
|
+
description_fields : list of str, optional
|
|
264
|
+
A list of specific fields to include in the description. Matching is
|
|
265
|
+
done using normalized keys.
|
|
231
266
|
|
|
232
267
|
Returns
|
|
233
268
|
-------
|
|
234
269
|
dict
|
|
235
|
-
The enriched description
|
|
270
|
+
The enriched description dictionary.
|
|
236
271
|
|
|
237
272
|
"""
|
|
238
273
|
if not isinstance(description, dict) or not isinstance(participants_row, dict):
|
|
@@ -272,10 +307,26 @@ def participants_row_for_subject(
|
|
|
272
307
|
subject: str,
|
|
273
308
|
id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
|
|
274
309
|
) -> pd.Series | None:
|
|
275
|
-
"""Load participants.tsv and return the row for a subject.
|
|
310
|
+
"""Load participants.tsv and return the row for a specific subject.
|
|
311
|
+
|
|
312
|
+
Searches for a subject's data in the ``participants.tsv`` file within a
|
|
313
|
+
BIDS dataset. It can identify the subject with or without the "sub-"
|
|
314
|
+
prefix.
|
|
315
|
+
|
|
316
|
+
Parameters
|
|
317
|
+
----------
|
|
318
|
+
bids_root : str or Path
|
|
319
|
+
The root directory of the BIDS dataset.
|
|
320
|
+
subject : str
|
|
321
|
+
The subject identifier (e.g., "01" or "sub-01").
|
|
322
|
+
id_columns : tuple of str, default ("participant_id", "participant", "subject")
|
|
323
|
+
A tuple of column names to search for the subject identifier.
|
|
324
|
+
|
|
325
|
+
Returns
|
|
326
|
+
-------
|
|
327
|
+
pandas.Series or None
|
|
328
|
+
A pandas Series containing the subject's data if found, otherwise None.
|
|
276
329
|
|
|
277
|
-
- Accepts either "01" or "sub-01" as the subject identifier.
|
|
278
|
-
- Returns a pandas Series for the first matching row, or None if not found.
|
|
279
330
|
"""
|
|
280
331
|
try:
|
|
281
332
|
participants_tsv = Path(bids_root) / "participants.tsv"
|
|
@@ -311,9 +362,28 @@ def participants_extras_from_tsv(
|
|
|
311
362
|
id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
|
|
312
363
|
na_like: tuple[str, ...] = ("", "n/a", "na", "nan", "unknown", "none"),
|
|
313
364
|
) -> dict[str, Any]:
|
|
314
|
-
"""
|
|
365
|
+
"""Extract additional participant information from participants.tsv.
|
|
366
|
+
|
|
367
|
+
Retrieves all non-identifier and non-empty fields for a subject from
|
|
368
|
+
the ``participants.tsv`` file.
|
|
369
|
+
|
|
370
|
+
Parameters
|
|
371
|
+
----------
|
|
372
|
+
bids_root : str or Path
|
|
373
|
+
The root directory of the BIDS dataset.
|
|
374
|
+
subject : str
|
|
375
|
+
The subject identifier.
|
|
376
|
+
id_columns : tuple of str, default ("participant_id", "participant", "subject")
|
|
377
|
+
Column names to be treated as identifiers and excluded from the
|
|
378
|
+
output.
|
|
379
|
+
na_like : tuple of str, default ("", "n/a", "na", "nan", "unknown", "none")
|
|
380
|
+
Values to be considered as "Not Available" and excluded.
|
|
381
|
+
|
|
382
|
+
Returns
|
|
383
|
+
-------
|
|
384
|
+
dict
|
|
385
|
+
A dictionary of extra participant information.
|
|
315
386
|
|
|
316
|
-
Uses vectorized pandas operations to drop id columns and NA-like values.
|
|
317
387
|
"""
|
|
318
388
|
row = participants_row_for_subject(bids_root, subject, id_columns=id_columns)
|
|
319
389
|
if row is None:
|
|
@@ -331,10 +401,21 @@ def attach_participants_extras(
|
|
|
331
401
|
description: Any,
|
|
332
402
|
extras: dict[str, Any],
|
|
333
403
|
) -> None:
|
|
334
|
-
"""Attach
|
|
404
|
+
"""Attach extra participant data to a raw object and its description.
|
|
405
|
+
|
|
406
|
+
Updates the ``raw.info['subject_info']`` and the description object
|
|
407
|
+
(dict or pandas Series) with extra data from ``participants.tsv``.
|
|
408
|
+
It does not overwrite existing keys.
|
|
409
|
+
|
|
410
|
+
Parameters
|
|
411
|
+
----------
|
|
412
|
+
raw : mne.io.Raw
|
|
413
|
+
The MNE Raw object to be updated.
|
|
414
|
+
description : dict or pandas.Series
|
|
415
|
+
The description object to be updated.
|
|
416
|
+
extras : dict
|
|
417
|
+
A dictionary of extra participant information to attach.
|
|
335
418
|
|
|
336
|
-
- Adds to ``raw.info['subject_info']['participants_extras']``.
|
|
337
|
-
- Adds to ``description`` if dict or pandas Series (only missing keys).
|
|
338
419
|
"""
|
|
339
420
|
if not extras:
|
|
340
421
|
return
|
|
@@ -375,9 +456,28 @@ def enrich_from_participants(
|
|
|
375
456
|
raw: Any,
|
|
376
457
|
description: Any,
|
|
377
458
|
) -> dict[str, Any]:
|
|
378
|
-
"""
|
|
459
|
+
"""Read participants.tsv and attach extra info for the subject.
|
|
460
|
+
|
|
461
|
+
This is a convenience function that finds the subject from the
|
|
462
|
+
``bidspath``, retrieves extra information from ``participants.tsv``,
|
|
463
|
+
and attaches it to the raw object and its description.
|
|
464
|
+
|
|
465
|
+
Parameters
|
|
466
|
+
----------
|
|
467
|
+
bids_root : str or Path
|
|
468
|
+
The root directory of the BIDS dataset.
|
|
469
|
+
bidspath : mne_bids.BIDSPath
|
|
470
|
+
The BIDSPath object for the current data file.
|
|
471
|
+
raw : mne.io.Raw
|
|
472
|
+
The MNE Raw object to be updated.
|
|
473
|
+
description : dict or pandas.Series
|
|
474
|
+
The description object to be updated.
|
|
475
|
+
|
|
476
|
+
Returns
|
|
477
|
+
-------
|
|
478
|
+
dict
|
|
479
|
+
The dictionary of extras that were attached.
|
|
379
480
|
|
|
380
|
-
Returns the extras dictionary for further use if needed.
|
|
381
481
|
"""
|
|
382
482
|
subject = getattr(bidspath, "subject", None)
|
|
383
483
|
if not subject:
|
eegdash/const.py
CHANGED
|
@@ -28,6 +28,8 @@ ALLOWED_QUERY_FIELDS = {
|
|
|
28
28
|
"nchans",
|
|
29
29
|
"ntimes",
|
|
30
30
|
}
|
|
31
|
+
"""set: A set of field names that are permitted in database queries constructed
|
|
32
|
+
via :func:`~eegdash.api.EEGDash.find` with keyword arguments."""
|
|
31
33
|
|
|
32
34
|
RELEASE_TO_OPENNEURO_DATASET_MAP = {
|
|
33
35
|
"R11": "ds005516",
|
|
@@ -42,6 +44,8 @@ RELEASE_TO_OPENNEURO_DATASET_MAP = {
|
|
|
42
44
|
"R2": "ds005506",
|
|
43
45
|
"R1": "ds005505",
|
|
44
46
|
}
|
|
47
|
+
"""dict: A mapping from Healthy Brain Network (HBN) release identifiers (e.g., "R11")
|
|
48
|
+
to their corresponding OpenNeuro dataset identifiers (e.g., "ds005516")."""
|
|
45
49
|
|
|
46
50
|
SUBJECT_MINI_RELEASE_MAP = {
|
|
47
51
|
"R11": [
|
|
@@ -287,6 +291,9 @@ SUBJECT_MINI_RELEASE_MAP = {
|
|
|
287
291
|
"NDARFW972KFQ",
|
|
288
292
|
],
|
|
289
293
|
}
|
|
294
|
+
"""dict: A mapping from HBN release identifiers to a list of subject IDs.
|
|
295
|
+
This is used to select a small, representative subset of subjects for creating
|
|
296
|
+
"mini" datasets for testing and demonstration purposes."""
|
|
290
297
|
|
|
291
298
|
config = {
|
|
292
299
|
"required_fields": ["data_name"],
|
|
@@ -322,3 +329,21 @@ config = {
|
|
|
322
329
|
],
|
|
323
330
|
"accepted_query_fields": ["data_name", "dataset"],
|
|
324
331
|
}
|
|
332
|
+
"""dict: A global configuration dictionary for the EEGDash package.
|
|
333
|
+
|
|
334
|
+
Keys
|
|
335
|
+
----
|
|
336
|
+
required_fields : list
|
|
337
|
+
Fields that must be present in every database record.
|
|
338
|
+
attributes : dict
|
|
339
|
+
A schema defining the expected primary attributes and their types for a
|
|
340
|
+
database record.
|
|
341
|
+
description_fields : list
|
|
342
|
+
A list of fields considered to be descriptive metadata for a recording,
|
|
343
|
+
which can be used for filtering and display.
|
|
344
|
+
bids_dependencies_files : list
|
|
345
|
+
A list of BIDS metadata filenames that are relevant for interpreting an
|
|
346
|
+
EEG recording.
|
|
347
|
+
accepted_query_fields : list
|
|
348
|
+
Fields that are accepted for lightweight existence checks in the database.
|
|
349
|
+
"""
|
eegdash/dataset/__init__.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
"""Public API for dataset helpers and dynamically generated datasets."""
|
|
2
2
|
|
|
3
3
|
from . import dataset as _dataset_mod # triggers dynamic class registration
|
|
4
|
-
from .
|
|
4
|
+
from .bids_dataset import EEGBIDSDataset
|
|
5
|
+
from .dataset import EEGChallengeDataset, EEGDashDataset
|
|
5
6
|
from .registry import register_openneuro_datasets
|
|
6
7
|
|
|
7
8
|
# Re-export dynamically generated dataset classes at the package level so that
|
|
@@ -17,6 +18,11 @@ for _name in getattr(_dataset_mod, "__all__", []):
|
|
|
17
18
|
globals()[_name] = _obj
|
|
18
19
|
_dyn_names.append(_name)
|
|
19
20
|
|
|
20
|
-
__all__ = [
|
|
21
|
+
__all__ = [
|
|
22
|
+
"EEGBIDSDataset",
|
|
23
|
+
"EEGDashDataset",
|
|
24
|
+
"EEGChallengeDataset",
|
|
25
|
+
"register_openneuro_datasets",
|
|
26
|
+
] + _dyn_names
|
|
21
27
|
|
|
22
28
|
del _dataset_mod, _name, _obj, _dyn_names
|
eegdash/dataset/base.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
# Authors: The EEGDash contributors.
|
|
2
|
+
# License: GNU General Public License
|
|
3
|
+
# Copyright the EEGDash contributors.
|
|
4
|
+
|
|
5
|
+
"""Data utilities and dataset classes for EEG data handling.
|
|
6
|
+
|
|
7
|
+
This module provides core dataset classes for working with EEG data in the EEGDash ecosystem,
|
|
8
|
+
including classes for individual recordings and collections of datasets. It integrates with
|
|
9
|
+
braindecode for machine learning workflows and handles data loading from both local and remote sources.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import io
|
|
13
|
+
import os
|
|
14
|
+
import traceback
|
|
15
|
+
from contextlib import redirect_stderr
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
import mne
|
|
20
|
+
import mne_bids
|
|
21
|
+
from mne._fiff.utils import _read_segments_file
|
|
22
|
+
from mne.io import BaseRaw
|
|
23
|
+
from mne_bids import BIDSPath
|
|
24
|
+
|
|
25
|
+
from braindecode.datasets import BaseDataset
|
|
26
|
+
|
|
27
|
+
from .. import downloader
|
|
28
|
+
from ..bids_eeg_metadata import enrich_from_participants
|
|
29
|
+
from ..logging import logger
|
|
30
|
+
from ..paths import get_default_cache_dir
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class EEGDashBaseDataset(BaseDataset):
|
|
34
|
+
"""A single EEG recording dataset.
|
|
35
|
+
|
|
36
|
+
Represents a single EEG recording, typically hosted on a remote server (like AWS S3)
|
|
37
|
+
and cached locally upon first access. This class is a subclass of
|
|
38
|
+
:class:`braindecode.datasets.BaseDataset` and can be used with braindecode's
|
|
39
|
+
preprocessing and training pipelines.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
record : dict
|
|
44
|
+
A fully resolved metadata record for the data to load.
|
|
45
|
+
cache_dir : str
|
|
46
|
+
The local directory where the data will be cached.
|
|
47
|
+
s3_bucket : str, optional
|
|
48
|
+
The S3 bucket to download data from. If not provided, defaults to the
|
|
49
|
+
OpenNeuro bucket.
|
|
50
|
+
**kwargs
|
|
51
|
+
Additional keyword arguments passed to the
|
|
52
|
+
:class:`braindecode.datasets.BaseDataset` constructor.
|
|
53
|
+
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
_AWS_BUCKET = "s3://openneuro.org"
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
record: dict[str, Any],
|
|
61
|
+
cache_dir: str,
|
|
62
|
+
s3_bucket: str | None = None,
|
|
63
|
+
**kwargs,
|
|
64
|
+
):
|
|
65
|
+
super().__init__(None, **kwargs)
|
|
66
|
+
self.record = record
|
|
67
|
+
self.cache_dir = Path(cache_dir)
|
|
68
|
+
self.bids_kwargs = self._get_raw_bids_args()
|
|
69
|
+
|
|
70
|
+
if s3_bucket:
|
|
71
|
+
self.s3_bucket = s3_bucket
|
|
72
|
+
self.s3_open_neuro = False
|
|
73
|
+
else:
|
|
74
|
+
self.s3_bucket = self._AWS_BUCKET
|
|
75
|
+
self.s3_open_neuro = True
|
|
76
|
+
|
|
77
|
+
# Compute a dataset folder name under cache_dir that encodes preprocessing
|
|
78
|
+
# (e.g., bdf, mini) to avoid overlapping with the original dataset cache.
|
|
79
|
+
self.dataset_folder = record.get("dataset", "")
|
|
80
|
+
# TODO: remove this hack when competition is over
|
|
81
|
+
if s3_bucket:
|
|
82
|
+
suffixes: list[str] = []
|
|
83
|
+
bucket_lower = str(s3_bucket).lower()
|
|
84
|
+
if "bdf" in bucket_lower:
|
|
85
|
+
suffixes.append("bdf")
|
|
86
|
+
if "mini" in bucket_lower:
|
|
87
|
+
suffixes.append("mini")
|
|
88
|
+
if suffixes:
|
|
89
|
+
self.dataset_folder = f"{self.dataset_folder}-{'-'.join(suffixes)}"
|
|
90
|
+
|
|
91
|
+
# Place files under the dataset-specific folder (with suffix if any)
|
|
92
|
+
rel = Path(record["bidspath"]) # usually starts with dataset id
|
|
93
|
+
if rel.parts and rel.parts[0] == record.get("dataset"):
|
|
94
|
+
rel = Path(self.dataset_folder, *rel.parts[1:])
|
|
95
|
+
else:
|
|
96
|
+
rel = Path(self.dataset_folder) / rel
|
|
97
|
+
self.filecache = self.cache_dir / rel
|
|
98
|
+
self.bids_root = self.cache_dir / self.dataset_folder
|
|
99
|
+
|
|
100
|
+
self.bidspath = BIDSPath(
|
|
101
|
+
root=self.bids_root,
|
|
102
|
+
datatype="eeg",
|
|
103
|
+
suffix="eeg",
|
|
104
|
+
**self.bids_kwargs,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
self.s3file = downloader.get_s3path(self.s3_bucket, record["bidspath"])
|
|
108
|
+
self.bids_dependencies = record["bidsdependencies"]
|
|
109
|
+
self.bids_dependencies_original = record["bidsdependencies"]
|
|
110
|
+
# TODO: removing temporary fix for BIDS dependencies path
|
|
111
|
+
# when the competition is over and dataset is digested properly
|
|
112
|
+
if not self.s3_open_neuro:
|
|
113
|
+
self.bids_dependencies = [
|
|
114
|
+
dep.split("/", 1)[1] for dep in self.bids_dependencies
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
self._raw = None
|
|
118
|
+
|
|
119
|
+
def _get_raw_bids_args(self) -> dict[str, Any]:
|
|
120
|
+
"""Extract BIDS-related arguments from the metadata record."""
|
|
121
|
+
desired_fields = ["subject", "session", "task", "run"]
|
|
122
|
+
return {k: self.record[k] for k in desired_fields if self.record[k]}
|
|
123
|
+
|
|
124
|
+
def _ensure_raw(self) -> None:
|
|
125
|
+
"""Ensure the raw data file and its dependencies are cached locally."""
|
|
126
|
+
# TO-DO: remove this once is fixed on the our side
|
|
127
|
+
# for the competition
|
|
128
|
+
if not self.s3_open_neuro:
|
|
129
|
+
self.bidspath = self.bidspath.update(extension=".bdf")
|
|
130
|
+
self.filecache = self.filecache.with_suffix(".bdf")
|
|
131
|
+
|
|
132
|
+
if not os.path.exists(self.filecache): # not preload
|
|
133
|
+
if self.bids_dependencies:
|
|
134
|
+
downloader.download_dependencies(
|
|
135
|
+
s3_bucket=self.s3_bucket,
|
|
136
|
+
bids_dependencies=self.bids_dependencies,
|
|
137
|
+
bids_dependencies_original=self.bids_dependencies_original,
|
|
138
|
+
cache_dir=self.cache_dir,
|
|
139
|
+
dataset_folder=self.dataset_folder,
|
|
140
|
+
record=self.record,
|
|
141
|
+
s3_open_neuro=self.s3_open_neuro,
|
|
142
|
+
)
|
|
143
|
+
self.filecache = downloader.download_s3_file(
|
|
144
|
+
self.s3file, self.filecache, self.s3_open_neuro
|
|
145
|
+
)
|
|
146
|
+
self.filenames = [self.filecache]
|
|
147
|
+
if self._raw is None:
|
|
148
|
+
try:
|
|
149
|
+
# mne-bids can emit noisy warnings to stderr; keep user logs clean
|
|
150
|
+
_stderr_buffer = io.StringIO()
|
|
151
|
+
with redirect_stderr(_stderr_buffer):
|
|
152
|
+
self._raw = mne_bids.read_raw_bids(
|
|
153
|
+
bids_path=self.bidspath, verbose="ERROR"
|
|
154
|
+
)
|
|
155
|
+
# Enrich Raw.info and description with participants.tsv extras
|
|
156
|
+
enrich_from_participants(
|
|
157
|
+
self.bids_root, self.bidspath, self._raw, self.description
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error(
|
|
162
|
+
f"Error while reading BIDS file: {self.bidspath}\n"
|
|
163
|
+
"This may be due to a missing or corrupted file.\n"
|
|
164
|
+
"Please check the file and try again.\n"
|
|
165
|
+
"Usually erasing the local cache and re-downloading helps.\n"
|
|
166
|
+
f"`rm {self.bidspath}`"
|
|
167
|
+
)
|
|
168
|
+
logger.error(f"Exception: {e}")
|
|
169
|
+
logger.error(traceback.format_exc())
|
|
170
|
+
raise e
|
|
171
|
+
|
|
172
|
+
def __len__(self) -> int:
|
|
173
|
+
"""Return the number of samples in the dataset."""
|
|
174
|
+
if self._raw is None:
|
|
175
|
+
if (
|
|
176
|
+
self.record["ntimes"] is None
|
|
177
|
+
or self.record["sampling_frequency"] is None
|
|
178
|
+
):
|
|
179
|
+
self._ensure_raw()
|
|
180
|
+
else:
|
|
181
|
+
# FIXME: this is a bit strange and should definitely not change as a side effect
|
|
182
|
+
# of accessing the data (which it will, since ntimes is the actual length but rounded down)
|
|
183
|
+
return int(self.record["ntimes"] * self.record["sampling_frequency"])
|
|
184
|
+
return len(self._raw)
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def raw(self) -> BaseRaw:
|
|
188
|
+
"""The MNE Raw object for this recording.
|
|
189
|
+
|
|
190
|
+
Accessing this property triggers the download and caching of the data
|
|
191
|
+
if it has not been accessed before.
|
|
192
|
+
|
|
193
|
+
Returns
|
|
194
|
+
-------
|
|
195
|
+
mne.io.BaseRaw
|
|
196
|
+
The loaded MNE Raw object.
|
|
197
|
+
|
|
198
|
+
"""
|
|
199
|
+
if self._raw is None:
|
|
200
|
+
self._ensure_raw()
|
|
201
|
+
return self._raw
|
|
202
|
+
|
|
203
|
+
@raw.setter
|
|
204
|
+
def raw(self, raw: BaseRaw):
|
|
205
|
+
self._raw = raw
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class EEGDashBaseRaw(BaseRaw):
|
|
209
|
+
"""MNE BaseRaw wrapper for automatic S3 data fetching.
|
|
210
|
+
|
|
211
|
+
This class extends :class:`mne.io.BaseRaw` to automatically fetch data
|
|
212
|
+
from an S3 bucket and cache it locally when data is first accessed.
|
|
213
|
+
It is intended for internal use within the EEGDash ecosystem.
|
|
214
|
+
|
|
215
|
+
Parameters
|
|
216
|
+
----------
|
|
217
|
+
input_fname : str
|
|
218
|
+
The path to the file on the S3 bucket (relative to the bucket root).
|
|
219
|
+
metadata : dict
|
|
220
|
+
The metadata record for the recording, containing information like
|
|
221
|
+
sampling frequency, channel names, etc.
|
|
222
|
+
preload : bool, default False
|
|
223
|
+
If True, preload the data into memory.
|
|
224
|
+
cache_dir : str, optional
|
|
225
|
+
Local directory for caching data. If None, a default directory is used.
|
|
226
|
+
bids_dependencies : list of str, default []
|
|
227
|
+
A list of BIDS metadata files to download alongside the main recording.
|
|
228
|
+
verbose : str, int, or None, default None
|
|
229
|
+
The MNE verbosity level.
|
|
230
|
+
|
|
231
|
+
See Also
|
|
232
|
+
--------
|
|
233
|
+
mne.io.Raw : The base class for Raw objects in MNE.
|
|
234
|
+
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
_AWS_BUCKET = "s3://openneuro.org"
|
|
238
|
+
|
|
239
|
+
def __init__(
|
|
240
|
+
self,
|
|
241
|
+
input_fname: str,
|
|
242
|
+
metadata: dict[str, Any],
|
|
243
|
+
preload: bool = False,
|
|
244
|
+
*,
|
|
245
|
+
cache_dir: str | None = None,
|
|
246
|
+
bids_dependencies: list[str] | None = None,
|
|
247
|
+
verbose: Any = None,
|
|
248
|
+
):
|
|
249
|
+
# Create a simple RawArray
|
|
250
|
+
sfreq = metadata["sfreq"] # Sampling frequency
|
|
251
|
+
n_times = metadata["n_times"]
|
|
252
|
+
ch_names = metadata["ch_names"]
|
|
253
|
+
ch_types = []
|
|
254
|
+
for ch in metadata["ch_types"]:
|
|
255
|
+
chtype = ch.lower()
|
|
256
|
+
if chtype == "heog" or chtype == "veog":
|
|
257
|
+
chtype = "eog"
|
|
258
|
+
ch_types.append(chtype)
|
|
259
|
+
info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
|
|
260
|
+
|
|
261
|
+
self.s3file = downloader.get_s3path(self._AWS_BUCKET, input_fname)
|
|
262
|
+
self.cache_dir = Path(cache_dir) if cache_dir else get_default_cache_dir()
|
|
263
|
+
self.filecache = self.cache_dir / input_fname
|
|
264
|
+
if bids_dependencies is None:
|
|
265
|
+
bids_dependencies = []
|
|
266
|
+
self.bids_dependencies = bids_dependencies
|
|
267
|
+
|
|
268
|
+
if preload and not os.path.exists(self.filecache):
|
|
269
|
+
self.filecache = downloader.download_s3_file(
|
|
270
|
+
self.s3file, self.filecache, self.s3_open_neuro
|
|
271
|
+
)
|
|
272
|
+
self.filenames = [self.filecache]
|
|
273
|
+
preload = self.filecache
|
|
274
|
+
|
|
275
|
+
super().__init__(
|
|
276
|
+
info,
|
|
277
|
+
preload,
|
|
278
|
+
last_samps=[n_times - 1],
|
|
279
|
+
orig_format="single",
|
|
280
|
+
verbose=verbose,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
def _read_segment(
|
|
284
|
+
self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
|
|
285
|
+
):
|
|
286
|
+
"""Read a segment of data, downloading if necessary."""
|
|
287
|
+
if not os.path.exists(self.filecache): # not preload
|
|
288
|
+
if self.bids_dependencies: # this is use only to sidecars for now
|
|
289
|
+
downloader.download_dependencies(
|
|
290
|
+
s3_bucket=self._AWS_BUCKET,
|
|
291
|
+
bids_dependencies=self.bids_dependencies,
|
|
292
|
+
bids_dependencies_original=None,
|
|
293
|
+
cache_dir=self.cache_dir,
|
|
294
|
+
dataset_folder=self.filecache,
|
|
295
|
+
record={},
|
|
296
|
+
s3_open_neuro=self.s3_open_neuro,
|
|
297
|
+
)
|
|
298
|
+
self.filecache = downloader.download_s3_file(
|
|
299
|
+
self.s3file, self.filecache, self.s3_open_neuro
|
|
300
|
+
)
|
|
301
|
+
self.filenames = [self.filecache]
|
|
302
|
+
else: # not preload and file is not cached
|
|
303
|
+
self.filenames = [self.filecache]
|
|
304
|
+
return super()._read_segment(start, stop, sel, data_buffer, verbose=verbose)
|
|
305
|
+
|
|
306
|
+
def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
|
|
307
|
+
"""Read a chunk of data from a local file."""
|
|
308
|
+
_read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
__all__ = ["EEGDashBaseDataset", "EEGDashBaseRaw"]
|