braindecode 1.3.0.dev177069446__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- braindecode/__init__.py +9 -0
- braindecode/augmentation/__init__.py +52 -0
- braindecode/augmentation/base.py +225 -0
- braindecode/augmentation/functional.py +1300 -0
- braindecode/augmentation/transforms.py +1356 -0
- braindecode/classifier.py +258 -0
- braindecode/datasets/__init__.py +44 -0
- braindecode/datasets/base.py +823 -0
- braindecode/datasets/bbci.py +693 -0
- braindecode/datasets/bcicomp.py +193 -0
- braindecode/datasets/bids/__init__.py +54 -0
- braindecode/datasets/bids/datasets.py +239 -0
- braindecode/datasets/bids/format.py +717 -0
- braindecode/datasets/bids/hub.py +987 -0
- braindecode/datasets/bids/hub_format.py +717 -0
- braindecode/datasets/bids/hub_io.py +197 -0
- braindecode/datasets/bids/hub_validation.py +114 -0
- braindecode/datasets/bids/iterable.py +220 -0
- braindecode/datasets/chb_mit.py +163 -0
- braindecode/datasets/mne.py +170 -0
- braindecode/datasets/moabb.py +219 -0
- braindecode/datasets/nmt.py +313 -0
- braindecode/datasets/registry.py +120 -0
- braindecode/datasets/siena.py +162 -0
- braindecode/datasets/sleep_physio_challe_18.py +411 -0
- braindecode/datasets/sleep_physionet.py +125 -0
- braindecode/datasets/tuh.py +591 -0
- braindecode/datasets/utils.py +67 -0
- braindecode/datasets/xy.py +96 -0
- braindecode/datautil/__init__.py +62 -0
- braindecode/datautil/channel_utils.py +114 -0
- braindecode/datautil/hub_formats.py +180 -0
- braindecode/datautil/serialization.py +359 -0
- braindecode/datautil/util.py +154 -0
- braindecode/eegneuralnet.py +372 -0
- braindecode/functional/__init__.py +22 -0
- braindecode/functional/functions.py +251 -0
- braindecode/functional/initialization.py +47 -0
- braindecode/models/__init__.py +117 -0
- braindecode/models/atcnet.py +830 -0
- braindecode/models/attentionbasenet.py +727 -0
- braindecode/models/attn_sleep.py +549 -0
- braindecode/models/base.py +574 -0
- braindecode/models/bendr.py +493 -0
- braindecode/models/biot.py +537 -0
- braindecode/models/brainmodule.py +845 -0
- braindecode/models/config.py +233 -0
- braindecode/models/contrawr.py +319 -0
- braindecode/models/ctnet.py +541 -0
- braindecode/models/deep4.py +376 -0
- braindecode/models/deepsleepnet.py +417 -0
- braindecode/models/eegconformer.py +475 -0
- braindecode/models/eeginception_erp.py +379 -0
- braindecode/models/eeginception_mi.py +379 -0
- braindecode/models/eegitnet.py +302 -0
- braindecode/models/eegminer.py +256 -0
- braindecode/models/eegnet.py +359 -0
- braindecode/models/eegnex.py +354 -0
- braindecode/models/eegsimpleconv.py +201 -0
- braindecode/models/eegsym.py +917 -0
- braindecode/models/eegtcnet.py +337 -0
- braindecode/models/fbcnet.py +225 -0
- braindecode/models/fblightconvnet.py +315 -0
- braindecode/models/fbmsnet.py +338 -0
- braindecode/models/hybrid.py +126 -0
- braindecode/models/ifnet.py +443 -0
- braindecode/models/labram.py +1316 -0
- braindecode/models/luna.py +891 -0
- braindecode/models/medformer.py +760 -0
- braindecode/models/msvtnet.py +377 -0
- braindecode/models/patchedtransformer.py +640 -0
- braindecode/models/reve.py +843 -0
- braindecode/models/sccnet.py +280 -0
- braindecode/models/shallow_fbcsp.py +212 -0
- braindecode/models/signal_jepa.py +1122 -0
- braindecode/models/sinc_shallow.py +339 -0
- braindecode/models/sleep_stager_blanco_2020.py +169 -0
- braindecode/models/sleep_stager_chambon_2018.py +159 -0
- braindecode/models/sparcnet.py +426 -0
- braindecode/models/sstdpn.py +869 -0
- braindecode/models/summary.csv +47 -0
- braindecode/models/syncnet.py +234 -0
- braindecode/models/tcn.py +275 -0
- braindecode/models/tidnet.py +397 -0
- braindecode/models/tsinception.py +295 -0
- braindecode/models/usleep.py +439 -0
- braindecode/models/util.py +369 -0
- braindecode/modules/__init__.py +92 -0
- braindecode/modules/activation.py +86 -0
- braindecode/modules/attention.py +883 -0
- braindecode/modules/blocks.py +160 -0
- braindecode/modules/convolution.py +330 -0
- braindecode/modules/filter.py +654 -0
- braindecode/modules/layers.py +216 -0
- braindecode/modules/linear.py +70 -0
- braindecode/modules/parametrization.py +38 -0
- braindecode/modules/stats.py +87 -0
- braindecode/modules/util.py +85 -0
- braindecode/modules/wrapper.py +90 -0
- braindecode/preprocessing/__init__.py +271 -0
- braindecode/preprocessing/eegprep_preprocess.py +1317 -0
- braindecode/preprocessing/mne_preprocess.py +240 -0
- braindecode/preprocessing/preprocess.py +579 -0
- braindecode/preprocessing/util.py +177 -0
- braindecode/preprocessing/windowers.py +1037 -0
- braindecode/regressor.py +234 -0
- braindecode/samplers/__init__.py +18 -0
- braindecode/samplers/base.py +399 -0
- braindecode/samplers/ssl.py +263 -0
- braindecode/training/__init__.py +23 -0
- braindecode/training/callbacks.py +23 -0
- braindecode/training/losses.py +105 -0
- braindecode/training/scoring.py +477 -0
- braindecode/util.py +419 -0
- braindecode/version.py +1 -0
- braindecode/visualization/__init__.py +8 -0
- braindecode/visualization/confusion_matrices.py +289 -0
- braindecode/visualization/gradients.py +62 -0
- braindecode-1.3.0.dev177069446.dist-info/METADATA +230 -0
- braindecode-1.3.0.dev177069446.dist-info/RECORD +124 -0
- braindecode-1.3.0.dev177069446.dist-info/WHEEL +5 -0
- braindecode-1.3.0.dev177069446.dist-info/licenses/LICENSE.txt +31 -0
- braindecode-1.3.0.dev177069446.dist-info/licenses/NOTICE.txt +20 -0
- braindecode-1.3.0.dev177069446.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dataset registry for Hub integration.
|
|
3
|
+
|
|
4
|
+
Datasets register themselves here so Hub code can look them up by name
|
|
5
|
+
without direct imports (avoiding circular dependencies).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# Authors: Kuntal Kokate
|
|
9
|
+
#
|
|
10
|
+
# License: BSD (3-clause)
|
|
11
|
+
|
|
12
|
+
from typing import Any, Dict, Type
|
|
13
|
+
|
|
14
|
+
# Global registry mapping dataset class names to classes
|
|
15
|
+
_DATASET_REGISTRY: Dict[str, Type] = {}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def register_dataset(cls: Type) -> Type:
|
|
19
|
+
"""
|
|
20
|
+
Decorator to register a dataset class in the global registry.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
cls : Type
|
|
25
|
+
The dataset class to register.
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
Type
|
|
30
|
+
The same class (unchanged), so this can be used as a decorator.
|
|
31
|
+
"""
|
|
32
|
+
_DATASET_REGISTRY[cls.__name__] = cls
|
|
33
|
+
return cls
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _available_datasets_str() -> str:
|
|
37
|
+
"""Return a human-readable list of registered dataset class names."""
|
|
38
|
+
if not _DATASET_REGISTRY:
|
|
39
|
+
return "<no registered datasets>"
|
|
40
|
+
return ", ".join(_DATASET_REGISTRY.keys())
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_dataset_class(name: str) -> Type:
|
|
44
|
+
"""
|
|
45
|
+
Retrieve a registered dataset class by name.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
name : str
|
|
50
|
+
Name of the dataset class (e.g., 'WindowsDataset').
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
Type
|
|
55
|
+
The dataset class.
|
|
56
|
+
|
|
57
|
+
Raises
|
|
58
|
+
------
|
|
59
|
+
KeyError
|
|
60
|
+
If the class name is not registered.
|
|
61
|
+
"""
|
|
62
|
+
try:
|
|
63
|
+
return _DATASET_REGISTRY[name]
|
|
64
|
+
except KeyError as exc:
|
|
65
|
+
raise KeyError(
|
|
66
|
+
f"Dataset class '{name}' not found in registry. "
|
|
67
|
+
f"Available classes: {_available_datasets_str()}"
|
|
68
|
+
) from exc
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def get_dataset_type(obj: Any) -> str:
|
|
72
|
+
"""
|
|
73
|
+
Get the registered type name for a dataset instance.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
obj : Any
|
|
78
|
+
The object to check.
|
|
79
|
+
|
|
80
|
+
Returns
|
|
81
|
+
-------
|
|
82
|
+
str
|
|
83
|
+
The name of the dataset class (e.g., 'WindowsDataset').
|
|
84
|
+
|
|
85
|
+
Raises
|
|
86
|
+
------
|
|
87
|
+
TypeError
|
|
88
|
+
If the object is not an instance of any registered dataset class.
|
|
89
|
+
"""
|
|
90
|
+
for cls in _DATASET_REGISTRY.values():
|
|
91
|
+
if isinstance(obj, cls):
|
|
92
|
+
return cls.__name__
|
|
93
|
+
|
|
94
|
+
raise TypeError(
|
|
95
|
+
f"Object of type {type(obj).__name__} is not a registered dataset class. "
|
|
96
|
+
f"Available classes: {_available_datasets_str()}"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def is_registered_dataset(obj: Any, class_name: str) -> bool:
|
|
101
|
+
"""
|
|
102
|
+
Check if an object is an instance of a registered dataset class.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
obj : Any
|
|
107
|
+
The object to check.
|
|
108
|
+
class_name : str
|
|
109
|
+
Name of the dataset class to check against.
|
|
110
|
+
|
|
111
|
+
Returns
|
|
112
|
+
-------
|
|
113
|
+
bool
|
|
114
|
+
True if obj is an instance of the named class, False otherwise.
|
|
115
|
+
"""
|
|
116
|
+
try:
|
|
117
|
+
cls = get_dataset_class(class_name)
|
|
118
|
+
except KeyError:
|
|
119
|
+
return False
|
|
120
|
+
return isinstance(obj, cls)
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This dataset is a BIDS compatible version of the Siena Scalp EEG Database.
|
|
3
|
+
|
|
4
|
+
It reorganizes the file structure to comply with the BIDS specification. To this effect:
|
|
5
|
+
|
|
6
|
+
- Metadata was organized according to BIDS.
|
|
7
|
+
- Data in the EEG edf files was modified to keep only the 19 channels from a 10-20 EEG system.
|
|
8
|
+
- Annotations were formatted as BIDS-score compatible tsv files.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
# Authors: Dan, Jonathan
|
|
12
|
+
# Detti, Paolo
|
|
13
|
+
# Bruno Aristimunha <b.aristimunha@gmail.com>
|
|
14
|
+
#
|
|
15
|
+
# License: BSD (3-clause)
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
from mne.datasets import fetch_dataset
|
|
21
|
+
|
|
22
|
+
from braindecode.datasets import BIDSDataset
|
|
23
|
+
from braindecode.datasets.utils import _correct_dataset_path
|
|
24
|
+
|
|
25
|
+
SIENA_URL = "https://zenodo.org/records/10640762/files/BIDS_Siena.zip"
|
|
26
|
+
SIENA_archive_name = "SIENA.zip"
|
|
27
|
+
SIENA_folder_name = "SIENA-BIDS-eeg-dataset"
|
|
28
|
+
SIENA_dataset_name = "SIENA-EEG-Corpus"
|
|
29
|
+
|
|
30
|
+
SIENA_dataset_params = {
|
|
31
|
+
"dataset_name": SIENA_dataset_name,
|
|
32
|
+
"url": SIENA_URL,
|
|
33
|
+
"archive_name": SIENA_archive_name,
|
|
34
|
+
"folder_name": SIENA_folder_name,
|
|
35
|
+
"hash": "126e71e18570cf359a440ba5227494ecffca4b0b0057c733f90ec29ba5e15ff8", # sha256
|
|
36
|
+
"config_key": SIENA_dataset_name,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class SIENA(BIDSDataset):
|
|
41
|
+
"""The Siena EEG Dataset.
|
|
42
|
+
|
|
43
|
+
The database consists of EEG recordings of 14 patients acquired at the Unit of Neurology
|
|
44
|
+
and Neurophysiology of the University of Siena.
|
|
45
|
+
|
|
46
|
+
Subjects include 9 males (ages 25-71) and 5 females (ages 20-58).
|
|
47
|
+
Subjects were monitored with a Video-EEG with a sampling rate of 512 Hz,
|
|
48
|
+
with electrodes arranged on the basis of the international 10-20 System.
|
|
49
|
+
|
|
50
|
+
Most of the recordings also contain 1 or 2 EKG signals.
|
|
51
|
+
The diagnosis of epilepsy and the classification of seizures according to the
|
|
52
|
+
criteria of the International League Against Epilepsy were performed by an expert
|
|
53
|
+
clinician after a careful review of the clinical and electrophysiological
|
|
54
|
+
data of each patient.
|
|
55
|
+
|
|
56
|
+
This BIDS-compatible version of the dataset was published by Jonathan Dan [Dan2025]_
|
|
57
|
+
and is based on the original Siena Scalp EEG Database [Detti2020a]_, [Detti2020b]_.
|
|
58
|
+
|
|
59
|
+
.. versionadded:: 1.3
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
root : pathlib.Path | str
|
|
64
|
+
The root of the BIDS path.
|
|
65
|
+
subjects : str | array-like of str | None
|
|
66
|
+
The subject ID. Corresponds to "sub".
|
|
67
|
+
sessions : str | array-like of str | None
|
|
68
|
+
The acquisition session. Corresponds to "ses".
|
|
69
|
+
tasks : str | array-like of str | None
|
|
70
|
+
The experimental task. Corresponds to "task".
|
|
71
|
+
acquisitions : str | array-like of str | None
|
|
72
|
+
The acquisition parameters. Corresponds to "acq".
|
|
73
|
+
runs : str | array-like of str | None
|
|
74
|
+
The run number. Corresponds to "run".
|
|
75
|
+
processings : str | array-like of str | None
|
|
76
|
+
The processing label. Corresponds to "proc".
|
|
77
|
+
recordings : str | array-like of str | None
|
|
78
|
+
The recording name. Corresponds to "rec".
|
|
79
|
+
spaces : str | array-like of str | None
|
|
80
|
+
The coordinate space for anatomical and sensor location
|
|
81
|
+
files (e.g., ``*_electrodes.tsv``, ``*_markers.mrk``).
|
|
82
|
+
Corresponds to "space".
|
|
83
|
+
Note that valid values for ``space`` must come from a list
|
|
84
|
+
of BIDS keywords as described in the BIDS specification.
|
|
85
|
+
splits : str | array-like of str | None
|
|
86
|
+
The split of the continuous recording file for ``.fif`` data.
|
|
87
|
+
Corresponds to "split".
|
|
88
|
+
descriptions : str | array-like of str | None
|
|
89
|
+
This corresponds to the BIDS entity ``desc``. It is used to provide
|
|
90
|
+
additional information for derivative data, e.g., preprocessed data
|
|
91
|
+
may be assigned ``description='cleaned'``.
|
|
92
|
+
suffixes : str | array-like of str | None
|
|
93
|
+
The filename suffix. This is the entity after the
|
|
94
|
+
last ``_`` before the extension. E.g., ``'channels'``.
|
|
95
|
+
The following filename suffix's are accepted:
|
|
96
|
+
'meg', 'markers', 'eeg', 'ieeg', 'T1w',
|
|
97
|
+
'participants', 'scans', 'electrodes', 'coordsystem',
|
|
98
|
+
'channels', 'events', 'headshape', 'digitizer',
|
|
99
|
+
'beh', 'physio', 'stim'
|
|
100
|
+
extensions : str | array-like of str | None
|
|
101
|
+
The extension of the filename. E.g., ``'.json'``.
|
|
102
|
+
By default, uses the ones accepted by :func:`mne_bids.read_raw_bids`.
|
|
103
|
+
datatypes : str | array-like of str | None
|
|
104
|
+
The BIDS data type, e.g., ``'anat'``, ``'func'``, ``'eeg'``, ``'meg'``,
|
|
105
|
+
``'ieeg'``.
|
|
106
|
+
check : bool
|
|
107
|
+
If ``True``, only returns paths that conform to BIDS. If ``False``
|
|
108
|
+
(default), the ``.check`` attribute of the returned
|
|
109
|
+
:class:`mne_bids.BIDSPath` object will be set to ``True`` for paths that
|
|
110
|
+
do conform to BIDS, and to ``False`` for those that don't.
|
|
111
|
+
preload : bool
|
|
112
|
+
If True, preload the data. Defaults to False.
|
|
113
|
+
n_jobs : int
|
|
114
|
+
Number of jobs to run in parallel. Defaults to 1.
|
|
115
|
+
|
|
116
|
+
References
|
|
117
|
+
----------
|
|
118
|
+
.. [Detti2020a] Detti, P. (2020). Siena Scalp EEG Database (version 1.0.0).
|
|
119
|
+
PhysioNet. RRID:SCR_007345. https://doi.org/10.13026/5d4a-j060
|
|
120
|
+
.. [Detti2020b] Detti, P., Vatti, G., Zabalo Manrique de Lara, G.
|
|
121
|
+
EEG Synchronization Analysis for Seizure Prediction:
|
|
122
|
+
A Study on Data of Noninvasive Recordings.
|
|
123
|
+
Processes 2020, 8(7), 846; https://doi.org/10.3390/pr8070846
|
|
124
|
+
.. [Dan2025] Dan, J., Pale, U., Amirshahi, A., Cappelletti, W.,
|
|
125
|
+
Ingolfsson, T. M., Wang, X., ... & Ryvlin, P. (2025).
|
|
126
|
+
SzCORE: seizure community open-source research evaluatio
|
|
127
|
+
framework for the validation of electroencephalography-based
|
|
128
|
+
automated seizure detection algorithms. Epilepsia, 66, 14-24.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
def __init__(self, root=None, *args, **kwargs):
|
|
132
|
+
# Download dataset if not present
|
|
133
|
+
if root is None:
|
|
134
|
+
path_root = fetch_dataset(
|
|
135
|
+
dataset_params=SIENA_dataset_params,
|
|
136
|
+
path=None,
|
|
137
|
+
processor="unzip",
|
|
138
|
+
force_update=False,
|
|
139
|
+
)
|
|
140
|
+
# First time we fetch the dataset, we need to move the files to the
|
|
141
|
+
# correct directory.
|
|
142
|
+
path_root = _correct_dataset_path(
|
|
143
|
+
path_root, SIENA_archive_name, "BIDS_Siena"
|
|
144
|
+
)
|
|
145
|
+
else:
|
|
146
|
+
# Validate that the provided root is a valid BIDS dataset
|
|
147
|
+
if not Path(f"{root}/participants.tsv").exists():
|
|
148
|
+
raise ValueError(
|
|
149
|
+
f"The provided root directory {root} does not contain a valid "
|
|
150
|
+
"BIDS dataset (missing participants.tsv). Please ensure the "
|
|
151
|
+
"root points directly to the BIDS dataset directory."
|
|
152
|
+
)
|
|
153
|
+
path_root = root
|
|
154
|
+
|
|
155
|
+
kwargs["root"] = path_root
|
|
156
|
+
|
|
157
|
+
super().__init__(
|
|
158
|
+
*args,
|
|
159
|
+
extensions=".edf",
|
|
160
|
+
check=False,
|
|
161
|
+
**kwargs,
|
|
162
|
+
)
|
|
@@ -0,0 +1,411 @@
|
|
|
1
|
+
"""PhysioNet Challenge 2018 dataset."""
|
|
2
|
+
|
|
3
|
+
# Authors: Hubert Banville <hubert.jbanville@gmail.com>
|
|
4
|
+
# Bruno Aristimunha <b.aristimunha@gmail.com>
|
|
5
|
+
# License: BSD (3-clause)
|
|
6
|
+
# Code copied from the repository
|
|
7
|
+
# https://github.com/hubertjb/dynamic-spatial-filtering
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import os.path as op
|
|
11
|
+
import urllib
|
|
12
|
+
|
|
13
|
+
import mne
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
import wfdb
|
|
17
|
+
from joblib import Parallel, delayed
|
|
18
|
+
from mne.datasets.sleep_physionet._utils import _fetch_one
|
|
19
|
+
from mne.datasets.utils import _get_path
|
|
20
|
+
from mne.utils import warn
|
|
21
|
+
|
|
22
|
+
from braindecode.datasets import BaseConcatDataset, RawDataset
|
|
23
|
+
|
|
24
|
+
PC18_DIR = op.join(op.dirname(__file__), "data", "pc18")
|
|
25
|
+
PC18_RECORDS = op.join(PC18_DIR, "sleep_records.csv")
|
|
26
|
+
PC18_INFO = op.join(PC18_DIR, "age-sex.csv")
|
|
27
|
+
PC18_URL = "https://physionet.org/files/challenge-2018/1.0.0/"
|
|
28
|
+
PC18_SHA1_TRAINING = op.join(PC18_DIR, "training_SHA1SUMS")
|
|
29
|
+
PC18_SHA1_TEST = op.join(PC18_DIR, "test_SHA1SUMS")
|
|
30
|
+
PC18_METAINFO_URL = "https://zenodo.org/records/13823458/files/"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Function to download a file if it doesn't exist
|
|
34
|
+
def _download_if_missing(file_path, url):
|
|
35
|
+
folder_path = op.dirname(file_path)
|
|
36
|
+
|
|
37
|
+
# Ensure the folder exists
|
|
38
|
+
if not op.exists(folder_path):
|
|
39
|
+
warn(f"Directory {folder_path} not found. Creating directory.")
|
|
40
|
+
os.makedirs(folder_path)
|
|
41
|
+
|
|
42
|
+
# Check if file exists, if not download it
|
|
43
|
+
if not op.exists(file_path):
|
|
44
|
+
warn(f"{file_path} not found. Downloading from {url}")
|
|
45
|
+
urllib.request.urlretrieve(url, file_path)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def ensure_metafiles_exist():
|
|
49
|
+
files_to_check = {
|
|
50
|
+
PC18_RECORDS: PC18_METAINFO_URL + "sleep_records.csv",
|
|
51
|
+
PC18_INFO: PC18_METAINFO_URL + "age-sex.csv",
|
|
52
|
+
PC18_SHA1_TRAINING: PC18_METAINFO_URL + "training_SHA1SUMS",
|
|
53
|
+
PC18_SHA1_TEST: PC18_METAINFO_URL + "test_SHA1SUMS",
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
for file_path, url in files_to_check.items():
|
|
57
|
+
_download_if_missing(file_path, url)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _update_pc18_sleep_records(fname=PC18_RECORDS):
|
|
61
|
+
"""Create CSV file with information about available PC18 recordings."""
|
|
62
|
+
# Load and massage the checksums.
|
|
63
|
+
sha_train_df = pd.read_csv(
|
|
64
|
+
PC18_SHA1_TRAINING,
|
|
65
|
+
sep=" ",
|
|
66
|
+
header=None,
|
|
67
|
+
names=["sha", "fname"],
|
|
68
|
+
engine="python",
|
|
69
|
+
)
|
|
70
|
+
sha_test_df = pd.read_csv(
|
|
71
|
+
PC18_SHA1_TEST, sep=" ", header=None, names=["sha", "fname"], engine="python"
|
|
72
|
+
)
|
|
73
|
+
sha_train_df["Split"] = "training"
|
|
74
|
+
sha_test_df["Split"] = "test"
|
|
75
|
+
sha_df = pd.concat([sha_train_df, sha_test_df], axis=0, ignore_index=True)
|
|
76
|
+
select_records = (
|
|
77
|
+
sha_df.fname.str.startswith("tr") | sha_df.fname.str.startswith("te")
|
|
78
|
+
) & ~sha_df.fname.str.endswith("arousal.mat")
|
|
79
|
+
sha_df = sha_df[select_records]
|
|
80
|
+
sha_df["Record"] = sha_df["fname"].str.split("/", expand=True)[0]
|
|
81
|
+
sha_df["fname"] = sha_df[["Split", "fname"]].agg("/".join, axis=1)
|
|
82
|
+
|
|
83
|
+
# Load and massage the data.
|
|
84
|
+
data = pd.read_csv(PC18_INFO)
|
|
85
|
+
|
|
86
|
+
data = data.reset_index().rename({"index": "Subject"}, axis=1)
|
|
87
|
+
data["Sex"] = (
|
|
88
|
+
data["Sex"].map({"F": "female", "M": "male", "m": "male"}).astype("category")
|
|
89
|
+
)
|
|
90
|
+
data = sha_df.merge(data, on="Record")
|
|
91
|
+
|
|
92
|
+
data["Record type"] = (
|
|
93
|
+
data["fname"]
|
|
94
|
+
.str.split(".", expand=True)[1]
|
|
95
|
+
.map({"hea": "Header", "mat": "PSG", "arousal": "Arousal"})
|
|
96
|
+
.astype("category")
|
|
97
|
+
)
|
|
98
|
+
data = data[
|
|
99
|
+
["Subject", "Record", "Record type", "Split", "Age", "Sex", "sha", "fname"]
|
|
100
|
+
].sort_values(by="Subject")
|
|
101
|
+
|
|
102
|
+
# Save the data.
|
|
103
|
+
data.to_csv(fname, index=False)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _data_path(path=None):
|
|
107
|
+
"""Get path to local copy of PC18 dataset."""
|
|
108
|
+
key = "PC18_DATASET_PATH"
|
|
109
|
+
name = "PC18_DATASET_SLEEP"
|
|
110
|
+
path = _get_path(path, key, name)
|
|
111
|
+
subdirs = os.listdir(path)
|
|
112
|
+
if "training" in subdirs or "test" in subdirs: # the specified path is
|
|
113
|
+
# already at the training and test folders level
|
|
114
|
+
return path
|
|
115
|
+
else:
|
|
116
|
+
return op.join(path, "pc18-sleep-data")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def fetch_pc18_data(subjects, path=None, force_update=False, base_url=PC18_URL):
|
|
120
|
+
"""Get paths to local copies of PhysioNet Challenge 2018 dataset files.
|
|
121
|
+
|
|
122
|
+
This will fetch data from the publicly available PhysioNet Computing in
|
|
123
|
+
Cardiology Challenge 2018 dataset on sleep arousal detection [1]_ [2]_.
|
|
124
|
+
This corresponds to 1983 recordings from individual subjects with
|
|
125
|
+
(suspected) sleep apnea. The dataset is separated into a training set with
|
|
126
|
+
994 recordings for which arousal annotation are available and a test set
|
|
127
|
+
with 989 recordings for which the labels have not been revealed. Across the
|
|
128
|
+
entire dataset, mean age is 55 years old and 65% of recordings are from
|
|
129
|
+
male subjects.
|
|
130
|
+
|
|
131
|
+
More information can be found on the
|
|
132
|
+
`physionet website <https://physionet.org/content/challenge-2018/1.0.0/>`_.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
subjects : list of int
|
|
137
|
+
The subjects to use. Can be in the range of 0-1982 (inclusive). Test
|
|
138
|
+
recordings are 0-988, while training recordings are 989-1982.
|
|
139
|
+
path : None | str
|
|
140
|
+
Location of where to look for the PC18 data storing location. If None,
|
|
141
|
+
the environment variable or config parameter ``PC18_DATASET_PATH``
|
|
142
|
+
is used. If it doesn't exist, the "~/mne_data" directory is used. If
|
|
143
|
+
the dataset is not found under the given path, the data will be
|
|
144
|
+
automatically downloaded to the specified folder.
|
|
145
|
+
force_update : bool
|
|
146
|
+
Force update of the dataset even if a local copy exists.
|
|
147
|
+
update_path : bool | None
|
|
148
|
+
If True, set the PC18_DATASET_PATH in mne-python config to the given
|
|
149
|
+
path. If None, the user is prompted.
|
|
150
|
+
base_url : str
|
|
151
|
+
The URL root.
|
|
152
|
+
%(verbose)s
|
|
153
|
+
|
|
154
|
+
Returns
|
|
155
|
+
-------
|
|
156
|
+
paths : list
|
|
157
|
+
List of local data paths of the given type.
|
|
158
|
+
|
|
159
|
+
References
|
|
160
|
+
----------
|
|
161
|
+
.. [1] Mohammad M Ghassemi, Benjamin E Moody, Li-wei H Lehman, Christopher
|
|
162
|
+
Song, Qiao Li, Haoqi Sun, Roger G Mark, M Brandon Westover, Gari D
|
|
163
|
+
Clifford. You Snooze, You Win: the PhysioNet/Computing in Cardiology
|
|
164
|
+
Challenge 2018.
|
|
165
|
+
.. [2] Goldberger, A., Amaral, L., Glass, L., Hausdorff, J., Ivanov, P. C.,
|
|
166
|
+
Mark, R., ... & Stanley, H. E. (2000). PhysioBank, PhysioToolkit, and
|
|
167
|
+
PhysioNet: Components of a new research resource for complex physiologic
|
|
168
|
+
signals. Circulation [Online]. 101 (23), pp. e215–e220.)
|
|
169
|
+
"""
|
|
170
|
+
records = pd.read_csv(PC18_RECORDS)
|
|
171
|
+
psg_records = records[records["Record type"] == "PSG"]
|
|
172
|
+
hea_records = records[records["Record type"] == "Header"]
|
|
173
|
+
arousal_records = records[records["Record type"] == "Arousal"]
|
|
174
|
+
|
|
175
|
+
path = _data_path(path=path)
|
|
176
|
+
params = [path, force_update, base_url]
|
|
177
|
+
|
|
178
|
+
fnames = []
|
|
179
|
+
for subject in subjects:
|
|
180
|
+
for idx in np.where(psg_records["Subject"] == subject)[0]:
|
|
181
|
+
psg_fname = _fetch_one(
|
|
182
|
+
psg_records["fname"].iloc[idx], psg_records["sha"].iloc[idx], *params
|
|
183
|
+
)
|
|
184
|
+
hea_fname = _fetch_one(
|
|
185
|
+
hea_records["fname"].iloc[idx], hea_records["sha"].iloc[idx], *params
|
|
186
|
+
)
|
|
187
|
+
if psg_records["Split"].iloc[idx] == "training":
|
|
188
|
+
train_idx = np.where(arousal_records["Subject"] == subject)[0][0]
|
|
189
|
+
arousal_fname = _fetch_one(
|
|
190
|
+
arousal_records["fname"].iloc[train_idx],
|
|
191
|
+
arousal_records["sha"].iloc[train_idx],
|
|
192
|
+
*params,
|
|
193
|
+
)
|
|
194
|
+
else:
|
|
195
|
+
arousal_fname = None
|
|
196
|
+
fnames.append([psg_fname, hea_fname, arousal_fname])
|
|
197
|
+
|
|
198
|
+
return fnames
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _convert_wfdb_anns_to_mne_annotations(annots):
|
|
202
|
+
"""Convert wfdb.io.Annotation format to MNE's.
|
|
203
|
+
|
|
204
|
+
Parameters
|
|
205
|
+
----------
|
|
206
|
+
annots : wfdb.io.Annotation
|
|
207
|
+
Annotation object obtained by e.g. loading an annotation file with
|
|
208
|
+
wfdb.rdann().
|
|
209
|
+
|
|
210
|
+
Returns
|
|
211
|
+
-------
|
|
212
|
+
mne.Annotations :
|
|
213
|
+
MNE Annotations object.
|
|
214
|
+
"""
|
|
215
|
+
ann_chs = set(annots.chan)
|
|
216
|
+
onsets = annots.sample / annots.fs
|
|
217
|
+
new_onset, new_duration, new_description = list(), list(), list()
|
|
218
|
+
for channel_name in ann_chs:
|
|
219
|
+
mask = annots.chan == channel_name
|
|
220
|
+
ch_onsets = onsets[mask]
|
|
221
|
+
ch_descs = np.array(annots.aux_note)[mask]
|
|
222
|
+
|
|
223
|
+
# Events with beginning and end, defined by '(event' and 'event)'
|
|
224
|
+
if all([(i.startswith("(") or i.endswith(")")) for i in ch_descs]):
|
|
225
|
+
pass
|
|
226
|
+
else: # Sleep stage-like annotations
|
|
227
|
+
ch_durations = np.concatenate([np.diff(ch_onsets), [30]])
|
|
228
|
+
if all(ch_durations > 0):
|
|
229
|
+
ValueError("Negative duration")
|
|
230
|
+
new_onset.extend(ch_onsets)
|
|
231
|
+
new_duration.extend(ch_durations)
|
|
232
|
+
new_description.extend(ch_descs)
|
|
233
|
+
|
|
234
|
+
mne_annots = mne.Annotations(
|
|
235
|
+
new_onset, new_duration, new_description, orig_time=None
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
return mne_annots
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class SleepPhysionetChallenge2018(BaseConcatDataset):
|
|
242
|
+
"""Physionet Challenge 2018 polysomnography dataset.
|
|
243
|
+
|
|
244
|
+
Sleep dataset from https://physionet.org/content/challenge-2018/1.0.0/.
|
|
245
|
+
Contains overnight recordings from 1983 healthy subjects.
|
|
246
|
+
|
|
247
|
+
The total size is 266 GB, so make sure you have enough space before
|
|
248
|
+
downloading.
|
|
249
|
+
|
|
250
|
+
See `fetch_pc18_data` for a more complete description.
|
|
251
|
+
|
|
252
|
+
Parameters
|
|
253
|
+
----------
|
|
254
|
+
subject_ids : list(int) | str | None
|
|
255
|
+
(list of) int of subject(s) to be loaded.
|
|
256
|
+
- If `None`, loads all subjects (both training and test sets [no label associated]).
|
|
257
|
+
- If `"training"`, loads only the training set subjects.
|
|
258
|
+
- If `"test"`, loads only the test set subjects, no label associated!
|
|
259
|
+
- Otherwise, expects an iterable of subject IDs.
|
|
260
|
+
path : None | str
|
|
261
|
+
Location of where to look for the PC18 data storing location. If None,
|
|
262
|
+
the environment variable or config parameter ``MNE_DATASETS_PC18_PATH``
|
|
263
|
+
is used. If it doesn't exist, the "~/mne_data" directory is used. If
|
|
264
|
+
the dataset is not found under the given path, the data will be
|
|
265
|
+
automatically downloaded to the specified folder.
|
|
266
|
+
load_eeg_only : bool
|
|
267
|
+
If True, only load the EEG channels and discard the others (EOG, EMG,
|
|
268
|
+
temperature, respiration) to avoid resampling the other signals.
|
|
269
|
+
preproc : list(Preprocessor) | None
|
|
270
|
+
List of preprocessors to apply to each file individually. This way the
|
|
271
|
+
data can e.g., be downsampled (temporally and spatially) to limit the
|
|
272
|
+
memory usage of the entire Dataset object. This also enables applying
|
|
273
|
+
preprocessing in parallel over the recordings.
|
|
274
|
+
n_jobs : int
|
|
275
|
+
Number of parallel processes.
|
|
276
|
+
"""
|
|
277
|
+
|
|
278
|
+
def __init__(
|
|
279
|
+
self,
|
|
280
|
+
subject_ids="training",
|
|
281
|
+
path=None,
|
|
282
|
+
load_eeg_only=True,
|
|
283
|
+
preproc=None,
|
|
284
|
+
n_jobs=1,
|
|
285
|
+
):
|
|
286
|
+
if subject_ids is None:
|
|
287
|
+
subject_ids = range(1983)
|
|
288
|
+
warn(
|
|
289
|
+
""""
|
|
290
|
+
You are loading the complete dataset (0 to 1982),
|
|
291
|
+
which includes a portion of the test set (0 to 988)
|
|
292
|
+
from the Physionet Challenge 2018. Note that the test set
|
|
293
|
+
does not have associated labels, so supervised classification
|
|
294
|
+
cannot be performed on these data.""",
|
|
295
|
+
UserWarning,
|
|
296
|
+
)
|
|
297
|
+
elif subject_ids == "training":
|
|
298
|
+
subject_ids = range(989, 1983)
|
|
299
|
+
elif subject_ids == "test":
|
|
300
|
+
subject_ids = range(989)
|
|
301
|
+
warn(
|
|
302
|
+
"""
|
|
303
|
+
This subset does not have associated labels, so supervised
|
|
304
|
+
classification (sleep stage) cannot be performed on this data.
|
|
305
|
+
You can also use the meta information as a label to perform
|
|
306
|
+
another task.
|
|
307
|
+
"""
|
|
308
|
+
)
|
|
309
|
+
else:
|
|
310
|
+
# If subject_ids is an iterable, check if it includes any test set IDs
|
|
311
|
+
if any(sid < 989 for sid in subject_ids):
|
|
312
|
+
warn(
|
|
313
|
+
"""
|
|
314
|
+
You are loading a subset of the data that includes test set
|
|
315
|
+
subjects (subject IDs: 0 to 988). These subjects do not have
|
|
316
|
+
associated labels, which means supervised classification
|
|
317
|
+
(sleep stage) cannot be performed on this data. You can also
|
|
318
|
+
use the meta information as a label to perform another task.
|
|
319
|
+
""",
|
|
320
|
+
UserWarning,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
ensure_metafiles_exist()
|
|
324
|
+
|
|
325
|
+
paths = fetch_pc18_data(subject_ids, path=path)
|
|
326
|
+
|
|
327
|
+
self.info_df = pd.read_csv(PC18_INFO)
|
|
328
|
+
|
|
329
|
+
if n_jobs == 1:
|
|
330
|
+
all_base_ds = [
|
|
331
|
+
self._load_raw(
|
|
332
|
+
subj_nb=subject_id,
|
|
333
|
+
raw_fname=p[0],
|
|
334
|
+
arousal_fname=p[2],
|
|
335
|
+
load_eeg_only=load_eeg_only,
|
|
336
|
+
preproc=preproc,
|
|
337
|
+
)
|
|
338
|
+
for subject_id, p in zip(subject_ids, paths)
|
|
339
|
+
]
|
|
340
|
+
else:
|
|
341
|
+
all_base_ds = Parallel(n_jobs=n_jobs)(
|
|
342
|
+
delayed(self._load_raw)(
|
|
343
|
+
subject_id,
|
|
344
|
+
p[0],
|
|
345
|
+
p[2],
|
|
346
|
+
load_eeg_only=load_eeg_only,
|
|
347
|
+
preproc=preproc,
|
|
348
|
+
)
|
|
349
|
+
for subject_id, p in zip(subject_ids, paths)
|
|
350
|
+
)
|
|
351
|
+
super().__init__(all_base_ds)
|
|
352
|
+
|
|
353
|
+
def _load_raw(self, subj_nb, raw_fname, arousal_fname, load_eeg_only, preproc):
|
|
354
|
+
channel_types = ["eeg"] * 7
|
|
355
|
+
if load_eeg_only:
|
|
356
|
+
channels = list(range(7))
|
|
357
|
+
else:
|
|
358
|
+
channel_types += ["emg", "misc", "misc", "misc", "misc", "ecg"]
|
|
359
|
+
channels = None
|
|
360
|
+
|
|
361
|
+
# Load raw signals and header
|
|
362
|
+
record = wfdb.io.rdrecord(op.splitext(raw_fname[0])[0], channels=channels)
|
|
363
|
+
|
|
364
|
+
# Convert to right units for MNE (EEG should be in V)
|
|
365
|
+
data = record.p_signal.T
|
|
366
|
+
data[np.array(record.units) == "uV"] /= 1e6
|
|
367
|
+
data[np.array(record.units) == "mV"] /= 1e3
|
|
368
|
+
info = mne.create_info(record.sig_name, record.fs, channel_types)
|
|
369
|
+
raw_file = mne.io.RawArray(data, info)
|
|
370
|
+
|
|
371
|
+
# Extract annotations
|
|
372
|
+
if arousal_fname is not None:
|
|
373
|
+
annots = wfdb.rdann(
|
|
374
|
+
op.splitext(raw_fname[0])[0],
|
|
375
|
+
"arousal",
|
|
376
|
+
sampfrom=0,
|
|
377
|
+
sampto=None,
|
|
378
|
+
shift_samps=False,
|
|
379
|
+
return_label_elements=["symbol"],
|
|
380
|
+
summarize_labels=False,
|
|
381
|
+
)
|
|
382
|
+
mne_annots = _convert_wfdb_anns_to_mne_annotations(annots)
|
|
383
|
+
raw_file = raw_file.set_annotations(mne_annots)
|
|
384
|
+
|
|
385
|
+
record_name = op.splitext(op.basename(raw_fname[0]))[0]
|
|
386
|
+
record_info = self.info_df[self.info_df["Record"] == record_name].iloc[0]
|
|
387
|
+
if record_info["Record"].startswith("tr"):
|
|
388
|
+
split = "training"
|
|
389
|
+
elif record_info["Record"].startswith("te"):
|
|
390
|
+
split = "test"
|
|
391
|
+
else:
|
|
392
|
+
split = "unknown"
|
|
393
|
+
|
|
394
|
+
desc = pd.Series(
|
|
395
|
+
{
|
|
396
|
+
"subject": subj_nb,
|
|
397
|
+
"record": record_info["Record"],
|
|
398
|
+
"split": split,
|
|
399
|
+
"age": record_info["Age"],
|
|
400
|
+
"sex": record_info["Sex"],
|
|
401
|
+
},
|
|
402
|
+
name="",
|
|
403
|
+
)
|
|
404
|
+
base_dataset = RawDataset(raw_file, desc)
|
|
405
|
+
|
|
406
|
+
if preproc is not None:
|
|
407
|
+
from braindecode.preprocessing.preprocess import _preprocess
|
|
408
|
+
|
|
409
|
+
_preprocess(base_dataset, None, preproc)
|
|
410
|
+
|
|
411
|
+
return base_dataset
|