braindecode 1.3.0.dev177069446__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. braindecode/__init__.py +9 -0
  2. braindecode/augmentation/__init__.py +52 -0
  3. braindecode/augmentation/base.py +225 -0
  4. braindecode/augmentation/functional.py +1300 -0
  5. braindecode/augmentation/transforms.py +1356 -0
  6. braindecode/classifier.py +258 -0
  7. braindecode/datasets/__init__.py +44 -0
  8. braindecode/datasets/base.py +823 -0
  9. braindecode/datasets/bbci.py +693 -0
  10. braindecode/datasets/bcicomp.py +193 -0
  11. braindecode/datasets/bids/__init__.py +54 -0
  12. braindecode/datasets/bids/datasets.py +239 -0
  13. braindecode/datasets/bids/format.py +717 -0
  14. braindecode/datasets/bids/hub.py +987 -0
  15. braindecode/datasets/bids/hub_format.py +717 -0
  16. braindecode/datasets/bids/hub_io.py +197 -0
  17. braindecode/datasets/bids/hub_validation.py +114 -0
  18. braindecode/datasets/bids/iterable.py +220 -0
  19. braindecode/datasets/chb_mit.py +163 -0
  20. braindecode/datasets/mne.py +170 -0
  21. braindecode/datasets/moabb.py +219 -0
  22. braindecode/datasets/nmt.py +313 -0
  23. braindecode/datasets/registry.py +120 -0
  24. braindecode/datasets/siena.py +162 -0
  25. braindecode/datasets/sleep_physio_challe_18.py +411 -0
  26. braindecode/datasets/sleep_physionet.py +125 -0
  27. braindecode/datasets/tuh.py +591 -0
  28. braindecode/datasets/utils.py +67 -0
  29. braindecode/datasets/xy.py +96 -0
  30. braindecode/datautil/__init__.py +62 -0
  31. braindecode/datautil/channel_utils.py +114 -0
  32. braindecode/datautil/hub_formats.py +180 -0
  33. braindecode/datautil/serialization.py +359 -0
  34. braindecode/datautil/util.py +154 -0
  35. braindecode/eegneuralnet.py +372 -0
  36. braindecode/functional/__init__.py +22 -0
  37. braindecode/functional/functions.py +251 -0
  38. braindecode/functional/initialization.py +47 -0
  39. braindecode/models/__init__.py +117 -0
  40. braindecode/models/atcnet.py +830 -0
  41. braindecode/models/attentionbasenet.py +727 -0
  42. braindecode/models/attn_sleep.py +549 -0
  43. braindecode/models/base.py +574 -0
  44. braindecode/models/bendr.py +493 -0
  45. braindecode/models/biot.py +537 -0
  46. braindecode/models/brainmodule.py +845 -0
  47. braindecode/models/config.py +233 -0
  48. braindecode/models/contrawr.py +319 -0
  49. braindecode/models/ctnet.py +541 -0
  50. braindecode/models/deep4.py +376 -0
  51. braindecode/models/deepsleepnet.py +417 -0
  52. braindecode/models/eegconformer.py +475 -0
  53. braindecode/models/eeginception_erp.py +379 -0
  54. braindecode/models/eeginception_mi.py +379 -0
  55. braindecode/models/eegitnet.py +302 -0
  56. braindecode/models/eegminer.py +256 -0
  57. braindecode/models/eegnet.py +359 -0
  58. braindecode/models/eegnex.py +354 -0
  59. braindecode/models/eegsimpleconv.py +201 -0
  60. braindecode/models/eegsym.py +917 -0
  61. braindecode/models/eegtcnet.py +337 -0
  62. braindecode/models/fbcnet.py +225 -0
  63. braindecode/models/fblightconvnet.py +315 -0
  64. braindecode/models/fbmsnet.py +338 -0
  65. braindecode/models/hybrid.py +126 -0
  66. braindecode/models/ifnet.py +443 -0
  67. braindecode/models/labram.py +1316 -0
  68. braindecode/models/luna.py +891 -0
  69. braindecode/models/medformer.py +760 -0
  70. braindecode/models/msvtnet.py +377 -0
  71. braindecode/models/patchedtransformer.py +640 -0
  72. braindecode/models/reve.py +843 -0
  73. braindecode/models/sccnet.py +280 -0
  74. braindecode/models/shallow_fbcsp.py +212 -0
  75. braindecode/models/signal_jepa.py +1122 -0
  76. braindecode/models/sinc_shallow.py +339 -0
  77. braindecode/models/sleep_stager_blanco_2020.py +169 -0
  78. braindecode/models/sleep_stager_chambon_2018.py +159 -0
  79. braindecode/models/sparcnet.py +426 -0
  80. braindecode/models/sstdpn.py +869 -0
  81. braindecode/models/summary.csv +47 -0
  82. braindecode/models/syncnet.py +234 -0
  83. braindecode/models/tcn.py +275 -0
  84. braindecode/models/tidnet.py +397 -0
  85. braindecode/models/tsinception.py +295 -0
  86. braindecode/models/usleep.py +439 -0
  87. braindecode/models/util.py +369 -0
  88. braindecode/modules/__init__.py +92 -0
  89. braindecode/modules/activation.py +86 -0
  90. braindecode/modules/attention.py +883 -0
  91. braindecode/modules/blocks.py +160 -0
  92. braindecode/modules/convolution.py +330 -0
  93. braindecode/modules/filter.py +654 -0
  94. braindecode/modules/layers.py +216 -0
  95. braindecode/modules/linear.py +70 -0
  96. braindecode/modules/parametrization.py +38 -0
  97. braindecode/modules/stats.py +87 -0
  98. braindecode/modules/util.py +85 -0
  99. braindecode/modules/wrapper.py +90 -0
  100. braindecode/preprocessing/__init__.py +271 -0
  101. braindecode/preprocessing/eegprep_preprocess.py +1317 -0
  102. braindecode/preprocessing/mne_preprocess.py +240 -0
  103. braindecode/preprocessing/preprocess.py +579 -0
  104. braindecode/preprocessing/util.py +177 -0
  105. braindecode/preprocessing/windowers.py +1037 -0
  106. braindecode/regressor.py +234 -0
  107. braindecode/samplers/__init__.py +18 -0
  108. braindecode/samplers/base.py +399 -0
  109. braindecode/samplers/ssl.py +263 -0
  110. braindecode/training/__init__.py +23 -0
  111. braindecode/training/callbacks.py +23 -0
  112. braindecode/training/losses.py +105 -0
  113. braindecode/training/scoring.py +477 -0
  114. braindecode/util.py +419 -0
  115. braindecode/version.py +1 -0
  116. braindecode/visualization/__init__.py +8 -0
  117. braindecode/visualization/confusion_matrices.py +289 -0
  118. braindecode/visualization/gradients.py +62 -0
  119. braindecode-1.3.0.dev177069446.dist-info/METADATA +230 -0
  120. braindecode-1.3.0.dev177069446.dist-info/RECORD +124 -0
  121. braindecode-1.3.0.dev177069446.dist-info/WHEEL +5 -0
  122. braindecode-1.3.0.dev177069446.dist-info/licenses/LICENSE.txt +31 -0
  123. braindecode-1.3.0.dev177069446.dist-info/licenses/NOTICE.txt +20 -0
  124. braindecode-1.3.0.dev177069446.dist-info/top_level.txt +1 -0
@@ -0,0 +1,120 @@
1
+ """
2
+ Dataset registry for Hub integration.
3
+
4
+ Datasets register themselves here so Hub code can look them up by name
5
+ without direct imports (avoiding circular dependencies).
6
+ """
7
+
8
+ # Authors: Kuntal Kokate
9
+ #
10
+ # License: BSD (3-clause)
11
+
12
+ from typing import Any, Dict, Type
13
+
14
+ # Global registry mapping dataset class names to classes
15
+ _DATASET_REGISTRY: Dict[str, Type] = {}
16
+
17
+
18
+ def register_dataset(cls: Type) -> Type:
19
+ """
20
+ Decorator to register a dataset class in the global registry.
21
+
22
+ Parameters
23
+ ----------
24
+ cls : Type
25
+ The dataset class to register.
26
+
27
+ Returns
28
+ -------
29
+ Type
30
+ The same class (unchanged), so this can be used as a decorator.
31
+ """
32
+ _DATASET_REGISTRY[cls.__name__] = cls
33
+ return cls
34
+
35
+
36
+ def _available_datasets_str() -> str:
37
+ """Return a human-readable list of registered dataset class names."""
38
+ if not _DATASET_REGISTRY:
39
+ return "<no registered datasets>"
40
+ return ", ".join(_DATASET_REGISTRY.keys())
41
+
42
+
43
+ def get_dataset_class(name: str) -> Type:
44
+ """
45
+ Retrieve a registered dataset class by name.
46
+
47
+ Parameters
48
+ ----------
49
+ name : str
50
+ Name of the dataset class (e.g., 'WindowsDataset').
51
+
52
+ Returns
53
+ -------
54
+ Type
55
+ The dataset class.
56
+
57
+ Raises
58
+ ------
59
+ KeyError
60
+ If the class name is not registered.
61
+ """
62
+ try:
63
+ return _DATASET_REGISTRY[name]
64
+ except KeyError as exc:
65
+ raise KeyError(
66
+ f"Dataset class '{name}' not found in registry. "
67
+ f"Available classes: {_available_datasets_str()}"
68
+ ) from exc
69
+
70
+
71
+ def get_dataset_type(obj: Any) -> str:
72
+ """
73
+ Get the registered type name for a dataset instance.
74
+
75
+ Parameters
76
+ ----------
77
+ obj : Any
78
+ The object to check.
79
+
80
+ Returns
81
+ -------
82
+ str
83
+ The name of the dataset class (e.g., 'WindowsDataset').
84
+
85
+ Raises
86
+ ------
87
+ TypeError
88
+ If the object is not an instance of any registered dataset class.
89
+ """
90
+ for cls in _DATASET_REGISTRY.values():
91
+ if isinstance(obj, cls):
92
+ return cls.__name__
93
+
94
+ raise TypeError(
95
+ f"Object of type {type(obj).__name__} is not a registered dataset class. "
96
+ f"Available classes: {_available_datasets_str()}"
97
+ )
98
+
99
+
100
+ def is_registered_dataset(obj: Any, class_name: str) -> bool:
101
+ """
102
+ Check if an object is an instance of a registered dataset class.
103
+
104
+ Parameters
105
+ ----------
106
+ obj : Any
107
+ The object to check.
108
+ class_name : str
109
+ Name of the dataset class to check against.
110
+
111
+ Returns
112
+ -------
113
+ bool
114
+ True if obj is an instance of the named class, False otherwise.
115
+ """
116
+ try:
117
+ cls = get_dataset_class(class_name)
118
+ except KeyError:
119
+ return False
120
+ return isinstance(obj, cls)
@@ -0,0 +1,162 @@
1
+ """
2
+ This dataset is a BIDS compatible version of the Siena Scalp EEG Database.
3
+
4
+ It reorganizes the file structure to comply with the BIDS specification. To this effect:
5
+
6
+ - Metadata was organized according to BIDS.
7
+ - Data in the EEG edf files was modified to keep only the 19 channels from a 10-20 EEG system.
8
+ - Annotations were formatted as BIDS-score compatible tsv files.
9
+ """
10
+
11
+ # Authors: Dan, Jonathan
12
+ # Detti, Paolo
13
+ # Bruno Aristimunha <b.aristimunha@gmail.com>
14
+ #
15
+ # License: BSD (3-clause)
16
+ from __future__ import annotations
17
+
18
+ from pathlib import Path
19
+
20
+ from mne.datasets import fetch_dataset
21
+
22
+ from braindecode.datasets import BIDSDataset
23
+ from braindecode.datasets.utils import _correct_dataset_path
24
+
25
+ SIENA_URL = "https://zenodo.org/records/10640762/files/BIDS_Siena.zip"
26
+ SIENA_archive_name = "SIENA.zip"
27
+ SIENA_folder_name = "SIENA-BIDS-eeg-dataset"
28
+ SIENA_dataset_name = "SIENA-EEG-Corpus"
29
+
30
+ SIENA_dataset_params = {
31
+ "dataset_name": SIENA_dataset_name,
32
+ "url": SIENA_URL,
33
+ "archive_name": SIENA_archive_name,
34
+ "folder_name": SIENA_folder_name,
35
+ "hash": "126e71e18570cf359a440ba5227494ecffca4b0b0057c733f90ec29ba5e15ff8", # sha256
36
+ "config_key": SIENA_dataset_name,
37
+ }
38
+
39
+
40
+ class SIENA(BIDSDataset):
41
+ """The Siena EEG Dataset.
42
+
43
+ The database consists of EEG recordings of 14 patients acquired at the Unit of Neurology
44
+ and Neurophysiology of the University of Siena.
45
+
46
+ Subjects include 9 males (ages 25-71) and 5 females (ages 20-58).
47
+ Subjects were monitored with a Video-EEG with a sampling rate of 512 Hz,
48
+ with electrodes arranged on the basis of the international 10-20 System.
49
+
50
+ Most of the recordings also contain 1 or 2 EKG signals.
51
+ The diagnosis of epilepsy and the classification of seizures according to the
52
+ criteria of the International League Against Epilepsy were performed by an expert
53
+ clinician after a careful review of the clinical and electrophysiological
54
+ data of each patient.
55
+
56
+ This BIDS-compatible version of the dataset was published by Jonathan Dan [Dan2025]_
57
+ and is based on the original Siena Scalp EEG Database [Detti2020a]_, [Detti2020b]_.
58
+
59
+ .. versionadded:: 1.3
60
+
61
+ Parameters
62
+ ----------
63
+ root : pathlib.Path | str
64
+ The root of the BIDS path.
65
+ subjects : str | array-like of str | None
66
+ The subject ID. Corresponds to "sub".
67
+ sessions : str | array-like of str | None
68
+ The acquisition session. Corresponds to "ses".
69
+ tasks : str | array-like of str | None
70
+ The experimental task. Corresponds to "task".
71
+ acquisitions : str | array-like of str | None
72
+ The acquisition parameters. Corresponds to "acq".
73
+ runs : str | array-like of str | None
74
+ The run number. Corresponds to "run".
75
+ processings : str | array-like of str | None
76
+ The processing label. Corresponds to "proc".
77
+ recordings : str | array-like of str | None
78
+ The recording name. Corresponds to "rec".
79
+ spaces : str | array-like of str | None
80
+ The coordinate space for anatomical and sensor location
81
+ files (e.g., ``*_electrodes.tsv``, ``*_markers.mrk``).
82
+ Corresponds to "space".
83
+ Note that valid values for ``space`` must come from a list
84
+ of BIDS keywords as described in the BIDS specification.
85
+ splits : str | array-like of str | None
86
+ The split of the continuous recording file for ``.fif`` data.
87
+ Corresponds to "split".
88
+ descriptions : str | array-like of str | None
89
+ This corresponds to the BIDS entity ``desc``. It is used to provide
90
+ additional information for derivative data, e.g., preprocessed data
91
+ may be assigned ``description='cleaned'``.
92
+ suffixes : str | array-like of str | None
93
+ The filename suffix. This is the entity after the
94
+ last ``_`` before the extension. E.g., ``'channels'``.
95
+ The following filename suffix's are accepted:
96
+ 'meg', 'markers', 'eeg', 'ieeg', 'T1w',
97
+ 'participants', 'scans', 'electrodes', 'coordsystem',
98
+ 'channels', 'events', 'headshape', 'digitizer',
99
+ 'beh', 'physio', 'stim'
100
+ extensions : str | array-like of str | None
101
+ The extension of the filename. E.g., ``'.json'``.
102
+ By default, uses the ones accepted by :func:`mne_bids.read_raw_bids`.
103
+ datatypes : str | array-like of str | None
104
+ The BIDS data type, e.g., ``'anat'``, ``'func'``, ``'eeg'``, ``'meg'``,
105
+ ``'ieeg'``.
106
+ check : bool
107
+ If ``True``, only returns paths that conform to BIDS. If ``False``
108
+ (default), the ``.check`` attribute of the returned
109
+ :class:`mne_bids.BIDSPath` object will be set to ``True`` for paths that
110
+ do conform to BIDS, and to ``False`` for those that don't.
111
+ preload : bool
112
+ If True, preload the data. Defaults to False.
113
+ n_jobs : int
114
+ Number of jobs to run in parallel. Defaults to 1.
115
+
116
+ References
117
+ ----------
118
+ .. [Detti2020a] Detti, P. (2020). Siena Scalp EEG Database (version 1.0.0).
119
+ PhysioNet. RRID:SCR_007345. https://doi.org/10.13026/5d4a-j060
120
+ .. [Detti2020b] Detti, P., Vatti, G., Zabalo Manrique de Lara, G.
121
+ EEG Synchronization Analysis for Seizure Prediction:
122
+ A Study on Data of Noninvasive Recordings.
123
+ Processes 2020, 8(7), 846; https://doi.org/10.3390/pr8070846
124
+ .. [Dan2025] Dan, J., Pale, U., Amirshahi, A., Cappelletti, W.,
125
+ Ingolfsson, T. M., Wang, X., ... & Ryvlin, P. (2025).
126
+ SzCORE: seizure community open-source research evaluatio
127
+ framework for the validation of electroencephalography-based
128
+ automated seizure detection algorithms. Epilepsia, 66, 14-24.
129
+ """
130
+
131
+ def __init__(self, root=None, *args, **kwargs):
132
+ # Download dataset if not present
133
+ if root is None:
134
+ path_root = fetch_dataset(
135
+ dataset_params=SIENA_dataset_params,
136
+ path=None,
137
+ processor="unzip",
138
+ force_update=False,
139
+ )
140
+ # First time we fetch the dataset, we need to move the files to the
141
+ # correct directory.
142
+ path_root = _correct_dataset_path(
143
+ path_root, SIENA_archive_name, "BIDS_Siena"
144
+ )
145
+ else:
146
+ # Validate that the provided root is a valid BIDS dataset
147
+ if not Path(f"{root}/participants.tsv").exists():
148
+ raise ValueError(
149
+ f"The provided root directory {root} does not contain a valid "
150
+ "BIDS dataset (missing participants.tsv). Please ensure the "
151
+ "root points directly to the BIDS dataset directory."
152
+ )
153
+ path_root = root
154
+
155
+ kwargs["root"] = path_root
156
+
157
+ super().__init__(
158
+ *args,
159
+ extensions=".edf",
160
+ check=False,
161
+ **kwargs,
162
+ )
@@ -0,0 +1,411 @@
1
+ """PhysioNet Challenge 2018 dataset."""
2
+
3
+ # Authors: Hubert Banville <hubert.jbanville@gmail.com>
4
+ # Bruno Aristimunha <b.aristimunha@gmail.com>
5
+ # License: BSD (3-clause)
6
+ # Code copied from the repository
7
+ # https://github.com/hubertjb/dynamic-spatial-filtering
8
+
9
+ import os
10
+ import os.path as op
11
+ import urllib
12
+
13
+ import mne
14
+ import numpy as np
15
+ import pandas as pd
16
+ import wfdb
17
+ from joblib import Parallel, delayed
18
+ from mne.datasets.sleep_physionet._utils import _fetch_one
19
+ from mne.datasets.utils import _get_path
20
+ from mne.utils import warn
21
+
22
+ from braindecode.datasets import BaseConcatDataset, RawDataset
23
+
24
+ PC18_DIR = op.join(op.dirname(__file__), "data", "pc18")
25
+ PC18_RECORDS = op.join(PC18_DIR, "sleep_records.csv")
26
+ PC18_INFO = op.join(PC18_DIR, "age-sex.csv")
27
+ PC18_URL = "https://physionet.org/files/challenge-2018/1.0.0/"
28
+ PC18_SHA1_TRAINING = op.join(PC18_DIR, "training_SHA1SUMS")
29
+ PC18_SHA1_TEST = op.join(PC18_DIR, "test_SHA1SUMS")
30
+ PC18_METAINFO_URL = "https://zenodo.org/records/13823458/files/"
31
+
32
+
33
+ # Function to download a file if it doesn't exist
34
+ def _download_if_missing(file_path, url):
35
+ folder_path = op.dirname(file_path)
36
+
37
+ # Ensure the folder exists
38
+ if not op.exists(folder_path):
39
+ warn(f"Directory {folder_path} not found. Creating directory.")
40
+ os.makedirs(folder_path)
41
+
42
+ # Check if file exists, if not download it
43
+ if not op.exists(file_path):
44
+ warn(f"{file_path} not found. Downloading from {url}")
45
+ urllib.request.urlretrieve(url, file_path)
46
+
47
+
48
+ def ensure_metafiles_exist():
49
+ files_to_check = {
50
+ PC18_RECORDS: PC18_METAINFO_URL + "sleep_records.csv",
51
+ PC18_INFO: PC18_METAINFO_URL + "age-sex.csv",
52
+ PC18_SHA1_TRAINING: PC18_METAINFO_URL + "training_SHA1SUMS",
53
+ PC18_SHA1_TEST: PC18_METAINFO_URL + "test_SHA1SUMS",
54
+ }
55
+
56
+ for file_path, url in files_to_check.items():
57
+ _download_if_missing(file_path, url)
58
+
59
+
60
+ def _update_pc18_sleep_records(fname=PC18_RECORDS):
61
+ """Create CSV file with information about available PC18 recordings."""
62
+ # Load and massage the checksums.
63
+ sha_train_df = pd.read_csv(
64
+ PC18_SHA1_TRAINING,
65
+ sep=" ",
66
+ header=None,
67
+ names=["sha", "fname"],
68
+ engine="python",
69
+ )
70
+ sha_test_df = pd.read_csv(
71
+ PC18_SHA1_TEST, sep=" ", header=None, names=["sha", "fname"], engine="python"
72
+ )
73
+ sha_train_df["Split"] = "training"
74
+ sha_test_df["Split"] = "test"
75
+ sha_df = pd.concat([sha_train_df, sha_test_df], axis=0, ignore_index=True)
76
+ select_records = (
77
+ sha_df.fname.str.startswith("tr") | sha_df.fname.str.startswith("te")
78
+ ) & ~sha_df.fname.str.endswith("arousal.mat")
79
+ sha_df = sha_df[select_records]
80
+ sha_df["Record"] = sha_df["fname"].str.split("/", expand=True)[0]
81
+ sha_df["fname"] = sha_df[["Split", "fname"]].agg("/".join, axis=1)
82
+
83
+ # Load and massage the data.
84
+ data = pd.read_csv(PC18_INFO)
85
+
86
+ data = data.reset_index().rename({"index": "Subject"}, axis=1)
87
+ data["Sex"] = (
88
+ data["Sex"].map({"F": "female", "M": "male", "m": "male"}).astype("category")
89
+ )
90
+ data = sha_df.merge(data, on="Record")
91
+
92
+ data["Record type"] = (
93
+ data["fname"]
94
+ .str.split(".", expand=True)[1]
95
+ .map({"hea": "Header", "mat": "PSG", "arousal": "Arousal"})
96
+ .astype("category")
97
+ )
98
+ data = data[
99
+ ["Subject", "Record", "Record type", "Split", "Age", "Sex", "sha", "fname"]
100
+ ].sort_values(by="Subject")
101
+
102
+ # Save the data.
103
+ data.to_csv(fname, index=False)
104
+
105
+
106
+ def _data_path(path=None):
107
+ """Get path to local copy of PC18 dataset."""
108
+ key = "PC18_DATASET_PATH"
109
+ name = "PC18_DATASET_SLEEP"
110
+ path = _get_path(path, key, name)
111
+ subdirs = os.listdir(path)
112
+ if "training" in subdirs or "test" in subdirs: # the specified path is
113
+ # already at the training and test folders level
114
+ return path
115
+ else:
116
+ return op.join(path, "pc18-sleep-data")
117
+
118
+
119
+ def fetch_pc18_data(subjects, path=None, force_update=False, base_url=PC18_URL):
120
+ """Get paths to local copies of PhysioNet Challenge 2018 dataset files.
121
+
122
+ This will fetch data from the publicly available PhysioNet Computing in
123
+ Cardiology Challenge 2018 dataset on sleep arousal detection [1]_ [2]_.
124
+ This corresponds to 1983 recordings from individual subjects with
125
+ (suspected) sleep apnea. The dataset is separated into a training set with
126
+ 994 recordings for which arousal annotation are available and a test set
127
+ with 989 recordings for which the labels have not been revealed. Across the
128
+ entire dataset, mean age is 55 years old and 65% of recordings are from
129
+ male subjects.
130
+
131
+ More information can be found on the
132
+ `physionet website <https://physionet.org/content/challenge-2018/1.0.0/>`_.
133
+
134
+ Parameters
135
+ ----------
136
+ subjects : list of int
137
+ The subjects to use. Can be in the range of 0-1982 (inclusive). Test
138
+ recordings are 0-988, while training recordings are 989-1982.
139
+ path : None | str
140
+ Location of where to look for the PC18 data storing location. If None,
141
+ the environment variable or config parameter ``PC18_DATASET_PATH``
142
+ is used. If it doesn't exist, the "~/mne_data" directory is used. If
143
+ the dataset is not found under the given path, the data will be
144
+ automatically downloaded to the specified folder.
145
+ force_update : bool
146
+ Force update of the dataset even if a local copy exists.
147
+ update_path : bool | None
148
+ If True, set the PC18_DATASET_PATH in mne-python config to the given
149
+ path. If None, the user is prompted.
150
+ base_url : str
151
+ The URL root.
152
+ %(verbose)s
153
+
154
+ Returns
155
+ -------
156
+ paths : list
157
+ List of local data paths of the given type.
158
+
159
+ References
160
+ ----------
161
+ .. [1] Mohammad M Ghassemi, Benjamin E Moody, Li-wei H Lehman, Christopher
162
+ Song, Qiao Li, Haoqi Sun, Roger G Mark, M Brandon Westover, Gari D
163
+ Clifford. You Snooze, You Win: the PhysioNet/Computing in Cardiology
164
+ Challenge 2018.
165
+ .. [2] Goldberger, A., Amaral, L., Glass, L., Hausdorff, J., Ivanov, P. C.,
166
+ Mark, R., ... & Stanley, H. E. (2000). PhysioBank, PhysioToolkit, and
167
+ PhysioNet: Components of a new research resource for complex physiologic
168
+ signals. Circulation [Online]. 101 (23), pp. e215–e220.)
169
+ """
170
+ records = pd.read_csv(PC18_RECORDS)
171
+ psg_records = records[records["Record type"] == "PSG"]
172
+ hea_records = records[records["Record type"] == "Header"]
173
+ arousal_records = records[records["Record type"] == "Arousal"]
174
+
175
+ path = _data_path(path=path)
176
+ params = [path, force_update, base_url]
177
+
178
+ fnames = []
179
+ for subject in subjects:
180
+ for idx in np.where(psg_records["Subject"] == subject)[0]:
181
+ psg_fname = _fetch_one(
182
+ psg_records["fname"].iloc[idx], psg_records["sha"].iloc[idx], *params
183
+ )
184
+ hea_fname = _fetch_one(
185
+ hea_records["fname"].iloc[idx], hea_records["sha"].iloc[idx], *params
186
+ )
187
+ if psg_records["Split"].iloc[idx] == "training":
188
+ train_idx = np.where(arousal_records["Subject"] == subject)[0][0]
189
+ arousal_fname = _fetch_one(
190
+ arousal_records["fname"].iloc[train_idx],
191
+ arousal_records["sha"].iloc[train_idx],
192
+ *params,
193
+ )
194
+ else:
195
+ arousal_fname = None
196
+ fnames.append([psg_fname, hea_fname, arousal_fname])
197
+
198
+ return fnames
199
+
200
+
201
+ def _convert_wfdb_anns_to_mne_annotations(annots):
202
+ """Convert wfdb.io.Annotation format to MNE's.
203
+
204
+ Parameters
205
+ ----------
206
+ annots : wfdb.io.Annotation
207
+ Annotation object obtained by e.g. loading an annotation file with
208
+ wfdb.rdann().
209
+
210
+ Returns
211
+ -------
212
+ mne.Annotations :
213
+ MNE Annotations object.
214
+ """
215
+ ann_chs = set(annots.chan)
216
+ onsets = annots.sample / annots.fs
217
+ new_onset, new_duration, new_description = list(), list(), list()
218
+ for channel_name in ann_chs:
219
+ mask = annots.chan == channel_name
220
+ ch_onsets = onsets[mask]
221
+ ch_descs = np.array(annots.aux_note)[mask]
222
+
223
+ # Events with beginning and end, defined by '(event' and 'event)'
224
+ if all([(i.startswith("(") or i.endswith(")")) for i in ch_descs]):
225
+ pass
226
+ else: # Sleep stage-like annotations
227
+ ch_durations = np.concatenate([np.diff(ch_onsets), [30]])
228
+ if all(ch_durations > 0):
229
+ ValueError("Negative duration")
230
+ new_onset.extend(ch_onsets)
231
+ new_duration.extend(ch_durations)
232
+ new_description.extend(ch_descs)
233
+
234
+ mne_annots = mne.Annotations(
235
+ new_onset, new_duration, new_description, orig_time=None
236
+ )
237
+
238
+ return mne_annots
239
+
240
+
241
+ class SleepPhysionetChallenge2018(BaseConcatDataset):
242
+ """Physionet Challenge 2018 polysomnography dataset.
243
+
244
+ Sleep dataset from https://physionet.org/content/challenge-2018/1.0.0/.
245
+ Contains overnight recordings from 1983 healthy subjects.
246
+
247
+ The total size is 266 GB, so make sure you have enough space before
248
+ downloading.
249
+
250
+ See `fetch_pc18_data` for a more complete description.
251
+
252
+ Parameters
253
+ ----------
254
+ subject_ids : list(int) | str | None
255
+ (list of) int of subject(s) to be loaded.
256
+ - If `None`, loads all subjects (both training and test sets [no label associated]).
257
+ - If `"training"`, loads only the training set subjects.
258
+ - If `"test"`, loads only the test set subjects, no label associated!
259
+ - Otherwise, expects an iterable of subject IDs.
260
+ path : None | str
261
+ Location of where to look for the PC18 data storing location. If None,
262
+ the environment variable or config parameter ``MNE_DATASETS_PC18_PATH``
263
+ is used. If it doesn't exist, the "~/mne_data" directory is used. If
264
+ the dataset is not found under the given path, the data will be
265
+ automatically downloaded to the specified folder.
266
+ load_eeg_only : bool
267
+ If True, only load the EEG channels and discard the others (EOG, EMG,
268
+ temperature, respiration) to avoid resampling the other signals.
269
+ preproc : list(Preprocessor) | None
270
+ List of preprocessors to apply to each file individually. This way the
271
+ data can e.g., be downsampled (temporally and spatially) to limit the
272
+ memory usage of the entire Dataset object. This also enables applying
273
+ preprocessing in parallel over the recordings.
274
+ n_jobs : int
275
+ Number of parallel processes.
276
+ """
277
+
278
+ def __init__(
279
+ self,
280
+ subject_ids="training",
281
+ path=None,
282
+ load_eeg_only=True,
283
+ preproc=None,
284
+ n_jobs=1,
285
+ ):
286
+ if subject_ids is None:
287
+ subject_ids = range(1983)
288
+ warn(
289
+ """"
290
+ You are loading the complete dataset (0 to 1982),
291
+ which includes a portion of the test set (0 to 988)
292
+ from the Physionet Challenge 2018. Note that the test set
293
+ does not have associated labels, so supervised classification
294
+ cannot be performed on these data.""",
295
+ UserWarning,
296
+ )
297
+ elif subject_ids == "training":
298
+ subject_ids = range(989, 1983)
299
+ elif subject_ids == "test":
300
+ subject_ids = range(989)
301
+ warn(
302
+ """
303
+ This subset does not have associated labels, so supervised
304
+ classification (sleep stage) cannot be performed on this data.
305
+ You can also use the meta information as a label to perform
306
+ another task.
307
+ """
308
+ )
309
+ else:
310
+ # If subject_ids is an iterable, check if it includes any test set IDs
311
+ if any(sid < 989 for sid in subject_ids):
312
+ warn(
313
+ """
314
+ You are loading a subset of the data that includes test set
315
+ subjects (subject IDs: 0 to 988). These subjects do not have
316
+ associated labels, which means supervised classification
317
+ (sleep stage) cannot be performed on this data. You can also
318
+ use the meta information as a label to perform another task.
319
+ """,
320
+ UserWarning,
321
+ )
322
+
323
+ ensure_metafiles_exist()
324
+
325
+ paths = fetch_pc18_data(subject_ids, path=path)
326
+
327
+ self.info_df = pd.read_csv(PC18_INFO)
328
+
329
+ if n_jobs == 1:
330
+ all_base_ds = [
331
+ self._load_raw(
332
+ subj_nb=subject_id,
333
+ raw_fname=p[0],
334
+ arousal_fname=p[2],
335
+ load_eeg_only=load_eeg_only,
336
+ preproc=preproc,
337
+ )
338
+ for subject_id, p in zip(subject_ids, paths)
339
+ ]
340
+ else:
341
+ all_base_ds = Parallel(n_jobs=n_jobs)(
342
+ delayed(self._load_raw)(
343
+ subject_id,
344
+ p[0],
345
+ p[2],
346
+ load_eeg_only=load_eeg_only,
347
+ preproc=preproc,
348
+ )
349
+ for subject_id, p in zip(subject_ids, paths)
350
+ )
351
+ super().__init__(all_base_ds)
352
+
353
+ def _load_raw(self, subj_nb, raw_fname, arousal_fname, load_eeg_only, preproc):
354
+ channel_types = ["eeg"] * 7
355
+ if load_eeg_only:
356
+ channels = list(range(7))
357
+ else:
358
+ channel_types += ["emg", "misc", "misc", "misc", "misc", "ecg"]
359
+ channels = None
360
+
361
+ # Load raw signals and header
362
+ record = wfdb.io.rdrecord(op.splitext(raw_fname[0])[0], channels=channels)
363
+
364
+ # Convert to right units for MNE (EEG should be in V)
365
+ data = record.p_signal.T
366
+ data[np.array(record.units) == "uV"] /= 1e6
367
+ data[np.array(record.units) == "mV"] /= 1e3
368
+ info = mne.create_info(record.sig_name, record.fs, channel_types)
369
+ raw_file = mne.io.RawArray(data, info)
370
+
371
+ # Extract annotations
372
+ if arousal_fname is not None:
373
+ annots = wfdb.rdann(
374
+ op.splitext(raw_fname[0])[0],
375
+ "arousal",
376
+ sampfrom=0,
377
+ sampto=None,
378
+ shift_samps=False,
379
+ return_label_elements=["symbol"],
380
+ summarize_labels=False,
381
+ )
382
+ mne_annots = _convert_wfdb_anns_to_mne_annotations(annots)
383
+ raw_file = raw_file.set_annotations(mne_annots)
384
+
385
+ record_name = op.splitext(op.basename(raw_fname[0]))[0]
386
+ record_info = self.info_df[self.info_df["Record"] == record_name].iloc[0]
387
+ if record_info["Record"].startswith("tr"):
388
+ split = "training"
389
+ elif record_info["Record"].startswith("te"):
390
+ split = "test"
391
+ else:
392
+ split = "unknown"
393
+
394
+ desc = pd.Series(
395
+ {
396
+ "subject": subj_nb,
397
+ "record": record_info["Record"],
398
+ "split": split,
399
+ "age": record_info["Age"],
400
+ "sex": record_info["Sex"],
401
+ },
402
+ name="",
403
+ )
404
+ base_dataset = RawDataset(raw_file, desc)
405
+
406
+ if preproc is not None:
407
+ from braindecode.preprocessing.preprocess import _preprocess
408
+
409
+ _preprocess(base_dataset, None, preproc)
410
+
411
+ return base_dataset