braindecode 1.3.0.dev177069446__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. braindecode/__init__.py +9 -0
  2. braindecode/augmentation/__init__.py +52 -0
  3. braindecode/augmentation/base.py +225 -0
  4. braindecode/augmentation/functional.py +1300 -0
  5. braindecode/augmentation/transforms.py +1356 -0
  6. braindecode/classifier.py +258 -0
  7. braindecode/datasets/__init__.py +44 -0
  8. braindecode/datasets/base.py +823 -0
  9. braindecode/datasets/bbci.py +693 -0
  10. braindecode/datasets/bcicomp.py +193 -0
  11. braindecode/datasets/bids/__init__.py +54 -0
  12. braindecode/datasets/bids/datasets.py +239 -0
  13. braindecode/datasets/bids/format.py +717 -0
  14. braindecode/datasets/bids/hub.py +987 -0
  15. braindecode/datasets/bids/hub_format.py +717 -0
  16. braindecode/datasets/bids/hub_io.py +197 -0
  17. braindecode/datasets/bids/hub_validation.py +114 -0
  18. braindecode/datasets/bids/iterable.py +220 -0
  19. braindecode/datasets/chb_mit.py +163 -0
  20. braindecode/datasets/mne.py +170 -0
  21. braindecode/datasets/moabb.py +219 -0
  22. braindecode/datasets/nmt.py +313 -0
  23. braindecode/datasets/registry.py +120 -0
  24. braindecode/datasets/siena.py +162 -0
  25. braindecode/datasets/sleep_physio_challe_18.py +411 -0
  26. braindecode/datasets/sleep_physionet.py +125 -0
  27. braindecode/datasets/tuh.py +591 -0
  28. braindecode/datasets/utils.py +67 -0
  29. braindecode/datasets/xy.py +96 -0
  30. braindecode/datautil/__init__.py +62 -0
  31. braindecode/datautil/channel_utils.py +114 -0
  32. braindecode/datautil/hub_formats.py +180 -0
  33. braindecode/datautil/serialization.py +359 -0
  34. braindecode/datautil/util.py +154 -0
  35. braindecode/eegneuralnet.py +372 -0
  36. braindecode/functional/__init__.py +22 -0
  37. braindecode/functional/functions.py +251 -0
  38. braindecode/functional/initialization.py +47 -0
  39. braindecode/models/__init__.py +117 -0
  40. braindecode/models/atcnet.py +830 -0
  41. braindecode/models/attentionbasenet.py +727 -0
  42. braindecode/models/attn_sleep.py +549 -0
  43. braindecode/models/base.py +574 -0
  44. braindecode/models/bendr.py +493 -0
  45. braindecode/models/biot.py +537 -0
  46. braindecode/models/brainmodule.py +845 -0
  47. braindecode/models/config.py +233 -0
  48. braindecode/models/contrawr.py +319 -0
  49. braindecode/models/ctnet.py +541 -0
  50. braindecode/models/deep4.py +376 -0
  51. braindecode/models/deepsleepnet.py +417 -0
  52. braindecode/models/eegconformer.py +475 -0
  53. braindecode/models/eeginception_erp.py +379 -0
  54. braindecode/models/eeginception_mi.py +379 -0
  55. braindecode/models/eegitnet.py +302 -0
  56. braindecode/models/eegminer.py +256 -0
  57. braindecode/models/eegnet.py +359 -0
  58. braindecode/models/eegnex.py +354 -0
  59. braindecode/models/eegsimpleconv.py +201 -0
  60. braindecode/models/eegsym.py +917 -0
  61. braindecode/models/eegtcnet.py +337 -0
  62. braindecode/models/fbcnet.py +225 -0
  63. braindecode/models/fblightconvnet.py +315 -0
  64. braindecode/models/fbmsnet.py +338 -0
  65. braindecode/models/hybrid.py +126 -0
  66. braindecode/models/ifnet.py +443 -0
  67. braindecode/models/labram.py +1316 -0
  68. braindecode/models/luna.py +891 -0
  69. braindecode/models/medformer.py +760 -0
  70. braindecode/models/msvtnet.py +377 -0
  71. braindecode/models/patchedtransformer.py +640 -0
  72. braindecode/models/reve.py +843 -0
  73. braindecode/models/sccnet.py +280 -0
  74. braindecode/models/shallow_fbcsp.py +212 -0
  75. braindecode/models/signal_jepa.py +1122 -0
  76. braindecode/models/sinc_shallow.py +339 -0
  77. braindecode/models/sleep_stager_blanco_2020.py +169 -0
  78. braindecode/models/sleep_stager_chambon_2018.py +159 -0
  79. braindecode/models/sparcnet.py +426 -0
  80. braindecode/models/sstdpn.py +869 -0
  81. braindecode/models/summary.csv +47 -0
  82. braindecode/models/syncnet.py +234 -0
  83. braindecode/models/tcn.py +275 -0
  84. braindecode/models/tidnet.py +397 -0
  85. braindecode/models/tsinception.py +295 -0
  86. braindecode/models/usleep.py +439 -0
  87. braindecode/models/util.py +369 -0
  88. braindecode/modules/__init__.py +92 -0
  89. braindecode/modules/activation.py +86 -0
  90. braindecode/modules/attention.py +883 -0
  91. braindecode/modules/blocks.py +160 -0
  92. braindecode/modules/convolution.py +330 -0
  93. braindecode/modules/filter.py +654 -0
  94. braindecode/modules/layers.py +216 -0
  95. braindecode/modules/linear.py +70 -0
  96. braindecode/modules/parametrization.py +38 -0
  97. braindecode/modules/stats.py +87 -0
  98. braindecode/modules/util.py +85 -0
  99. braindecode/modules/wrapper.py +90 -0
  100. braindecode/preprocessing/__init__.py +271 -0
  101. braindecode/preprocessing/eegprep_preprocess.py +1317 -0
  102. braindecode/preprocessing/mne_preprocess.py +240 -0
  103. braindecode/preprocessing/preprocess.py +579 -0
  104. braindecode/preprocessing/util.py +177 -0
  105. braindecode/preprocessing/windowers.py +1037 -0
  106. braindecode/regressor.py +234 -0
  107. braindecode/samplers/__init__.py +18 -0
  108. braindecode/samplers/base.py +399 -0
  109. braindecode/samplers/ssl.py +263 -0
  110. braindecode/training/__init__.py +23 -0
  111. braindecode/training/callbacks.py +23 -0
  112. braindecode/training/losses.py +105 -0
  113. braindecode/training/scoring.py +477 -0
  114. braindecode/util.py +419 -0
  115. braindecode/version.py +1 -0
  116. braindecode/visualization/__init__.py +8 -0
  117. braindecode/visualization/confusion_matrices.py +289 -0
  118. braindecode/visualization/gradients.py +62 -0
  119. braindecode-1.3.0.dev177069446.dist-info/METADATA +230 -0
  120. braindecode-1.3.0.dev177069446.dist-info/RECORD +124 -0
  121. braindecode-1.3.0.dev177069446.dist-info/WHEEL +5 -0
  122. braindecode-1.3.0.dev177069446.dist-info/licenses/LICENSE.txt +31 -0
  123. braindecode-1.3.0.dev177069446.dist-info/licenses/NOTICE.txt +20 -0
  124. braindecode-1.3.0.dev177069446.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1317 @@
1
+ """Preprocessors using the EEGPrep package."""
2
+
3
+ # Authors: Christian Kothe <christian.kothe@intheon.io>
4
+ #
5
+ # License: BSD-3
6
+
7
+ import logging
8
+ from abc import abstractmethod
9
+ from typing import Any, Sequence
10
+
11
+ import mne
12
+ import numpy as np
13
+ from mne.io import BaseRaw
14
+
15
+ from .preprocess import Preprocessor
16
+ from .util import mne_load_metadata, mne_store_metadata
17
+
18
+ log = logging.getLogger(__name__)
19
+
20
+ __all__ = [
21
+ "EEGPrep",
22
+ "RemoveDCOffset",
23
+ "Resampling",
24
+ "RemoveFlatChannels",
25
+ "RemoveDrifts",
26
+ "RemoveBadChannels",
27
+ "RemoveBadChannelsNoLocs",
28
+ "RemoveBursts",
29
+ "RemoveBadWindows",
30
+ "ReinterpolateRemovedChannels",
31
+ "RemoveCommonAverageReference",
32
+ ]
33
+
34
+ try:
35
+ import eegprep
36
+ except ImportError:
37
+ eegprep = None
38
+
39
+
40
+ class EEGPrepBasePreprocessor(Preprocessor):
41
+ """Abstract base class for EEGPrep preprocessors, implementing shared functionality.
42
+
43
+ Parameters
44
+ ----------
45
+ can_change_duration : bool | str
46
+ Whether the preprocessor can change the duration of the data during processing;
47
+ can also be the name of some sub-operation that does so for display in a more
48
+ actionable error message.
49
+ record_orig_chanlocs : bool
50
+ Whether to record the EEG channel locations before processing
51
+ in the MNE Raw structure for later use. This will not override any already
52
+ present channel location information, so this can safely be used multiple times
53
+ to record whatever were the original channel locations.
54
+ force_dtype : np.dtype | str | None
55
+ Optionally for the in/out data to be converted to this dtype before and after
56
+ processing. Can help ensure consistent dtypes across different preprocessors.
57
+
58
+ """
59
+
60
+ # payload key under which we store our original channel locations
61
+ _chanlocs_key = "original_chanlocs"
62
+
63
+ def __init__(
64
+ self,
65
+ *,
66
+ can_change_duration: str | bool = False,
67
+ record_orig_chanlocs: bool = False,
68
+ force_dtype: np.dtype | str | None = None,
69
+ ):
70
+ super().__init__(
71
+ fn=self._apply_op,
72
+ apply_on_array=False,
73
+ )
74
+ if can_change_duration is True:
75
+ can_change_duration = self.__class__.__name__
76
+ self.can_change_duration = can_change_duration
77
+ self.record_orig_chanlocs = record_orig_chanlocs
78
+ self.force_dtype = np.dtype(force_dtype) if force_dtype is not None else None
79
+
80
+ @property
81
+ def _all_attrs(self):
82
+ return super()._all_attrs + [
83
+ "can_change_duration",
84
+ "record_orig_chanlocs",
85
+ "force_dtype",
86
+ ]
87
+
88
+ @abstractmethod
89
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
90
+ """Apply the preprocessor to an EEGLAB EEG structure. Overridden by subclass."""
91
+ ...
92
+
93
+ def _apply_op(self, raw: BaseRaw) -> None:
94
+ """Internal method that does the actual work; this is called by Preprocessor.apply()."""
95
+ # handle error if eegprep is not available
96
+ if eegprep is None:
97
+ raise RuntimeError(
98
+ "The eegprep package is required to use the EEGPrep preprocessor.\n"
99
+ " Please install braindecode with the [eegprep] extra added as in\n"
100
+ " 'pip install braindecode[eegprep]' to use this functionality,\n"
101
+ " or run 'pip install eegprep[eeglabio]' directly."
102
+ )
103
+
104
+ opname = self.__class__.__name__
105
+ if isinstance(raw, mne.BaseEpochs):
106
+ raise ValueError(
107
+ f"{opname} is meant to be used on Raw (continuous) data, "
108
+ f"not Epochs. Use before epoching."
109
+ )
110
+
111
+ # preserve the data description for restoration later
112
+ description = raw.info["description"]
113
+
114
+ # split off non-EEG channels since apply_eeg() expects only EEG channels
115
+ chn_order = raw.ch_names.copy()
116
+ eeg_idx = mne.pick_types(raw.info, eeg=True, exclude=[])
117
+ if len(eeg_idx) < len(chn_order):
118
+ eeg = raw.copy().pick(eeg_idx)
119
+ non_eeg = raw.drop_channels(eeg.ch_names)
120
+ else:
121
+ eeg = raw
122
+ non_eeg = None
123
+
124
+ eeg = eegprep.mne2eeg(eeg)
125
+
126
+ # back up channel locations for potential later use
127
+ orig_chanlocs = [cl.copy() for cl in eeg["chanlocs"]]
128
+
129
+ # ensure all events in EEG structure have a 'duration' field; this is
130
+ # necessary for some of the EEGPrep operations to succeed
131
+ if not all("duration" in ev for ev in eeg["event"]):
132
+ for ev in eeg["event"]:
133
+ if "duration" not in ev:
134
+ ev["duration"] = 1
135
+
136
+ if self.force_dtype is not None:
137
+ eeg["data"] = eeg["data"].astype(self.force_dtype)
138
+
139
+ # actual operation happens here
140
+ eeg = self.apply_eeg(eeg, raw)
141
+
142
+ if self.force_dtype is not None:
143
+ eeg["data"] = eeg["data"].astype(self.force_dtype)
144
+
145
+ # rename EEGLAB-type boundary events to a form that's recognized by MNE so they
146
+ # (or intersecting epochs) are ignored during potential downstream epoching
147
+ # done by braindecode pipelines
148
+ for ev in eeg["event"]:
149
+ if ev["type"] == "boundary":
150
+ ev["type"] = "BAD boundary"
151
+
152
+ # set some minimal chaninfo fields, which are otherwise not necessarily
153
+ # guaranteed to be there
154
+ if not isinstance(eeg["chaninfo"], dict) or not eeg["chaninfo"]:
155
+ eeg["chaninfo"] = {"nosedir": "+X"}
156
+
157
+ # actual conversion
158
+ proc = eegprep.eeg2mne(eeg)
159
+
160
+ # restore the description field, since it may not survive the roundtrip
161
+ # conversion (and we don't have control over all the code involved there)
162
+ proc.info["description"] = description
163
+
164
+ if self.record_orig_chanlocs:
165
+ # stash original channel locations if not already present
166
+ mne_store_metadata(
167
+ raw=proc,
168
+ payload=orig_chanlocs,
169
+ key=self._chanlocs_key,
170
+ no_overwrite=True,
171
+ )
172
+
173
+ if non_eeg is not None:
174
+ # try to add back any non-EEG channels that were originally present
175
+
176
+ if offending_op_name := self.can_change_duration:
177
+ # while this may work due to luck on one or another session, in
178
+ # general we will not try to add them back in in this case; in future
179
+ # we may try to work out exactly what sample mask was removed from the
180
+ # EEG channels and apply the same to non-EEG channels via MNE
181
+ affected_chans = ", ".join(non_eeg.ch_names)
182
+ if offending_op_name == opname:
183
+ detail = f"the {opname} Preprocessor"
184
+ else:
185
+ detail = f"{offending_op_name} in the {opname} Preprocessor"
186
+ log.error(
187
+ f"Could not add back non-EEG channels ({affected_chans}) after"
188
+ f" {opname} processing; these will be omitted from the processed"
189
+ f" data. If you want to retain these channels, you will have to"
190
+ f" disable {detail}; you may perform that step using other"
191
+ f" methods before and after {opname}, respectively."
192
+ )
193
+ else:
194
+ # re-insert non-EEG channels, and restore original channel order
195
+ proc.add_channels([non_eeg], force_update_info=True)
196
+ if proc.ch_names != chn_order:
197
+ proc.reorder_channels(chn_order)
198
+
199
+ # write result back into raw, discard proc (_apply_op() is in-place)
200
+ if not proc.preload:
201
+ proc.load_data()
202
+ raw.__dict__ = proc.__dict__
203
+
204
+ @classmethod
205
+ def _get_orig_chanlocs(cls, raw: BaseRaw) -> list[dict[str, Any]] | None:
206
+ """Retrieve original channel locations stashed in the given MNE Raw
207
+ structure, if any."""
208
+ return mne_load_metadata(raw, key=cls._chanlocs_key)
209
+
210
+
211
+ class EEGPrep(EEGPrepBasePreprocessor):
212
+ """Preprocessor for an MNE Raw object that applies the EEGPrep pipeline.
213
+ This is based on [Mullen2015]_.
214
+
215
+ .. figure:: https://cdn.ncbi.nlm.nih.gov/pmc/blobs/a79a/4710679/675fc2dee929/nihms733482f9.jpg
216
+ :align: center
217
+ :alt: Before/after comparison of EEGPrep processing on EEG data.
218
+
219
+ This pipeline involves the stages:
220
+
221
+ - DC offset subtraction (:class:`RemoveDCOffset`)
222
+ - Optional resampling (:class:`Resampling`)
223
+ - Flatline channel detection and removal (:class:`RemoveFlatChannels`)
224
+ - High-pass filtering (:class:`RemoveDrifts`)
225
+ - Bad channel detection and removal using correlation and HF noise criteria
226
+ (:class:`RemoveBadChannels` with fallback to :class:`RemoveBadChannelsNoLocs`)
227
+ - Burst artifact removal using ASR (Artifact Subspace Reconstruction)
228
+ (:class:`RemoveBursts`)
229
+ - Detection and removal of residual bad time windows (:class:`RemoveBadWindows`)
230
+ - Optional reinterpolation of removed channels
231
+ (:class:`ReinterpolateRemovedChannels`)
232
+ - Optional common average referencing (:class:`RemoveCommonAverageReference`)
233
+
234
+ These steps are also individually available as separate preprocessors in this module
235
+ if you want to apply only a subset of them or customize some beyond the parameters
236
+ available here. Note that it is important to apply them in the order given above;
237
+ other orderings may lead to suboptimal results.
238
+
239
+ Typically no signal processing (except potentially resampling or removal of unused
240
+ channels or time windows) should be done before this pipeline. It is recommended to
241
+ follow this with at least a low-pass filter to remove high-frequency artifacts
242
+ (e.g., 40-45 Hz transition band).
243
+
244
+ The main processing parameters can each be set to None to skip the respective
245
+ stage (or False for boolean switches). Note this pipeline will only affect the
246
+ EEG channels in your data, and will leave other channels unaffected. It is
247
+ recommended to remove these channels yourself beforehand if you don't want them
248
+ included in your downstream analysis.
249
+
250
+ .. Note::
251
+ This implementation of the pipeline is best used in the context of
252
+ cross-session prediction; when using this with a within-session split, there is
253
+ a risk of data leakage since the artifact removal will be calibrated on statistics
254
+ of the entire session (and thus test sets). In practice the effect may be minor,
255
+ unless your downstream analysis is strongly driven by artifacts (e.g., if you
256
+ are trying to decode eye movements or muscle activity), but paper reviewers may
257
+ not be convinced by that.
258
+
259
+
260
+ Parameters
261
+ ----------
262
+ resample_to : float | None = None
263
+ Optionally resample to this sampling rate (in Hz) before processing.
264
+ Good choices are 200, 250, 256 Hz (consider keeping it a power of two
265
+ if it was originally), but one may go as low as 100-128 Hz if memory, compute,
266
+ or model complexity limitations demand it.
267
+ flatline_maxdur : float | None
268
+ Remove channels that are flat for longer than this duration (in seconds).
269
+ This stage is almost never triggered in practice but can help with the
270
+ occasional strange EEG configuration.
271
+ highpass_frequencies : tuple[float, float] | None
272
+ Tuple of lower and upper bound of the *transition band* for high-pass filtering
273
+ before processing. This means that full suppression will be reached at the
274
+ lower bound, and the upper bound is where the passband begins.
275
+ bad_channel_corr_threshold : float | None
276
+ Threshold for correlation-based bad channel detection. A good default range
277
+ is 0.75-0.8. Becomes quite aggressive at and beyond 0.8; also, consider using
278
+ lower values (eg 0.7-0.75) for <32ch EEG and higher (0.8-0.85) for >128ch.
279
+ burst_removal_cutoff : float | None
280
+ Amplitude threshold for burst artifact removal using ASR
281
+ (Artifact Subspace Reconstruction). This parameter tends to have a large effect
282
+ on the performance of downstream ML. 10-15 is a good range for ML pipelines
283
+ (lower is more aggressive); for neuroscience analysis, more conservative values
284
+ like 20-30 may be better. The unit is z-scores relative to a Gaussian component
285
+ of background EEG, but real EEG can be super-Gaussian, thus the large values.
286
+ bad_window_max_bad_channels : float | None
287
+ Threshold for rejection of bad time windows based on fraction of simultaneously
288
+ noisy channels. Lower is more aggressive. Typical values are 0.15 (quite
289
+ aggressive) to 0.3 (quite lax).
290
+ bad_channel_reinterpolate : bool
291
+ Whether to reinterpolate bad channels that were detected and removed. Usually
292
+ required when doing cross-session analysis (to have a consistent channel set).
293
+ common_avg_ref : bool
294
+ Whether to apply a common average reference after processing. Recommended
295
+ when doing cross-study analysis to have a consistent referencing scheme.
296
+ bad_channel_hf_threshold : float
297
+ Threshold for high-frequency (>=45 Hz) noise-based bad channel detection,
298
+ in z-scores. Lower is more aggressive. Default is 4.0. This is rarely tuned,
299
+ but data with unusual higher-frequency activity could benefit from exploration
300
+ in the 3.5-5.0 range.
301
+ bad_channel_max_broken_time : float
302
+ Max fraction of session length during which a channel may be bad before
303
+ it is removed. Default is 0.4 (40%), max is 0.5 (breakdown point of stats).
304
+ Pretty much never tuned.
305
+ bad_window_tolerances : tuple[float, float]
306
+ (min, max) z-score tolerance for identifying bad time window/channel pairs.
307
+ This typically does not need to be changed (instead one may change the max
308
+ bad channels that cross this threshold), but different implementations
309
+ use different values here. The max value is the main parameter, where
310
+ EEGLAB/EEGPrep uses 7 while the original pipeline [1] used 5.5, and NeuroPype
311
+ uses 6. Lower values are more aggressive. The min value is only triggered if the
312
+ EEG data has signal dropouts (very low amplitude, e.g. due to something becoming
313
+ unplugged) which is rare; some choices are (-inf, EEGPrep; -3.5, BCILAB;
314
+ -4 NeuroPype).
315
+ refdata_max_bad_channels : float | None
316
+ Same function as bad_window_max_bad_channels, but used only to determine
317
+ calibration data for burst removal. Usually more aggressive than the former
318
+ (0.05-0.1) to get clean calibration data. This can be set to None to skip this
319
+ and force all data to be used for calibration.
320
+ refdata_max_tolerances : tuple[float, float]
321
+ Same as bad_window_tolerances, but used only to determine calibration data for
322
+ burst removal. Almost never touched, and defaults to a fairly aggressive
323
+ (-inf, 5.5) to get clean calibration data.
324
+ num_samples : int
325
+ Number of channel subsets to draw for the RANSAC reconstruction during bad
326
+ channel identification. Higher can be more robust but slower to calibrate.
327
+ Default is 50.
328
+ subset_size : float
329
+ Size of channel subsets for RANSAC, as fraction (0-1) or count. Default 0.25.
330
+ For higher-density EEG (e.g., 64-128ch), one can achieve somewhat better
331
+ robustness to clusters of bad channels by setting this to 0.15 and increasing
332
+ num_samples to 200.
333
+ bad_channel_nolocs_threshold : float
334
+ A fallback correlation threshold for bad-channel removal that is applied when
335
+ no channel location information is available. The value here typically needs to
336
+ be fairly low, e.g., 0.45-0.5 (lower is more aggressive). Ideally you have
337
+ channel locations so that this fallback is not needed.
338
+ bad_channel_nolocs_exclude_frac : float
339
+ A fraction of most correlated channels to exclude in the case where no channel
340
+ location information is available. Used to reject pairs of shorted or otherwise
341
+ highly correlated sets of bad channels.
342
+ max_mem_mb : int
343
+ Max memory that ASR can use, in MB. Larger values can reduce overhead during
344
+ processing, but usually 64MB is sufficient.
345
+
346
+ References
347
+ ----------
348
+ .. [Mullen2015] Mullen, T.R., Kothe, C.A., Chi, Y.M., Ojeda, A., Kerth, T.,
349
+ Makeig, S., Jung, T.P. and Cauwenberghs, G., 2015. Real-time neuroimaging and
350
+ cognitive monitoring using wearable dry EEG. IEEE Transactions on Biomedical
351
+ Engineering, 62(11), pp.2553-2567.
352
+
353
+ """
354
+
355
+ def __init__(
356
+ self,
357
+ *,
358
+ # (main processing parameters)
359
+ resample_to: float | None = None,
360
+ flatline_maxdur: float | None = 5.0,
361
+ highpass_frequencies: tuple[float, float] | None = (0.25, 0.75),
362
+ bad_channel_corr_threshold: float | None = 0.8,
363
+ burst_removal_cutoff: float | None = 10.0,
364
+ bad_window_max_bad_channels: float | None = 0.25,
365
+ bad_channel_reinterpolate: bool = True,
366
+ common_avg_ref: bool = True,
367
+ # additional tuning parameters
368
+ bad_channel_max_broken_time: float = 0.4,
369
+ bad_channel_hf_threshold: float | None = 4.0,
370
+ bad_window_tolerances: tuple[float, float] | None = (-np.inf, 7),
371
+ refdata_max_bad_channels: float | None = 0.075,
372
+ refdata_max_tolerances: tuple[float, float] | None = (-np.inf, 5.5),
373
+ num_samples: int = 50,
374
+ subset_size: float = 0.25,
375
+ bad_channel_nolocs_threshold: float = 0.45,
376
+ bad_channel_nolocs_exclude_frac: float = 0.1,
377
+ max_mem_mb: int = 64,
378
+ ):
379
+ can_change_duration = " and ".join(
380
+ opname
381
+ for opname in (
382
+ "resample" if resample_to else "",
383
+ "bad time window removal" if bad_window_max_bad_channels else "",
384
+ )
385
+ if opname
386
+ )
387
+ super().__init__(
388
+ can_change_duration=can_change_duration or False,
389
+ )
390
+ self.resample_to = resample_to
391
+ self.bad_channel_reinterpolate = bad_channel_reinterpolate
392
+ self.common_avg_ref = common_avg_ref
393
+ self.burst_removal_cutoff = burst_removal_cutoff
394
+ self.bad_window_max_bad_channels = bad_window_max_bad_channels
395
+ self.bad_channel_corr_threshold = bad_channel_corr_threshold
396
+ self.highpass_frequencies = highpass_frequencies
397
+ self.flatline_maxdur = flatline_maxdur
398
+ self.bad_channel_hf_threshold = bad_channel_hf_threshold
399
+ self.bad_channel_max_broken_time = bad_channel_max_broken_time
400
+ self.bad_window_tolerances = bad_window_tolerances
401
+ self.refdata_max_bad_channels = refdata_max_bad_channels
402
+ self.refdata_max_tolerances = refdata_max_tolerances
403
+ self.num_samples = num_samples
404
+ self.subset_size = subset_size
405
+ self.bad_channel_nolocs_threshold = bad_channel_nolocs_threshold
406
+ self.bad_channel_nolocs_exclude_frac = bad_channel_nolocs_exclude_frac
407
+ self.max_mem_mb = max_mem_mb
408
+
409
+ @property
410
+ def clean_artifacts_params(self):
411
+ if self.bad_channel_corr_threshold is None:
412
+ line_noise_crit = None
413
+ else:
414
+ line_noise_crit = self.bad_channel_hf_threshold
415
+ return dict(
416
+ ChannelCriterion=self.bad_channel_corr_threshold,
417
+ LineNoiseCriterion=line_noise_crit,
418
+ BurstCriterion=self.burst_removal_cutoff,
419
+ WindowCriterion=self.bad_window_max_bad_channels,
420
+ Highpass=self.highpass_frequencies,
421
+ ChannelCriterionMaxBadTime=self.bad_channel_max_broken_time,
422
+ BurstCriterionRefMaxBadChns=self.refdata_max_bad_channels,
423
+ BurstCriterionRefTolerances=self.refdata_max_tolerances,
424
+ WindowCriterionTolerances=self.bad_window_tolerances,
425
+ FlatlineCriterion=self.flatline_maxdur,
426
+ NumSamples=self.num_samples,
427
+ SubsetSize=self.subset_size,
428
+ NoLocsChannelCriterion=self.bad_channel_nolocs_threshold,
429
+ NoLocsChannelCriterionExcluded=self.bad_channel_nolocs_exclude_frac,
430
+ MaxMem=self.max_mem_mb,
431
+ # For reference, the function additionally accepts these (legacy etc.)
432
+ # arguments, which we're not exposing here (current defaults as below):
433
+ # BurstRejection='off',
434
+ # Distance='euclidian',
435
+ # Channels=None,
436
+ # Channels_ignore=None,
437
+ # availableRAM_GB=None,
438
+ )
439
+
440
+ @property
441
+ def _all_attrs(self):
442
+ return super()._all_attrs + [
443
+ "resample_to",
444
+ "bad_channel_reinterpolate",
445
+ "common_avg_ref",
446
+ "burst_removal_cutoff",
447
+ "bad_window_max_bad_channels",
448
+ "bad_channel_corr_threshold",
449
+ "highpass_frequencies",
450
+ "flatline_maxdur",
451
+ "bad_channel_hf_threshold",
452
+ "bad_channel_max_broken_time",
453
+ "bad_window_tolerances",
454
+ "refdata_max_bad_channels",
455
+ "refdata_max_tolerances",
456
+ "num_samples",
457
+ "subset_size",
458
+ "bad_channel_nolocs_threshold",
459
+ "bad_channel_nolocs_exclude_frac",
460
+ "max_mem_mb",
461
+ ]
462
+
463
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
464
+ """Apply the preprocessor to an EEGLAB EEG structure."""
465
+ # remove per-channel DC offset (can be huge)
466
+ eeg["data"] -= np.median(eeg["data"], axis=1, keepdims=True)
467
+
468
+ # optional resampling
469
+ if (srate := self.resample_to) is not None:
470
+ eeg = eegprep.resample(eeg, srate)
471
+
472
+ # do a check if the data has a supported sampling rate
473
+ if self.burst_removal_cutoff is not None:
474
+ supported_rates = (100, 128, 200, 250, 256, 300, 500, 512)
475
+ cur_srate = int(eegprep.utils.round_mat(eeg["srate"]))
476
+ if cur_srate not in supported_rates:
477
+ # note: technically the method will run if you disable this error,
478
+ # but you're likely getting (potentially quite) suboptimal results
479
+ raise NotImplementedError(
480
+ f"The dataset has an uncommon sampling rate of {cur_srate} Hz,"
481
+ f" which is not supported by the EEGPrep Preprocessor"
482
+ f" implementation. Please enable resampling to"
483
+ f" resample the data to one of the supported rates"
484
+ f" ({', '.join(str(r) for r in supported_rates)})."
485
+ )
486
+
487
+ # preserve input channel locations for reinterpolation later
488
+ orig_chanlocs = [channel_loc.copy() for channel_loc in eeg["chanlocs"]]
489
+
490
+ # artifact removal stage
491
+ eeg, *_ = eegprep.clean_artifacts(eeg, **self.clean_artifacts_params)
492
+
493
+ if self.force_dtype != np.float64:
494
+ # cast to float32 for equivalence with multi-stage EEGPrep pipeline
495
+ eeg["data"] = eeg["data"].astype(np.float32)
496
+
497
+ # optionally reinterpolate dropped channels
498
+ if self.bad_channel_reinterpolate and (
499
+ len(orig_chanlocs) > len(eeg["chanlocs"])
500
+ ):
501
+ eeg = eegprep.eeg_interp(eeg, orig_chanlocs)
502
+
503
+ # optionally apply common average reference
504
+ if self.common_avg_ref:
505
+ eeg = eegprep.reref(eeg, [])
506
+
507
+ return eeg
508
+
509
+
510
+ class RemoveFlatChannels(EEGPrepBasePreprocessor):
511
+ """Removes EEG channels that flat-line for extended periods of time.
512
+ Follows [Mullen2015]_.
513
+
514
+ This is an automated artifact rejection function which ensures that
515
+ the data contains no flat-lined channels. This is very rarely the case, but the
516
+ preprocessor exists since the presence of such channels may throw off downstream
517
+ preproc steps.
518
+
519
+ This step is best placed very early in a preprocessing pipeline, before any
520
+ filtering (since filter pre/post ringing can mask flatlines).
521
+
522
+ A channel :math:`c` is flagged as flat if there exists a time interval
523
+ :math:`[t_1, t_2]` where:
524
+
525
+ .. math::
526
+
527
+ |X_{c,t+1} - X_{c,t}| < \\varepsilon_{\\text{jitter}} \\quad \\forall t \\in [t_1, t_2]
528
+
529
+ \\text{and} \\quad t_2 - t_1 > T_{\\text{max}}
530
+
531
+ where :math:`\\varepsilon_{\\text{jitter}} = \\text{max_allowed_jitter} \\times \\varepsilon`
532
+ (with :math:`\\varepsilon` being machine epsilon for float64), and
533
+ :math:`T_{\\text{max}} = \\text{max_flatline_duration} \\times f_s` (with :math:`f_s`
534
+ being the sampling rate).
535
+
536
+ Parameters
537
+ ----------
538
+ max_flatline_duration : float
539
+ Maximum tolerated flatline duration. In seconds. If a channel has a longer
540
+ flatline than this, it will be considered abnormal. Defaults to 5.0.
541
+ max_allowed_jitter : float
542
+ Maximum tolerated jitter during flatlines. As a multiple of epsilon for the
543
+ 64-bit float data type (np.finfo(np.float64).eps). Defaults to 20.
544
+
545
+ References
546
+ ----------
547
+ .. [Mullen2015] Mullen, T.R., Kothe, C.A., Chi, Y.M., Ojeda, A., Kerth, T.,
548
+ Makeig, S., Jung, T.P. and Cauwenberghs, G., 2015. Real-time neuroimaging and
549
+ cognitive monitoring using wearable dry EEG. IEEE Transactions on Biomedical
550
+ Engineering, 62(11), pp.2553-2567.
551
+
552
+ """
553
+
554
+ def __init__(
555
+ self,
556
+ *,
557
+ max_flatline_duration: float = 5.0,
558
+ max_allowed_jitter: float = 20.0,
559
+ ):
560
+ super().__init__(record_orig_chanlocs=True)
561
+ self.max_flatline_duration = max_flatline_duration
562
+ self.max_allowed_jitter = max_allowed_jitter
563
+
564
+ @property
565
+ def _all_attrs(self):
566
+ return super()._all_attrs + [
567
+ "max_flatline_duration",
568
+ "max_allowed_jitter",
569
+ ]
570
+
571
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
572
+ """Apply the preprocessor to an EEGLAB EEG structure."""
573
+ eeg = eegprep.clean_flatlines(
574
+ eeg,
575
+ max_flatline_duration=self.max_flatline_duration,
576
+ max_allowed_jitter=self.max_allowed_jitter,
577
+ )
578
+
579
+ return eeg
580
+
581
+
582
+ class RemoveDCOffset(EEGPrepBasePreprocessor):
583
+ """Remove the DC offset from the EEG data by subtracting the per-channel median.
584
+
585
+ This preprocessor mainly exists because some EEG data (depending on the electrical
586
+ characteristics of the hardware) can have such a large DC offset that highpass
587
+ filters do not necessarily fully remove it, unless some care is taken with filter
588
+ settings (noted in EEGLAB documentation [Delorme2004]_).
589
+
590
+ The operation performed is:
591
+
592
+ .. math::
593
+
594
+ X'_{c,t} = X_{c,t} - \\text{median}_t(X_{c,t})
595
+
596
+ where :math:`c` indexes the channel and :math:`t` indexes time.
597
+
598
+ References
599
+ ----------
600
+ .. [Delorme2004] Delorme, A. and Makeig, S., 2004. EEGLAB: an open source toolbox
601
+ for analysis of single-trial EEG dynamics including independent component
602
+ analysis. Journal of Neuroscience Methods, 134(1), pp.9-21.
603
+
604
+ """
605
+
606
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
607
+ """Apply the preprocessor to an EEGLAB EEG structure."""
608
+ # note this might as well be implemented directly on the MNE data structure,
609
+ # but was kept this way since we have the EEGPrep machinery here already.
610
+ eeg["data"] -= np.median(eeg["data"], axis=1, keepdims=True)
611
+ return eeg
612
+
613
+
614
+ class RemoveDrifts(EEGPrepBasePreprocessor):
615
+ """Remove drifts from the EEG data using a forward-backward high-pass filter.
616
+ See [Oppenheim1999]_.
617
+
618
+ .. figure:: ../../docs/_static/preprocess/highpass.png
619
+ :align: center
620
+ :alt: Magnitude response for this filter with default settings.
621
+
622
+ Note that MNE has its own suite of filters for this that offers more choices; use
623
+ this filter if you are specifically interested in matching the EEGLAB and EEGPrep
624
+ behavior, for example if you're building an EEGPrep-like pipeline from individual
625
+ steps, e.g., to customize parts that are not exposed by the top-level EEGPrep
626
+ preprocessor.
627
+
628
+ .. Note::
629
+ If your method involves causal analysis, either with applications to real-time
630
+ single-trial brain-computer interfacing or for example involving autoregressive
631
+ modeling or other causal measures, consider using a strictly causal highpass
632
+ filter instead.
633
+
634
+ Parameters
635
+ ----------
636
+ transition : Sequence[float]
637
+ The transition band in Hz, i.e. lower and upper edge of the transition as in
638
+ (lo, hi). Defaults to (0.25, 0.75). Choosing this can be tricky when your data
639
+ contains long-duration event-related potentials that your method exploits, in
640
+ which case you may need to carefully lower this somewhat to avoid attenuating
641
+ them.
642
+ attenuation : float
643
+ The stop-band attenuation, in dB. Defaults to 80.0.
644
+ method : str
645
+ The method to use for filtering ('fft' or 'fir'). Defaults to 'fft' (uses more
646
+ memory but is much faster than 'fir').
647
+
648
+ References
649
+ ----------
650
+ .. [Oppenheim1999] Oppenheim, A.V., 1999. Discrete-time signal processing.
651
+ Pearson Education India.
652
+
653
+ """
654
+
655
+ def __init__(
656
+ self,
657
+ transition: Sequence[float] = (0.25, 0.75),
658
+ *,
659
+ attenuation: float = 80.0,
660
+ method: str = "fft",
661
+ ):
662
+ super().__init__()
663
+ self.transition = transition
664
+ self.attenuation = attenuation
665
+ self.method = method
666
+
667
+ @property
668
+ def _all_attrs(self):
669
+ return super()._all_attrs + [
670
+ "transition",
671
+ "attenuation",
672
+ "method",
673
+ ]
674
+
675
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
676
+ """Apply the preprocessor to an EEGLAB EEG structure."""
677
+ eeg = eegprep.clean_drifts(
678
+ eeg,
679
+ transition=self.transition,
680
+ attenuation=self.attenuation,
681
+ method=self.method,
682
+ )
683
+
684
+ return eeg
685
+
686
+
687
+ class Resampling(EEGPrepBasePreprocessor):
688
+ """Resample the data to a specified rate (EEGPrep version).
689
+ Based on [Proakis2007]_ and included for equivalence with EEGPrep.
690
+
691
+ .. figure:: ../../docs/_static/preprocess/downsample.png
692
+ :align: center
693
+ :alt: Example of resampling a time series.
694
+
695
+ MNE has its resampling routine (use as `Preprocessor("resample", sfreq=rate)`)
696
+ but this will not necessarily match EEGPrep's behavior exactly. Typical
697
+ differences include edge padding, the exact design rule for the filter kernel
698
+ and its window function, and handling of resampling ratios with large rational
699
+ factors.
700
+
701
+ It's not necessarily clear which of the two implementations is "better" (likely
702
+ both are fine for typical EEG applications). Use this one if you try to match
703
+ EEGPrep and EEGLAB behavior specifically, for example when you migrate from a
704
+ simple pipeline that uses the high-level EEGPrep preprocessor to a more
705
+ custom pipeline built from individual steps and want to ensure identical
706
+ results (up to float precision issues).
707
+
708
+ Resampling can be placed quite early in a preprocessing pipeline to cut down on
709
+ compute time and memory usage of downstram steps, e.g., before filtering, but
710
+ note the sampling rate interacts with e.g. temporal convolution kernel sizes;
711
+ when reproducing literature, ideally you first resample to the same rate as
712
+ used there.
713
+
714
+ .. Note::
715
+ There can be a small timing accuracy penalty when resampling on continuous data
716
+ (before epoching) when doing event-locked analysis, since epoch windows will be
717
+ snapped to the nearest sample. However, this jitter is typically fairly minor
718
+ relative to timing variability in the brain responses themselves, so will often
719
+ not be a problem in practice.
720
+
721
+ Parameters
722
+ ----------
723
+ sfreq : float | None
724
+ The desired sampling rate in Hz. Skipped if set to None.
725
+
726
+
727
+ References
728
+ ----------
729
+ .. [Proakis2007] Proakis, J.G., 2007. Digital signal processing: principles,
730
+ algorithms, and applications, 4/E. Pearson Education India.
731
+
732
+ """
733
+
734
+ def __init__(
735
+ self,
736
+ sfreq: float | None,
737
+ ):
738
+ super().__init__(can_change_duration=True)
739
+ self.sfreq = sfreq
740
+
741
+ @property
742
+ def _all_attrs(self):
743
+ return super()._all_attrs + ["sfreq"]
744
+
745
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
746
+ """Apply the preprocessor to an EEGLAB EEG structure."""
747
+ if self.sfreq is not None:
748
+ eeg = eegprep.resample(eeg, self.sfreq)
749
+
750
+ return eeg
751
+
752
+
753
+ class RemoveBadChannels(EEGPrepBasePreprocessor):
754
+ """Removes EEG channels with problematic data; variant that uses channel locations.
755
+ Implemented as in [Kothe2013]_.
756
+
757
+ .. figure:: https://www.mdpi.com/sensors/sensors-22-07314/article_deploy/html/images/sensors-22-07314-g003.png
758
+ :align: center
759
+ :alt: Conceptual image of bad-channel removal.
760
+
761
+ This is an automated artifact rejection function which ensures that the data
762
+ contains no channels that record only noise for extended periods of time. This uses
763
+ a hybrid criterion involving correlation and high-frequency noise thresholds:
764
+
765
+ a) if a channel has lower correlation to its robust estimate (based on other
766
+ channels) than a given threshold for a minimum period of time (or percentage of
767
+ the recording), it will be removed.
768
+ b) if a channel has more (high-frequency) noise relative relative to the (robust)
769
+ population of other channels than a given threshold (in standard deviations),
770
+ it will be removed.
771
+
772
+ This method requires channels to have an associated location; when a location
773
+ is not known or could not be inferred (e.g., from channel labels if using a standard
774
+ montage such as the 10-20 system), use the :class:`RemoveBadChannelsNoLocs`
775
+ preprocessor instead.
776
+
777
+ Preconditions:
778
+
779
+ - One of :class:`RemoveDrifts` or :class:`braindecode.preprocessing.Filter` (
780
+ configured as a highpass filter) must have been applied beforehand.
781
+ - 3D channel locations must be available in the data (can be automatic with some
782
+ file types, but may require some MNE operations with others).
783
+ - Consider applying :class:`RemoveDCOffset` beforehand as a general precaution.
784
+
785
+ Parameters
786
+ ----------
787
+ corr_threshold : float
788
+ Correlation threshold. If a channel over a short time window is correlated at
789
+ less than this value to its robust estimate (based on other channels), it is
790
+ considered abnormal during that time. A good default range is 0.75-0.8 and the
791
+ default is 0.8. Becomes quite aggressive at and beyond 0.8; also, consider
792
+ using lower values (eg 0.7-0.75) for <32ch EEG and higher (0.8-0.85) for >128ch.
793
+ This is the main tunable parameter of the method.
794
+ noise_threshold : float
795
+ Threshold for high-frequency (>=45 Hz) noise-based bad channel detection,
796
+ in robust z-scores (i.e., st. devs.). Lower is more aggressive. Default is 4.0.
797
+ This is rarely tuned, but data with unusual higher-frequency activity could
798
+ benefit from exploration in the 3.5-5.0 range.
799
+ window_len : float
800
+ Length of the time windows (in seconds) for which correlation statistics
801
+ are computed; ideally short enough to reasonably capture periods of global
802
+ artifacts or intermittent sensor dropouts, but not shorter (for statistical
803
+ reasons). Default is 5.0 sec.
804
+ subset_size : float
805
+ Size of random channel subsets to compute robust reconstructions. This can be
806
+ given as a fraction (0-1) of the total number of channels, or as an absolute
807
+ number. Multiple (pseudo-)random subsets are sampled in a RANSAC-like process
808
+ to obtain a robust reference estimate for each channel. Default is 0.25 (25% of
809
+ channels). For higher-density EEG (e.g., 64-128ch) with potential clusters
810
+ of bad channels, one can achieve somewhat better robustness by setting this
811
+ to 0.15 and increasing num_samples to 200.
812
+ num_samples : int
813
+ Number of samples generated for the robust channel reconstruction. This is the
814
+ number of samples to generate in a RANSAC-like process. The larger
815
+ this value, the more robust but also slower the initial identification of
816
+ bad channels will be. Default is 50.
817
+ max_broken_time : float
818
+ Maximum time (either in seconds or as fraction of the recording) during which
819
+ a channel is allowed to have artifacts. If a channel exceeds this, it will be
820
+ removed. Not usually tuned. Default is 0.4 (40%), max is 0.5 (breakdown point
821
+ of stats). Pretty much never tuned.
822
+
823
+ References
824
+ ----------
825
+ .. [Kothe2013] Kothe, C.A. and Makeig, S., 2013. BCILAB: a platform for
826
+ brain–computer interface development. Journal of Neural Engineering, 10(5),
827
+ p.056014.
828
+
829
+ """
830
+
831
+ def __init__(
832
+ self,
833
+ *,
834
+ corr_threshold: float = 0.8,
835
+ noise_threshold: float = 4.0,
836
+ window_len: float = 5,
837
+ max_broken_time: float = 0.4,
838
+ subset_size: float = 0.25,
839
+ num_samples: int = 50,
840
+ ):
841
+ super().__init__(record_orig_chanlocs=True)
842
+ self.corr_threshold = corr_threshold
843
+ self.noise_threshold = noise_threshold
844
+ self.window_len = window_len
845
+ self.max_broken_time = max_broken_time
846
+ self.num_samples = num_samples
847
+ self.subset_size = subset_size
848
+
849
+ @property
850
+ def _all_attrs(self):
851
+ return super()._all_attrs + [
852
+ "corr_threshold",
853
+ "noise_threshold",
854
+ "window_len",
855
+ "max_broken_time",
856
+ "num_samples",
857
+ "subset_size",
858
+ ]
859
+
860
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
861
+ """Apply the preprocessor to an EEGLAB EEG structure."""
862
+ eeg = eegprep.clean_channels(
863
+ eeg,
864
+ corr_threshold=self.corr_threshold,
865
+ noise_threshold=self.noise_threshold,
866
+ window_len=self.window_len,
867
+ max_broken_time=self.max_broken_time,
868
+ num_samples=self.num_samples,
869
+ subset_size=self.subset_size,
870
+ )
871
+
872
+ return eeg
873
+
874
+
875
+ class RemoveBadChannelsNoLocs(EEGPrepBasePreprocessor):
876
+ """Remove EEG channels with problematic data; variant that does not use channel
877
+ locations. Implemented as in [Kothe2013]_.
878
+
879
+ .. figure:: https://www.mdpi.com/sensors/sensors-22-07314/article_deploy/html/images/sensors-22-07314-g003.png
880
+ :align: center
881
+ :alt: Conceptual image of bad-channel removal.
882
+
883
+ This is an automated artifact rejection function which ensures that the data
884
+ contains no channels that record only noise for extended periods of time.
885
+ The criterion is based on correlation: if a channel is decorrelated from all others
886
+ (pairwise correlation < a given threshold), excluding a given fraction of most
887
+ correlated channels, and if this holds on for a sufficiently long fraction of the
888
+ data set, then the channel is removed.
889
+
890
+ This method does not require or take into account channel locations; if you do have
891
+ locations, you may get better results with the RemoveBadChannels preprocessor
892
+ instead.
893
+
894
+ Preconditions:
895
+
896
+ - One of :class:`RemoveDrifts` or :class:`braindecode.preprocessing.Filter` (
897
+ configured as a highpass filter) must have been applied beforehand.
898
+ - Consider applying :class:`RemoveDCOffset` beforehand as a general precaution.
899
+
900
+ Parameters
901
+ ----------
902
+ min_corr : float
903
+ Minimum correlation between a channel and any other channel (in a short
904
+ period of time) below which the channel is considered abnormal for that time
905
+ period. Reasonable range: 0.4 (very lax) to 0.6 (quite aggressive).
906
+ Default is 0.45.
907
+ ignored_quantile : float
908
+ Fraction of channels that need to have at least the given min_corr value w.r.t.
909
+ the channel under consideration. This allows to deal with channels or small
910
+ groups of channels that measure the same noise source. Reasonable
911
+ range: 0.05 (rather lax) to 0.2 (tolerates many disconnected/shorted channels).
912
+ window_len : float
913
+ Length of the windows (in seconds) over which correlation stats are computed.
914
+ Reasonable values are 1.0 sec (more noisy estimates) to 5.0 sec (more reliable,
915
+ but can miss brief artifacts). Default is 2.0 sec.
916
+ max_broken_time : float
917
+ Maximum time (either in seconds or as fraction of the recording) during which
918
+ a channel is allowed to have artifacts. If a channel exceeds this, it will be
919
+ removed. Not usually tuned. Default is 0.4 (40%), max is 0.5 (breakdown point
920
+ of stats). Pretty much never tuned.
921
+ linenoise_aware : bool
922
+ Whether the operation should be performed in a line-noise
923
+ aware manner. If enabled, the correlation measure will not be affected
924
+ by the presence or absence of line noise (using a temporary notch filter).
925
+
926
+ References
927
+ ----------
928
+ .. [Kothe2013] Kothe, C.A. and Makeig, S., 2013. BCILAB: a platform for
929
+ brain–computer interface development. Journal of Neural Engineering, 10(5),
930
+ p.056014.
931
+
932
+ """
933
+
934
+ def __init__(
935
+ self,
936
+ *,
937
+ min_corr: float = 0.45,
938
+ ignored_quantile: float = 0.1,
939
+ window_len: float = 2.0,
940
+ max_broken_time: float = 0.4,
941
+ linenoise_aware: bool = True,
942
+ ):
943
+ super().__init__(record_orig_chanlocs=True)
944
+ self.min_corr = min_corr
945
+ self.ignored_quantile = ignored_quantile
946
+ self.window_len = window_len
947
+ self.max_broken_time = max_broken_time
948
+ self.linenoise_aware = linenoise_aware
949
+
950
+ @property
951
+ def _all_attrs(self):
952
+ return super()._all_attrs + [
953
+ "min_corr",
954
+ "ignored_quantile",
955
+ "window_len",
956
+ "max_broken_time",
957
+ "linenoise_aware",
958
+ ]
959
+
960
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
961
+ """Apply the preprocessor to an EEGLAB EEG structure."""
962
+ eeg, _ = eegprep.clean_channels_nolocs(
963
+ eeg,
964
+ min_corr=self.min_corr,
965
+ ignored_quantile=self.ignored_quantile,
966
+ window_len=self.window_len,
967
+ max_broken_time=self.max_broken_time,
968
+ linenoise_aware=self.linenoise_aware,
969
+ )
970
+
971
+ return eeg
972
+
973
+
974
+ class RemoveBursts(EEGPrepBasePreprocessor):
975
+ """Run the Artifact Subspace Reconstruction (ASR) method on EEG data to
976
+ remove burst-type artifacts. Follows [Mullen2015]_.
977
+
978
+ .. figure:: https://cdn.ncbi.nlm.nih.gov/pmc/blobs/a79a/4710679/675fc2dee929/nihms733482f9.jpg
979
+ :align: center
980
+ :alt: Before/after comparison of ASR applied to EEG data.
981
+
982
+ This is an automated artifact rejection function that ensures that the data
983
+ contains no events that have abnormally strong power; the subspaces on which
984
+ those events occur are reconstructed (interpolated) based on the rest of the
985
+ EEG signal during these time periods.
986
+
987
+ Preconditions:
988
+
989
+ - One of :class:`RemoveDrifts` or :class:`braindecode.preprocessing.Filter` (
990
+ configured as a highpass filter) must have been applied beforehand.
991
+ - Must have removed flat-line channels beforehand with :class:`RemoveFlatChannels`.
992
+ - If you are removing bad channels (:class:`RemoveBadChannels` or
993
+ :class:`RemoveBadChannelsNoLocs`), use those before this step.
994
+ - Consider applying :class:`RemoveDCOffset` beforehand as a general best practice.
995
+ - If you are re-referencing to common average (:class:`RemoveCommonAverageReference`),
996
+ this should normally *NOT* be done before this step, but after it.
997
+
998
+ Parameters
999
+ ----------
1000
+ cutoff : float
1001
+ Threshold for artifact rejection. Data portions whose variance is larger than
1002
+ this threshold relative to the calibration data are considered artifactual
1003
+ and removed. There is a fair amount of literature on what constitutes a good
1004
+ value. 7.5 is very aggressive, 10-15 is a good range for ML pipelines, 20-30
1005
+ is more forgiving and is more common in neuroscience applications. The unit is
1006
+ z-scores relative to a Gaussian component of background EEG, but since EEG
1007
+ phenomena of interest can stand out from the Gaussian background, typical
1008
+ thresholds are considerably larger than for a purely Gaussian distribution.
1009
+ Default is 10.0.
1010
+ window_len : float | None
1011
+ Length of the statistics window in seconds. Should not be much longer
1012
+ than artifact timescale. The number of samples in the window should
1013
+ be >= 1.5x channels. Default: max(0.5, 1.5 * nbchan / srate).
1014
+ step_size : int | None
1015
+ Step size for processing in samples. The reconstruction matrix is updated every
1016
+ this many samples. If None, defaults to window_len / 2 samples.
1017
+ max_dims : float
1018
+ Maximum dimensionality/fraction of dimensions to reconstruct. Default: 0.66.
1019
+ This can be understood to be the number of simultaneous artifact components that
1020
+ may be removed; normally needs no tuning, but on very low-channel data (e.g.,
1021
+ 4ch) one may exploring small integers between 1 and #channels-1.
1022
+ ref_maxbadchannels : float | None
1023
+ Parameter that controls automatic calibration data selection. This represents
1024
+ the max fraction (0-1) of bad channels tolerated in a window for it to be used
1025
+ as calibration data. Lower is more aggressive (e.g., 0.05). Default: 0.075.
1026
+ The parameter has the same meaning as the max_bad_channels parameter in the
1027
+ RemoveBadWindows preprocessor, but note that this stage is used here as a
1028
+ subroutine to identify calibration data only. The overall method will always
1029
+ output a data matrix of the same shape as the input data. If set to None,
1030
+ all data is used for calibration.
1031
+ ref_tolerances : tuple[float, float]
1032
+ Power tolerances (lower, upper) in SDs from robust EEG power for a channel to
1033
+ be considered 'bad' during calibration data selection. This parameter goes hand
1034
+ in hand with ref_maxbadchannels. Default: (-inf, 5.5).
1035
+ ref_wndlen : float
1036
+ Window length in seconds for calibration data selection granularity.
1037
+ Default: 1.0.
1038
+ maxmem : int
1039
+ Maximum memory (in MB) to use during processing. Larger values can reduce
1040
+ overhead during processing, but usually 64MB is sufficient.
1041
+
1042
+ References
1043
+ ----------
1044
+ .. [Mullen2015] Mullen, T.R., Kothe, C.A., Chi, Y.M., Ojeda, A., Kerth, T.,
1045
+ Makeig, S., Jung, T.P. and Cauwenberghs, G., 2015. Real-time neuroimaging and
1046
+ cognitive monitoring using wearable dry EEG. IEEE Transactions on Biomedical
1047
+ Engineering, 62(11), pp.2553-2567.
1048
+
1049
+ """
1050
+
1051
+ def __init__(
1052
+ self,
1053
+ *,
1054
+ cutoff: float = 10.0,
1055
+ window_len: float | None = None,
1056
+ step_size: int | None = None,
1057
+ max_dims: float = 0.66,
1058
+ ref_maxbadchannels: float | None = 0.075,
1059
+ ref_tolerances: tuple[float, float] = (-np.inf, 5.5),
1060
+ ref_wndlen: float = 1.0,
1061
+ maxmem: int = 64,
1062
+ ):
1063
+ super().__init__(can_change_duration=True)
1064
+ self.cutoff = cutoff
1065
+ self.window_len = window_len
1066
+ self.step_size = step_size
1067
+ self.max_dims = max_dims
1068
+ self.ref_maxbadchannels = ref_maxbadchannels
1069
+ self.ref_tolerances = ref_tolerances
1070
+ self.ref_wndlen = ref_wndlen
1071
+ self.maxmem = maxmem
1072
+
1073
+ @property
1074
+ def _all_attrs(self):
1075
+ return super()._all_attrs + [
1076
+ "cutoff",
1077
+ "window_len",
1078
+ "step_size",
1079
+ "max_dims",
1080
+ "ref_maxbadchannels",
1081
+ "ref_tolerances",
1082
+ "ref_wndlen",
1083
+ "maxmem",
1084
+ ]
1085
+
1086
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
1087
+ """Apply the preprocessor to an EEGLAB EEG structure."""
1088
+ eeg = eegprep.clean_asr(
1089
+ eeg,
1090
+ cutoff=self.cutoff,
1091
+ window_len=self.window_len,
1092
+ step_size=self.step_size,
1093
+ max_dims=self.max_dims,
1094
+ ref_maxbadchannels=self.ref_maxbadchannels,
1095
+ ref_tolerances=self.ref_tolerances,
1096
+ ref_wndlen=self.ref_wndlen,
1097
+ maxmem=self.maxmem,
1098
+ )
1099
+
1100
+ return eeg
1101
+
1102
+
1103
+ class RemoveBadWindows(EEGPrepBasePreprocessor):
1104
+ """Remove periods with abnormally high-power content from continuous data.
1105
+ Implemented as in [Kothe2013]_.
1106
+
1107
+ .. figure:: https://www.jove.com/files/ftp_upload/65829/65829fig13.jpg
1108
+ :align: center
1109
+ :alt: Before/after comparison of bad-window removal.
1110
+
1111
+ This function cuts segments from the data which contain high-power (or low-power)
1112
+ artifacts. Specifically, only time windows are retained which have less than a
1113
+ certain fraction of *bad* channels, where a channel is bad in a window if its RMS
1114
+ power is above or below some z-score threshold relative to a robust estimate
1115
+ of clean-EEG power in that channel.
1116
+
1117
+ .. Note::
1118
+ When your method is meant to produce predictions for all time points
1119
+ in your continuous data (or all epochs of interest), you may not want to use this
1120
+ preprocessor, and enabling it may give you rosy performance estimates that do not
1121
+ reflect how your method works when used on gap-free data. It can nevertheless be
1122
+ useful to apply this to training data only in such cases, however, to get an
1123
+ artifact-unencumbered model.
1124
+
1125
+ Preconditions:
1126
+
1127
+ - One of :class:`RemoveDrifts` or :class:`braindecode.preprocessing.Filter` (
1128
+ configured as a highpass filter) must have been applied beforehand.
1129
+
1130
+ Parameters
1131
+ ----------
1132
+ max_bad_channels : int | float
1133
+ Threshold for rejection of bad time windows based on fraction of simultaneously
1134
+ noisy channels. This is the main tuning parameter; lower is more aggressive.
1135
+ Typical values are 0.15 (quite aggressive) to 0.3 (quite lax). Can also be
1136
+ specified as an absolute number of channels. Default is 0.25 (25% of channels).
1137
+ zthresholds : tuple(float, float)
1138
+ (min, max) z-score tolerance for identifying bad time window/channel pairs.
1139
+ This typically does not need to be changed (instead one may change the max
1140
+ bad channels that cross this threshold), but different implementations
1141
+ use different values here. The max value is the main parameter, where
1142
+ EEGLAB/EEGPrep uses 7 while the original pipeline [1] used 5.5, and NeuroPype
1143
+ uses 6. Lower values are more aggressive. The min value is only triggered if the
1144
+ EEG data has signal dropouts (very low amplitude, e.g. due to something becoming
1145
+ unplugged) which is rare; some choices are (-inf, EEGPrep; -3.5, BCILAB;
1146
+ -4, NeuroPype).
1147
+ window_len : float
1148
+ The window length that is used to check the data for artifact content, in
1149
+ seconds. This is ideally as long as the expected time scale of the artifacts,
1150
+ but short enough for there to be enough windows to compute statistics over.
1151
+ Default is 1.0 sec, but this may be lowered to 0.5 sec to catch very brief
1152
+ artifacts.
1153
+ window_overlap : float
1154
+ Fractional overlap between consecutive windows (0-1). Higher overlap
1155
+ finds more artefacts but is slower. Default is 0.66 (about 2/3 overlap).
1156
+ max_dropout_fraction : float
1157
+ Maximum fraction of windows that may have arbitrarily low amplitude
1158
+ (e.g. sensor unplugged). Default is 0.1.
1159
+ min_clean_fraction : float
1160
+ Minimum fraction of windows expected to be clean (essentially
1161
+ uncontaminated EEG). Default is 0.25.
1162
+ truncate_quant : tuple(float, float)
1163
+ Quantile range of the truncated Gaussian to fit (default (0.022,0.6)).
1164
+ step_sizes : tuple(float, float)
1165
+ Grid-search step sizes in quantiles for lower/upper edge. Default is (0.01,0.01)
1166
+ shape_range : sequence(float)
1167
+ Range for the beta shape parameter in the generalised Gaussian used
1168
+ for distribution fitting. Default is np.arange(1.7, 3.6, 0.15).
1169
+
1170
+ References
1171
+ ----------
1172
+ .. [Kothe2013] Kothe, C.A. and Makeig, S., 2013. BCILAB: a platform for
1173
+ brain–computer interface development. Journal of Neural Engineering, 10(5),
1174
+ p.056014.
1175
+
1176
+ """
1177
+
1178
+ def __init__(
1179
+ self,
1180
+ *,
1181
+ max_bad_channels: int | float = 0.25,
1182
+ zthresholds: tuple[float, float] = (-np.inf, 7),
1183
+ window_len: float = 1.0,
1184
+ window_overlap: float = 0.66,
1185
+ max_dropout_fraction: float = 0.1,
1186
+ min_clean_fraction: float = 0.25,
1187
+ truncate_quant: tuple[float, float] = (0.022, 0.6),
1188
+ step_sizes: tuple[float, float] = (0.01, 0.01),
1189
+ shape_range: np.ndarray | Sequence[float] = np.arange(1.7, 3.6, 0.15),
1190
+ ):
1191
+ super().__init__(can_change_duration=True)
1192
+ self.max_bad_channels = max_bad_channels
1193
+ self.zthresholds = zthresholds
1194
+ self.window_len = window_len
1195
+ self.window_overlap = window_overlap
1196
+ self.max_dropout_fraction = max_dropout_fraction
1197
+ self.min_clean_fraction = min_clean_fraction
1198
+ self.truncate_quant = truncate_quant
1199
+ self.step_sizes = step_sizes
1200
+ self.shape_range = shape_range
1201
+
1202
+ @property
1203
+ def _all_attrs(self):
1204
+ return super()._all_attrs + [
1205
+ "max_bad_channels",
1206
+ "zthresholds",
1207
+ "window_len",
1208
+ "window_overlap",
1209
+ "max_dropout_fraction",
1210
+ "min_clean_fraction",
1211
+ "truncate_quant",
1212
+ "step_sizes",
1213
+ "shape_range",
1214
+ ]
1215
+
1216
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
1217
+ """Apply the preprocessor to an EEGLAB EEG structure."""
1218
+ eeg, _ = eegprep.clean_windows(
1219
+ eeg,
1220
+ max_bad_channels=self.max_bad_channels,
1221
+ zthresholds=self.zthresholds,
1222
+ window_len=self.window_len,
1223
+ window_overlap=self.window_overlap,
1224
+ max_dropout_fraction=self.max_dropout_fraction,
1225
+ min_clean_fraction=self.min_clean_fraction,
1226
+ truncate_quant=self.truncate_quant,
1227
+ step_sizes=self.step_sizes,
1228
+ shape_range=self.shape_range,
1229
+ )
1230
+
1231
+ return eeg
1232
+
1233
+
1234
+ class ReinterpolateRemovedChannels(EEGPrepBasePreprocessor):
1235
+ """Reinterpolate previously removed EEG channels to restore original channel set.
1236
+
1237
+ .. figure:: ../../docs/_static/preprocess/sph_spline_interp.png
1238
+ :align: center
1239
+ :alt: Spherical spline interpolation example.
1240
+
1241
+ This reinterpolates EEG channels that were previously dropped via one of the EEGPrep
1242
+ channel removal operations and restores the original order of EEG channels. This
1243
+ is typically necessary when you are using automatic channel removal but you need
1244
+ a consistent channel set across multiple recordings/sessions. Uses spherical-spline
1245
+ interpolation (based on [Perrin1989]_).
1246
+
1247
+ The typical place to perform this is after all other EEGPrep-related artifact
1248
+ removal steps, except re-referencing. If no channel locations were recorded,
1249
+ this preprocessor has no effect.
1250
+
1251
+ Preconditions:
1252
+
1253
+ - Must have 3D channel locations.
1254
+ - This filter will only have an effect if one or more of the preceding steps
1255
+ recorded original channel locations (e.g., :class:`RemoveBadChannels`,
1256
+ :class:`RemoveBadChannelsNoLocs`, or :class:`RemoveFlatChannels`).
1257
+ - If you are re-referencing to common average (:class:`RemoveCommonAverageReference`),
1258
+ this should normally *NOT* be done before this step, but after it (otherwise
1259
+ your reference will depend on which channels were removed).
1260
+
1261
+ References
1262
+ ----------
1263
+ .. [Perrin1989] Perrin, F., Pernier, J., Bertrand, O. and Echallier, J.F., 1989.
1264
+ Spherical splines for scalp potential and current density mapping.
1265
+ Electroencephalography and Clinical Neurophysiology, 72(2), pp.184-187.
1266
+
1267
+
1268
+ """
1269
+
1270
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
1271
+ """Apply the preprocessor to an EEGLAB EEG structure."""
1272
+ orig_chanlocs = self._get_orig_chanlocs(raw)
1273
+ if orig_chanlocs is None:
1274
+ log.info(
1275
+ "ReinterpolateRemovedChannels: No original channel locations were "
1276
+ "recorded by a preceding step; skipping reinterpolation."
1277
+ )
1278
+ elif len(orig_chanlocs) > len(eeg["chanlocs"]):
1279
+ eeg = eegprep.eeg_interp(eeg, orig_chanlocs)
1280
+
1281
+ return eeg
1282
+
1283
+
1284
+ class RemoveCommonAverageReference(EEGPrepBasePreprocessor):
1285
+ """Subtracts the common average reference from the EEG data (EEGPrep version).
1286
+ This is useful for having a consistent referencing scheme across recordings
1287
+ (cf. [Offner1950]_).
1288
+
1289
+ Generally, common average re-referencing is `data -= mean(data, axis=0)`, but
1290
+ both EEGLAB/eegprep and to a greater extent MNE have additional bookkeeping around
1291
+ re-referencing, in the latter case due to its focus on source localization. This
1292
+ will have little effect on most machine-learning use cases; nevertheless, this
1293
+ operation is included here to allow users to mirror the behavior of the end-to-end
1294
+ EEGPrep pipeline by means of individual operations (for example when migrating
1295
+ from one to the other form) without introducing perhaps unexpected side effects
1296
+ on the MNE data structure.
1297
+
1298
+ The operation performed is:
1299
+
1300
+ .. math::
1301
+
1302
+ X'_{c,t} = X_{c,t} - \\frac{1}{C}\\sum_{c=1}^{C} X_{c,t}
1303
+
1304
+ where :math:`C` is the number of channels, :math:`c` indexes the channel, and
1305
+ :math:`t` indexes time.
1306
+
1307
+ References
1308
+ ----------
1309
+ .. [Offner1950] Offner, F. F. (1950). The EEG as potential mapping: the value of the
1310
+ average monopolar reference. Electroencephalography and Clinical Neurophysiology,
1311
+ 2(2), 213-214.
1312
+
1313
+ """
1314
+
1315
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
1316
+ """Apply the preprocessor to an EEGLAB EEG structure."""
1317
+ return eegprep.reref(eeg, [])