braindecode 1.3.0.dev180329405__py3-none-any.whl → 1.3.0.dev182330353__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. braindecode/augmentation/base.py +1 -1
  2. braindecode/datasets/__init__.py +12 -4
  3. braindecode/datasets/base.py +115 -151
  4. braindecode/datasets/bcicomp.py +4 -4
  5. braindecode/datasets/bids.py +3 -3
  6. braindecode/datasets/experimental.py +2 -2
  7. braindecode/datasets/mne.py +3 -5
  8. braindecode/datasets/moabb.py +17 -7
  9. braindecode/datasets/nmt.py +2 -2
  10. braindecode/datasets/sleep_physio_challe_18.py +2 -2
  11. braindecode/datasets/sleep_physionet.py +2 -2
  12. braindecode/datasets/tuh.py +2 -2
  13. braindecode/datasets/xy.py +2 -2
  14. braindecode/datautil/__init__.py +11 -1
  15. braindecode/datautil/channel_utils.py +114 -0
  16. braindecode/datautil/serialization.py +7 -7
  17. braindecode/functional/functions.py +6 -2
  18. braindecode/functional/initialization.py +2 -3
  19. braindecode/models/__init__.py +6 -0
  20. braindecode/models/atcnet.py +26 -27
  21. braindecode/models/attentionbasenet.py +37 -32
  22. braindecode/models/attn_sleep.py +2 -0
  23. braindecode/models/base.py +280 -2
  24. braindecode/models/bendr.py +469 -0
  25. braindecode/models/biot.py +2 -0
  26. braindecode/models/contrawr.py +2 -0
  27. braindecode/models/ctnet.py +8 -3
  28. braindecode/models/deepsleepnet.py +28 -19
  29. braindecode/models/eegconformer.py +2 -2
  30. braindecode/models/eeginception_erp.py +31 -25
  31. braindecode/models/eegitnet.py +2 -0
  32. braindecode/models/eegminer.py +2 -0
  33. braindecode/models/eegnet.py +1 -1
  34. braindecode/models/eegsym.py +917 -0
  35. braindecode/models/eegtcnet.py +2 -0
  36. braindecode/models/fbcnet.py +5 -1
  37. braindecode/models/fblightconvnet.py +2 -0
  38. braindecode/models/fbmsnet.py +20 -6
  39. braindecode/models/ifnet.py +2 -0
  40. braindecode/models/labram.py +33 -26
  41. braindecode/models/medformer.py +758 -0
  42. braindecode/models/msvtnet.py +2 -0
  43. braindecode/models/patchedtransformer.py +1 -1
  44. braindecode/models/signal_jepa.py +111 -27
  45. braindecode/models/sinc_shallow.py +12 -9
  46. braindecode/models/sstdpn.py +11 -11
  47. braindecode/models/summary.csv +3 -0
  48. braindecode/models/syncnet.py +2 -0
  49. braindecode/models/tcn.py +2 -0
  50. braindecode/models/usleep.py +26 -21
  51. braindecode/models/util.py +3 -0
  52. braindecode/modules/attention.py +10 -10
  53. braindecode/modules/blocks.py +3 -3
  54. braindecode/modules/filter.py +2 -9
  55. braindecode/modules/layers.py +18 -17
  56. braindecode/preprocessing/__init__.py +232 -3
  57. braindecode/preprocessing/eegprep_preprocess.py +1202 -0
  58. braindecode/preprocessing/mne_preprocess.py +142 -10
  59. braindecode/preprocessing/preprocess.py +28 -18
  60. braindecode/preprocessing/util.py +166 -0
  61. braindecode/preprocessing/windowers.py +26 -20
  62. braindecode/samplers/base.py +8 -8
  63. braindecode/version.py +1 -1
  64. {braindecode-1.3.0.dev180329405.dist-info → braindecode-1.3.0.dev182330353.dist-info}/METADATA +6 -2
  65. braindecode-1.3.0.dev182330353.dist-info/RECORD +109 -0
  66. braindecode-1.3.0.dev180329405.dist-info/RECORD +0 -103
  67. {braindecode-1.3.0.dev180329405.dist-info → braindecode-1.3.0.dev182330353.dist-info}/WHEEL +0 -0
  68. {braindecode-1.3.0.dev180329405.dist-info → braindecode-1.3.0.dev182330353.dist-info}/licenses/LICENSE.txt +0 -0
  69. {braindecode-1.3.0.dev180329405.dist-info → braindecode-1.3.0.dev182330353.dist-info}/licenses/NOTICE.txt +0 -0
  70. {braindecode-1.3.0.dev180329405.dist-info → braindecode-1.3.0.dev182330353.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1202 @@
1
+ """Preprocessors using the EEGPrep package."""
2
+
3
+ # Authors: Christian Kothe <christian.kothe@intheon.io>
4
+ #
5
+ # License: BSD-3
6
+
7
+ import logging
8
+ from abc import abstractmethod
9
+ from typing import Any, Sequence
10
+
11
+ import mne
12
+ import numpy as np
13
+ from mne.io import BaseRaw
14
+
15
+ from .preprocess import Preprocessor
16
+ from .util import mne_load_metadata, mne_store_metadata
17
+
18
+ log = logging.getLogger(__name__)
19
+
20
+ __all__ = [
21
+ "EEGPrep",
22
+ "RemoveDCOffset",
23
+ "Resampling",
24
+ "RemoveFlatChannels",
25
+ "RemoveDrifts",
26
+ "RemoveBadChannels",
27
+ "RemoveBadChannelsNoLocs",
28
+ "RemoveBursts",
29
+ "RemoveBadWindows",
30
+ "ReinterpolateRemovedChannels",
31
+ "RemoveCommonAverageReference",
32
+ ]
33
+
34
+ try:
35
+ import eegprep
36
+ except ImportError:
37
+ eegprep = None
38
+
39
+
40
+ class EEGPrepBasePreprocessor(Preprocessor):
41
+ """Abstract base class for EEGPrep preprocessors, implementing shared functionality.
42
+
43
+ Parameters
44
+ ----------
45
+ can_change_duration : bool | str
46
+ Whether the preprocessor can change the duration of the data during processing;
47
+ can also be the name of some sub-operation that does so for display in a more
48
+ actionable error message.
49
+ record_orig_chanlocs : bool
50
+ Whether to record the EEG channel locations before processing
51
+ in the MNE Raw structure for later use. This will not override any already
52
+ present channel location information, so this can safely be used multiple times
53
+ to record whatever were the original channel locations.
54
+ force_dtype : np.dtype | str | None
55
+ Optionally for the in/out data to be converted to this dtype before and after
56
+ processing. Can help ensure consistent dtypes across different preprocessors.
57
+
58
+ """
59
+
60
+ # payload key under which we store our original channel locations
61
+ _chanlocs_key = "original_chanlocs"
62
+
63
+ def __init__(
64
+ self,
65
+ *,
66
+ can_change_duration: str | bool = False,
67
+ record_orig_chanlocs: bool = False,
68
+ force_dtype: np.dtype | str | None = None,
69
+ ):
70
+ super().__init__(
71
+ fn=self._apply_op,
72
+ apply_on_array=False,
73
+ )
74
+ if can_change_duration is True:
75
+ can_change_duration = self.__class__.__name__
76
+ self.can_change_duration = can_change_duration
77
+ self.record_orig_chanlocs = record_orig_chanlocs
78
+ self.force_dtype = np.dtype(force_dtype) if force_dtype is not None else None
79
+
80
+ @abstractmethod
81
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
82
+ """Apply the preprocessor to an EEGLAB EEG structure. Overridden by subclass."""
83
+ ...
84
+
85
+ def _apply_op(self, raw: BaseRaw) -> None:
86
+ """Internal method that does the actual work; this is called by Preprocessor.apply()."""
87
+ # handle error if eegprep is not available
88
+ if eegprep is None:
89
+ raise RuntimeError(
90
+ "The eegprep package is required to use the EEGPrep preprocessor.\n"
91
+ " Please install braindecode with the [eegprep] extra added as in\n"
92
+ " 'pip install braindecode[eegprep]' to use this functionality,\n"
93
+ " or run 'pip install eegprep[eeglabio]' directly."
94
+ )
95
+
96
+ opname = self.__class__.__name__
97
+ if isinstance(raw, mne.BaseEpochs):
98
+ raise ValueError(
99
+ f"{opname} is meant to be used on Raw (continuous) data, "
100
+ f"not Epochs. Use before epoching."
101
+ )
102
+
103
+ # preserve the data description for restoration later
104
+ description = raw.info["description"]
105
+
106
+ # split off non-EEG channels since apply_eeg() expects only EEG channels
107
+ chn_order = raw.ch_names.copy()
108
+ eeg_idx = mne.pick_types(raw.info, eeg=True, exclude=[])
109
+ if len(eeg_idx) < len(chn_order):
110
+ eeg = raw.copy().pick(eeg_idx)
111
+ non_eeg = raw.drop_channels(eeg.ch_names)
112
+ else:
113
+ eeg = raw
114
+ non_eeg = None
115
+
116
+ eeg = eegprep.mne2eeg(eeg)
117
+
118
+ # back up channel locations for potential later use
119
+ orig_chanlocs = [cl.copy() for cl in eeg["chanlocs"]]
120
+
121
+ # ensure all events in EEG structure have a 'duration' field; this is
122
+ # necessary for some of the EEGPrep operations to succeed
123
+ if not all("duration" in ev for ev in eeg["event"]):
124
+ for ev in eeg["event"]:
125
+ if "duration" not in ev:
126
+ ev["duration"] = 1
127
+
128
+ if self.force_dtype is not None:
129
+ eeg["data"] = eeg["data"].astype(self.force_dtype)
130
+
131
+ # actual operation happens here
132
+ eeg = self.apply_eeg(eeg, raw)
133
+
134
+ if self.force_dtype is not None:
135
+ eeg["data"] = eeg["data"].astype(self.force_dtype)
136
+
137
+ # rename EEGLAB-type boundary events to a form that's recognized by MNE so they
138
+ # (or intersecting epochs) are ignored during potential downstream epoching
139
+ # done by braindecode pipelines
140
+ for ev in eeg["event"]:
141
+ if ev["type"] == "boundary":
142
+ ev["type"] = "BAD boundary"
143
+
144
+ # set some minimal chaninfo fields, which are otherwise not necessarily
145
+ # guaranteed to be there
146
+ if not isinstance(eeg["chaninfo"], dict) or not eeg["chaninfo"]:
147
+ eeg["chaninfo"] = {"nosedir": "+X"}
148
+
149
+ # actual conversion
150
+ proc = eegprep.eeg2mne(eeg)
151
+
152
+ # restore the description field, since it may not survive the roundtrip
153
+ # conversion (and we don't have control over all the code involved there)
154
+ proc.info["description"] = description
155
+
156
+ if self.record_orig_chanlocs:
157
+ # stash original channel locations if not already present
158
+ mne_store_metadata(
159
+ raw=proc,
160
+ payload=orig_chanlocs,
161
+ key=self._chanlocs_key,
162
+ no_overwrite=True,
163
+ )
164
+
165
+ if non_eeg is not None:
166
+ # try to add back any non-EEG channels that were originally present
167
+
168
+ if offending_op_name := self.can_change_duration:
169
+ # while this may work due to luck on one or another session, in
170
+ # general we will not try to add them back in in this case; in future
171
+ # we may try to work out exactly what sample mask was removed from the
172
+ # EEG channels and apply the same to non-EEG channels via MNE
173
+ affected_chans = ", ".join(non_eeg.ch_names)
174
+ if offending_op_name == opname:
175
+ detail = f"the {opname} Preprocessor"
176
+ else:
177
+ detail = f"{offending_op_name} in the {opname} Preprocessor"
178
+ log.error(
179
+ f"Could not add back non-EEG channels ({affected_chans}) after"
180
+ f" {opname} processing; these will be omitted from the processed"
181
+ f" data. If you want to retain these channels, you will have to"
182
+ f" disable {detail}; you may perform that step using other"
183
+ f" methods before and after {opname}, respectively."
184
+ )
185
+ else:
186
+ # re-insert non-EEG channels, and restore original channel order
187
+ proc.add_channels([non_eeg], force_update_info=True)
188
+ if proc.ch_names != chn_order:
189
+ proc.reorder_channels(chn_order)
190
+
191
+ # write result back into raw, discard proc (_apply_op() is in-place)
192
+ if not proc.preload:
193
+ proc.load_data()
194
+ raw.__dict__ = proc.__dict__
195
+
196
+ @classmethod
197
+ def _get_orig_chanlocs(cls, raw: BaseRaw) -> list[dict[str, Any]] | None:
198
+ """Retrieve original channel locations stashed in the given MNE Raw
199
+ structure, if any."""
200
+ return mne_load_metadata(raw, key=cls._chanlocs_key)
201
+
202
+
203
+ class EEGPrep(EEGPrepBasePreprocessor):
204
+ """Preprocessor for an MNE Raw object that applies the EEGPrep pipeline.
205
+ This is based on [Mullen2015]_.
206
+
207
+ .. figure:: https://cdn.ncbi.nlm.nih.gov/pmc/blobs/a79a/4710679/675fc2dee929/nihms733482f9.jpg
208
+ :align: center
209
+ :alt: Before/after comparison of EEGPrep processing on EEG data.
210
+
211
+ This pipeline involves the stages:
212
+
213
+ - DC offset subtraction (:class:`RemoveDCOffset`)
214
+ - Optional resampling (:class:`Resampling`)
215
+ - Flatline channel detection and removal (:class:`RemoveFlatChannels`)
216
+ - High-pass filtering (:class:`RemoveDrifts`)
217
+ - Bad channel detection and removal using correlation and HF noise criteria
218
+ (:class:`RemoveBadChannels` with fallback to :class:`RemoveBadChannelsNoLocs`)
219
+ - Burst artifact removal using ASR (Artifact Subspace Reconstruction)
220
+ (:class:`RemoveBursts`)
221
+ - Detection and removal of residual bad time windows (:class:`RemoveBadWindows`)
222
+ - Optional reinterpolation of removed channels
223
+ (:class:`ReinterpolateRemovedChannels`)
224
+ - Optional common average referencing (:class:`RemoveCommonAverageReference`)
225
+
226
+ These steps are also individually available as separate preprocessors in this module
227
+ if you want to apply only a subset of them or customize some beyond the parameters
228
+ available here. Note that it is important to apply them in the order given above;
229
+ other orderings may lead to suboptimal results.
230
+
231
+ Typically no signal processing (except potentially resampling or removal of unused
232
+ channels or time windows) should be done before this pipeline. It is recommended to
233
+ follow this with at least a low-pass filter to remove high-frequency artifacts
234
+ (e.g., 40-45 Hz transition band).
235
+
236
+ The main processing parameters can each be set to None to skip the respective
237
+ stage (or False for boolean switches). Note this pipeline will only affect the
238
+ EEG channels in your data, and will leave other channels unaffected. It is
239
+ recommended to remove these channels yourself beforehand if you don't want them
240
+ included in your downstream analysis.
241
+
242
+ .. Note::
243
+ This implementation of the pipeline is best used in the context of
244
+ cross-session prediction; when using this with a within-session split, there is
245
+ a risk of data leakage since the artifact removal will be calibrated on statistics
246
+ of the entire session (and thus test sets). In practice the effect may be minor,
247
+ unless your downstream analysis is strongly driven by artifacts (e.g., if you
248
+ are trying to decode eye movements or muscle activity), but paper reviewers may
249
+ not be convinced by that.
250
+
251
+
252
+ Parameters
253
+ ----------
254
+ resample_to : float | None = None
255
+ Optionally resample to this sampling rate (in Hz) before processing.
256
+ Good choices are 200, 250, 256 Hz (consider keeping it a power of two
257
+ if it was originally), but one may go as low as 100-128 Hz if memory, compute,
258
+ or model complexity limitations demand it.
259
+ flatline_maxdur : float | None
260
+ Remove channels that are flat for longer than this duration (in seconds).
261
+ This stage is almost never triggered in practice but can help with the
262
+ occasional strange EEG configuration.
263
+ highpass_frequencies : tuple[float, float] | None
264
+ Tuple of lower and upper bound of the *transition band* for high-pass filtering
265
+ before processing. This means that full suppression will be reached at the
266
+ lower bound, and the upper bound is where the passband begins.
267
+ bad_channel_corr_threshold : float | None
268
+ Threshold for correlation-based bad channel detection. A good default range
269
+ is 0.75-0.8. Becomes quite aggressive at and beyond 0.8; also, consider using
270
+ lower values (eg 0.7-0.75) for <32ch EEG and higher (0.8-0.85) for >128ch.
271
+ burst_removal_cutoff : float | None
272
+ Amplitude threshold for burst artifact removal using ASR
273
+ (Artifact Subspace Reconstruction). This parameter tends to have a large effect
274
+ on the performance of downstream ML. 10-15 is a good range for ML pipelines
275
+ (lower is more aggressive); for neuroscience analysis, more conservative values
276
+ like 20-30 may be better. The unit is z-scores relative to a Gaussian component
277
+ of background EEG, but real EEG can be super-Gaussian, thus the large values.
278
+ bad_window_max_bad_channels : float | None
279
+ Threshold for rejection of bad time windows based on fraction of simultaneously
280
+ noisy channels. Lower is more aggressive. Typical values are 0.15 (quite
281
+ aggressive) to 0.3 (quite lax).
282
+ bad_channel_reinterpolate : bool
283
+ Whether to reinterpolate bad channels that were detected and removed. Usually
284
+ required when doing cross-session analysis (to have a consistent channel set).
285
+ common_avg_ref : bool
286
+ Whether to apply a common average reference after processing. Recommended
287
+ when doing cross-study analysis to have a consistent referencing scheme.
288
+ bad_channel_hf_threshold : float
289
+ Threshold for high-frequency (>=45 Hz) noise-based bad channel detection,
290
+ in z-scores. Lower is more aggressive. Default is 4.0. This is rarely tuned,
291
+ but data with unusual higher-frequency activity could benefit from exploration
292
+ in the 3.5-5.0 range.
293
+ bad_channel_max_broken_time : float
294
+ Max fraction of session length during which a channel may be bad before
295
+ it is removed. Default is 0.4 (40%), max is 0.5 (breakdown point of stats).
296
+ Pretty much never tuned.
297
+ bad_window_tolerances : tuple[float, float]
298
+ (min, max) z-score tolerance for identifying bad time window/channel pairs.
299
+ This typically does not need to be changed (instead one may change the max
300
+ bad channels that cross this threshold), but different implementations
301
+ use different values here. The max value is the main parameter, where
302
+ EEGLAB/EEGPrep uses 7 while the original pipeline [1] used 5.5, and NeuroPype
303
+ uses 6. Lower values are more aggressive. The min value is only triggered if the
304
+ EEG data has signal dropouts (very low amplitude, e.g. due to something becoming
305
+ unplugged) which is rare; some choices are (-inf, EEGPrep; -3.5, BCILAB;
306
+ -4 NeuroPype).
307
+ refdata_max_bad_channels : float | None
308
+ Same function as bad_window_max_bad_channels, but used only to determine
309
+ calibration data for burst removal. Usually more aggressive than the former
310
+ (0.05-0.1) to get clean calibration data. This can be set to None to skip this
311
+ and force all data to be used for calibration.
312
+ refdata_max_tolerances : tuple[float, float]
313
+ Same as bad_window_tolerances, but used only to determine calibration data for
314
+ burst removal. Almost never touched, and defaults to a fairly aggressive
315
+ (-inf, 5.5) to get clean calibration data.
316
+ num_samples : int
317
+ Number of channel subsets to draw for the RANSAC reconstruction during bad
318
+ channel identification. Higher can be more robust but slower to calibrate.
319
+ Default is 50.
320
+ subset_size : float
321
+ Size of channel subsets for RANSAC, as fraction (0-1) or count. Default 0.25.
322
+ For higher-density EEG (e.g., 64-128ch), one can achieve somewhat better
323
+ robustness to clusters of bad channels by setting this to 0.15 and increasing
324
+ num_samples to 200.
325
+ bad_channel_nolocs_threshold : float
326
+ A fallback correlation threshold for bad-channel removal that is applied when
327
+ no channel location information is available. The value here typically needs to
328
+ be fairly low, e.g., 0.45-0.5 (lower is more aggressive). Ideally you have
329
+ channel locations so that this fallback is not needed.
330
+ bad_channel_nolocs_exclude_frac : float
331
+ A fraction of most correlated channels to exclude in the case where no channel
332
+ location information is available. Used to reject pairs of shorted or otherwise
333
+ highly correlated sets of bad channels.
334
+ max_mem_mb : int
335
+ Max memory that ASR can use, in MB. Larger values can reduce overhead during
336
+ processing, but usually 64MB is sufficient.
337
+
338
+ References
339
+ ----------
340
+ .. [Mullen2015] Mullen, T.R., Kothe, C.A., Chi, Y.M., Ojeda, A., Kerth, T.,
341
+ Makeig, S., Jung, T.P. and Cauwenberghs, G., 2015. Real-time neuroimaging and
342
+ cognitive monitoring using wearable dry EEG. IEEE Transactions on Biomedical
343
+ Engineering, 62(11), pp.2553-2567.
344
+
345
+ """
346
+
347
+ def __init__(
348
+ self,
349
+ *,
350
+ # (main processing parameters)
351
+ resample_to: float | None = None,
352
+ flatline_maxdur: float | None = 5.0,
353
+ highpass_frequencies: tuple[float, float] | None = (0.25, 0.75),
354
+ bad_channel_corr_threshold: float | None = 0.8,
355
+ burst_removal_cutoff: float | None = 10.0,
356
+ bad_window_max_bad_channels: float | None = 0.25,
357
+ bad_channel_reinterpolate: bool = True,
358
+ common_avg_ref: bool = True,
359
+ # additional tuning parameters
360
+ bad_channel_max_broken_time: float = 0.4,
361
+ bad_channel_hf_threshold: float | None = 4.0,
362
+ bad_window_tolerances: tuple[float, float] | None = (-np.inf, 7),
363
+ refdata_max_bad_channels: float | None = 0.075,
364
+ refdata_max_tolerances: tuple[float, float] | None = (-np.inf, 5.5),
365
+ num_samples: int = 50,
366
+ subset_size: float = 0.25,
367
+ bad_channel_nolocs_threshold: float = 0.45,
368
+ bad_channel_nolocs_exclude_frac: float = 0.1,
369
+ max_mem_mb: int = 64,
370
+ ):
371
+ can_change_duration = " and ".join(
372
+ opname
373
+ for opname in (
374
+ "resample" if resample_to else "",
375
+ "bad time window removal" if bad_window_max_bad_channels else "",
376
+ )
377
+ if opname
378
+ )
379
+ super().__init__(
380
+ can_change_duration=can_change_duration or False,
381
+ )
382
+ self.resample_to = resample_to
383
+ self.reinterpolate = bad_channel_reinterpolate
384
+ self.common_avg_ref = common_avg_ref
385
+ self.burst_removal_cutoff = burst_removal_cutoff
386
+ self.bad_window_max_bad_channels = bad_window_max_bad_channels
387
+
388
+ if bad_channel_corr_threshold is None:
389
+ line_noise_crit = None
390
+ else:
391
+ line_noise_crit = bad_channel_hf_threshold
392
+ self.clean_artifacts_params = dict(
393
+ ChannelCriterion=bad_channel_corr_threshold,
394
+ LineNoiseCriterion=line_noise_crit,
395
+ BurstCriterion=burst_removal_cutoff,
396
+ WindowCriterion=bad_window_max_bad_channels,
397
+ Highpass=highpass_frequencies,
398
+ ChannelCriterionMaxBadTime=bad_channel_max_broken_time,
399
+ BurstCriterionRefMaxBadChns=refdata_max_bad_channels,
400
+ BurstCriterionRefTolerances=refdata_max_tolerances,
401
+ WindowCriterionTolerances=bad_window_tolerances,
402
+ FlatlineCriterion=flatline_maxdur,
403
+ NumSamples=num_samples,
404
+ SubsetSize=subset_size,
405
+ NoLocsChannelCriterion=bad_channel_nolocs_threshold,
406
+ NoLocsChannelCriterionExcluded=bad_channel_nolocs_exclude_frac,
407
+ MaxMem=max_mem_mb,
408
+ # For reference, the function additionally accepts these (legacy etc.)
409
+ # arguments, which we're not exposing here (current defaults as below):
410
+ # BurstRejection='off',
411
+ # Distance='euclidian',
412
+ # Channels=None,
413
+ # Channels_ignore=None,
414
+ # availableRAM_GB=None,
415
+ )
416
+
417
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
418
+ """Apply the preprocessor to an EEGLAB EEG structure."""
419
+ # remove per-channel DC offset (can be huge)
420
+ eeg["data"] -= np.median(eeg["data"], axis=1, keepdims=True)
421
+
422
+ # optional resampling
423
+ if (srate := self.resample_to) is not None:
424
+ eeg = eegprep.resample(eeg, srate)
425
+
426
+ # do a check if the data has a supported sampling rate
427
+ if self.burst_removal_cutoff is not None:
428
+ supported_rates = (100, 128, 200, 250, 256, 300, 500, 512)
429
+ cur_srate = int(eegprep.utils.round_mat(eeg["srate"]))
430
+ if cur_srate not in supported_rates:
431
+ # note: technically the method will run if you disable this error,
432
+ # but you're likely getting (potentially quite) suboptimal results
433
+ raise NotImplementedError(
434
+ f"The dataset has an uncommon sampling rate of {cur_srate} Hz,"
435
+ f" which is not supported by the EEGPrep Preprocessor"
436
+ f" implementation. Please enable resampling to"
437
+ f" resample the data to one of the supported rates"
438
+ f" ({', '.join(str(r) for r in supported_rates)})."
439
+ )
440
+
441
+ # preserve input channel locations for reinterpolation later
442
+ orig_chanlocs = [channel_loc.copy() for channel_loc in eeg["chanlocs"]]
443
+
444
+ # artifact removal stage
445
+ eeg, *_ = eegprep.clean_artifacts(eeg, **self.clean_artifacts_params)
446
+
447
+ if self.force_dtype != np.float64:
448
+ # cast to float32 for equivalence with multi-stage EEGPrep pipeline
449
+ eeg["data"] = eeg["data"].astype(np.float32)
450
+
451
+ # optionally reinterpolate dropped channels
452
+ if self.reinterpolate and (len(orig_chanlocs) > len(eeg["chanlocs"])):
453
+ eeg = eegprep.eeg_interp(eeg, orig_chanlocs)
454
+
455
+ # optionally apply common average reference
456
+ if self.common_avg_ref:
457
+ eeg = eegprep.reref(eeg, [])
458
+
459
+ return eeg
460
+
461
+
462
+ class RemoveFlatChannels(EEGPrepBasePreprocessor):
463
+ """Removes EEG channels that flat-line for extended periods of time.
464
+ Follows [Mullen2015]_.
465
+
466
+ This is an automated artifact rejection function which ensures that
467
+ the data contains no flat-lined channels. This is very rarely the case, but the
468
+ preprocessor exists since the presence of such channels may throw off downstream
469
+ preproc steps.
470
+
471
+ This step is best placed very early in a preprocessing pipeline, before any
472
+ filtering (since filter pre/post ringing can mask flatlines).
473
+
474
+ A channel :math:`c` is flagged as flat if there exists a time interval
475
+ :math:`[t_1, t_2]` where:
476
+
477
+ .. math::
478
+
479
+ |X_{c,t+1} - X_{c,t}| < \\varepsilon_{\\text{jitter}} \\quad \\forall t \\in [t_1, t_2]
480
+
481
+ \\text{and} \\quad t_2 - t_1 > T_{\\text{max}}
482
+
483
+ where :math:`\\varepsilon_{\\text{jitter}} = \\text{max_allowed_jitter} \\times \\varepsilon`
484
+ (with :math:`\\varepsilon` being machine epsilon for float64), and
485
+ :math:`T_{\\text{max}} = \\text{max_flatline_duration} \\times f_s` (with :math:`f_s`
486
+ being the sampling rate).
487
+
488
+ Parameters
489
+ ----------
490
+ max_flatline_duration : float
491
+ Maximum tolerated flatline duration. In seconds. If a channel has a longer
492
+ flatline than this, it will be considered abnormal. Defaults to 5.0.
493
+ max_allowed_jitter : float
494
+ Maximum tolerated jitter during flatlines. As a multiple of epsilon for the
495
+ 64-bit float data type (np.finfo(np.float64).eps). Defaults to 20.
496
+
497
+ References
498
+ ----------
499
+ .. [Mullen2015] Mullen, T.R., Kothe, C.A., Chi, Y.M., Ojeda, A., Kerth, T.,
500
+ Makeig, S., Jung, T.P. and Cauwenberghs, G., 2015. Real-time neuroimaging and
501
+ cognitive monitoring using wearable dry EEG. IEEE Transactions on Biomedical
502
+ Engineering, 62(11), pp.2553-2567.
503
+
504
+ """
505
+
506
+ def __init__(
507
+ self,
508
+ *,
509
+ max_flatline_duration: float = 5.0,
510
+ max_allowed_jitter: float = 20.0,
511
+ ):
512
+ super().__init__(record_orig_chanlocs=True)
513
+ self.max_flatline_duration = max_flatline_duration
514
+ self.max_allowed_jitter = max_allowed_jitter
515
+
516
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
517
+ """Apply the preprocessor to an EEGLAB EEG structure."""
518
+ eeg = eegprep.clean_flatlines(
519
+ eeg,
520
+ max_flatline_duration=self.max_flatline_duration,
521
+ max_allowed_jitter=self.max_allowed_jitter,
522
+ )
523
+
524
+ return eeg
525
+
526
+
527
+ class RemoveDCOffset(EEGPrepBasePreprocessor):
528
+ """Remove the DC offset from the EEG data by subtracting the per-channel median.
529
+
530
+ This preprocessor mainly exists because some EEG data (depending on the electrical
531
+ characteristics of the hardware) can have such a large DC offset that highpass
532
+ filters do not necessarily fully remove it, unless some care is taken with filter
533
+ settings (noted in EEGLAB documentation [Delorme2004]_).
534
+
535
+ The operation performed is:
536
+
537
+ .. math::
538
+
539
+ X'_{c,t} = X_{c,t} - \\text{median}_t(X_{c,t})
540
+
541
+ where :math:`c` indexes the channel and :math:`t` indexes time.
542
+
543
+ References
544
+ ----------
545
+ .. [Delorme2004] Delorme, A. and Makeig, S., 2004. EEGLAB: an open source toolbox
546
+ for analysis of single-trial EEG dynamics including independent component
547
+ analysis. Journal of Neuroscience Methods, 134(1), pp.9-21.
548
+
549
+ """
550
+
551
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
552
+ """Apply the preprocessor to an EEGLAB EEG structure."""
553
+ # note this might as well be implemented directly on the MNE data structure,
554
+ # but was kept this way since we have the EEGPrep machinery here already.
555
+ eeg["data"] -= np.median(eeg["data"], axis=1, keepdims=True)
556
+ return eeg
557
+
558
+
559
+ class RemoveDrifts(EEGPrepBasePreprocessor):
560
+ """Remove drifts from the EEG data using a forward-backward high-pass filter.
561
+ See [Oppenheim1999]_.
562
+
563
+ .. figure:: ../../docs/_static/preprocess/highpass.png
564
+ :align: center
565
+ :alt: Magnitude response for this filter with default settings.
566
+
567
+ Note that MNE has its own suite of filters for this that offers more choices; use
568
+ this filter if you are specifically interested in matching the EEGLAB and EEGPrep
569
+ behavior, for example if you're building an EEGPrep-like pipeline from individual
570
+ steps, e.g., to customize parts that are not exposed by the top-level EEGPrep
571
+ preprocessor.
572
+
573
+ .. Note::
574
+ If your method involves causal analysis, either with applications to real-time
575
+ single-trial brain-computer interfacing or for example involving autoregressive
576
+ modeling or other causal measures, consider using a strictly causal highpass
577
+ filter instead.
578
+
579
+ Parameters
580
+ ----------
581
+ transition : Sequence[float]
582
+ The transition band in Hz, i.e. lower and upper edge of the transition as in
583
+ (lo, hi). Defaults to (0.25, 0.75). Choosing this can be tricky when your data
584
+ contains long-duration event-related potentials that your method exploits, in
585
+ which case you may need to carefully lower this somewhat to avoid attenuating
586
+ them.
587
+ attenuation : float
588
+ The stop-band attenuation, in dB. Defaults to 80.0.
589
+ method : str
590
+ The method to use for filtering ('fft' or 'fir'). Defaults to 'fft' (uses more
591
+ memory but is much faster than 'fir').
592
+
593
+ References
594
+ ----------
595
+ .. [Oppenheim1999] Oppenheim, A.V., 1999. Discrete-time signal processing.
596
+ Pearson Education India.
597
+
598
+ """
599
+
600
+ def __init__(
601
+ self,
602
+ transition: Sequence[float] = (0.25, 0.75),
603
+ *,
604
+ attenuation: float = 80.0,
605
+ method: str = "fft",
606
+ ):
607
+ super().__init__()
608
+ self.transition = transition
609
+ self.attenuation = attenuation
610
+ self.method = method
611
+
612
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
613
+ """Apply the preprocessor to an EEGLAB EEG structure."""
614
+ eeg = eegprep.clean_drifts(
615
+ eeg,
616
+ transition=self.transition,
617
+ attenuation=self.attenuation,
618
+ method=self.method,
619
+ )
620
+
621
+ return eeg
622
+
623
+
624
+ class Resampling(EEGPrepBasePreprocessor):
625
+ """Resample the data to a specified rate (EEGPrep version).
626
+ Based on [Proakis2007]_ and included for equivalence with EEGPrep.
627
+
628
+ .. figure:: ../../docs/_static/preprocess/downsample.png
629
+ :align: center
630
+ :alt: Example of resampling a time series.
631
+
632
+ MNE has its resampling routine (use as `Preprocessor("resample", sfreq=rate)`)
633
+ but this will not necessarily match EEGPrep's behavior exactly. Typical
634
+ differences include edge padding, the exact design rule for the filter kernel
635
+ and its window function, and handling of resampling ratios with large rational
636
+ factors.
637
+
638
+ It's not necessarily clear which of the two implementations is "better" (likely
639
+ both are fine for typical EEG applications). Use this one if you try to match
640
+ EEGPrep and EEGLAB behavior specifically, for example when you migrate from a
641
+ simple pipeline that uses the high-level EEGPrep preprocessor to a more
642
+ custom pipeline built from individual steps and want to ensure identical
643
+ results (up to float precision issues).
644
+
645
+ Resampling can be placed quite early in a preprocessing pipeline to cut down on
646
+ compute time and memory usage of downstram steps, e.g., before filtering, but
647
+ note the sampling rate interacts with e.g. temporal convolution kernel sizes;
648
+ when reproducing literature, ideally you first resample to the same rate as
649
+ used there.
650
+
651
+ .. Note::
652
+ There can be a small timing accuracy penalty when resampling on continuous data
653
+ (before epoching) when doing event-locked analysis, since epoch windows will be
654
+ snapped to the nearest sample. However, this jitter is typically fairly minor
655
+ relative to timing variability in the brain responses themselves, so will often
656
+ not be a problem in practice.
657
+
658
+ Parameters
659
+ ----------
660
+ sfreq : float | None
661
+ The desired sampling rate in Hz. Skipped if set to None.
662
+
663
+
664
+ References
665
+ ----------
666
+ .. [Proakis2007] Proakis, J.G., 2007. Digital signal processing: principles,
667
+ algorithms, and applications, 4/E. Pearson Education India.
668
+
669
+ """
670
+
671
+ def __init__(
672
+ self,
673
+ sfreq: float | None,
674
+ ):
675
+ super().__init__(can_change_duration=True)
676
+ self.sfreq = sfreq
677
+
678
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
679
+ """Apply the preprocessor to an EEGLAB EEG structure."""
680
+ if self.sfreq is not None:
681
+ eeg = eegprep.resample(eeg, self.sfreq)
682
+
683
+ return eeg
684
+
685
+
686
+ class RemoveBadChannels(EEGPrepBasePreprocessor):
687
+ """Removes EEG channels with problematic data; variant that uses channel locations.
688
+ Implemented as in [Kothe2013]_.
689
+
690
+ .. figure:: https://www.mdpi.com/sensors/sensors-22-07314/article_deploy/html/images/sensors-22-07314-g003.png
691
+ :align: center
692
+ :alt: Conceptual image of bad-channel removal.
693
+
694
+ This is an automated artifact rejection function which ensures that the data
695
+ contains no channels that record only noise for extended periods of time. This uses
696
+ a hybrid criterion involving correlation and high-frequency noise thresholds:
697
+
698
+ a) if a channel has lower correlation to its robust estimate (based on other
699
+ channels) than a given threshold for a minimum period of time (or percentage of
700
+ the recording), it will be removed.
701
+ b) if a channel has more (high-frequency) noise relative relative to the (robust)
702
+ population of other channels than a given threshold (in standard deviations),
703
+ it will be removed.
704
+
705
+ This method requires channels to have an associated location; when a location
706
+ is not known or could not be inferred (e.g., from channel labels if using a standard
707
+ montage such as the 10-20 system), use the :class:`RemoveBadChannelsNoLocs`
708
+ preprocessor instead.
709
+
710
+ Preconditions:
711
+
712
+ - One of :class:`RemoveDrifts` or :class:`braindecode.preprocessing.Filter` (
713
+ configured as a highpass filter) must have been applied beforehand.
714
+ - 3D channel locations must be available in the data (can be automatic with some
715
+ file types, but may require some MNE operations with others).
716
+ - Consider applying :class:`RemoveDCOffset` beforehand as a general precaution.
717
+
718
+ Parameters
719
+ ----------
720
+ corr_threshold : float
721
+ Correlation threshold. If a channel over a short time window is correlated at
722
+ less than this value to its robust estimate (based on other channels), it is
723
+ considered abnormal during that time. A good default range is 0.75-0.8 and the
724
+ default is 0.8. Becomes quite aggressive at and beyond 0.8; also, consider
725
+ using lower values (eg 0.7-0.75) for <32ch EEG and higher (0.8-0.85) for >128ch.
726
+ This is the main tunable parameter of the method.
727
+ noise_threshold : float
728
+ Threshold for high-frequency (>=45 Hz) noise-based bad channel detection,
729
+ in robust z-scores (i.e., st. devs.). Lower is more aggressive. Default is 4.0.
730
+ This is rarely tuned, but data with unusual higher-frequency activity could
731
+ benefit from exploration in the 3.5-5.0 range.
732
+ window_len : float
733
+ Length of the time windows (in seconds) for which correlation statistics
734
+ are computed; ideally short enough to reasonably capture periods of global
735
+ artifacts or intermittent sensor dropouts, but not shorter (for statistical
736
+ reasons). Default is 5.0 sec.
737
+ subset_size : float
738
+ Size of random channel subsets to compute robust reconstructions. This can be
739
+ given as a fraction (0-1) of the total number of channels, or as an absolute
740
+ number. Multiple (pseudo-)random subsets are sampled in a RANSAC-like process
741
+ to obtain a robust reference estimate for each channel. Default is 0.25 (25% of
742
+ channels). For higher-density EEG (e.g., 64-128ch) with potential clusters
743
+ of bad channels, one can achieve somewhat better robustness by setting this
744
+ to 0.15 and increasing num_samples to 200.
745
+ num_samples : int
746
+ Number of samples generated for the robust channel reconstruction. This is the
747
+ number of samples to generate in a RANSAC-like process. The larger
748
+ this value, the more robust but also slower the initial identification of
749
+ bad channels will be. Default is 50.
750
+ max_broken_time : float
751
+ Maximum time (either in seconds or as fraction of the recording) during which
752
+ a channel is allowed to have artifacts. If a channel exceeds this, it will be
753
+ removed. Not usually tuned. Default is 0.4 (40%), max is 0.5 (breakdown point
754
+ of stats). Pretty much never tuned.
755
+
756
+ References
757
+ ----------
758
+ .. [Kothe2013] Kothe, C.A. and Makeig, S., 2013. BCILAB: a platform for
759
+ brain–computer interface development. Journal of Neural Engineering, 10(5),
760
+ p.056014.
761
+
762
+ """
763
+
764
+ def __init__(
765
+ self,
766
+ *,
767
+ corr_threshold: float = 0.8,
768
+ noise_threshold: float = 4.0,
769
+ window_len: float = 5,
770
+ max_broken_time: float = 0.4,
771
+ subset_size: float = 0.25,
772
+ num_samples: int = 50,
773
+ ):
774
+ super().__init__(record_orig_chanlocs=True)
775
+ self.corr_threshold = corr_threshold
776
+ self.noise_threshold = noise_threshold
777
+ self.window_len = window_len
778
+ self.max_broken_time = max_broken_time
779
+ self.num_samples = num_samples
780
+ self.subset_size = subset_size
781
+
782
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
783
+ """Apply the preprocessor to an EEGLAB EEG structure."""
784
+ eeg = eegprep.clean_channels(
785
+ eeg,
786
+ corr_threshold=self.corr_threshold,
787
+ noise_threshold=self.noise_threshold,
788
+ window_len=self.window_len,
789
+ max_broken_time=self.max_broken_time,
790
+ num_samples=self.num_samples,
791
+ subset_size=self.subset_size,
792
+ )
793
+
794
+ return eeg
795
+
796
+
797
+ class RemoveBadChannelsNoLocs(EEGPrepBasePreprocessor):
798
+ """Remove EEG channels with problematic data; variant that does not use channel
799
+ locations. Implemented as in [Kothe2013]_.
800
+
801
+ .. figure:: https://www.mdpi.com/sensors/sensors-22-07314/article_deploy/html/images/sensors-22-07314-g003.png
802
+ :align: center
803
+ :alt: Conceptual image of bad-channel removal.
804
+
805
+ This is an automated artifact rejection function which ensures that the data
806
+ contains no channels that record only noise for extended periods of time.
807
+ The criterion is based on correlation: if a channel is decorrelated from all others
808
+ (pairwise correlation < a given threshold), excluding a given fraction of most
809
+ correlated channels, and if this holds on for a sufficiently long fraction of the
810
+ data set, then the channel is removed.
811
+
812
+ This method does not require or take into account channel locations; if you do have
813
+ locations, you may get better results with the RemoveBadChannels preprocessor
814
+ instead.
815
+
816
+ Preconditions:
817
+
818
+ - One of :class:`RemoveDrifts` or :class:`braindecode.preprocessing.Filter` (
819
+ configured as a highpass filter) must have been applied beforehand.
820
+ - Consider applying :class:`RemoveDCOffset` beforehand as a general precaution.
821
+
822
+ Parameters
823
+ ----------
824
+ min_corr : float
825
+ Minimum correlation between a channel and any other channel (in a short
826
+ period of time) below which the channel is considered abnormal for that time
827
+ period. Reasonable range: 0.4 (very lax) to 0.6 (quite aggressive).
828
+ Default is 0.45.
829
+ ignored_quantile : float
830
+ Fraction of channels that need to have at least the given min_corr value w.r.t.
831
+ the channel under consideration. This allows to deal with channels or small
832
+ groups of channels that measure the same noise source. Reasonable
833
+ range: 0.05 (rather lax) to 0.2 (tolerates many disconnected/shorted channels).
834
+ window_len : float
835
+ Length of the windows (in seconds) over which correlation stats are computed.
836
+ Reasonable values are 1.0 sec (more noisy estimates) to 5.0 sec (more reliable,
837
+ but can miss brief artifacts). Default is 2.0 sec.
838
+ max_broken_time : float
839
+ Maximum time (either in seconds or as fraction of the recording) during which
840
+ a channel is allowed to have artifacts. If a channel exceeds this, it will be
841
+ removed. Not usually tuned. Default is 0.4 (40%), max is 0.5 (breakdown point
842
+ of stats). Pretty much never tuned.
843
+ linenoise_aware : bool
844
+ Whether the operation should be performed in a line-noise
845
+ aware manner. If enabled, the correlation measure will not be affected
846
+ by the presence or absence of line noise (using a temporary notch filter).
847
+
848
+ References
849
+ ----------
850
+ .. [Kothe2013] Kothe, C.A. and Makeig, S., 2013. BCILAB: a platform for
851
+ brain–computer interface development. Journal of Neural Engineering, 10(5),
852
+ p.056014.
853
+
854
+ """
855
+
856
+ def __init__(
857
+ self,
858
+ *,
859
+ min_corr: float = 0.45,
860
+ ignored_quantile: float = 0.1,
861
+ window_len: float = 2.0,
862
+ max_broken_time: float = 0.4,
863
+ linenoise_aware: bool = True,
864
+ ):
865
+ super().__init__(record_orig_chanlocs=True)
866
+ self.min_corr = min_corr
867
+ self.ignored_quantile = ignored_quantile
868
+ self.window_len = window_len
869
+ self.max_broken_time = max_broken_time
870
+ self.linenoise_aware = linenoise_aware
871
+
872
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
873
+ """Apply the preprocessor to an EEGLAB EEG structure."""
874
+ eeg, _ = eegprep.clean_channels_nolocs(
875
+ eeg,
876
+ min_corr=self.min_corr,
877
+ ignored_quantile=self.ignored_quantile,
878
+ window_len=self.window_len,
879
+ max_broken_time=self.max_broken_time,
880
+ linenoise_aware=self.linenoise_aware,
881
+ )
882
+
883
+ return eeg
884
+
885
+
886
+ class RemoveBursts(EEGPrepBasePreprocessor):
887
+ """Run the Artifact Subspace Reconstruction (ASR) method on EEG data to
888
+ remove burst-type artifacts. Follows [Mullen2015]_.
889
+
890
+ .. figure:: https://cdn.ncbi.nlm.nih.gov/pmc/blobs/a79a/4710679/675fc2dee929/nihms733482f9.jpg
891
+ :align: center
892
+ :alt: Before/after comparison of ASR applied to EEG data.
893
+
894
+ This is an automated artifact rejection function that ensures that the data
895
+ contains no events that have abnormally strong power; the subspaces on which
896
+ those events occur are reconstructed (interpolated) based on the rest of the
897
+ EEG signal during these time periods.
898
+
899
+ Preconditions:
900
+
901
+ - One of :class:`RemoveDrifts` or :class:`braindecode.preprocessing.Filter` (
902
+ configured as a highpass filter) must have been applied beforehand.
903
+ - Must have removed flat-line channels beforehand with :class:`RemoveFlatChannels`.
904
+ - If you are removing bad channels (:class:`RemoveBadChannels` or
905
+ :class:`RemoveBadChannelsNoLocs`), use those before this step.
906
+ - Consider applying :class:`RemoveDCOffset` beforehand as a general best practice.
907
+ - If you are re-referencing to common average (:class:`RemoveCommonAverageReference`),
908
+ this should normally *NOT* be done before this step, but after it.
909
+
910
+ Parameters
911
+ ----------
912
+ cutoff : float
913
+ Threshold for artifact rejection. Data portions whose variance is larger than
914
+ this threshold relative to the calibration data are considered artifactual
915
+ and removed. There is a fair amount of literature on what constitutes a good
916
+ value. 7.5 is very aggressive, 10-15 is a good range for ML pipelines, 20-30
917
+ is more forgiving and is more common in neuroscience applications. The unit is
918
+ z-scores relative to a Gaussian component of background EEG, but since EEG
919
+ phenomena of interest can stand out from the Gaussian background, typical
920
+ thresholds are considerably larger than for a purely Gaussian distribution.
921
+ Default is 10.0.
922
+ window_len : float | None
923
+ Length of the statistics window in seconds. Should not be much longer
924
+ than artifact timescale. The number of samples in the window should
925
+ be >= 1.5x channels. Default: max(0.5, 1.5 * nbchan / srate).
926
+ step_size : int | None
927
+ Step size for processing in samples. The reconstruction matrix is updated every
928
+ this many samples. If None, defaults to window_len / 2 samples.
929
+ max_dims : float
930
+ Maximum dimensionality/fraction of dimensions to reconstruct. Default: 0.66.
931
+ This can be understood to be the number of simultaneous artifact components that
932
+ may be removed; normally needs no tuning, but on very low-channel data (e.g.,
933
+ 4ch) one may exploring small integers between 1 and #channels-1.
934
+ ref_maxbadchannels : float | None
935
+ Parameter that controls automatic calibration data selection. This represents
936
+ the max fraction (0-1) of bad channels tolerated in a window for it to be used
937
+ as calibration data. Lower is more aggressive (e.g., 0.05). Default: 0.075.
938
+ The parameter has the same meaning as the max_bad_channels parameter in the
939
+ RemoveBadWindows preprocessor, but note that this stage is used here as a
940
+ subroutine to identify calibration data only. The overall method will always
941
+ output a data matrix of the same shape as the input data. If set to None,
942
+ all data is used for calibration.
943
+ ref_tolerances : tuple[float, float]
944
+ Power tolerances (lower, upper) in SDs from robust EEG power for a channel to
945
+ be considered 'bad' during calibration data selection. This parameter goes hand
946
+ in hand with ref_maxbadchannels. Default: (-inf, 5.5).
947
+ ref_wndlen : float
948
+ Window length in seconds for calibration data selection granularity.
949
+ Default: 1.0.
950
+ maxmem : int
951
+ Maximum memory (in MB) to use during processing. Larger values can reduce
952
+ overhead during processing, but usually 64MB is sufficient.
953
+
954
+ References
955
+ ----------
956
+ .. [Mullen2015] Mullen, T.R., Kothe, C.A., Chi, Y.M., Ojeda, A., Kerth, T.,
957
+ Makeig, S., Jung, T.P. and Cauwenberghs, G., 2015. Real-time neuroimaging and
958
+ cognitive monitoring using wearable dry EEG. IEEE Transactions on Biomedical
959
+ Engineering, 62(11), pp.2553-2567.
960
+
961
+ """
962
+
963
+ def __init__(
964
+ self,
965
+ *,
966
+ cutoff: float = 10.0,
967
+ window_len: float | None = None,
968
+ step_size: int | None = None,
969
+ max_dims: float = 0.66,
970
+ ref_maxbadchannels: float | None = 0.075,
971
+ ref_tolerances: tuple[float, float] = (-np.inf, 5.5),
972
+ ref_wndlen: float = 1.0,
973
+ maxmem: int = 64,
974
+ ):
975
+ super().__init__(can_change_duration=True)
976
+ self.cutoff = cutoff
977
+ self.window_len = window_len
978
+ self.step_size = step_size
979
+ self.max_dims = max_dims
980
+ self.ref_maxbadchannels = ref_maxbadchannels
981
+ self.ref_tolerances = ref_tolerances
982
+ self.ref_wndlen = ref_wndlen
983
+ self.maxmem = maxmem
984
+
985
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
986
+ """Apply the preprocessor to an EEGLAB EEG structure."""
987
+ eeg = eegprep.clean_asr(
988
+ eeg,
989
+ cutoff=self.cutoff,
990
+ window_len=self.window_len,
991
+ step_size=self.step_size,
992
+ max_dims=self.max_dims,
993
+ ref_maxbadchannels=self.ref_maxbadchannels,
994
+ ref_tolerances=self.ref_tolerances,
995
+ ref_wndlen=self.ref_wndlen,
996
+ maxmem=self.maxmem,
997
+ )
998
+
999
+ return eeg
1000
+
1001
+
1002
+ class RemoveBadWindows(EEGPrepBasePreprocessor):
1003
+ """Remove periods with abnormally high-power content from continuous data.
1004
+ Implemented as in [Kothe2013]_.
1005
+
1006
+ .. figure:: https://www.jove.com/files/ftp_upload/65829/65829fig13.jpg
1007
+ :align: center
1008
+ :alt: Before/after comparison of bad-window removal.
1009
+
1010
+ This function cuts segments from the data which contain high-power (or low-power)
1011
+ artifacts. Specifically, only time windows are retained which have less than a
1012
+ certain fraction of *bad* channels, where a channel is bad in a window if its RMS
1013
+ power is above or below some z-score threshold relative to a robust estimate
1014
+ of clean-EEG power in that channel.
1015
+
1016
+ .. Note::
1017
+ When your method is meant to produce predictions for all time points
1018
+ in your continuous data (or all epochs of interest), you may not want to use this
1019
+ preprocessor, and enabling it may give you rosy performance estimates that do not
1020
+ reflect how your method works when used on gap-free data. It can nevertheless be
1021
+ useful to apply this to training data only in such cases, however, to get an
1022
+ artifact-unencumbered model.
1023
+
1024
+ Preconditions:
1025
+
1026
+ - One of :class:`RemoveDrifts` or :class:`braindecode.preprocessing.Filter` (
1027
+ configured as a highpass filter) must have been applied beforehand.
1028
+
1029
+ Parameters
1030
+ ----------
1031
+ max_bad_channels : int | float
1032
+ Threshold for rejection of bad time windows based on fraction of simultaneously
1033
+ noisy channels. This is the main tuning parameter; lower is more aggressive.
1034
+ Typical values are 0.15 (quite aggressive) to 0.3 (quite lax). Can also be
1035
+ specified as an absolute number of channels. Default is 0.25 (25% of channels).
1036
+ zthresholds : tuple(float, float)
1037
+ (min, max) z-score tolerance for identifying bad time window/channel pairs.
1038
+ This typically does not need to be changed (instead one may change the max
1039
+ bad channels that cross this threshold), but different implementations
1040
+ use different values here. The max value is the main parameter, where
1041
+ EEGLAB/EEGPrep uses 7 while the original pipeline [1] used 5.5, and NeuroPype
1042
+ uses 6. Lower values are more aggressive. The min value is only triggered if the
1043
+ EEG data has signal dropouts (very low amplitude, e.g. due to something becoming
1044
+ unplugged) which is rare; some choices are (-inf, EEGPrep; -3.5, BCILAB;
1045
+ -4, NeuroPype).
1046
+ window_len : float
1047
+ The window length that is used to check the data for artifact content, in
1048
+ seconds. This is ideally as long as the expected time scale of the artifacts,
1049
+ but short enough for there to be enough windows to compute statistics over.
1050
+ Default is 1.0 sec, but this may be lowered to 0.5 sec to catch very brief
1051
+ artifacts.
1052
+ window_overlap : float
1053
+ Fractional overlap between consecutive windows (0-1). Higher overlap
1054
+ finds more artefacts but is slower. Default is 0.66 (about 2/3 overlap).
1055
+ max_dropout_fraction : float
1056
+ Maximum fraction of windows that may have arbitrarily low amplitude
1057
+ (e.g. sensor unplugged). Default is 0.1.
1058
+ min_clean_fraction : float
1059
+ Minimum fraction of windows expected to be clean (essentially
1060
+ uncontaminated EEG). Default is 0.25.
1061
+ truncate_quant : tuple(float, float)
1062
+ Quantile range of the truncated Gaussian to fit (default (0.022,0.6)).
1063
+ step_sizes : tuple(float, float)
1064
+ Grid-search step sizes in quantiles for lower/upper edge. Default is (0.01,0.01)
1065
+ shape_range : sequence(float)
1066
+ Range for the beta shape parameter in the generalised Gaussian used
1067
+ for distribution fitting. Default is np.arange(1.7, 3.6, 0.15).
1068
+
1069
+ References
1070
+ ----------
1071
+ .. [Kothe2013] Kothe, C.A. and Makeig, S., 2013. BCILAB: a platform for
1072
+ brain–computer interface development. Journal of Neural Engineering, 10(5),
1073
+ p.056014.
1074
+
1075
+ """
1076
+
1077
+ def __init__(
1078
+ self,
1079
+ *,
1080
+ max_bad_channels: int | float = 0.25,
1081
+ zthresholds: tuple[float, float] = (-np.inf, 7),
1082
+ window_len: float = 1.0,
1083
+ window_overlap: float = 0.66,
1084
+ max_dropout_fraction: float = 0.1,
1085
+ min_clean_fraction: float = 0.25,
1086
+ truncate_quant: tuple[float, float] = (0.022, 0.6),
1087
+ step_sizes: tuple[float, float] = (0.01, 0.01),
1088
+ shape_range: np.ndarray | Sequence[float] = np.arange(1.7, 3.6, 0.15),
1089
+ ):
1090
+ super().__init__(can_change_duration=True)
1091
+ self.max_bad_channels = max_bad_channels
1092
+ self.zthresholds = zthresholds
1093
+ self.window_len = window_len
1094
+ self.window_overlap = window_overlap
1095
+ self.max_dropout_fraction = max_dropout_fraction
1096
+ self.min_clean_fraction = min_clean_fraction
1097
+ self.truncate_quant = truncate_quant
1098
+ self.step_sizes = step_sizes
1099
+ self.shape_range = shape_range
1100
+
1101
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
1102
+ """Apply the preprocessor to an EEGLAB EEG structure."""
1103
+ eeg, _ = eegprep.clean_windows(
1104
+ eeg,
1105
+ max_bad_channels=self.max_bad_channels,
1106
+ zthresholds=self.zthresholds,
1107
+ window_len=self.window_len,
1108
+ window_overlap=self.window_overlap,
1109
+ max_dropout_fraction=self.max_dropout_fraction,
1110
+ min_clean_fraction=self.min_clean_fraction,
1111
+ truncate_quant=self.truncate_quant,
1112
+ step_sizes=self.step_sizes,
1113
+ shape_range=self.shape_range,
1114
+ )
1115
+
1116
+ return eeg
1117
+
1118
+
1119
+ class ReinterpolateRemovedChannels(EEGPrepBasePreprocessor):
1120
+ """Reinterpolate previously removed EEG channels to restore original channel set.
1121
+
1122
+ .. figure:: ../../docs/_static/preprocess/sph_spline_interp.png
1123
+ :align: center
1124
+ :alt: Spherical spline interpolation example.
1125
+
1126
+ This reinterpolates EEG channels that were previously dropped via one of the EEGPrep
1127
+ channel removal operations and restores the original order of EEG channels. This
1128
+ is typically necessary when you are using automatic channel removal but you need
1129
+ a consistent channel set across multiple recordings/sessions. Uses spherical-spline
1130
+ interpolation (based on [Perrin1989]_).
1131
+
1132
+ The typical place to perform this is after all other EEGPrep-related artifact
1133
+ removal steps, except re-referencing. If no channel locations were recorded,
1134
+ this preprocessor has no effect.
1135
+
1136
+ Preconditions:
1137
+
1138
+ - Must have 3D channel locations.
1139
+ - This filter will only have an effect if one or more of the preceding steps
1140
+ recorded original channel locations (e.g., :class:`RemoveBadChannels`,
1141
+ :class:`RemoveBadChannelsNoLocs`, or :class:`RemoveFlatChannels`).
1142
+ - If you are re-referencing to common average (:class:`RemoveCommonAverageReference`),
1143
+ this should normally *NOT* be done before this step, but after it (otherwise
1144
+ your reference will depend on which channels were removed).
1145
+
1146
+ References
1147
+ ----------
1148
+ .. [Perrin1989] Perrin, F., Pernier, J., Bertrand, O. and Echallier, J.F., 1989.
1149
+ Spherical splines for scalp potential and current density mapping.
1150
+ Electroencephalography and Clinical Neurophysiology, 72(2), pp.184-187.
1151
+
1152
+
1153
+ """
1154
+
1155
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
1156
+ """Apply the preprocessor to an EEGLAB EEG structure."""
1157
+ orig_chanlocs = self._get_orig_chanlocs(raw)
1158
+ if orig_chanlocs is None:
1159
+ log.info(
1160
+ "ReinterpolateRemovedChannels: No original channel locations were "
1161
+ "recorded by a preceding step; skipping reinterpolation."
1162
+ )
1163
+ elif len(orig_chanlocs) > len(eeg["chanlocs"]):
1164
+ eeg = eegprep.eeg_interp(eeg, orig_chanlocs)
1165
+
1166
+ return eeg
1167
+
1168
+
1169
+ class RemoveCommonAverageReference(EEGPrepBasePreprocessor):
1170
+ """Subtracts the common average reference from the EEG data (EEGPrep version).
1171
+ This is useful for having a consistent referencing scheme across recordings
1172
+ (cf. [Offner1950]_).
1173
+
1174
+ Generally, common average re-referencing is `data -= mean(data, axis=0)`, but
1175
+ both EEGLAB/eegprep and to a greater extent MNE have additional bookkeeping around
1176
+ re-referencing, in the latter case due to its focus on source localization. This
1177
+ will have little effect on most machine-learning use cases; nevertheless, this
1178
+ operation is included here to allow users to mirror the behavior of the end-to-end
1179
+ EEGPrep pipeline by means of individual operations (for example when migrating
1180
+ from one to the other form) without introducing perhaps unexpected side effects
1181
+ on the MNE data structure.
1182
+
1183
+ The operation performed is:
1184
+
1185
+ .. math::
1186
+
1187
+ X'_{c,t} = X_{c,t} - \\frac{1}{C}\\sum_{c=1}^{C} X_{c,t}
1188
+
1189
+ where :math:`C` is the number of channels, :math:`c` indexes the channel, and
1190
+ :math:`t` indexes time.
1191
+
1192
+ References
1193
+ ----------
1194
+ .. [Offner1950] Offner, F. F. (1950). The EEG as potential mapping: the value of the
1195
+ average monopolar reference. Electroencephalography and Clinical Neurophysiology,
1196
+ 2(2), 213-214.
1197
+
1198
+ """
1199
+
1200
+ def apply_eeg(self, eeg: dict[str, Any], raw: BaseRaw) -> dict[str, Any]:
1201
+ """Apply the preprocessor to an EEGLAB EEG structure."""
1202
+ return eegprep.reref(eeg, [])