braindecode 1.3.0.dev177069446__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. braindecode/__init__.py +9 -0
  2. braindecode/augmentation/__init__.py +52 -0
  3. braindecode/augmentation/base.py +225 -0
  4. braindecode/augmentation/functional.py +1300 -0
  5. braindecode/augmentation/transforms.py +1356 -0
  6. braindecode/classifier.py +258 -0
  7. braindecode/datasets/__init__.py +44 -0
  8. braindecode/datasets/base.py +823 -0
  9. braindecode/datasets/bbci.py +693 -0
  10. braindecode/datasets/bcicomp.py +193 -0
  11. braindecode/datasets/bids/__init__.py +54 -0
  12. braindecode/datasets/bids/datasets.py +239 -0
  13. braindecode/datasets/bids/format.py +717 -0
  14. braindecode/datasets/bids/hub.py +987 -0
  15. braindecode/datasets/bids/hub_format.py +717 -0
  16. braindecode/datasets/bids/hub_io.py +197 -0
  17. braindecode/datasets/bids/hub_validation.py +114 -0
  18. braindecode/datasets/bids/iterable.py +220 -0
  19. braindecode/datasets/chb_mit.py +163 -0
  20. braindecode/datasets/mne.py +170 -0
  21. braindecode/datasets/moabb.py +219 -0
  22. braindecode/datasets/nmt.py +313 -0
  23. braindecode/datasets/registry.py +120 -0
  24. braindecode/datasets/siena.py +162 -0
  25. braindecode/datasets/sleep_physio_challe_18.py +411 -0
  26. braindecode/datasets/sleep_physionet.py +125 -0
  27. braindecode/datasets/tuh.py +591 -0
  28. braindecode/datasets/utils.py +67 -0
  29. braindecode/datasets/xy.py +96 -0
  30. braindecode/datautil/__init__.py +62 -0
  31. braindecode/datautil/channel_utils.py +114 -0
  32. braindecode/datautil/hub_formats.py +180 -0
  33. braindecode/datautil/serialization.py +359 -0
  34. braindecode/datautil/util.py +154 -0
  35. braindecode/eegneuralnet.py +372 -0
  36. braindecode/functional/__init__.py +22 -0
  37. braindecode/functional/functions.py +251 -0
  38. braindecode/functional/initialization.py +47 -0
  39. braindecode/models/__init__.py +117 -0
  40. braindecode/models/atcnet.py +830 -0
  41. braindecode/models/attentionbasenet.py +727 -0
  42. braindecode/models/attn_sleep.py +549 -0
  43. braindecode/models/base.py +574 -0
  44. braindecode/models/bendr.py +493 -0
  45. braindecode/models/biot.py +537 -0
  46. braindecode/models/brainmodule.py +845 -0
  47. braindecode/models/config.py +233 -0
  48. braindecode/models/contrawr.py +319 -0
  49. braindecode/models/ctnet.py +541 -0
  50. braindecode/models/deep4.py +376 -0
  51. braindecode/models/deepsleepnet.py +417 -0
  52. braindecode/models/eegconformer.py +475 -0
  53. braindecode/models/eeginception_erp.py +379 -0
  54. braindecode/models/eeginception_mi.py +379 -0
  55. braindecode/models/eegitnet.py +302 -0
  56. braindecode/models/eegminer.py +256 -0
  57. braindecode/models/eegnet.py +359 -0
  58. braindecode/models/eegnex.py +354 -0
  59. braindecode/models/eegsimpleconv.py +201 -0
  60. braindecode/models/eegsym.py +917 -0
  61. braindecode/models/eegtcnet.py +337 -0
  62. braindecode/models/fbcnet.py +225 -0
  63. braindecode/models/fblightconvnet.py +315 -0
  64. braindecode/models/fbmsnet.py +338 -0
  65. braindecode/models/hybrid.py +126 -0
  66. braindecode/models/ifnet.py +443 -0
  67. braindecode/models/labram.py +1316 -0
  68. braindecode/models/luna.py +891 -0
  69. braindecode/models/medformer.py +760 -0
  70. braindecode/models/msvtnet.py +377 -0
  71. braindecode/models/patchedtransformer.py +640 -0
  72. braindecode/models/reve.py +843 -0
  73. braindecode/models/sccnet.py +280 -0
  74. braindecode/models/shallow_fbcsp.py +212 -0
  75. braindecode/models/signal_jepa.py +1122 -0
  76. braindecode/models/sinc_shallow.py +339 -0
  77. braindecode/models/sleep_stager_blanco_2020.py +169 -0
  78. braindecode/models/sleep_stager_chambon_2018.py +159 -0
  79. braindecode/models/sparcnet.py +426 -0
  80. braindecode/models/sstdpn.py +869 -0
  81. braindecode/models/summary.csv +47 -0
  82. braindecode/models/syncnet.py +234 -0
  83. braindecode/models/tcn.py +275 -0
  84. braindecode/models/tidnet.py +397 -0
  85. braindecode/models/tsinception.py +295 -0
  86. braindecode/models/usleep.py +439 -0
  87. braindecode/models/util.py +369 -0
  88. braindecode/modules/__init__.py +92 -0
  89. braindecode/modules/activation.py +86 -0
  90. braindecode/modules/attention.py +883 -0
  91. braindecode/modules/blocks.py +160 -0
  92. braindecode/modules/convolution.py +330 -0
  93. braindecode/modules/filter.py +654 -0
  94. braindecode/modules/layers.py +216 -0
  95. braindecode/modules/linear.py +70 -0
  96. braindecode/modules/parametrization.py +38 -0
  97. braindecode/modules/stats.py +87 -0
  98. braindecode/modules/util.py +85 -0
  99. braindecode/modules/wrapper.py +90 -0
  100. braindecode/preprocessing/__init__.py +271 -0
  101. braindecode/preprocessing/eegprep_preprocess.py +1317 -0
  102. braindecode/preprocessing/mne_preprocess.py +240 -0
  103. braindecode/preprocessing/preprocess.py +579 -0
  104. braindecode/preprocessing/util.py +177 -0
  105. braindecode/preprocessing/windowers.py +1037 -0
  106. braindecode/regressor.py +234 -0
  107. braindecode/samplers/__init__.py +18 -0
  108. braindecode/samplers/base.py +399 -0
  109. braindecode/samplers/ssl.py +263 -0
  110. braindecode/training/__init__.py +23 -0
  111. braindecode/training/callbacks.py +23 -0
  112. braindecode/training/losses.py +105 -0
  113. braindecode/training/scoring.py +477 -0
  114. braindecode/util.py +419 -0
  115. braindecode/version.py +1 -0
  116. braindecode/visualization/__init__.py +8 -0
  117. braindecode/visualization/confusion_matrices.py +289 -0
  118. braindecode/visualization/gradients.py +62 -0
  119. braindecode-1.3.0.dev177069446.dist-info/METADATA +230 -0
  120. braindecode-1.3.0.dev177069446.dist-info/RECORD +124 -0
  121. braindecode-1.3.0.dev177069446.dist-info/WHEEL +5 -0
  122. braindecode-1.3.0.dev177069446.dist-info/licenses/LICENSE.txt +31 -0
  123. braindecode-1.3.0.dev177069446.dist-info/licenses/NOTICE.txt +20 -0
  124. braindecode-1.3.0.dev177069446.dist-info/top_level.txt +1 -0
@@ -0,0 +1,727 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+
5
+ from einops.layers.torch import Rearrange
6
+ from mne.utils import warn
7
+ from torch import nn
8
+
9
+ from braindecode.models.base import EEGModuleMixin
10
+ from braindecode.modules import Ensure4d
11
+ from braindecode.modules.attention import (
12
+ CAT,
13
+ CBAM,
14
+ ECA,
15
+ FCA,
16
+ GCT,
17
+ SRM,
18
+ CATLite,
19
+ EncNet,
20
+ GatherExcite,
21
+ GSoP,
22
+ SqueezeAndExcitation,
23
+ )
24
+
25
+
26
+ class AttentionBaseNet(EEGModuleMixin, nn.Module):
27
+ r"""AttentionBaseNet from Wimpff M et al (2023) [Martin2023]_.
28
+
29
+ :bdg-success:`Convolution` :bdg-info:`Attention/Transformer`
30
+
31
+ .. figure:: https://content.cld.iop.org/journals/1741-2552/21/3/036020/revision2/jnead48b9f2_hr.jpg
32
+ :align: center
33
+ :alt: AttentionBaseNet Architecture
34
+ :width: 640px
35
+
36
+ .. rubric:: Architectural Overview
37
+
38
+ AttentionBaseNet is a *convolution-first* network with a *channel-attention* stage.
39
+ The end-to-end flow is:
40
+
41
+ - (i) :class:`_FeatureExtractor` learns a temporal filter bank and per-filter spatial
42
+ projections (depthwise across electrodes), then condenses time by pooling;
43
+ - (ii) **Channel Expansion** uses a ``1x1`` convolution to set the feature width;
44
+ - (iii) :class:`_ChannelAttentionBlock` refines features via depthwise–pointwise temporal
45
+ convs and an optional channel-attention module (SE/CBAM/ECA/…);
46
+ - (iv) **Classifier** flattens the sequence and applies a linear readout.
47
+
48
+ This design mirrors shallow CNN pipelines (EEGNet-style stem) but inserts a pluggable
49
+ attention unit that *re-weights channels* (and optionally temporal positions) before
50
+ classification.
51
+
52
+ .. rubric:: Macro Components
53
+
54
+ - :class:`_FeatureExtractor` **(Shallow conv stem → condensed feature map)**
55
+
56
+ - *Operations.*
57
+ - **Temporal conv** (:class:`torch.nn.Conv2d`) with kernel ``(1, L_t)`` creates a learned
58
+ FIR-like filter bank with ``n_temporal_filters`` maps.
59
+ - **Depthwise spatial conv** (:class:`torch.nn.Conv2d`, ``groups=n_temporal_filters``)
60
+ with kernel ``(n_chans, 1)`` learns per-filter spatial projections over the full montage.
61
+ - **BatchNorm → ELU → AvgPool → Dropout** stabilize and downsample time.
62
+ - Output shape: ``(B, F2, 1, T₁)`` with ``F2 = n_temporal_filters x spatial_expansion``.
63
+
64
+ *Interpretability/robustness.* Temporal kernels behave as analyzable FIR filters; the
65
+ depthwise spatial step yields rhythm-specific topographies. Pooling acts as a local
66
+ integrator that reduces variance on short EEG windows.
67
+
68
+ - **Channel Expansion**
69
+
70
+ - *Operations.*
71
+ - A ``1x1`` conv → BN → activation maps ``F2 → ch_dim`` without changing
72
+ the temporal length ``T₁`` (shape: ``(B, ch_dim, 1, T₁)``).
73
+ This sets the embedding width for the attention block.
74
+
75
+ - :class:`_ChannelAttentionBlock` **(temporal refinement + channel attention)**
76
+
77
+ - *Operations.*
78
+ - **Depthwise temporal conv** ``(1, L_a)`` (groups=``ch_dim``) + **pointwise ``1x1``**,
79
+ BN and activation → preserves shape ``(B, ch_dim, 1, T₁)`` while refining timing.
80
+ - **Optional attention module** (see *Additional Mechanisms*) applies channel reweighting
81
+ (some variants also apply temporal gating).
82
+ - **AvgPool (1, P₂)** with stride ``(1, S₂)`` and **Dropout** → outputs
83
+ ``(B, ch_dim, 1, T₂)``.
84
+
85
+ *Role.* Emphasizes informative channels (and, in certain modes, salient time steps)
86
+ before the classifier; complements the convolutional priors with adaptive re-weighting.
87
+
88
+ - **Classifier (aggregation + readout)**
89
+
90
+ *Operations.* :class:`torch.nn.Flatten` → :class:`torch.nn.Linear` from
91
+ ``(B, ch_dim·T₂)`` to classes.
92
+
93
+ .. rubric:: Convolutional Details
94
+
95
+ - **Temporal (where time-domain patterns are learned).**
96
+ Wide kernels in the stem (``(1, L_t)``) act as a learned filter bank for oscillatory
97
+ bands/transients; the attention block's depthwise temporal conv (``(1, L_a)``) sharpens
98
+ short-term dynamics after downsampling. Pool sizes/strides (``P₁,S₁`` then ``P₂,S₂``)
99
+ set the token rate and effective temporal resolution.
100
+
101
+ - **Spatial (how electrodes are processed).**
102
+ A depthwise spatial conv with kernel ``(n_chans, 1)`` spans the full montage to
103
+ learn *per-temporal-filter* spatial projections (no cross-filter mixing at this step),
104
+ mirroring the interpretable spatial stage in shallow CNNs.
105
+
106
+ - **Spectral (how frequency content is captured).**
107
+ No explicit Fourier/wavelet transform is used in the stem—spectral selectivity
108
+ emerges from learned temporal kernels. When ``attention_mode="fca"``, a frequency
109
+ channel attention (DCT-based) summarizes frequencies to drive channel weights.
110
+
111
+ .. rubric:: Attention / Sequential Modules
112
+
113
+ - **Type.** Channel attention chosen by ``attention_mode`` (SE, ECA, CBAM, CAT, GSoP,
114
+ EncNet, GE, GCT, SRM, CATLite). Most operate purely on channels; CBAM/CAT additionally
115
+ include temporal attention.
116
+
117
+ - **Shapes.** Input/Output around attention: ``(B, ch_dim, 1, T₁)``. Re-arrangements
118
+ (if any) are internal to the module; the block returns the same shape before pooling.
119
+
120
+ - **Role.** Re-weights channels (and optionally time) to highlight informative sources
121
+ and suppress distractors, improving SNR ahead of the linear head.
122
+
123
+ .. rubric:: Additional Mechanisms
124
+
125
+ **Attention variants at a glance:**
126
+
127
+ - ``"se"``: Squeeze-and-Excitation (global pooling → bottleneck → gates).
128
+ - ``"gsop"``: Global second-order pooling (covariance-aware channel weights).
129
+ - ``"fca"``: Frequency Channel Attention (DCT summary; uses ``seq_len`` and ``freq_idx``).
130
+ - ``"encnet"``: EncNet with learned codewords (uses ``n_codewords``).
131
+ - ``"eca"``: Efficient Channel Attention (local 1-D conv over channel descriptor; uses ``kernel_size``).
132
+ - ``"ge"``: Gather–Excite (context pooling with optional MLP; can use ``extra_params``).
133
+ - ``"gct"``: Gated Channel Transformation (global context normalization + gating).
134
+ - ``"srm"``: Style-based recalibration (mean–std descriptors; optional MLP).
135
+ - ``"cbam"``: Channel then temporal attention (uses ``kernel_size``).
136
+ - ``"cat"`` / ``"catlite"``: Collaborative (channel ± temporal) attention; *lite* omits temporal.
137
+
138
+ **Auto-compatibility on short inputs:**
139
+
140
+ If the input duration is too short for the configured kernels/pools, the implementation
141
+ **automatically rescales** temporal lengths/strides downward (with a warning) to keep
142
+ shapes valid and preserve the pipeline semantics.
143
+
144
+ .. rubric:: Usage and Configuration
145
+
146
+ - ``n_temporal_filters``, ``temporal_filter_length`` and ``spatial_expansion``:
147
+ control the capacity and the number of spatial projections in the stem.
148
+ - ``pool_length_inp``, ``pool_stride_inp`` then ``pool_length``, ``pool_stride``:
149
+ trade temporal resolution for compute; they determine the final sequence length ``T₂``.
150
+ - ``ch_dim``: width after the ``1x1`` expansion and the effective embedding size for attention.
151
+ - ``attention_mode`` + its specific hyperparameters (``reduction_rate``,
152
+ ``kernel_size``, ``seq_len``, ``freq_idx``, ``n_codewords``, ``use_mlp``):
153
+ select and tune the reweighting mechanism.
154
+ - ``drop_prob_inp`` and ``drop_prob_attn``: regularize stem and attention stages.
155
+ - **Training tips.**
156
+
157
+ Start with moderate pooling (e.g., ``P₁=75,S₁=15``) and ELU activations; enable attention
158
+ only after the stem learns stable filters. For small datasets, prefer simpler modes
159
+ (``"se"``, ``"eca"``) before heavier ones (``"gsop"``, ``"encnet"``).
160
+
161
+ Parameters
162
+ ----------
163
+ n_temporal_filters : int, optional
164
+ Number of temporal convolutional filters in the first layer. This defines
165
+ the number of output channels after the temporal convolution.
166
+ Default is 40.
167
+ temp_filter_length : int, default=15
168
+ The length of the temporal filters in the convolutional layers.
169
+ spatial_expansion : int, optional
170
+ Multiplicative factor to expand the spatial dimensions. Used to increase
171
+ the capacity of the model by expanding spatial features. Default is 1.
172
+ pool_length_inp : int, optional
173
+ Length of the pooling window in the input layer. Determines how much
174
+ temporal information is aggregated during pooling. Default is 75.
175
+ pool_stride_inp : int, optional
176
+ Stride of the pooling operation in the input layer. Controls the
177
+ downsampling factor in the temporal dimension. Default is 15.
178
+ drop_prob_inp : float, optional
179
+ Dropout rate applied after the input layer. This is the probability of
180
+ zeroing out elements during training to prevent overfitting.
181
+ Default is 0.5.
182
+ ch_dim : int, optional
183
+ Number of channels in the subsequent convolutional layers. This controls
184
+ the depth of the network after the initial layer. Default is 16.
185
+ attention_mode : str, optional
186
+ The type of attention mechanism to apply. If `None`, no attention is applied.
187
+
188
+ - "se" for Squeeze-and-excitation network
189
+ - "gsop" for Global Second-Order Pooling
190
+ - "fca" for Frequency Channel Attention Network
191
+ - "encnet" for context encoding module
192
+ - "eca" for Efficient channel attention for deep convolutional neural networks
193
+ - "ge" for Gather-Excite
194
+ - "gct" for Gated Channel Transformation
195
+ - "srm" for Style-based Recalibration Module
196
+ - "cbam" for Convolutional Block Attention Module
197
+ - "cat" for Learning to collaborate channel and temporal attention
198
+ from multi-information fusion
199
+ - "catlite" for Learning to collaborate channel attention
200
+ from multi-information fusion (lite version, cat w/o temporal attention)
201
+
202
+ pool_length : int, default=8
203
+ The length of the window for the average pooling operation.
204
+ pool_stride : int, default=8
205
+ The stride of the average pooling operation.
206
+ drop_prob_attn : float, default=0.5
207
+ The dropout rate for regularization for the attention layer. Values should be between 0 and 1.
208
+ reduction_rate : int, default=4
209
+ The reduction rate used in the attention mechanism to reduce dimensionality
210
+ and computational complexity.
211
+ use_mlp : bool, default=False
212
+ Flag to indicate whether an MLP (Multi-Layer Perceptron) should be used within
213
+ the attention mechanism for further processing.
214
+ freq_idx : int, default=0
215
+ DCT index used in fca attention mechanism.
216
+ n_codewords : int, default=4
217
+ The number of codewords (clusters) used in attention mechanisms that employ
218
+ quantization or clustering strategies.
219
+ kernel_size : int, default=9
220
+ The kernel size used in certain types of attention mechanisms for convolution
221
+ operations.
222
+ activation : type[nn.Module] = nn.ELU,
223
+ Activation function class to apply. Should be a PyTorch activation
224
+ module class like ``nn.ReLU`` or ``nn.ELU``. Default is ``nn.ELU``.
225
+ extra_params : bool, default=False
226
+ Flag to indicate whether additional, custom parameters should be passed to
227
+ the attention mechanism.
228
+
229
+ Notes
230
+ -----
231
+ - Sequence length after each stage is computed internally; the final classifier expects
232
+ a flattened ``ch_dim x T₂`` vector.
233
+ - Attention operates on *channel* dimension by design; temporal gating exists only in
234
+ specific variants (CBAM/CAT).
235
+ - The paper and original code with more details about the methodological
236
+ choices are available at the [Martin2023]_ and [MartinCode]_.
237
+
238
+ .. versionadded:: 0.9
239
+
240
+ References
241
+ ----------
242
+ .. [Martin2023] Wimpff, M., Gizzi, L., Zerfowski, J. and Yang, B., 2023.
243
+ EEG motor imagery decoding: A framework for comparative analysis with
244
+ channel attention mechanisms. arXiv preprint arXiv:2310.11198.
245
+ .. [MartinCode] Wimpff, M., Gizzi, L., Zerfowski, J. and Yang, B.
246
+ GitHub https://github.com/martinwimpff/channel-attention (accessed 2024-03-28)
247
+ """
248
+
249
+ def __init__(
250
+ self,
251
+ n_times=None,
252
+ n_chans=None,
253
+ n_outputs=None,
254
+ chs_info=None,
255
+ sfreq=None,
256
+ input_window_seconds=None,
257
+ # Module parameters
258
+ n_temporal_filters: int = 40,
259
+ temp_filter_length_inp: int = 25,
260
+ spatial_expansion: int = 1,
261
+ pool_length_inp: int = 75,
262
+ pool_stride_inp: int = 15,
263
+ drop_prob_inp: float = 0.5,
264
+ ch_dim: int = 16,
265
+ temp_filter_length: int = 15,
266
+ pool_length: int = 8,
267
+ pool_stride: int = 8,
268
+ drop_prob_attn: float = 0.5,
269
+ attention_mode: str | None = None,
270
+ reduction_rate: int = 4,
271
+ use_mlp: bool = False,
272
+ freq_idx: int = 0,
273
+ n_codewords: int = 4,
274
+ kernel_size: int = 9,
275
+ activation: type[nn.Module] = nn.ELU,
276
+ extra_params: bool = False,
277
+ ):
278
+ super(AttentionBaseNet, self).__init__()
279
+
280
+ super().__init__(
281
+ n_outputs=n_outputs,
282
+ n_chans=n_chans,
283
+ chs_info=chs_info,
284
+ n_times=n_times,
285
+ sfreq=sfreq,
286
+ input_window_seconds=input_window_seconds,
287
+ )
288
+ del n_outputs, n_chans, chs_info, n_times, sfreq, input_window_seconds
289
+
290
+ min_n_times_required = self._get_min_n_times(
291
+ pool_length_inp,
292
+ pool_stride_inp,
293
+ pool_length,
294
+ )
295
+
296
+ if self.n_times < min_n_times_required:
297
+ scaling_factor = self.n_times / min_n_times_required
298
+ warn(
299
+ f"n_times ({self.n_times}) is smaller than the minimum required "
300
+ f"({min_n_times_required}) for the current model parameters configuration. "
301
+ "Adjusting parameters to ensure compatibility."
302
+ "Reducing the kernel, pooling, and stride sizes accordingly.\n"
303
+ "Scaling factor: {:.2f}".format(scaling_factor),
304
+ UserWarning,
305
+ )
306
+ # 3. Scale down all temporal parameters proportionally
307
+ # Use max(1, ...) to ensure parameters remain valid
308
+ temp_filter_length_inp = max(
309
+ 1, int(temp_filter_length_inp * scaling_factor)
310
+ )
311
+ pool_length_inp = max(1, int(pool_length_inp * scaling_factor))
312
+ pool_stride_inp = max(1, int(pool_stride_inp * scaling_factor))
313
+ temp_filter_length = max(1, int(temp_filter_length * scaling_factor))
314
+ pool_length = max(1, int(pool_length * scaling_factor))
315
+ pool_stride = max(1, int(pool_stride * scaling_factor))
316
+
317
+ self.input_block = _FeatureExtractor(
318
+ n_chans=self.n_chans,
319
+ n_temporal_filters=n_temporal_filters,
320
+ temporal_filter_length=temp_filter_length_inp,
321
+ spatial_expansion=spatial_expansion,
322
+ pool_length=pool_length_inp,
323
+ pool_stride=pool_stride_inp,
324
+ drop_prob=drop_prob_inp,
325
+ activation=activation,
326
+ )
327
+
328
+ self.channel_expansion = nn.Sequential(
329
+ nn.Conv2d(
330
+ n_temporal_filters * spatial_expansion, ch_dim, (1, 1), bias=False
331
+ ),
332
+ nn.BatchNorm2d(ch_dim),
333
+ activation(),
334
+ )
335
+
336
+ seq_lengths = self._calculate_sequence_lengths(
337
+ self.n_times,
338
+ [temp_filter_length_inp, temp_filter_length],
339
+ [pool_length_inp, pool_length],
340
+ [pool_stride_inp, pool_stride],
341
+ )
342
+
343
+ self.channel_attention_block = _ChannelAttentionBlock(
344
+ attention_mode=attention_mode,
345
+ in_channels=ch_dim,
346
+ temp_filter_length=temp_filter_length,
347
+ pool_length=pool_length,
348
+ pool_stride=pool_stride,
349
+ drop_prob=drop_prob_attn,
350
+ reduction_rate=reduction_rate,
351
+ use_mlp=use_mlp,
352
+ seq_len=seq_lengths[0],
353
+ freq_idx=freq_idx,
354
+ n_codewords=n_codewords,
355
+ kernel_size=kernel_size,
356
+ extra_params=extra_params,
357
+ activation=activation,
358
+ )
359
+
360
+ self.final_layer = nn.Sequential(
361
+ nn.Flatten(), nn.Linear(seq_lengths[-1] * ch_dim, self.n_outputs)
362
+ )
363
+
364
+ def forward(self, x):
365
+ x = self.input_block(x)
366
+ x = self.channel_expansion(x)
367
+ x = self.channel_attention_block(x)
368
+ x = self.final_layer(x)
369
+ return x
370
+
371
+ @staticmethod
372
+ def _calculate_sequence_lengths(
373
+ input_window_samples: int,
374
+ kernel_lengths: list,
375
+ pool_lengths: list,
376
+ pool_strides: list,
377
+ ):
378
+ seq_lengths = []
379
+ out = input_window_samples
380
+ for k, pl, ps in zip(kernel_lengths, pool_lengths, pool_strides):
381
+ out = math.floor(out + 2 * (k // 2) - k + 1)
382
+ out = math.floor((out - pl) / ps + 1)
383
+ # Ensure output is at least 1 to avoid zero-sized tensors
384
+ out = max(1, out)
385
+ seq_lengths.append(int(out))
386
+ return seq_lengths
387
+
388
+ @staticmethod
389
+ def _get_min_n_times(
390
+ pool_length_inp: int,
391
+ pool_stride_inp: int,
392
+ pool_length: int,
393
+ ) -> int:
394
+ """
395
+ Calculates the minimum n_times required for the model to work.
396
+
397
+ with the given parameters.
398
+
399
+ The calculation is based on reversing the pooling operations to
400
+ ensure the input to each is valid.
401
+ """
402
+ # The input to the second pooling layer must be at least its kernel size.
403
+ min_len_for_second_pool = pool_length
404
+
405
+ # Reverse the first pooling operation to find the required input size.
406
+ # Formula: min_L_in = Stride * (min_L_out - 1) + Kernel
407
+ min_len = pool_stride_inp * (min_len_for_second_pool - 1) + pool_length_inp
408
+ return min_len
409
+
410
+
411
+ class _FeatureExtractor(nn.Module):
412
+ r"""
413
+ A module for feature extraction of the data with temporal and spatial.
414
+
415
+ transformations.
416
+
417
+ This module sequentially processes the input through a series of layers:
418
+ rearrangement, temporal convolution, batch normalization, spatial convolution,
419
+ another batch normalization, an ELU non-linearity, average pooling, and dropout.
420
+
421
+ Parameters
422
+ ----------
423
+ n_chans : int
424
+ The number of channels in the input data.
425
+ n_temporal_filters : int, optional
426
+ The number of filters to use in the temporal convolution layer. Default is 40.
427
+ temporal_filter_length : int, optional
428
+ The size of each filter in the temporal convolution layer. Default is 25.
429
+ spatial_expansion : int, optional
430
+ The expansion factor of the spatial convolution layer, determining the number
431
+ of output channels relative to the number of temporal filters. Default is 1.
432
+ pool_length : int, optional
433
+ The size of the window for the average pooling operation. Default is 75.
434
+ pool_stride : int, optional
435
+ The stride of the average pooling operation. Default is 15.
436
+ drop_prob : float, optional
437
+ The dropout rate for regularization. Default is 0.5.
438
+ activation : nn.Module, default=nn.ELU
439
+ Activation function class to apply. Should be a PyTorch activation
440
+ module class like ``nn.ReLU`` or ``nn.ELU``. Default is ``nn.ELU``.
441
+ """
442
+
443
+ def __init__(
444
+ self,
445
+ n_chans: int,
446
+ n_temporal_filters: int = 40,
447
+ temporal_filter_length: int = 25,
448
+ spatial_expansion: int = 1,
449
+ pool_length: int = 75,
450
+ pool_stride: int = 15,
451
+ drop_prob: float = 0.5,
452
+ activation: type[nn.Module] = nn.ELU,
453
+ ):
454
+ super().__init__()
455
+
456
+ self.ensure4d = Ensure4d()
457
+ self.rearrange_input = Rearrange("b c t 1 -> b 1 c t")
458
+ self.temporal_conv = nn.Conv2d(
459
+ 1,
460
+ n_temporal_filters,
461
+ kernel_size=(1, temporal_filter_length),
462
+ padding=(0, temporal_filter_length // 2),
463
+ bias=False,
464
+ )
465
+ self.intermediate_bn = nn.BatchNorm2d(n_temporal_filters)
466
+ self.spatial_conv = nn.Conv2d(
467
+ n_temporal_filters,
468
+ n_temporal_filters * spatial_expansion,
469
+ kernel_size=(n_chans, 1),
470
+ groups=n_temporal_filters,
471
+ bias=False,
472
+ )
473
+ self.bn = nn.BatchNorm2d(n_temporal_filters * spatial_expansion)
474
+ self.nonlinearity = activation()
475
+ self.pool = nn.AvgPool2d((1, pool_length), stride=(1, pool_stride))
476
+ self.dropout = nn.Dropout(drop_prob)
477
+
478
+ def forward(self, x):
479
+ x = self.ensure4d(x)
480
+ x = self.rearrange_input(x)
481
+ x = self.temporal_conv(x)
482
+ x = self.intermediate_bn(x)
483
+ x = self.spatial_conv(x)
484
+ x = self.bn(x)
485
+ x = self.nonlinearity(x)
486
+ x = self.pool(x)
487
+ x = self.dropout(x)
488
+ return x
489
+
490
+
491
+ class _ChannelAttentionBlock(nn.Module):
492
+ r"""
493
+ A neural network module implementing channel-wise attention mechanisms to enhance.
494
+
495
+ feature representations by selectively emphasizing important channels and suppressing
496
+ less useful ones. This block integrates convolutional layers, pooling, dropout, and
497
+ an optional attention mechanism that can be customized based on the given mode.
498
+
499
+ Parameters
500
+ ----------
501
+ attention_mode : str, optional
502
+ The type of attention mechanism to apply. If `None`, no attention is applied.
503
+
504
+ - "se" for Squeeze-and-excitation network
505
+ - "gsop" for Global Second-Order Pooling
506
+ - "fca" for Frequency Channel Attention Network
507
+ - "encnet" for context encoding module
508
+ - "eca" for Efficient channel attention for deep convolutional neural networks
509
+ - "ge" for Gather-Excite
510
+ - "gct" for Gated Channel Transformation
511
+ - "srm" for Style-based Recalibration Module
512
+ - "cbam" for Convolutional Block Attention Module
513
+ - "cat" for Learning to collaborate channel and temporal attention
514
+ from multi-information fusion
515
+ - "catlite" for Learning to collaborate channel attention
516
+ from multi-information fusion (lite version, cat w/o temporal attention)
517
+
518
+ in_channels : int, default=16
519
+ The number of input channels to the block.
520
+ temp_filter_length : int, default=15
521
+ The length of the temporal filters in the convolutional layers.
522
+ pool_length : int, default=8
523
+ The length of the window for the average pooling operation.
524
+ pool_stride : int, default=8
525
+ The stride of the average pooling operation.
526
+ drop_prob : float, default=0.5
527
+ The dropout rate for regularization. Values should be between 0 and 1.
528
+ reduction_rate : int, default=4
529
+ The reduction rate used in the attention mechanism to reduce dimensionality
530
+ and computational complexity.
531
+ use_mlp : bool, default=False
532
+ Flag to indicate whether an MLP (Multi-Layer Perceptron) should be used within
533
+ the attention mechanism for further processing.
534
+ seq_len : int, default=62
535
+ The sequence length, used in certain types of attention mechanisms to process
536
+ temporal dimensions.
537
+ freq_idx : int, default=0
538
+ DCT index used in fca attention mechanism.
539
+ n_codewords : int, default=4
540
+ The number of codewords (clusters) used in attention mechanisms that employ
541
+ quantization or clustering strategies.
542
+ kernel_size : int, default=9
543
+ The kernel size used in certain types of attention mechanisms for convolution
544
+ operations.
545
+ extra_params : bool, default=False
546
+ Flag to indicate whether additional, custom parameters should be passed to
547
+ the attention mechanism.
548
+ activation : nn.Module, default=nn.ELU
549
+ Activation function class to apply. Should be a PyTorch activation
550
+ module class like ``nn.ReLU`` or ``nn.ELU``. Default is ``nn.ELU``.
551
+
552
+ Attributes
553
+ ----------
554
+ conv : torch.nn.Sequential
555
+ Sequential model of convolutional layers, batch normalization, and ELU
556
+ activation, designed to process input features.
557
+ pool : torch.nn.AvgPool2d
558
+ Average pooling layer to reduce the dimensionality of the feature maps.
559
+ dropout : torch.nn.Dropout
560
+ Dropout layer for regularization.
561
+ attention_block : torch.nn.Module or None
562
+ The attention mechanism applied to the output of the convolutional layers,
563
+ if `attention_mode` is not None. Otherwise, it's set to None.
564
+ activation : nn.Module, default=nn.ELU
565
+ Activation function class to apply. Should be a PyTorch activation
566
+ module class like ``nn.ReLU`` or ``nn.ELU``. Default is ``nn.ELU``.
567
+
568
+ Examples
569
+ --------
570
+ >>> channel_attention_block = _ChannelAttentionBlock(attention_mode='cbam', in_channels=16, reduction_rate=4, kernel_size=7)
571
+ >>> x = torch.randn(1, 16, 64, 64) # Example input tensor
572
+ >>> output = channel_attention_block(x)
573
+ The output tensor then can be further processed or used as input to another block.
574
+ """
575
+
576
+ def __init__(
577
+ self,
578
+ attention_mode: str | None = None,
579
+ in_channels: int = 16,
580
+ temp_filter_length: int = 15,
581
+ pool_length: int = 8,
582
+ pool_stride: int = 8,
583
+ drop_prob: float = 0.5,
584
+ reduction_rate: int = 4,
585
+ use_mlp: bool = False,
586
+ seq_len: int = 62,
587
+ freq_idx: int = 0,
588
+ n_codewords: int = 4,
589
+ kernel_size: int = 9,
590
+ extra_params: bool = False,
591
+ activation: type[nn.Module] = nn.ELU,
592
+ ):
593
+ super().__init__()
594
+ self.conv = nn.Sequential(
595
+ nn.Conv2d(
596
+ in_channels,
597
+ in_channels,
598
+ (1, temp_filter_length),
599
+ padding=(0, temp_filter_length // 2),
600
+ bias=False,
601
+ groups=in_channels,
602
+ ),
603
+ nn.Conv2d(in_channels, in_channels, (1, 1), bias=False),
604
+ nn.BatchNorm2d(in_channels),
605
+ activation(),
606
+ )
607
+
608
+ self.pool = nn.AvgPool2d((1, pool_length), stride=(1, pool_stride))
609
+ self.dropout = nn.Dropout(drop_prob)
610
+
611
+ if attention_mode is not None:
612
+ self.attention_block = get_attention_block(
613
+ attention_mode,
614
+ ch_dim=in_channels,
615
+ reduction_rate=reduction_rate,
616
+ use_mlp=use_mlp,
617
+ seq_len=seq_len,
618
+ freq_idx=freq_idx,
619
+ n_codewords=n_codewords,
620
+ kernel_size=kernel_size,
621
+ extra_params=extra_params,
622
+ )
623
+ else:
624
+ self.attention_block = None
625
+
626
+ def forward(self, x):
627
+ out = self.conv(x)
628
+ if self.attention_block is not None:
629
+ out = self.attention_block(out)
630
+ out = self.pool(out)
631
+ out = self.dropout(out)
632
+ return out
633
+
634
+
635
+ def get_attention_block(
636
+ attention_mode: str,
637
+ ch_dim: int = 16,
638
+ reduction_rate: int = 4,
639
+ use_mlp: bool = False,
640
+ seq_len: int | None = None,
641
+ freq_idx: int = 0,
642
+ n_codewords: int = 4,
643
+ kernel_size: int = 9,
644
+ extra_params: bool = False,
645
+ ):
646
+ """
647
+ Util function to the attention block based on the attention mode.
648
+
649
+ Parameters
650
+ ----------
651
+ attention_mode : str
652
+ The type of attention mechanism to apply.
653
+ ch_dim : int
654
+ The number of input channels to the block.
655
+ reduction_rate : int
656
+ The reduction rate used in the attention mechanism to reduce
657
+ dimensionality and computational complexity.
658
+ Used in all the methods, except for the
659
+ encnet and eca.
660
+ use_mlp : bool
661
+ Flag to indicate whether an MLP (Multi-Layer Perceptron) should be used
662
+ within the attention mechanism for further processing. Used in the ge
663
+ and srm attention mechanism.
664
+ seq_len : int
665
+ The sequence length, used in certain types of attention mechanisms to
666
+ process temporal dimensions. Used in the ge or fca attention mechanism.
667
+ freq_idx : int
668
+ DCT index used in fca attention mechanism.
669
+ n_codewords : int
670
+ The number of codewords (clusters) used in attention mechanisms
671
+ that employ quantization or clustering strategies, encnet.
672
+ kernel_size : int
673
+ The kernel size used in certain types of attention mechanisms for convolution
674
+ operations, used in the cbam, eca, and cat attention mechanisms.
675
+ extra_params : bool
676
+ Parameter to pass additional parameters to the GatherExcite mechanism.
677
+
678
+ Returns
679
+ -------
680
+ nn.Module
681
+ The attention block based on the attention mode.
682
+ """
683
+ if attention_mode == "se":
684
+ return SqueezeAndExcitation(in_channels=ch_dim, reduction_rate=reduction_rate)
685
+ # improving the squeeze module
686
+ elif attention_mode == "gsop":
687
+ return GSoP(in_channels=ch_dim, reduction_rate=reduction_rate)
688
+ elif attention_mode == "fca":
689
+ assert seq_len is not None
690
+ return FCA(
691
+ in_channels=ch_dim,
692
+ seq_len=seq_len,
693
+ reduction_rate=reduction_rate,
694
+ freq_idx=freq_idx,
695
+ )
696
+ elif attention_mode == "encnet":
697
+ return EncNet(in_channels=ch_dim, n_codewords=n_codewords)
698
+ # improving the excitation module
699
+ elif attention_mode == "eca":
700
+ return ECA(in_channels=ch_dim, kernel_size=kernel_size)
701
+ # improving the squeeze and the excitation module
702
+ elif attention_mode == "ge":
703
+ assert seq_len is not None
704
+ return GatherExcite(
705
+ in_channels=ch_dim,
706
+ seq_len=seq_len,
707
+ extra_params=extra_params,
708
+ use_mlp=use_mlp,
709
+ reduction_rate=reduction_rate,
710
+ )
711
+ elif attention_mode == "gct":
712
+ return GCT(in_channels=ch_dim)
713
+ elif attention_mode == "srm":
714
+ return SRM(in_channels=ch_dim, use_mlp=use_mlp, reduction_rate=reduction_rate)
715
+ # temporal and channel attention
716
+ elif attention_mode == "cbam":
717
+ return CBAM(
718
+ in_channels=ch_dim, reduction_rate=reduction_rate, kernel_size=kernel_size
719
+ )
720
+ elif attention_mode == "cat":
721
+ return CAT(
722
+ in_channels=ch_dim, reduction_rate=reduction_rate, kernel_size=kernel_size
723
+ )
724
+ elif attention_mode == "catlite":
725
+ return CATLite(ch_dim, reduction_rate=reduction_rate)
726
+ else:
727
+ raise NotImplementedError