braindecode 1.2.0.dev169062562__tar.gz → 1.2.0.dev175337561__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of braindecode might be problematic. Click here for more details.

Files changed (122) hide show
  1. {braindecode-1.2.0.dev169062562/braindecode.egg-info → braindecode-1.2.0.dev175337561}/PKG-INFO +1 -1
  2. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/atcnet.py +152 -12
  3. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/attentionbasenet.py +140 -18
  4. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/eegconformer.py +111 -15
  5. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/eegnet.py +54 -4
  6. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/modules/attention.py +6 -3
  7. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/modules/convolution.py +10 -7
  8. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/modules/filter.py +8 -6
  9. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/training/scoring.py +2 -8
  10. braindecode-1.2.0.dev175337561/braindecode/version.py +1 -0
  11. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/visualization/gradients.py +6 -1
  12. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561/braindecode.egg-info}/PKG-INFO +1 -1
  13. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/conf.py +3 -0
  14. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/whats_new.rst +8 -0
  15. braindecode-1.2.0.dev169062562/braindecode/version.py +0 -1
  16. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/LICENSE.txt +0 -0
  17. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/MANIFEST.in +0 -0
  18. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/NOTICE.txt +0 -0
  19. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/README.rst +0 -0
  20. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/__init__.py +0 -0
  21. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/augmentation/__init__.py +0 -0
  22. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/augmentation/base.py +0 -0
  23. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/augmentation/functional.py +0 -0
  24. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/augmentation/transforms.py +0 -0
  25. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/classifier.py +0 -0
  26. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datasets/__init__.py +0 -0
  27. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datasets/base.py +0 -0
  28. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datasets/bbci.py +0 -0
  29. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datasets/bcicomp.py +0 -0
  30. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datasets/bids.py +0 -0
  31. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datasets/mne.py +0 -0
  32. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datasets/moabb.py +0 -0
  33. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datasets/nmt.py +0 -0
  34. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datasets/sleep_physio_challe_18.py +0 -0
  35. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datasets/sleep_physionet.py +0 -0
  36. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datasets/tuh.py +0 -0
  37. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datasets/xy.py +0 -0
  38. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datautil/__init__.py +0 -0
  39. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datautil/serialization.py +0 -0
  40. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/datautil/util.py +0 -0
  41. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/eegneuralnet.py +0 -0
  42. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/functional/__init__.py +0 -0
  43. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/functional/functions.py +0 -0
  44. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/functional/initialization.py +0 -0
  45. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/__init__.py +0 -0
  46. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/base.py +0 -0
  47. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/biot.py +0 -0
  48. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/contrawr.py +0 -0
  49. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/ctnet.py +0 -0
  50. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/deep4.py +0 -0
  51. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/deepsleepnet.py +0 -0
  52. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/eeginception_erp.py +0 -0
  53. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/eeginception_mi.py +0 -0
  54. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/eegitnet.py +0 -0
  55. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/eegminer.py +0 -0
  56. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/eegnex.py +0 -0
  57. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/eegresnet.py +0 -0
  58. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/eegsimpleconv.py +0 -0
  59. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/eegtcnet.py +0 -0
  60. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/fbcnet.py +0 -0
  61. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/fblightconvnet.py +0 -0
  62. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/fbmsnet.py +0 -0
  63. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/hybrid.py +0 -0
  64. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/ifnet.py +0 -0
  65. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/labram.py +0 -0
  66. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/msvtnet.py +0 -0
  67. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/sccnet.py +0 -0
  68. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/shallow_fbcsp.py +0 -0
  69. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/signal_jepa.py +0 -0
  70. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/sinc_shallow.py +0 -0
  71. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/sleep_stager_blanco_2020.py +0 -0
  72. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/sleep_stager_chambon_2018.py +0 -0
  73. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/sleep_stager_eldele_2021.py +0 -0
  74. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/sparcnet.py +0 -0
  75. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/summary.csv +0 -0
  76. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/syncnet.py +0 -0
  77. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/tcn.py +0 -0
  78. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/tidnet.py +0 -0
  79. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/tsinception.py +0 -0
  80. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/usleep.py +0 -0
  81. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/models/util.py +0 -0
  82. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/modules/__init__.py +0 -0
  83. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/modules/activation.py +0 -0
  84. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/modules/blocks.py +0 -0
  85. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/modules/layers.py +0 -0
  86. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/modules/linear.py +0 -0
  87. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/modules/parametrization.py +0 -0
  88. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/modules/stats.py +0 -0
  89. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/modules/util.py +0 -0
  90. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/modules/wrapper.py +0 -0
  91. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/preprocessing/__init__.py +0 -0
  92. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/preprocessing/mne_preprocess.py +0 -0
  93. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/preprocessing/preprocess.py +0 -0
  94. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/preprocessing/windowers.py +0 -0
  95. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/regressor.py +0 -0
  96. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/samplers/__init__.py +0 -0
  97. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/samplers/base.py +0 -0
  98. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/samplers/ssl.py +0 -0
  99. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/training/__init__.py +0 -0
  100. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/training/callbacks.py +0 -0
  101. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/training/losses.py +0 -0
  102. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/util.py +0 -0
  103. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/visualization/__init__.py +0 -0
  104. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode/visualization/confusion_matrices.py +0 -0
  105. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode.egg-info/SOURCES.txt +0 -0
  106. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode.egg-info/dependency_links.txt +0 -0
  107. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode.egg-info/requires.txt +0 -0
  108. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/braindecode.egg-info/top_level.txt +0 -0
  109. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/Makefile +0 -0
  110. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/_templates/autosummary/class.rst +0 -0
  111. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/_templates/autosummary/function.rst +0 -0
  112. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/api.rst +0 -0
  113. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/cite.rst +0 -0
  114. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/help.rst +0 -0
  115. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/index.rst +0 -0
  116. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/install/install.rst +0 -0
  117. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/install/install_pip.rst +0 -0
  118. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/install/install_source.rst +0 -0
  119. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/models_summary.rst +0 -0
  120. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/docs/sg_execution_times.rst +0 -0
  121. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/pyproject.toml +0 -0
  122. {braindecode-1.2.0.dev169062562 → braindecode-1.2.0.dev175337561}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: braindecode
3
- Version: 1.2.0.dev169062562
3
+ Version: 1.2.0.dev175337561
4
4
  Summary: Deep learning software to decode EEG, ECG or MEG signals
5
5
  Author-email: Robin Tibor Schirrmeister <robintibor@gmail.com>
6
6
  Maintainer-email: Alexandre Gramfort <agramfort@meta.com>, Bruno Aristimunha Pinto <b.aristimunha@gmail.com>, Robin Tibor Schirrmeister <robintibor@gmail.com>
@@ -13,13 +13,154 @@ from braindecode.modules import CausalConv1d, Ensure4d, MaxNormLinear
13
13
 
14
14
 
15
15
  class ATCNet(EEGModuleMixin, nn.Module):
16
- """ATCNet model from Altaheri et al. (2022) [1]_
16
+ """ATCNet from Altaheri et al. (2022) [1]_.
17
17
 
18
- Pytorch implementation based on official tensorflow code [2]_.
18
+ :bdg-success:`Convolution` :bdg-info:`Small Attention`
19
19
 
20
20
  .. figure:: https://user-images.githubusercontent.com/25565236/185449791-e8539453-d4fa-41e1-865a-2cf7e91f60ef.png
21
- :align: center
22
- :alt: ATCNet Architecture
21
+ :align: center
22
+ :alt: ATCNet Architecture
23
+ :width: 650px
24
+
25
+ .. rubric:: Architectural Overview
26
+
27
+ ATCNet is a *convolution-first* architecture augmented with a *lightweight attention–TCN*
28
+ sequence module. The end-to-end flow is:
29
+
30
+ - (i) :class:`_ConvBlock` learns temporal filter-banks and spatial projections (EEGNet-style),
31
+ downsampling time to a compact feature map;
32
+
33
+ - (ii) Sliding Windows carve overlapping temporal windows from this map;
34
+
35
+ - (iii) for each window, :class:`_AttentionBlock` applies small multi-head self-attention
36
+ over time, followed by a :class:`_TCNResidualBlock` stack (causal, dilated);
37
+
38
+ - (iv) window-level features are aggregated (mean of window logits or concatenation)
39
+ and mapped via a max-norm–constrained linear layer.
40
+
41
+ Relative to ViT, ATCNet replaces linear patch projection with learned *temporal–spatial*
42
+ convolutions; it processes *parallel* window encoders (attention→TCN) instead of a deep
43
+ stack; and swaps the MLP head for a TCN suited to 1-D EEG sequences.
44
+
45
+ .. rubric:: Macro Components
46
+
47
+ - :class:`_ConvBlock` **(Shallow conv stem → feature map)**
48
+
49
+ - *Operations.*
50
+ - **Temporal conv** (:class:`torch.nn.Conv2d`) with kernel ``(L_t, 1)`` builds a
51
+ FIR-like filter bank (``F1`` maps).
52
+ - **Depthwise spatial conv** (:class:`torch.nn.Conv2d`, ``groups=F1``) with kernel
53
+ ``(1, n_chans)`` learns per-filter spatial projections (akin to EEGNet’s CSP-like step).
54
+ - **BN → ELU → AvgPool → Dropout** to stabilize and condense activations.
55
+ - **Refining temporal conv** (:class:`torch.nn.Conv2d`) with kernel ``(L_r, 1)`` +
56
+ **BN → ELU → AvgPool → Dropout**.
57
+
58
+ The output shape is ``(B, F2, T_c, 1)`` with ``F2 = F1·D`` and ``T_c = T/(P1·P2)``.
59
+ Temporal kernels behave as FIR filters; the depthwise-spatial conv yields frequency-specific
60
+ topographies. Pooling acts as a local integrator, reducing variance and imposing a
61
+ useful inductive bias on short EEG windows.
62
+
63
+ - **Sliding-Window Sequencer**
64
+
65
+ From the condensed time axis (length ``T_c``), ATCNet forms ``n`` overlapping windows
66
+ of width ``T_w = T_c - n + 1`` (one start per index). Each window produces a sequence
67
+ ``(B, F2, T_w)`` forwarded to its own attention–TCN branch. This creates *parallel*
68
+ encoders over shifted contexts and is key to robustness on nonstationary EEG.
69
+
70
+ - :class:`_AttentionBlock` **(small MHA on temporal positions)**
71
+
72
+ - *Operations.*
73
+ - Rearrange to ``(B, T_w, F2)``,
74
+ - Normalization :class:`torch.nn.LayerNorm`
75
+ - Custom MultiHeadAttention :class:`_MHA` (``num_heads=H``, per-head dim ``d_h``) + residual add,
76
+ - Dropout :class:`torch.nn.Dropout`
77
+ - Rearrange back to ``(B, F2, T_w)``.
78
+
79
+
80
+ **Note**: Attention is *local to a window* and purely temporal.
81
+
82
+ *Role.* Re-weights evidence across the window, letting the model emphasize informative
83
+ segments (onsets, bursts) before causal convolutions aggregate history.
84
+
85
+ - :class:`_TCNResidualBlock` **(causal dilated temporal CNN)**
86
+
87
+ - *Operations.*
88
+ - Two :class:`braindecode.modules.CausalConv1d` layers per block with dilation ``1, 2, 4, …``
89
+ - Across blocks of `torch.nn.ELU` + `torch.nn.BatchNorm1d` + `torch.nn.Dropout`) +
90
+ a residual (identity or 1x1 mapping).
91
+ - The final feature used per window is the *last* causal step ``[..., -1]`` (forecast-style).
92
+
93
+ *Role.* Efficient long-range temporal integration with stable gradients; the dilated
94
+ receptive field complements attention’s soft selection.
95
+
96
+ - **Aggregation & Classifier**
97
+
98
+ - *Operations.*
99
+ - Either (a) map each window feature ``(B, F2)`` to logits via :class:`braindecode.modules.MaxNormLinear`
100
+ and **average** across windows (default, matching official code), or
101
+ - (b) **concatenate** all window features ``(B, n·F2)`` and apply a single :class:`MaxNormLinear`.
102
+ The max-norm constraint regularizes the readout.
103
+
104
+ .. rubric:: Convolutional Details
105
+
106
+ - **Temporal.** Temporal structure is learned in three places:
107
+ - (1) the stem’s wide ``(L_t, 1)`` conv (learned filter bank),
108
+ - (2) the refining ``(L_r, 1)`` conv after pooling (short-term dynamics), and
109
+ - (3) the TCN’s causal 1-D convolutions with exponentially increasing dilation
110
+ (long-range dependencies). The minimum sequence length required by the TCN stack is
111
+ ``(K_t - 1)·2^{L-1} + 1``; the implementation *auto-scales* kernels/pools/windows
112
+ when inputs are shorter to preserve feasibility.
113
+
114
+ - **Spatial.** A depthwise spatial conv spans the **full montage** (kernel ``(1, n_chans)``),
115
+ producing *per-temporal-filter* spatial projections (no cross-filter mixing at this step).
116
+ This mirrors EEGNet’s interpretability: each temporal filter has its own spatial pattern.
117
+
118
+
119
+ .. rubric:: Attention / Sequential Modules
120
+
121
+ - **Type.** Multi-head self-attention with ``H`` heads and per-head dim ``d_h`` implemented
122
+ in :class:`_MHA`, allowing ``embed_dim = H·d_h`` independent of input and output dims.
123
+ - **Shapes.** ``(B, F2, T_w) → (B, T_w, F2) → (B, F2, T_w)``. Attention operates along
124
+ the **temporal** axis within a window; channels/features stay in the embedding dim ``F2``.
125
+ - **Role.** Highlights salient temporal positions prior to causal convolution; small attention
126
+ keeps compute modest while improving context modeling over pooled features.
127
+
128
+ .. rubric:: Additional Mechanisms
129
+
130
+ - **Parallel encoders over shifted windows.** Improves montage/phase robustness by
131
+ ensembling nearby contexts rather than committing to a single segmentation.
132
+ - **Max-norm classifier.** Enforces weight norm constraints at the readout, a common
133
+ stabilization trick in EEG decoding.
134
+ - **ViT vs. ATCNet (design choices).** Convolutional *nonlinear* projection rather than
135
+ linear patchification; attention followed by **TCN** (not MLP); *parallel* window
136
+ encoders rather than stacked encoders.
137
+
138
+ .. rubric:: Usage and Configuration
139
+
140
+ - ``conv_block_n_filters (F1)``, ``conv_block_depth_mult (D)`` → capacity of the stem
141
+ (with ``F2 = F1·D`` feeding attention/TCN), dimensions aligned to ``F2``, like `EEGNetv4`.
142
+ - Pool sizes ``P1,P2`` trade temporal resolution for stability/compute; they set
143
+ ``T_c = T/(P1·P2)`` and thus window width ``T_w``.
144
+ - ``n_windows`` controls the ensemble over shifts (compute ∝ windows).
145
+ - ``att_num_heads``, ``att_head_dim`` set attention capacity; keep ``H·d_h ≈ F2``.
146
+ - ``tcn_depth``, ``tcn_kernel_size`` govern receptive field; larger values demand
147
+ longer inputs (see minimum length above). The implementation warns and *rescales*
148
+ kernels/pools/windows if inputs are too short.
149
+ - **Aggregation choice.** ``concat=False`` (default, average of per-window logits) matches
150
+ the official code; ``concat=True`` mirrors the paper’s concatenation variant.
151
+
152
+
153
+ Notes
154
+ -----
155
+ - Inputs substantially shorter than the implied minimum length trigger **automatic
156
+ downscaling** of kernels, pools, windows, and TCN kernel size to maintain validity.
157
+ - The attention–TCN sequence operates **per window**; the last causal step is used as the
158
+ window feature, aligning the temporal semantics across windows.
159
+
160
+ .. versionadded:: 1.1
161
+
162
+ - More detailed documentation of the model.
163
+
23
164
 
24
165
  Parameters
25
166
  ----------
@@ -85,15 +226,13 @@ class ATCNet(EEGModuleMixin, nn.Module):
85
226
  Maximum L2-norm constraint imposed on weights of the last
86
227
  fully-connected layer. Defaults to 0.25.
87
228
 
88
-
89
229
  References
90
230
  ----------
91
- .. [1] H. Altaheri, G. Muhammad and M. Alsulaiman,
92
- Physics-informed attention temporal convolutional network for EEG-based
93
- motor imagery classification in IEEE Transactions on Industrial Informatics,
94
- 2022, doi: 10.1109/TII.2022.3197419.
95
- .. [2] EEE-ATCNet implementation.
96
- https://github.com/Altaheri/EEG-ATCNet/blob/main/models.py
231
+ .. [1] H. Altaheri, G. Muhammad, M. Alsulaiman (2022).
232
+ *Physics-informed attention temporal convolutional network for EEG-based motor imagery classification.*
233
+ IEEE Transactions on Industrial Informatics. doi:10.1109/TII.2022.3197419.
234
+ .. [2] Official EEG-ATCNet implementation (TensorFlow):
235
+ https://github.com/Altaheri/EEG-ATCNet/blob/main/models.py
97
236
  """
98
237
 
99
238
  def __init__(
@@ -556,7 +695,8 @@ class _TCNResidualBlock(nn.Module):
556
695
  # Reshape the input for the residual connection when necessary
557
696
  if in_channels != n_filters:
558
697
  self.reshaping_conv = nn.Conv1d(
559
- n_filters,
698
+ in_channels=in_channels,
699
+ out_channels=n_filters,
560
700
  kernel_size=1,
561
701
  padding="same",
562
702
  )
@@ -24,26 +24,148 @@ from braindecode.modules.attention import (
24
24
 
25
25
 
26
26
  class AttentionBaseNet(EEGModuleMixin, nn.Module):
27
- """AttentionBaseNet from Wimpff M et al. (2023) [Martin2023]_.
27
+ """
28
+
29
+ :bdg-success:`Convolution` :bdg-info:`Small Attention`
28
30
 
29
31
  .. figure:: https://content.cld.iop.org/journals/1741-2552/21/3/036020/revision2/jnead48b9f2_hr.jpg
30
- :align: center
31
- :alt: Attention Base Net
32
-
33
- Neural Network from the paper: EEG motor imagery decoding:
34
- A framework for comparative analysis with channel attention
35
- mechanisms
36
-
37
- The paper and original code with more details about the methodological
38
- choices are available at the [Martin2023]_ and [MartinCode]_.
39
-
40
- The AttentionBaseNet architecture is composed of four modules:
41
- - Input Block that performs a temporal convolution and a spatial
42
- convolution.
43
- - Channel Expansion that modifies the number of channels.
44
- - An attention block that performs channel attention with several
45
- options
46
- - ClassificationHead
32
+ :align: center
33
+ :alt: AttentionBaseNet Architecture
34
+ :width: 640px
35
+
36
+
37
+ .. rubric:: Architectural Overview
38
+
39
+ AttentionBaseNet is a *convolution-first* network with a *channel-attention* stage.
40
+ The end-to-end flow is:
41
+
42
+ - (i) :class:`_FeatureExtractor` learns a temporal filter bank and per-filter spatial
43
+ projections (depthwise across electrodes), then condenses time by pooling;
44
+ - (ii) **Channel Expansion** uses a ``1x1`` convolution to set the feature width;
45
+ - (iii) :class:`_ChannelAttentionBlock` refines features via depthwise–pointwise temporal
46
+ convs and an optional channel-attention module (SE/CBAM/ECA/…);
47
+ - (iv) **Classifier** flattens the sequence and applies a linear readout.
48
+
49
+ This design mirrors shallow CNN pipelines (EEGNet-style stem) but inserts a pluggable
50
+ attention unit that *re-weights channels* (and optionally temporal positions) before
51
+ classification.
52
+
53
+
54
+ .. rubric:: Macro Components
55
+
56
+ - :class:`_FeatureExtractor` **(Shallow conv stem → condensed feature map)**
57
+
58
+ - *Operations.*
59
+ - **Temporal conv** (:class:`torch.nn.Conv2d`) with kernel ``(1, L_t)`` creates a learned
60
+ FIR-like filter bank with ``n_temporal_filters`` maps.
61
+ - **Depthwise spatial conv** (:class:`torch.nn.Conv2d`, ``groups=n_temporal_filters``)
62
+ with kernel ``(n_chans, 1)`` learns per-filter spatial projections over the full montage.
63
+ - **BatchNorm → ELU → AvgPool → Dropout** stabilize and downsample time.
64
+ - Output shape: ``(B, F2, 1, T₁)`` with ``F2 = n_temporal_filters x spatial_expansion``.
65
+
66
+ *Interpretability/robustness.* Temporal kernels behave as analyzable FIR filters; the
67
+ depthwise spatial step yields rhythm-specific topographies. Pooling acts as a local
68
+ integrator that reduces variance on short EEG windows.
69
+
70
+ - **Channel Expansion**
71
+
72
+ - *Operations.*
73
+ - A ``1x1`` conv → BN → activation maps ``F2 → ch_dim`` without changing
74
+ the temporal length ``T₁`` (shape: ``(B, ch_dim, 1, T₁)``).
75
+ This sets the embedding width for the attention block.
76
+
77
+ - :class:`_ChannelAttentionBlock` **(temporal refinement + channel attention)**
78
+
79
+ - *Operations.*
80
+ - **Depthwise temporal conv** ``(1, L_a)`` (groups=``ch_dim``) + **pointwise ``1x1``**,
81
+ BN and activation → preserves shape ``(B, ch_dim, 1, T₁)`` while refining timing.
82
+ - **Optional attention module** (see *Additional Mechanisms*) applies channel reweighting
83
+ (some variants also apply temporal gating).
84
+ - **AvgPool (1, P₂)** with stride ``(1, S₂)`` and **Dropout** → outputs
85
+ ``(B, ch_dim, 1, T₂)``.
86
+
87
+ *Role.* Emphasizes informative channels (and, in certain modes, salient time steps)
88
+ before the classifier; complements the convolutional priors with adaptive re-weighting.
89
+
90
+ - **Classifier (aggregation + readout)**
91
+
92
+ *Operations.* :class:`torch.nn.Flatten` → :class:`torch.nn.Linear` from
93
+ ``(B, ch_dim·T₂)`` to classes.
94
+
95
+
96
+ .. rubric:: Convolutional Details
97
+
98
+ - **Temporal (where time-domain patterns are learned).**
99
+ Wide kernels in the stem (``(1, L_t)``) act as a learned filter bank for oscillatory
100
+ bands/transients; the attention block’s depthwise temporal conv (``(1, L_a)``) sharpens
101
+ short-term dynamics after downsampling. Pool sizes/strides (``P₁,S₁`` then ``P₂,S₂``)
102
+ set the token rate and effective temporal resolution.
103
+
104
+ - **Spatial (how electrodes are processed).**
105
+ A depthwise spatial conv with kernel ``(n_chans, 1)`` spans the full montage to
106
+ learn *per-temporal-filter* spatial projections (no cross-filter mixing at this step),
107
+ mirroring the interpretable spatial stage in shallow CNNs.
108
+
109
+ - **Spectral (how frequency content is captured).**
110
+ No explicit Fourier/wavelet transform is used in the stem—spectral selectivity
111
+ emerges from learned temporal kernels. When ``attention_mode="fca"``, a frequency
112
+ channel attention (DCT-based) summarizes frequencies to drive channel weights.
113
+
114
+
115
+ .. rubric:: Attention / Sequential Modules
116
+
117
+ - **Type.** Channel attention chosen by ``attention_mode`` (SE, ECA, CBAM, CAT, GSoP,
118
+ EncNet, GE, GCT, SRM, CATLite). Most operate purely on channels; CBAM/CAT additionally
119
+ include temporal attention.
120
+ - **Shapes.** Input/Output around attention: ``(B, ch_dim, 1, T₁)``. Re-arrangements
121
+ (if any) are internal to the module; the block returns the same shape before pooling.
122
+ - **Role.** Re-weights channels (and optionally time) to highlight informative sources
123
+ and suppress distractors, improving SNR ahead of the linear head.
124
+
125
+
126
+ .. rubric:: Additional Mechanisms
127
+
128
+ - **Attention variants at a glance.**
129
+ - ``"se"``: Squeeze-and-Excitation (global pooling → bottleneck → gates).
130
+ - ``"gsop"``: Global second-order pooling (covariance-aware channel weights).
131
+ - ``"fca"``: Frequency Channel Attention (DCT summary; uses ``seq_len`` and ``freq_idx``).
132
+ - ``"encnet"``: EncNet with learned codewords (uses ``n_codewords``).
133
+ - ``"eca"``: Efficient Channel Attention (local 1-D conv over channel descriptor; uses ``kernel_size``).
134
+ - ``"ge"``: Gather–Excite (context pooling with optional MLP; can use ``extra_params``).
135
+ - ``"gct"``: Gated Channel Transformation (global context normalization + gating).
136
+ - ``"srm"``: Style-based recalibration (mean–std descriptors; optional MLP).
137
+ - ``"cbam"``: Channel then temporal attention (uses ``kernel_size``).
138
+ - ``"cat"`` / ``"catlite"``: Collaborative (channel ± temporal) attention; *lite* omits temporal.
139
+ - **Auto-compatibility on short inputs.**
140
+
141
+ If the input duration is too short for the configured kernels/pools, the implementation
142
+ **automatically rescales** temporal lengths/strides downward (with a warning) to keep
143
+ shapes valid and preserve the pipeline semantics.
144
+
145
+
146
+ .. rubric:: Usage and Configuration
147
+
148
+ - ``n_temporal_filters``, ``temporal_filter_length`` and ``spatial_expansion``:
149
+ control the capacity and the number of spatial projections in the stem.
150
+ - ``pool_length_inp``, ``pool_stride_inp`` then ``pool_length``, ``pool_stride``:
151
+ trade temporal resolution for compute; they determine the final sequence length ``T₂``.
152
+ - ``ch_dim``: width after the ``1x1`` expansion and the effective embedding size for attention.
153
+ - ``attention_mode`` + its specific hyperparameters (``reduction_rate``,
154
+ ``kernel_size``, ``seq_len``, ``freq_idx``, ``n_codewords``, ``use_mlp``):
155
+ select and tune the reweighting mechanism.
156
+ - ``drop_prob_inp`` and ``drop_prob_attn``: regularize stem and attention stages.
157
+ - **Training tips.**
158
+
159
+ Start with moderate pooling (e.g., ``P₁=75,S₁=15``) and ELU activations; enable attention
160
+ only after the stem learns stable filters. For small datasets, prefer simpler modes
161
+ (``"se"``, ``"eca"``) before heavier ones (``"gsop"``, ``"encnet"``).
162
+
163
+ Notes
164
+ -----
165
+ - Sequence length after each stage is computed internally; the final classifier expects
166
+ a flattened ``ch_dim x T₂`` vector.
167
+ - Attention operates on *channel* dimension by design; temporal gating exists only in
168
+ specific variants (CBAM/CAT).
47
169
 
48
170
  .. versionadded:: 0.9
49
171
 
@@ -12,33 +12,126 @@ from braindecode.modules import FeedForwardBlock, MultiHeadAttention
12
12
 
13
13
 
14
14
  class EEGConformer(EEGModuleMixin, nn.Module):
15
- """EEG Conformer from Song et al. (2022) from [song2022]_.
15
+ """EEG Conformer from Song et al. (2022) [song2022]_.
16
16
 
17
- .. figure:: https://raw.githubusercontent.com/eeyhsong/EEG-Conformer/refs/heads/main/visualization/Fig1.png
17
+ :bdg-success:`Convolution` :bdg-info:`Small Attention`
18
+
19
+ .. figure:: https://raw.githubusercontent.com/eeyhsong/EEG-Conformer/refs/heads/main/visualization/Fig1.png
18
20
  :align: center
19
21
  :alt: EEGConformer Architecture
22
+ :width: 600px
23
+
24
+
25
+ .. rubric:: Architectural Overview
26
+
27
+ EEG-Conformer is a *convolution-first* model augmented with a *lightweight transformer
28
+ encoder*. The end-to-end flow is:
29
+
30
+ - (i) :class:`_PatchEmbedding` converts the continuous EEG into a compact sequence of tokens via a :class:`ShallowFBCSPNet` temporal–spatial conv stem and temporal pooling;
31
+ - (ii) :class:`_TransformerEncoder applies small multi-head self-attention to integrate longer-range temporal context across tokens;
32
+ - (iii) :class:`_ClassificationHead` aggregates the sequence and performs a linear readout.
33
+ This preserves the strong inductive biases of shallow CNN filter banks while adding
34
+ just enough attention to capture dependencies beyond the pooling horizon [song2022]_.
35
+
36
+ .. rubric:: Macro Components
37
+
38
+ - :class:`_PatchEmbedding` **(Shallow conv stem → tokens)**
39
+
40
+ - *Operations.*
41
+ - A temporal convolution (`:class:`torch.nn.Conv2d`) ``(1 x L_t)`` forms a data-driven "filter bank";
42
+ - A spatial convolution (`:class:`torch.nn.Conv2d`) (n_chans x 1)`` projects across electrodes, collapsing the channel axis into a virtual channel.
43
+ - **Normalization function** `:class:torch.nn.BatchNorm`
44
+ - **Activation function** `:class:torch.nn.ELU`
45
+ - **Average Pooling** `:class:torch.nn.AvgPool` along time (kernel ``(1, P)`` with stride ``(1, S)``)
46
+ - final ``1x1`` :class:`torch.nn.Linear` projection.
47
+
48
+ The result is rearranged to a token sequence ``(B, S_tokens, D)``, where ``D = n_filters_time``.
49
+
50
+ *Interpretability/robustness.* Temporal kernels can be inspected as FIR filters;
51
+ the spatial conv yields channel projections analogous to :class:`ShallowFBCSPNet`’s learned
52
+ spatial filters. Temporal pooling stabilizes statistics and reduces sequence length.
53
+
54
+ - :class:`_TransformerEncoder` **(context over temporal tokens)**
55
+
56
+ - *Operations.*
57
+ - A stack of ``att_depth`` encoder blocks. :class:`_TransformerEncoderBlock`
58
+ - Each block applies LayerNorm :class:`torch.nn.LayerNorm`
59
+ - Multi-Head Self-Attention (``att_heads``) with dropout + residual :class:`MultiHeadAttention` (:class:`torch.nn.Dropout`)
60
+ - LayerNorm :class:`torch.nn.LayerNorm`
61
+ - 2-layer feed-forward (≈4x expansion, :class:`torch.nn.GELU`) with dropout + residual.
62
+
63
+ Shapes remain ``(B, S_tokens, D)`` throughout.
20
64
 
21
- Convolutional Transformer for EEG decoding.
65
+ *Role.* Small attention focuses on interactions among *temporal patches* (not channels),
66
+ extending effective receptive fields at modest cost.
22
67
 
23
- The paper and original code with more details about the methodological
24
- choices are available at the [song2022]_ and [ConformerCode]_.
68
+ - :class:`ClassificationHead` **(aggregation + readout)**
25
69
 
26
- This neural network architecture receives a traditional braindecode input.
27
- The input shape should be three-dimensional matrix representing the EEG
28
- signals.
70
+ - *Operations*.
71
+ - Flatten, :class:`torch.nn.Flatten` the sequence ``(B, S_tokens·D)`` -
72
+ - MLP (:class:`torch.nn.Linear` → activation (default: :class:`torch.nn.ELU`) → :class:`torch.nn.Dropout` → :class:`torch.nn.Linear`)
73
+ - final Linear to classes.
29
74
 
30
- `(batch_size, n_channels, n_timesteps)`.
75
+ With ``return_features=True``, features before the last Linear can be exported for
76
+ linear probing or downstream tasks.
31
77
 
32
- The EEG Conformer architecture is composed of three modules:
33
- - PatchEmbedding
34
- - TransformerEncoder
35
- - ClassificationHead
78
+ .. rubric:: Convolutional Details
79
+
80
+ - **Temporal (where time-domain patterns are learned).**
81
+ The initial ``(1 x L_t)`` conv per channel acts as a *learned filter bank* for oscillatory
82
+ bands and transients. Subsequent **AvgPool** along time performs local integration,
83
+ converting activations into “patches” (tokens). Pool length/stride control the
84
+ token rate and set the lower bound on temporal context within each token.
85
+
86
+ - **Spatial (how electrodes are processed).**
87
+ A single conv with kernel ``(n_chans x 1)`` spans the full montage to learn spatial
88
+ projections for each temporal feature map, collapsing the channel axis into a
89
+ virtual channel before tokenization. This mirrors the shallow spatial step in
90
+ :class:`ShallowFBCSPNet` (temporal filters → spatial projection → temporal condensation).
91
+
92
+ - **Spectral (how frequency content is captured).**
93
+ No explicit Fourier/wavelet stage is used. Spectral selectivity emerges implicitly
94
+ from the learned temporal kernels; pooling further smooths high-frequency noise.
95
+ The effective spectral resolution is thus governed by ``L_t`` and the pooling
96
+ configuration.
97
+
98
+ .. rubric:: Attention / Sequential Modules
99
+
100
+ - **Type.** Standard multi-head self-attention (MHA) with ``att_heads`` heads over the token sequence.
101
+ - **Shapes.** Input/Output: ``(B, S_tokens, D)``; attention operates along the ``S_tokens`` axis.
102
+ - **Role.** Re-weights and integrates evidence across pooled windows, capturing dependencies
103
+ longer than any single token while leaving channel relationships to the convolutional stem.
104
+ The design is intentionally *small*—attention refines rather than replaces convolutional feature extraction.
105
+
106
+ .. rubric:: Additional Mechanisms
107
+
108
+ - **Parallel with ShallowFBCSPNet.** Both begin with a learned temporal filter bank,
109
+ spatial projection across electrodes, and early temporal condensation.
110
+ :class:`ShallowFBCSPNet` then computes band-power (via squaring/log-variance), whereas
111
+ EEG-Conformer applies BN/ELU and **continues with attention** over tokens to
112
+ refine temporal context before classification.
113
+
114
+ - **Tokenization knob.** ``pool_time_length`` and especially ``pool_time_stride`` set
115
+ the number of tokens ``S_tokens``. Smaller strides → more tokens and higher attention
116
+ capacity (but higher compute); larger strides → fewer tokens and stronger inductive bias.
117
+
118
+ - **Embedding dimension = filters.** ``n_filters_time`` serves double duty as both the
119
+ number of temporal filters in the stem and the transformer’s embedding size ``D``,
120
+ simplifying dimensional alignment.
121
+
122
+ .. rubric:: Usage and Configuration
123
+
124
+ - **Instantiation.** Choose ``n_filters_time`` (embedding size ``D``) and
125
+ ``filter_time_length`` to match the rhythms of interest. Tune
126
+ ``pool_time_length/stride`` to trade temporal resolution for sequence length.
127
+ Keep ``att_depth`` modest (e.g., 4–6) and set ``att_heads`` to divide ``D``.
128
+ ``final_fc_length="auto"`` infers the flattened size from PatchEmbedding.
36
129
 
37
130
  Notes
38
131
  -----
39
132
  The authors recommend using data augmentation before using Conformer,
40
133
  e.g. segmentation and recombination,
41
- Please refer to the original paper and code for more details.
134
+ Please refer to the original paper and code for more details [ConformerCode]_.
42
135
 
43
136
  The model was initially tuned on 4 seconds of 250 Hz data.
44
137
  Please adjust the scale of the temporal convolutional layer,
@@ -47,7 +140,10 @@ class EEGConformer(EEGModuleMixin, nn.Module):
47
140
  .. versionadded:: 0.8
48
141
 
49
142
  We aggregate the parameters based on the parts of the models, or
50
- when the parameters were used first, e.g. n_filters_time.
143
+ when the parameters were used first, e.g. ``n_filters_time``.
144
+
145
+ .. versionadded:: 1.1
146
+
51
147
 
52
148
  Parameters
53
149
  ----------
@@ -20,13 +20,60 @@ from braindecode.modules import (
20
20
 
21
21
 
22
22
  class EEGNetv4(EEGModuleMixin, nn.Sequential):
23
- """EEGNet v4 model from Lawhern et al. (2018) [EEGNet4]_.
23
+ """EEGNet v4 model from Lawhern et al. (2018) [Lawhern2018]_.
24
+
25
+ :bdg-success:`Convolution` :bdg-secondary:`Depthwise–Separable`
24
26
 
25
27
  .. figure:: https://content.cld.iop.org/journals/1741-2552/15/5/056013/revision2/jneaace8cf01_hr.jpg
26
28
  :align: center
27
- :alt: EEGNet4 Architecture
29
+ :alt: EEGNetv4 Architecture
30
+ :width: 600px
31
+
32
+ .. rubric:: Architectural Overview
33
+
34
+ EEGNetv4 is a compact convolutional network designed for EEG decoding with a
35
+ pipeline that mirrors classical EEG processing:
36
+ - (i) learn temporal frequency-selective filters,
37
+ - (ii) learn spatial filters for those frequencies, and
38
+ - (iii) condense features with depthwise–separable convolutions before a lightweight classifier.
39
+
40
+ The architecture is deliberately small (temporal convolutional and spatial patterns) [Lawhern2018]_.
41
+
42
+ .. rubric:: Macro Components
43
+
44
+ - **Temporal convolution**
45
+ Temporal convolution applied per channel; learns ``F1`` kernels that act as data-driven band-pass filters.
46
+ - **Depthwise Spatial Filtering.**
47
+ Depthwise convolution spanning the channel dimension with ``groups = F1``,
48
+ yielding ``D`` spatial filters for each temporal filter (no cross-filter mixing).
49
+ - **Norm–Nonlinearity–Pooling (+ dropout).**
50
+ Batch normalization → ELU → temporal pooling, with dropout.
51
+ - **Depthwise–Separable Convolution Block.**
52
+ (a) depthwise temporal conv to refine temporal structure;
53
+ (b) pointwise 1x1 conv to mix feature maps into ``F2`` combinations.
54
+ - **Classifier Head.**
55
+ Lightweight 1x1 conv or dense layer (often with max-norm constraint).
28
56
 
29
- See details in [EEGNet4]_.
57
+ .. rubric:: Convolutional Details
58
+
59
+ **Temporal.** The initial temporal convs serve as a *learned filter bank*:
60
+ long 1-D kernels (implemented as 2-D with singleton spatial extent) emphasize oscillatory bands and transients.
61
+ Because this stage is linear prior to BN/ELU, kernels can be analyzed as FIR filters to reveal each feature’s spectrum [Lawhern2018]_.
62
+
63
+ **Spatial.** The depthwise spatial conv spans the full channel axis (kernel height = #electrodes; temporal size = 1).
64
+ With ``groups = F1``, each temporal filter learns its own set of ``D`` spatial projections—akin to CSP, learned end-to-end and
65
+ typically regularized with max-norm.
66
+
67
+ **Spectral.** No explicit Fourier/wavelet transform is used. Frequency structure
68
+ is captured implicitly by the temporal filter bank; later depthwise temporal kernels act as short-time integrators/refiners.
69
+
70
+ .. rubric:: Additional Comments
71
+
72
+ - **Filter-bank structure:** Parallel temporal kernels (``F1``) emulate classical filter banks; pairing them with frequency-specific spatial filters
73
+ yields features mappable to rhythms and topographies.
74
+ - **Depthwise & separable convs:** Parameter-efficient decomposition (depthwise + pointwise) retains power while limiting overfitting
75
+ [Chollet2017]_ and keeps temporal vs. mixing steps interpretable.
76
+ - **Regularization:** Batch norm, dropout, pooling, and optional max-norm on spatial kernels aid stability on small EEG datasets.
30
77
 
31
78
  Parameters
32
79
  ----------
@@ -68,10 +115,13 @@ class EEGNetv4(EEGModuleMixin, nn.Sequential):
68
115
 
69
116
  References
70
117
  ----------
71
- .. [EEGNet4] Lawhern, V. J., Solon, A. J., Waytowich, N. R., Gordon, S. M.,
118
+ .. [Lawhern2018] Lawhern, V. J., Solon, A. J., Waytowich, N. R., Gordon, S. M.,
72
119
  Hung, C. P., & Lance, B. J. (2018). EEGNet: a compact convolutional
73
120
  neural network for EEG-based brain–computer interfaces. Journal of
74
121
  neural engineering, 15(5), 056013.
122
+ .. [Chollet2017] Chollet, F., *Xception: Deep Learning with Depthwise Separable
123
+ Convolutions*, CVPR, 2017.
124
+
75
125
  """
76
126
 
77
127
  def __init__(
@@ -157,7 +157,8 @@ class FCA(nn.Module):
157
157
  ):
158
158
  super(FCA, self).__init__()
159
159
  mapper_y = [freq_idx]
160
- assert in_channels % len(mapper_y) == 0
160
+ if in_channels % len(mapper_y) != 0:
161
+ raise ValueError("in_channels must be divisible by number of DCT filters")
161
162
 
162
163
  self.weight = nn.Parameter(
163
164
  self.get_dct_filter(seq_len, mapper_y, in_channels), requires_grad=False
@@ -295,7 +296,8 @@ class ECA(nn.Module):
295
296
  def __init__(self, in_channels: int, kernel_size: int):
296
297
  super(ECA, self).__init__()
297
298
  self.gap = nn.AdaptiveAvgPool2d(1)
298
- assert kernel_size % 2 == 1, "kernel size must be odd for same padding"
299
+ if kernel_size % 2 != 1:
300
+ raise ValueError("kernel size must be odd for same padding")
299
301
  self.conv = nn.Conv1d(
300
302
  1, 1, kernel_size=kernel_size, padding=kernel_size // 2, bias=False
301
303
  )
@@ -530,7 +532,8 @@ class CBAM(nn.Module):
530
532
  nn.ReLU(),
531
533
  nn.Conv2d(in_channels // reduction_rate, in_channels, 1, bias=False),
532
534
  )
533
- assert kernel_size % 2 == 1, "kernel size must be odd for same padding"
535
+ if kernel_size % 2 != 1:
536
+ raise ValueError("kernel size must be odd for same padding")
534
537
  self.conv = nn.Conv2d(2, 1, (1, kernel_size), padding=(0, kernel_size // 2))
535
538
 
536
539
  def forward(self, x):
@@ -136,7 +136,8 @@ class CombinedConv(nn.Module):
136
136
  # Calculate bias terms
137
137
  if self.bias_time:
138
138
  time_bias = self.conv_time.bias
139
- assert time_bias is not None
139
+ if time_bias is None:
140
+ raise RuntimeError("conv_time.bias is None despite bias_time=True")
140
141
  calculated_bias = (
141
142
  self.conv_spat.weight.squeeze()
142
143
  .sum(-1)
@@ -145,7 +146,8 @@ class CombinedConv(nn.Module):
145
146
  )
146
147
  if self.bias_spat:
147
148
  spat_bias = self.conv_spat.bias
148
- assert spat_bias is not None
149
+ if spat_bias is None:
150
+ raise RuntimeError("conv_spat.bias is None despite bias_spat=True")
149
151
  if calculated_bias is None:
150
152
  calculated_bias = spat_bias
151
153
  else:
@@ -190,11 +192,12 @@ class CausalConv1d(nn.Conv1d):
190
192
  dilation=1,
191
193
  **kwargs,
192
194
  ):
193
- assert "padding" not in kwargs, (
194
- "The padding parameter is controlled internally by "
195
- f"{type(self).__name__} class. You should not try to override this"
196
- " parameter."
197
- )
195
+ if "padding" in kwargs:
196
+ raise ValueError(
197
+ "The padding parameter is controlled internally by "
198
+ f"{type(self).__name__} class. You should not try to override this"
199
+ " parameter."
200
+ )
198
201
 
199
202
  super().__init__(
200
203
  in_channels=in_channels,
@@ -452,12 +452,14 @@ class GeneralizedGaussianFilter(nn.Module):
452
452
  self.inverse_fourier = inverse_fourier
453
453
  self.affine_group_delay = affine_group_delay
454
454
  self.clamp_f_mean = clamp_f_mean
455
- assert out_channels % in_channels == 0, (
456
- "out_channels has to be multiple of in_channels"
457
- )
458
- assert len(f_mean) * in_channels == out_channels
459
- assert len(bandwidth) * in_channels == out_channels
460
- assert len(shape) * in_channels == out_channels
455
+ if out_channels % in_channels != 0:
456
+ raise ValueError("out_channels has to be multiple of in_channels")
457
+ if len(f_mean) * in_channels != out_channels:
458
+ raise ValueError("len(f_mean) * in_channels must equal out_channels")
459
+ if len(bandwidth) * in_channels != out_channels:
460
+ raise ValueError("len(bandwidth) * in_channels must equal out_channels")
461
+ if len(shape) * in_channels != out_channels:
462
+ raise ValueError("len(shape) * in_channels must equal out_channels")
461
463
 
462
464
  # Range from 0 to half sample rate, normalized
463
465
  self.n_range = nn.Parameter(
@@ -11,7 +11,6 @@ from contextlib import contextmanager
11
11
 
12
12
  import numpy as np
13
13
  import torch
14
- from mne.utils.check import check_version
15
14
  from skorch.callbacks.scoring import EpochScoring
16
15
  from skorch.dataset import unpack_data
17
16
  from skorch.utils import to_numpy
@@ -370,13 +369,8 @@ class PostEpochTrainScoring(EpochScoring):
370
369
  y_preds = []
371
370
  y_test = []
372
371
  for batch in iterator:
373
- batch_X, batch_y = unpack_data(batch)
374
- # TODO: remove after skorch 0.10 release
375
- if not check_version("skorch", min_version="0.10.1"):
376
- yp = net.evaluation_step(batch_X, training=False)
377
- # X, y unpacking has been pushed downstream in skorch 0.10
378
- else:
379
- yp = net.evaluation_step(batch, training=False)
372
+ _, batch_y = unpack_data(batch)
373
+ yp = net.evaluation_step(batch, training=False)
380
374
  yp = yp.to(device="cpu")
381
375
  y_test.append(self.target_extractor(batch_y))
382
376
  y_preds.append(yp)
@@ -0,0 +1 @@
1
+ __version__ = "1.2.0.dev175337561"
@@ -6,8 +6,13 @@ import numpy as np
6
6
  import torch
7
7
  from skorch.utils import to_numpy, to_tensor
8
8
 
9
+ from braindecode.util import set_random_seeds
9
10
 
10
- def compute_amplitude_gradients(model, dataset, batch_size):
11
+
12
+ def compute_amplitude_gradients(model, dataset, batch_size, seed=20240205):
13
+ """Compute amplitude gradients after seeding for reproducibility."""
14
+ cuda = next(model.parameters()).is_cuda
15
+ set_random_seeds(seed=seed, cuda=cuda)
11
16
  loader = torch.utils.data.DataLoader(
12
17
  dataset, batch_size=batch_size, drop_last=False, shuffle=False
13
18
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: braindecode
3
- Version: 1.2.0.dev169062562
3
+ Version: 1.2.0.dev175337561
4
4
  Summary: Deep learning software to decode EEG, ECG or MEG signals
5
5
  Author-email: Robin Tibor Schirrmeister <robintibor@gmail.com>
6
6
  Maintainer-email: Alexandre Gramfort <agramfort@meta.com>, Bruno Aristimunha Pinto <b.aristimunha@gmail.com>, Robin Tibor Schirrmeister <robintibor@gmail.com>
@@ -166,6 +166,9 @@ templates_path = ["_templates"]
166
166
  # source_suffix = ['.rst', '.md']
167
167
  source_suffix = ".rst"
168
168
 
169
+ rst_prolog = """
170
+ .. role:: tag(bdg-success)
171
+ """
169
172
  # The master toctree document.
170
173
  master_doc = "index"
171
174
 
@@ -22,6 +22,14 @@ Current 1.2 (dev)
22
22
 
23
23
  Enhancements
24
24
  ~~~~~~~~~~~~
25
+ - Improving the docstring for :class:`braindecode.models.EEGNetv4` (:gh:`768` by `Bruno Aristimunha`_)
26
+ - Improving the docstring for :class:`braindecode.models.EEGConformer` (:gh:`769` by `Bruno Aristimunha`_)
27
+ - Improving the docstring for :class:`braindecode.models.ATCNet` (:gh:`771` by `Bruno Aristimunha`_)
28
+ - Improving the docstring for :class:`braindecode.models.AttentionBaseNet` (:gh:`772` by `Bruno Aristimunha`_)
29
+
30
+
31
+ API changes
32
+ ~~~~~~~~~~~
25
33
 
26
34
  Bugs
27
35
  ~~~~
@@ -1 +0,0 @@
1
- __version__ = "1.2.0.dev169062562"