braindecode 1.2.0.dev182094932__py3-none-any.whl → 1.3.0.dev176728557__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of braindecode might be problematic. Click here for more details.

Files changed (34) hide show
  1. braindecode/datasets/experimental.py +218 -0
  2. braindecode/models/__init__.py +6 -8
  3. braindecode/models/atcnet.py +152 -12
  4. braindecode/models/attentionbasenet.py +151 -26
  5. braindecode/models/{sleep_stager_eldele_2021.py → attn_sleep.py} +12 -2
  6. braindecode/models/ctnet.py +1 -1
  7. braindecode/models/deep4.py +6 -2
  8. braindecode/models/deepsleepnet.py +118 -5
  9. braindecode/models/eegconformer.py +114 -15
  10. braindecode/models/eeginception_erp.py +76 -7
  11. braindecode/models/eeginception_mi.py +2 -0
  12. braindecode/models/eegnet.py +25 -189
  13. braindecode/models/eegnex.py +113 -6
  14. braindecode/models/eegsimpleconv.py +2 -0
  15. braindecode/models/eegtcnet.py +1 -1
  16. braindecode/models/sccnet.py +81 -8
  17. braindecode/models/shallow_fbcsp.py +2 -0
  18. braindecode/models/sleep_stager_blanco_2020.py +2 -0
  19. braindecode/models/sleep_stager_chambon_2018.py +2 -0
  20. braindecode/models/sparcnet.py +2 -0
  21. braindecode/models/summary.csv +39 -41
  22. braindecode/models/tidnet.py +2 -0
  23. braindecode/models/tsinception.py +15 -3
  24. braindecode/models/usleep.py +103 -9
  25. braindecode/models/util.py +5 -5
  26. braindecode/preprocessing/preprocess.py +20 -26
  27. braindecode/version.py +1 -1
  28. {braindecode-1.2.0.dev182094932.dist-info → braindecode-1.3.0.dev176728557.dist-info}/METADATA +7 -2
  29. {braindecode-1.2.0.dev182094932.dist-info → braindecode-1.3.0.dev176728557.dist-info}/RECORD +33 -33
  30. braindecode/models/eegresnet.py +0 -362
  31. {braindecode-1.2.0.dev182094932.dist-info → braindecode-1.3.0.dev176728557.dist-info}/WHEEL +0 -0
  32. {braindecode-1.2.0.dev182094932.dist-info → braindecode-1.3.0.dev176728557.dist-info}/licenses/LICENSE.txt +0 -0
  33. {braindecode-1.2.0.dev182094932.dist-info → braindecode-1.3.0.dev176728557.dist-info}/licenses/NOTICE.txt +0 -0
  34. {braindecode-1.2.0.dev182094932.dist-info → braindecode-1.3.0.dev176728557.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,218 @@
1
+ from __future__ import annotations
2
+
3
+ import random
4
+ from pathlib import Path
5
+ from typing import Callable, Sequence
6
+
7
+ import mne_bids
8
+ from torch.utils.data import IterableDataset, get_worker_info
9
+
10
+
11
+ class BIDSIterableDataset(IterableDataset):
12
+ """Dataset for loading BIDS.
13
+
14
+ .. warning::
15
+ This class is experimental and may change in the future.
16
+
17
+ .. warning::
18
+ This dataset is not consistent with the Braindecode API.
19
+
20
+ This class has the same parameters as the :func:`mne_bids.find_matching_paths` function
21
+ as it will be used to find the files to load. The default ``extensions`` parameter was changed.
22
+
23
+ More information on BIDS (Brain Imaging Data Structure)
24
+ can be found at https://bids.neuroimaging.io
25
+
26
+ Examples
27
+ --------
28
+ >>> from braindecode.datasets import BaseDataset, BaseConcatDataset
29
+ >>> from braindecode.datasets.bids import BIDSIterableDataset, _description_from_bids_path
30
+ >>> from braindecode.preprocessing import create_fixed_length_windows
31
+ >>>
32
+ >>> def my_reader_fn(path):
33
+ ... raw = mne_bids.read_raw_bids(path)
34
+ ... desc = _description_from_bids_path(path)
35
+ ... ds = BaseDataset(raw, description=desc)
36
+ ... windows_ds = create_fixed_length_windows(
37
+ ... BaseConcatDataset([ds]),
38
+ ... window_size_samples=400,
39
+ ... window_stride_samples=200,
40
+ ... )
41
+ ... return windows_ds
42
+ >>>
43
+ >>> dataset = BIDSIterableDataset(
44
+ ... reader_fn=my_reader_fn,
45
+ ... root="root/of/my/bids/dataset/",
46
+ ... )
47
+
48
+ Parameters
49
+ ----------
50
+ reader_fn : Callable[[mne_bids.BIDSPath], Sequence]
51
+ A function that takes a BIDSPath and returns a dataset.
52
+ pool_size : int
53
+ The number of recordings to read and sample from.
54
+ bids_paths : list[mne_bids.BIDSPath] | None
55
+ A list of BIDSPaths to load. If None, will use the paths found by
56
+ :func:`mne_bids.find_matching_paths` and the arguments below.
57
+ root : pathlib.Path | str
58
+ The root of the BIDS path.
59
+ subjects : str | array-like of str | None
60
+ The subject ID. Corresponds to "sub".
61
+ sessions : str | array-like of str | None
62
+ The acquisition session. Corresponds to "ses".
63
+ tasks : str | array-like of str | None
64
+ The experimental task. Corresponds to "task".
65
+ acquisitions: str | array-like of str | None
66
+ The acquisition parameters. Corresponds to "acq".
67
+ runs : str | array-like of str | None
68
+ The run number. Corresponds to "run".
69
+ processings : str | array-like of str | None
70
+ The processing label. Corresponds to "proc".
71
+ recordings : str | array-like of str | None
72
+ The recording name. Corresponds to "rec".
73
+ spaces : str | array-like of str | None
74
+ The coordinate space for anatomical and sensor location
75
+ files (e.g., ``*_electrodes.tsv``, ``*_markers.mrk``).
76
+ Corresponds to "space".
77
+ Note that valid values for ``space`` must come from a list
78
+ of BIDS keywords as described in the BIDS specification.
79
+ splits : str | array-like of str | None
80
+ The split of the continuous recording file for ``.fif`` data.
81
+ Corresponds to "split".
82
+ descriptions : str | array-like of str | None
83
+ This corresponds to the BIDS entity ``desc``. It is used to provide
84
+ additional information for derivative data, e.g., preprocessed data
85
+ may be assigned ``description='cleaned'``.
86
+ suffixes : str | array-like of str | None
87
+ The filename suffix. This is the entity after the
88
+ last ``_`` before the extension. E.g., ``'channels'``.
89
+ The following filename suffix's are accepted:
90
+ 'meg', 'markers', 'eeg', 'ieeg', 'T1w',
91
+ 'participants', 'scans', 'electrodes', 'coordsystem',
92
+ 'channels', 'events', 'headshape', 'digitizer',
93
+ 'beh', 'physio', 'stim'
94
+ extensions : str | array-like of str | None
95
+ The extension of the filename. E.g., ``'.json'``.
96
+ By default, uses the ones accepted by :func:`mne_bids.read_raw_bids`.
97
+ datatypes : str | array-like of str | None
98
+ The BIDS data type, e.g., ``'anat'``, ``'func'``, ``'eeg'``, ``'meg'``,
99
+ ``'ieeg'``.
100
+ check : bool
101
+ If ``True``, only returns paths that conform to BIDS. If ``False``
102
+ (default), the ``.check`` attribute of the returned
103
+ :class:`mne_bids.BIDSPath` object will be set to ``True`` for paths that
104
+ do conform to BIDS, and to ``False`` for those that don't.
105
+ preload : bool
106
+ If True, preload the data. Defaults to False.
107
+ n_jobs : int
108
+ Number of jobs to run in parallel. Defaults to 1.
109
+ """
110
+
111
+ def __init__(
112
+ self,
113
+ reader_fn: Callable[[mne_bids.BIDSPath], Sequence],
114
+ pool_size: int = 4,
115
+ bids_paths: list[mne_bids.BIDSPath] | None = None,
116
+ root: Path | str | None = None,
117
+ subjects: str | list[str] | None = None,
118
+ sessions: str | list[str] | None = None,
119
+ tasks: str | list[str] | None = None,
120
+ acquisitions: str | list[str] | None = None,
121
+ runs: str | list[str] | None = None,
122
+ processings: str | list[str] | None = None,
123
+ recordings: str | list[str] | None = None,
124
+ spaces: str | list[str] | None = None,
125
+ splits: str | list[str] | None = None,
126
+ descriptions: str | list[str] | None = None,
127
+ suffixes: str | list[str] | None = None,
128
+ extensions: str | list[str] | None = [
129
+ ".con",
130
+ ".sqd",
131
+ ".pdf",
132
+ ".fif",
133
+ ".ds",
134
+ ".vhdr",
135
+ ".set",
136
+ ".edf",
137
+ ".bdf",
138
+ ".EDF",
139
+ ".snirf",
140
+ ".cdt",
141
+ ".mef",
142
+ ".nwb",
143
+ ],
144
+ datatypes: str | list[str] | None = None,
145
+ check: bool = False,
146
+ ):
147
+ if bids_paths is None:
148
+ bids_paths = mne_bids.find_matching_paths(
149
+ root=root,
150
+ subjects=subjects,
151
+ sessions=sessions,
152
+ tasks=tasks,
153
+ acquisitions=acquisitions,
154
+ runs=runs,
155
+ processings=processings,
156
+ recordings=recordings,
157
+ spaces=spaces,
158
+ splits=splits,
159
+ descriptions=descriptions,
160
+ suffixes=suffixes,
161
+ extensions=extensions,
162
+ datatypes=datatypes,
163
+ check=check,
164
+ ignore_json=True,
165
+ )
166
+ # Filter out _epo.fif files:
167
+ bids_paths = [
168
+ bids_path
169
+ for bids_path in bids_paths
170
+ if not (bids_path.suffix == "epo" and bids_path.extension == ".fif")
171
+ ]
172
+ self.bids_paths = bids_paths
173
+ self.reader_fn = reader_fn
174
+ self.pool_size = pool_size
175
+
176
+ def __add__(self, other):
177
+ assert isinstance(other, BIDSIterableDataset)
178
+ return BIDSIterableDataset(
179
+ reader_fn=self.reader_fn,
180
+ bids_paths=self.bids_paths + other.bids_paths,
181
+ pool_size=self.pool_size,
182
+ )
183
+
184
+ def __iadd__(self, other):
185
+ assert isinstance(other, BIDSIterableDataset)
186
+ self.bids_paths += other.bids_paths
187
+ return self
188
+
189
+ def __iter__(self):
190
+ worker_info = get_worker_info()
191
+ if worker_info is None: # single-process data loading, return the full iterator
192
+ bids_paths = self.bids_paths
193
+ else: # in a worker process
194
+ # split workload
195
+ bids_paths = self.bids_paths[worker_info.id :: worker_info.num_workers]
196
+
197
+ pool = []
198
+ end = False
199
+ paths_it = iter(random.sample(bids_paths, k=len(bids_paths)))
200
+ while not (end and len(pool) == 0):
201
+ while not end and len(pool) < self.pool_size:
202
+ try:
203
+ bids_path = next(paths_it)
204
+ ds = self.reader_fn(bids_path)
205
+ if ds is None:
206
+ print(f"Skipping {bids_path} as it is too short.")
207
+ continue
208
+ idx = iter(random.sample(range(len(ds)), k=len(ds)))
209
+ pool.append((ds, idx))
210
+ except StopIteration:
211
+ end = True
212
+ i_pool = random.randint(0, len(pool) - 1)
213
+ ds, idx = pool[i_pool]
214
+ try:
215
+ i_ds = next(idx)
216
+ yield ds[i_ds]
217
+ except StopIteration:
218
+ pool.pop(i_pool)
@@ -4,6 +4,7 @@ Some predefined network architectures for EEG decoding.
4
4
 
5
5
  from .atcnet import ATCNet
6
6
  from .attentionbasenet import AttentionBaseNet
7
+ from .attn_sleep import AttnSleep
7
8
  from .base import EEGModuleMixin
8
9
  from .biot import BIOT
9
10
  from .contrawr import ContraWR
@@ -15,9 +16,8 @@ from .eeginception_erp import EEGInceptionERP
15
16
  from .eeginception_mi import EEGInceptionMI
16
17
  from .eegitnet import EEGITNet
17
18
  from .eegminer import EEGMiner
18
- from .eegnet import EEGNetv1, EEGNetv4
19
+ from .eegnet import EEGNet, EEGNetv4
19
20
  from .eegnex import EEGNeX
20
- from .eegresnet import EEGResNet
21
21
  from .eegsimpleconv import EEGSimpleConv
22
22
  from .eegtcnet import EEGTCNet
23
23
  from .fbcnet import FBCNet
@@ -38,12 +38,11 @@ from .signal_jepa import (
38
38
  from .sinc_shallow import SincShallowNet
39
39
  from .sleep_stager_blanco_2020 import SleepStagerBlanco2020
40
40
  from .sleep_stager_chambon_2018 import SleepStagerChambon2018
41
- from .sleep_stager_eldele_2021 import SleepStagerEldele2021
42
41
  from .sparcnet import SPARCNet
43
42
  from .syncnet import SyncNet
44
43
  from .tcn import BDTCN, TCN
45
44
  from .tidnet import TIDNet
46
- from .tsinception import TSceptionV1
45
+ from .tsinception import TSception
47
46
  from .usleep import USleep
48
47
  from .util import _init_models_dict, models_mandatory_parameters
49
48
 
@@ -53,6 +52,7 @@ _init_models_dict()
53
52
 
54
53
  __all__ = [
55
54
  "ATCNet",
55
+ "AttnSleep",
56
56
  "AttentionBaseNet",
57
57
  "EEGModuleMixin",
58
58
  "BIOT",
@@ -65,10 +65,9 @@ __all__ = [
65
65
  "EEGInceptionMI",
66
66
  "EEGITNet",
67
67
  "EEGMiner",
68
- "EEGNetv1",
68
+ "EEGNet",
69
69
  "EEGNetv4",
70
70
  "EEGNeX",
71
- "EEGResNet",
72
71
  "EEGSimpleConv",
73
72
  "EEGTCNet",
74
73
  "FBCNet",
@@ -87,13 +86,12 @@ __all__ = [
87
86
  "SincShallowNet",
88
87
  "SleepStagerBlanco2020",
89
88
  "SleepStagerChambon2018",
90
- "SleepStagerEldele2021",
91
89
  "SPARCNet",
92
90
  "SyncNet",
93
91
  "BDTCN",
94
92
  "TCN",
95
93
  "TIDNet",
96
- "TSceptionV1",
94
+ "TSception",
97
95
  "USleep",
98
96
  "_init_models_dict",
99
97
  "models_mandatory_parameters",
@@ -13,13 +13,154 @@ from braindecode.modules import CausalConv1d, Ensure4d, MaxNormLinear
13
13
 
14
14
 
15
15
  class ATCNet(EEGModuleMixin, nn.Module):
16
- """ATCNet model from Altaheri et al. (2022) [1]_
16
+ """ATCNet from Altaheri et al. (2022) [1]_.
17
17
 
18
- Pytorch implementation based on official tensorflow code [2]_.
18
+ :bdg-success:`Convolution` :bdg-info:`Small Attention`
19
19
 
20
20
  .. figure:: https://user-images.githubusercontent.com/25565236/185449791-e8539453-d4fa-41e1-865a-2cf7e91f60ef.png
21
- :align: center
22
- :alt: ATCNet Architecture
21
+ :align: center
22
+ :alt: ATCNet Architecture
23
+ :width: 650px
24
+
25
+ .. rubric:: Architectural Overview
26
+
27
+ ATCNet is a *convolution-first* architecture augmented with a *lightweight attention–TCN*
28
+ sequence module. The end-to-end flow is:
29
+
30
+ - (i) :class:`_ConvBlock` learns temporal filter-banks and spatial projections (EEGNet-style),
31
+ downsampling time to a compact feature map;
32
+
33
+ - (ii) Sliding Windows carve overlapping temporal windows from this map;
34
+
35
+ - (iii) for each window, :class:`_AttentionBlock` applies small multi-head self-attention
36
+ over time, followed by a :class:`_TCNResidualBlock` stack (causal, dilated);
37
+
38
+ - (iv) window-level features are aggregated (mean of window logits or concatenation)
39
+ and mapped via a max-norm–constrained linear layer.
40
+
41
+ Relative to ViT, ATCNet replaces linear patch projection with learned *temporal–spatial*
42
+ convolutions; it processes *parallel* window encoders (attention→TCN) instead of a deep
43
+ stack; and swaps the MLP head for a TCN suited to 1-D EEG sequences.
44
+
45
+ .. rubric:: Macro Components
46
+
47
+ - :class:`_ConvBlock` **(Shallow conv stem → feature map)**
48
+
49
+ - *Operations.*
50
+ - **Temporal conv** (:class:`torch.nn.Conv2d`) with kernel ``(L_t, 1)`` builds a
51
+ FIR-like filter bank (``F1`` maps).
52
+ - **Depthwise spatial conv** (:class:`torch.nn.Conv2d`, ``groups=F1``) with kernel
53
+ ``(1, n_chans)`` learns per-filter spatial projections (akin to EEGNet’s CSP-like step).
54
+ - **BN → ELU → AvgPool → Dropout** to stabilize and condense activations.
55
+ - **Refining temporal conv** (:class:`torch.nn.Conv2d`) with kernel ``(L_r, 1)`` +
56
+ **BN → ELU → AvgPool → Dropout**.
57
+
58
+ The output shape is ``(B, F2, T_c, 1)`` with ``F2 = F1·D`` and ``T_c = T/(P1·P2)``.
59
+ Temporal kernels behave as FIR filters; the depthwise-spatial conv yields frequency-specific
60
+ topographies. Pooling acts as a local integrator, reducing variance and imposing a
61
+ useful inductive bias on short EEG windows.
62
+
63
+ - **Sliding-Window Sequencer**
64
+
65
+ From the condensed time axis (length ``T_c``), ATCNet forms ``n`` overlapping windows
66
+ of width ``T_w = T_c - n + 1`` (one start per index). Each window produces a sequence
67
+ ``(B, F2, T_w)`` forwarded to its own attention–TCN branch. This creates *parallel*
68
+ encoders over shifted contexts and is key to robustness on nonstationary EEG.
69
+
70
+ - :class:`_AttentionBlock` **(small MHA on temporal positions)**
71
+
72
+ - *Operations.*
73
+ - Rearrange to ``(B, T_w, F2)``,
74
+ - Normalization :class:`torch.nn.LayerNorm`
75
+ - Custom MultiHeadAttention :class:`_MHA` (``num_heads=H``, per-head dim ``d_h``) + residual add,
76
+ - Dropout :class:`torch.nn.Dropout`
77
+ - Rearrange back to ``(B, F2, T_w)``.
78
+
79
+
80
+ **Note**: Attention is *local to a window* and purely temporal.
81
+
82
+ *Role.* Re-weights evidence across the window, letting the model emphasize informative
83
+ segments (onsets, bursts) before causal convolutions aggregate history.
84
+
85
+ - :class:`_TCNResidualBlock` **(causal dilated temporal CNN)**
86
+
87
+ - *Operations.*
88
+ - Two :class:`braindecode.modules.CausalConv1d` layers per block with dilation ``1, 2, 4, …``
89
+ - Across blocks of `torch.nn.ELU` + `torch.nn.BatchNorm1d` + `torch.nn.Dropout`) +
90
+ a residual (identity or 1x1 mapping).
91
+ - The final feature used per window is the *last* causal step ``[..., -1]`` (forecast-style).
92
+
93
+ *Role.* Efficient long-range temporal integration with stable gradients; the dilated
94
+ receptive field complements attention’s soft selection.
95
+
96
+ - **Aggregation & Classifier**
97
+
98
+ - *Operations.*
99
+ - Either (a) map each window feature ``(B, F2)`` to logits via :class:`braindecode.modules.MaxNormLinear`
100
+ and **average** across windows (default, matching official code), or
101
+ - (b) **concatenate** all window features ``(B, n·F2)`` and apply a single :class:`MaxNormLinear`.
102
+ The max-norm constraint regularizes the readout.
103
+
104
+ .. rubric:: Convolutional Details
105
+
106
+ - **Temporal.** Temporal structure is learned in three places:
107
+ - (1) the stem’s wide ``(L_t, 1)`` conv (learned filter bank),
108
+ - (2) the refining ``(L_r, 1)`` conv after pooling (short-term dynamics), and
109
+ - (3) the TCN’s causal 1-D convolutions with exponentially increasing dilation
110
+ (long-range dependencies). The minimum sequence length required by the TCN stack is
111
+ ``(K_t - 1)·2^{L-1} + 1``; the implementation *auto-scales* kernels/pools/windows
112
+ when inputs are shorter to preserve feasibility.
113
+
114
+ - **Spatial.** A depthwise spatial conv spans the **full montage** (kernel ``(1, n_chans)``),
115
+ producing *per-temporal-filter* spatial projections (no cross-filter mixing at this step).
116
+ This mirrors EEGNet’s interpretability: each temporal filter has its own spatial pattern.
117
+
118
+
119
+ .. rubric:: Attention / Sequential Modules
120
+
121
+ - **Type.** Multi-head self-attention with ``H`` heads and per-head dim ``d_h`` implemented
122
+ in :class:`_MHA`, allowing ``embed_dim = H·d_h`` independent of input and output dims.
123
+ - **Shapes.** ``(B, F2, T_w) → (B, T_w, F2) → (B, F2, T_w)``. Attention operates along
124
+ the **temporal** axis within a window; channels/features stay in the embedding dim ``F2``.
125
+ - **Role.** Highlights salient temporal positions prior to causal convolution; small attention
126
+ keeps compute modest while improving context modeling over pooled features.
127
+
128
+ .. rubric:: Additional Mechanisms
129
+
130
+ - **Parallel encoders over shifted windows.** Improves montage/phase robustness by
131
+ ensembling nearby contexts rather than committing to a single segmentation.
132
+ - **Max-norm classifier.** Enforces weight norm constraints at the readout, a common
133
+ stabilization trick in EEG decoding.
134
+ - **ViT vs. ATCNet (design choices).** Convolutional *nonlinear* projection rather than
135
+ linear patchification; attention followed by **TCN** (not MLP); *parallel* window
136
+ encoders rather than stacked encoders.
137
+
138
+ .. rubric:: Usage and Configuration
139
+
140
+ - ``conv_block_n_filters (F1)``, ``conv_block_depth_mult (D)`` → capacity of the stem
141
+ (with ``F2 = F1·D`` feeding attention/TCN), dimensions aligned to ``F2``, like :class:`EEGNet`.
142
+ - Pool sizes ``P1,P2`` trade temporal resolution for stability/compute; they set
143
+ ``T_c = T/(P1·P2)`` and thus window width ``T_w``.
144
+ - ``n_windows`` controls the ensemble over shifts (compute ∝ windows).
145
+ - ``att_num_heads``, ``att_head_dim`` set attention capacity; keep ``H·d_h ≈ F2``.
146
+ - ``tcn_depth``, ``tcn_kernel_size`` govern receptive field; larger values demand
147
+ longer inputs (see minimum length above). The implementation warns and *rescales*
148
+ kernels/pools/windows if inputs are too short.
149
+ - **Aggregation choice.** ``concat=False`` (default, average of per-window logits) matches
150
+ the official code; ``concat=True`` mirrors the paper’s concatenation variant.
151
+
152
+
153
+ Notes
154
+ -----
155
+ - Inputs substantially shorter than the implied minimum length trigger **automatic
156
+ downscaling** of kernels, pools, windows, and TCN kernel size to maintain validity.
157
+ - The attention–TCN sequence operates **per window**; the last causal step is used as the
158
+ window feature, aligning the temporal semantics across windows.
159
+
160
+ .. versionadded:: 1.1
161
+
162
+ - More detailed documentation of the model.
163
+
23
164
 
24
165
  Parameters
25
166
  ----------
@@ -85,15 +226,13 @@ class ATCNet(EEGModuleMixin, nn.Module):
85
226
  Maximum L2-norm constraint imposed on weights of the last
86
227
  fully-connected layer. Defaults to 0.25.
87
228
 
88
-
89
229
  References
90
230
  ----------
91
- .. [1] H. Altaheri, G. Muhammad and M. Alsulaiman,
92
- Physics-informed attention temporal convolutional network for EEG-based
93
- motor imagery classification in IEEE Transactions on Industrial Informatics,
94
- 2022, doi: 10.1109/TII.2022.3197419.
95
- .. [2] EEE-ATCNet implementation.
96
- https://github.com/Altaheri/EEG-ATCNet/blob/main/models.py
231
+ .. [1] H. Altaheri, G. Muhammad, M. Alsulaiman (2022).
232
+ *Physics-informed attention temporal convolutional network for EEG-based motor imagery classification.*
233
+ IEEE Transactions on Industrial Informatics. doi:10.1109/TII.2022.3197419.
234
+ .. [2] Official EEG-ATCNet implementation (TensorFlow):
235
+ https://github.com/Altaheri/EEG-ATCNet/blob/main/models.py
97
236
  """
98
237
 
99
238
  def __init__(
@@ -556,7 +695,8 @@ class _TCNResidualBlock(nn.Module):
556
695
  # Reshape the input for the residual connection when necessary
557
696
  if in_channels != n_filters:
558
697
  self.reshaping_conv = nn.Conv1d(
559
- n_filters,
698
+ in_channels=in_channels,
699
+ out_channels=n_filters,
560
700
  kernel_size=1,
561
701
  padding="same",
562
702
  )
@@ -26,25 +26,150 @@ from braindecode.modules.attention import (
26
26
  class AttentionBaseNet(EEGModuleMixin, nn.Module):
27
27
  """AttentionBaseNet from Wimpff M et al. (2023) [Martin2023]_.
28
28
 
29
+ :bdg-success:`Convolution` :bdg-info:`Small Attention`
30
+
29
31
  .. figure:: https://content.cld.iop.org/journals/1741-2552/21/3/036020/revision2/jnead48b9f2_hr.jpg
30
- :align: center
31
- :alt: Attention Base Net
32
+ :align: center
33
+ :alt: AttentionBaseNet Architecture
34
+ :width: 640px
35
+
36
+
37
+ .. rubric:: Architectural Overview
38
+
39
+ AttentionBaseNet is a *convolution-first* network with a *channel-attention* stage.
40
+ The end-to-end flow is:
41
+
42
+ - (i) :class:`_FeatureExtractor` learns a temporal filter bank and per-filter spatial
43
+ projections (depthwise across electrodes), then condenses time by pooling;
44
+ - (ii) **Channel Expansion** uses a ``1x1`` convolution to set the feature width;
45
+ - (iii) :class:`_ChannelAttentionBlock` refines features via depthwise–pointwise temporal
46
+ convs and an optional channel-attention module (SE/CBAM/ECA/…);
47
+ - (iv) **Classifier** flattens the sequence and applies a linear readout.
48
+
49
+ This design mirrors shallow CNN pipelines (EEGNet-style stem) but inserts a pluggable
50
+ attention unit that *re-weights channels* (and optionally temporal positions) before
51
+ classification.
52
+
53
+
54
+ .. rubric:: Macro Components
55
+
56
+ - :class:`_FeatureExtractor` **(Shallow conv stem → condensed feature map)**
57
+
58
+ - *Operations.*
59
+ - **Temporal conv** (:class:`torch.nn.Conv2d`) with kernel ``(1, L_t)`` creates a learned
60
+ FIR-like filter bank with ``n_temporal_filters`` maps.
61
+ - **Depthwise spatial conv** (:class:`torch.nn.Conv2d`, ``groups=n_temporal_filters``)
62
+ with kernel ``(n_chans, 1)`` learns per-filter spatial projections over the full montage.
63
+ - **BatchNorm → ELU → AvgPool → Dropout** stabilize and downsample time.
64
+ - Output shape: ``(B, F2, 1, T₁)`` with ``F2 = n_temporal_filters x spatial_expansion``.
65
+
66
+ *Interpretability/robustness.* Temporal kernels behave as analyzable FIR filters; the
67
+ depthwise spatial step yields rhythm-specific topographies. Pooling acts as a local
68
+ integrator that reduces variance on short EEG windows.
69
+
70
+ - **Channel Expansion**
71
+
72
+ - *Operations.*
73
+ - A ``1x1`` conv → BN → activation maps ``F2 → ch_dim`` without changing
74
+ the temporal length ``T₁`` (shape: ``(B, ch_dim, 1, T₁)``).
75
+ This sets the embedding width for the attention block.
76
+
77
+ - :class:`_ChannelAttentionBlock` **(temporal refinement + channel attention)**
78
+
79
+ - *Operations.*
80
+ - **Depthwise temporal conv** ``(1, L_a)`` (groups=``ch_dim``) + **pointwise ``1x1``**,
81
+ BN and activation → preserves shape ``(B, ch_dim, 1, T₁)`` while refining timing.
82
+ - **Optional attention module** (see *Additional Mechanisms*) applies channel reweighting
83
+ (some variants also apply temporal gating).
84
+ - **AvgPool (1, P₂)** with stride ``(1, S₂)`` and **Dropout** → outputs
85
+ ``(B, ch_dim, 1, T₂)``.
86
+
87
+ *Role.* Emphasizes informative channels (and, in certain modes, salient time steps)
88
+ before the classifier; complements the convolutional priors with adaptive re-weighting.
89
+
90
+ - **Classifier (aggregation + readout)**
91
+
92
+ *Operations.* :class:`torch.nn.Flatten` → :class:`torch.nn.Linear` from
93
+ ``(B, ch_dim·T₂)`` to classes.
94
+
95
+
96
+ .. rubric:: Convolutional Details
97
+
98
+ - **Temporal (where time-domain patterns are learned).**
99
+ Wide kernels in the stem (``(1, L_t)``) act as a learned filter bank for oscillatory
100
+ bands/transients; the attention block’s depthwise temporal conv (``(1, L_a)``) sharpens
101
+ short-term dynamics after downsampling. Pool sizes/strides (``P₁,S₁`` then ``P₂,S₂``)
102
+ set the token rate and effective temporal resolution.
103
+
104
+ - **Spatial (how electrodes are processed).**
105
+ A depthwise spatial conv with kernel ``(n_chans, 1)`` spans the full montage to
106
+ learn *per-temporal-filter* spatial projections (no cross-filter mixing at this step),
107
+ mirroring the interpretable spatial stage in shallow CNNs.
32
108
 
33
- Neural Network from the paper: EEG motor imagery decoding:
34
- A framework for comparative analysis with channel attention
35
- mechanisms
109
+ - **Spectral (how frequency content is captured).**
110
+ No explicit Fourier/wavelet transform is used in the stem—spectral selectivity
111
+ emerges from learned temporal kernels. When ``attention_mode="fca"``, a frequency
112
+ channel attention (DCT-based) summarizes frequencies to drive channel weights.
36
113
 
37
- The paper and original code with more details about the methodological
38
- choices are available at the [Martin2023]_ and [MartinCode]_.
39
114
 
40
- The AttentionBaseNet architecture is composed of four modules:
41
- - Input Block that performs a temporal convolution and a spatial
42
- convolution.
43
- - Channel Expansion that modifies the number of channels.
44
- - An attention block that performs channel attention with several
45
- options
46
- - ClassificationHead
115
+ .. rubric:: Attention / Sequential Modules
47
116
 
117
+ - **Type.** Channel attention chosen by ``attention_mode`` (SE, ECA, CBAM, CAT, GSoP,
118
+ EncNet, GE, GCT, SRM, CATLite). Most operate purely on channels; CBAM/CAT additionally
119
+ include temporal attention.
120
+
121
+ - **Shapes.** Input/Output around attention: ``(B, ch_dim, 1, T₁)``. Re-arrangements
122
+ (if any) are internal to the module; the block returns the same shape before pooling.
123
+
124
+ - **Role.** Re-weights channels (and optionally time) to highlight informative sources
125
+ and suppress distractors, improving SNR ahead of the linear head.
126
+
127
+
128
+ .. rubric:: Additional Mechanisms
129
+
130
+ - **Attention variants at a glance.**
131
+ - ``"se"``: Squeeze-and-Excitation (global pooling → bottleneck → gates).
132
+ - ``"gsop"``: Global second-order pooling (covariance-aware channel weights).
133
+ - ``"fca"``: Frequency Channel Attention (DCT summary; uses ``seq_len`` and ``freq_idx``).
134
+ - ``"encnet"``: EncNet with learned codewords (uses ``n_codewords``).
135
+ - ``"eca"``: Efficient Channel Attention (local 1-D conv over channel descriptor; uses ``kernel_size``).
136
+ - ``"ge"``: Gather–Excite (context pooling with optional MLP; can use ``extra_params``).
137
+ - ``"gct"``: Gated Channel Transformation (global context normalization + gating).
138
+ - ``"srm"``: Style-based recalibration (mean–std descriptors; optional MLP).
139
+ - ``"cbam"``: Channel then temporal attention (uses ``kernel_size``).
140
+ - ``"cat"`` / ``"catlite"``: Collaborative (channel ± temporal) attention; *lite* omits temporal.
141
+ - **Auto-compatibility on short inputs.**
142
+
143
+ If the input duration is too short for the configured kernels/pools, the implementation
144
+ **automatically rescales** temporal lengths/strides downward (with a warning) to keep
145
+ shapes valid and preserve the pipeline semantics.
146
+
147
+
148
+ .. rubric:: Usage and Configuration
149
+
150
+ - ``n_temporal_filters``, ``temporal_filter_length`` and ``spatial_expansion``:
151
+ control the capacity and the number of spatial projections in the stem.
152
+ - ``pool_length_inp``, ``pool_stride_inp`` then ``pool_length``, ``pool_stride``:
153
+ trade temporal resolution for compute; they determine the final sequence length ``T₂``.
154
+ - ``ch_dim``: width after the ``1x1`` expansion and the effective embedding size for attention.
155
+ - ``attention_mode`` + its specific hyperparameters (``reduction_rate``,
156
+ ``kernel_size``, ``seq_len``, ``freq_idx``, ``n_codewords``, ``use_mlp``):
157
+ select and tune the reweighting mechanism.
158
+ - ``drop_prob_inp`` and ``drop_prob_attn``: regularize stem and attention stages.
159
+ - **Training tips.**
160
+
161
+ Start with moderate pooling (e.g., ``P₁=75,S₁=15``) and ELU activations; enable attention
162
+ only after the stem learns stable filters. For small datasets, prefer simpler modes
163
+ (``"se"``, ``"eca"``) before heavier ones (``"gsop"``, ``"encnet"``).
164
+
165
+ Notes
166
+ -----
167
+ - Sequence length after each stage is computed internally; the final classifier expects
168
+ a flattened ``ch_dim x T₂`` vector.
169
+ - Attention operates on *channel* dimension by design; temporal gating exists only in
170
+ specific variants (CBAM/CAT).
171
+ - The paper and original code with more details about the methodological
172
+ choices are available at the [Martin2023]_ and [MartinCode]_.
48
173
  .. versionadded:: 0.9
49
174
 
50
175
  Parameters
@@ -73,18 +198,18 @@ class AttentionBaseNet(EEGModuleMixin, nn.Module):
73
198
  the depth of the network after the initial layer. Default is 16.
74
199
  attention_mode : str, optional
75
200
  The type of attention mechanism to apply. If `None`, no attention is applied.
76
- - "se" for Squeeze-and-excitation network
77
- - "gsop" for Global Second-Order Pooling
78
- - "fca" for Frequency Channel Attention Network
79
- - "encnet" for context encoding module
80
- - "eca" for Efficient channel attention for deep convolutional neural networks
81
- - "ge" for Gather-Excite
82
- - "gct" for Gated Channel Transformation
83
- - "srm" for Style-based Recalibration Module
84
- - "cbam" for Convolutional Block Attention Module
85
- - "cat" for Learning to collaborate channel and temporal attention
86
- from multi-information fusion
87
- - "catlite" for Learning to collaborate channel attention
201
+ - "se" for Squeeze-and-excitation network
202
+ - "gsop" for Global Second-Order Pooling
203
+ - "fca" for Frequency Channel Attention Network
204
+ - "encnet" for context encoding module
205
+ - "eca" for Efficient channel attention for deep convolutional neural networks
206
+ - "ge" for Gather-Excite
207
+ - "gct" for Gated Channel Transformation
208
+ - "srm" for Style-based Recalibration Module
209
+ - "cbam" for Convolutional Block Attention Module
210
+ - "cat" for Learning to collaborate channel and temporal attention
211
+ from multi-information fusion
212
+ - "catlite" for Learning to collaborate channel attention
88
213
  from multi-information fusion (lite version, cat w/o temporal attention)
89
214
  pool_length : int, default=8
90
215
  The length of the window for the average pooling operation.