PyPI - braindecode - Versions diffs - 1.2.0.dev182094932__py3-none-any.whl → 1.3.0.dev168011974__py3-none-any.whl - Mend

braindecode 1.2.0.dev182094932py3-none-any.whl → 1.3.0.dev168011974py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of braindecode might be problematic. Click here for more details.

Files changed (39) hide show

braindecode/augmentation/functional.py +154 -54
braindecode/augmentation/transforms.py +2 -2
braindecode/datasets/experimental.py +218 -0
braindecode/datasets/sleep_physio_challe_18.py +2 -1
braindecode/datautil/serialization.py +11 -6
braindecode/models/__init__.py +6 -8
braindecode/models/atcnet.py +156 -16
braindecode/models/attentionbasenet.py +151 -26
braindecode/models/{sleep_stager_eldele_2021.py → attn_sleep.py} +12 -2
braindecode/models/biot.py +1 -1
braindecode/models/ctnet.py +1 -1
braindecode/models/deep4.py +6 -2
braindecode/models/deepsleepnet.py +118 -5
braindecode/models/eegconformer.py +114 -15
braindecode/models/eeginception_erp.py +76 -7
braindecode/models/eeginception_mi.py +2 -0
braindecode/models/eegnet.py +27 -190
braindecode/models/eegnex.py +113 -6
braindecode/models/eegsimpleconv.py +2 -0
braindecode/models/eegtcnet.py +1 -1
braindecode/models/sccnet.py +81 -8
braindecode/models/shallow_fbcsp.py +2 -0
braindecode/models/sleep_stager_blanco_2020.py +2 -0
braindecode/models/sleep_stager_chambon_2018.py +2 -0
braindecode/models/sparcnet.py +2 -0
braindecode/models/summary.csv +39 -41
braindecode/models/tidnet.py +2 -0
braindecode/models/tsinception.py +15 -3
braindecode/models/usleep.py +103 -9
braindecode/models/util.py +5 -5
braindecode/preprocessing/preprocess.py +31 -28
braindecode/version.py +1 -1
{braindecode-1.2.0.dev182094932.dist-info → braindecode-1.3.0.dev168011974.dist-info}/METADATA +7 -2
{braindecode-1.2.0.dev182094932.dist-info → braindecode-1.3.0.dev168011974.dist-info}/RECORD +38 -38
braindecode/models/eegresnet.py +0 -362
{braindecode-1.2.0.dev182094932.dist-info → braindecode-1.3.0.dev168011974.dist-info}/WHEEL +0 -0
{braindecode-1.2.0.dev182094932.dist-info → braindecode-1.3.0.dev168011974.dist-info}/licenses/LICENSE.txt +0 -0
{braindecode-1.2.0.dev182094932.dist-info → braindecode-1.3.0.dev168011974.dist-info}/licenses/NOTICE.txt +0 -0
{braindecode-1.2.0.dev182094932.dist-info → braindecode-1.3.0.dev168011974.dist-info}/top_level.txt +0 -0

braindecode/datasets/sleep_physio_challe_18.py CHANGED Viewed

@@ -22,7 +22,6 @@ from mne.datasets.utils import _get_path
 from mne.utils import warn
 from braindecode.datasets import BaseConcatDataset, BaseDataset
-from braindecode.preprocessing.preprocess import _preprocess
 PC18_DIR = op.join(op.dirname(__file__), "data", "pc18")
 PC18_RECORDS = op.join(PC18_DIR, "sleep_records.csv")
@@ -407,6 +406,8 @@ class SleepPhysionetChallenge2018(BaseConcatDataset):
         base_dataset = BaseDataset(raw_file, desc)
         if preproc is not None:
+            from braindecode.preprocessing.preprocess import _preprocess
             _preprocess(base_dataset, None, preproc)
         return base_dataset

braindecode/datautil/serialization.py CHANGED Viewed

@@ -138,12 +138,17 @@ def _load_signals(fif_file, preload, is_raw):
         with open(pkl_file, "rb") as f:
             signals = pickle.load(f)
-        # If the file has been moved together with the pickle file, make sure
-        # the path links to correct fif file.
-        signals._fname = str(fif_file)
-        if preload:
-            signals.load_data()
-        return signals
+        if all(f.exists() for f in signals.filenames):
+            if preload:
+                signals.load_data()
+            return signals
+        else:  # This may happen if the file has been moved together with the pickle file.
+            warnings.warn(
+                f"Pickle file {pkl_file} exists, but the referenced fif "
+                "file(s) do not exist. Will read the fif file(s) directly "
+                "and re-create the pickle file.",
+                UserWarning,
+            )
     # If pickle didn't exist read via mne (likely slower) and save pkl after
     if is_raw:

braindecode/models/__init__.py CHANGED Viewed

@@ -4,6 +4,7 @@ Some predefined network architectures for EEG decoding.
 from .atcnet import ATCNet
 from .attentionbasenet import AttentionBaseNet
+from .attn_sleep import AttnSleep
 from .base import EEGModuleMixin
 from .biot import BIOT
 from .contrawr import ContraWR
@@ -15,9 +16,8 @@ from .eeginception_erp import EEGInceptionERP
 from .eeginception_mi import EEGInceptionMI
 from .eegitnet import EEGITNet
 from .eegminer import EEGMiner
-from .eegnet import EEGNetv1, EEGNetv4
+from .eegnet import EEGNet, EEGNetv4
 from .eegnex import EEGNeX
-from .eegresnet import EEGResNet
 from .eegsimpleconv import EEGSimpleConv
 from .eegtcnet import EEGTCNet
 from .fbcnet import FBCNet
@@ -38,12 +38,11 @@ from .signal_jepa import (
 from .sinc_shallow import SincShallowNet
 from .sleep_stager_blanco_2020 import SleepStagerBlanco2020
 from .sleep_stager_chambon_2018 import SleepStagerChambon2018
-from .sleep_stager_eldele_2021 import SleepStagerEldele2021
 from .sparcnet import SPARCNet
 from .syncnet import SyncNet
 from .tcn import BDTCN, TCN
 from .tidnet import TIDNet
-from .tsinception import TSceptionV1
+from .tsinception import TSception
 from .usleep import USleep
 from .util import _init_models_dict, models_mandatory_parameters
@@ -53,6 +52,7 @@ _init_models_dict()
 __all__ = [
     "ATCNet",
+    "AttnSleep",
     "AttentionBaseNet",
     "EEGModuleMixin",
     "BIOT",
@@ -65,10 +65,9 @@ __all__ = [
     "EEGInceptionMI",
     "EEGITNet",
     "EEGMiner",
-    "EEGNetv1",
+    "EEGNet",
     "EEGNetv4",
     "EEGNeX",
-    "EEGResNet",
     "EEGSimpleConv",
     "EEGTCNet",
     "FBCNet",
@@ -87,13 +86,12 @@ __all__ = [
     "SincShallowNet",
     "SleepStagerBlanco2020",
     "SleepStagerChambon2018",
-    "SleepStagerEldele2021",
     "SPARCNet",
     "SyncNet",
     "BDTCN",
     "TCN",
     "TIDNet",
-    "TSceptionV1",
+    "TSception",
     "USleep",
     "_init_models_dict",
     "models_mandatory_parameters",

braindecode/models/atcnet.py CHANGED Viewed

@@ -13,13 +13,154 @@ from braindecode.modules import CausalConv1d, Ensure4d, MaxNormLinear
 class ATCNet(EEGModuleMixin, nn.Module):
-    """ATCNet model from Altaheri et al. (2022) [1]_
+    """ATCNet from Altaheri et al. (2022) [1]_.
-    Pytorch implementation based on official tensorflow code [2]_.
+    :bdg-success:`Convolution` :bdg-info:`Small Attention`
     .. figure:: https://user-images.githubusercontent.com/25565236/185449791-e8539453-d4fa-41e1-865a-2cf7e91f60ef.png
-       :align: center
-       :alt: ATCNet Architecture
+        :align: center
+        :alt: ATCNet Architecture
+        :width: 650px
+    .. rubric:: Architectural Overview
+    ATCNet is a *convolution-first* architecture augmented with a *lightweight attention–TCN*
+    sequence module. The end-to-end flow is:
+    - (i) :class:`_ConvBlock` learns temporal filter-banks and spatial projections (EEGNet-style),
+      downsampling time to a compact feature map;
+    - (ii) Sliding Windows carve overlapping temporal windows from this map;
+    - (iii) for each window, :class:`_AttentionBlock` applies small multi-head self-attention
+      over time, followed by a :class:`_TCNResidualBlock` stack (causal, dilated);
+    - (iv) window-level features are aggregated (mean of window logits or concatenation)
+      and mapped via a max-norm–constrained linear layer.
+    Relative to ViT, ATCNet replaces linear patch projection with learned *temporal–spatial*
+    convolutions; it processes *parallel* window encoders (attention→TCN) instead of a deep
+    stack; and swaps the MLP head for a TCN suited to 1-D EEG sequences.
+    .. rubric:: Macro Components
+    - :class:`_ConvBlock` **(Shallow conv stem → feature map)**
+        - *Operations.*
+        - **Temporal conv** (:class:`torch.nn.Conv2d`) with kernel ``(L_t, 1)`` builds a
+            FIR-like filter bank (``F1`` maps).
+        - **Depthwise spatial conv** (:class:`torch.nn.Conv2d`, ``groups=F1``) with kernel
+          ``(1, n_chans)`` learns per-filter spatial projections (akin to EEGNet’s CSP-like step).
+        - **BN → ELU → AvgPool → Dropout** to stabilize and condense activations.
+        - **Refining temporal conv** (:class:`torch.nn.Conv2d`) with kernel ``(L_r, 1)`` +
+          **BN → ELU → AvgPool → Dropout**.
+    The output shape is ``(B, F2, T_c, 1)`` with ``F2 = F1·D`` and ``T_c = T/(P1·P2)``.
+    Temporal kernels behave as FIR filters; the depthwise-spatial conv yields frequency-specific
+    topographies. Pooling acts as a local integrator, reducing variance and imposing a
+    useful inductive bias on short EEG windows.
+    - **Sliding-Window Sequencer**
+      From the condensed time axis (length ``T_c``), ATCNet forms ``n`` overlapping windows
+      of width ``T_w = T_c - n + 1`` (one start per index). Each window produces a sequence
+      ``(B, F2, T_w)`` forwarded to its own attention–TCN branch. This creates *parallel*
+      encoders over shifted contexts and is key to robustness on nonstationary EEG.
+    - :class:`_AttentionBlock` **(small MHA on temporal positions)**
+        - *Operations.*
+        - Rearrange to ``(B, T_w, F2)``,
+        - Normalization :class:`torch.nn.LayerNorm`
+        - Custom MultiHeadAttention :class:`_MHA` (``num_heads=H``, per-head dim ``d_h``) + residual add,
+        - Dropout :class:`torch.nn.Dropout`
+        - Rearrange back to ``(B, F2, T_w)``.
+    **Note**: Attention is *local to a window* and purely temporal.
+    *Role.* Re-weights evidence across the window, letting the model emphasize informative
+    segments (onsets, bursts) before causal convolutions aggregate history.
+    - :class:`_TCNResidualBlock` **(causal dilated temporal CNN)**
+        - *Operations.*
+        - Two :class:`braindecode.modules.CausalConv1d` layers per block with dilation  ``1, 2, 4, …``
+        - Across blocks of `torch.nn.ELU` + `torch.nn.BatchNorm1d` + `torch.nn.Dropout`) +
+          a residual (identity or 1x1 mapping).
+        - The final feature used per window is the *last* causal step ``[..., -1]`` (forecast-style).
+    *Role.* Efficient long-range temporal integration with stable gradients; the dilated
+    receptive field complements attention’s soft selection.
+    - **Aggregation & Classifier**
+        - *Operations.*
+        - Either (a) map each window feature ``(B, F2)`` to logits via :class:`braindecode.modules.MaxNormLinear`
+        and **average** across windows (default, matching official code), or
+        - (b) **concatenate** all window features ``(B, n·F2)`` and apply a single :class:`MaxNormLinear`.
+        The max-norm constraint regularizes the readout.
+    .. rubric:: Convolutional Details
+    - **Temporal.** Temporal structure is learned in three places:
+        - (1) the stem’s wide ``(L_t, 1)`` conv (learned filter bank),
+        - (2) the refining ``(L_r, 1)`` conv after pooling (short-term dynamics), and
+        - (3) the TCN’s causal 1-D convolutions with exponentially increasing dilation
+          (long-range dependencies). The minimum sequence length required by the TCN stack is
+          ``(K_t - 1)·2^{L-1} + 1``; the implementation *auto-scales* kernels/pools/windows
+          when inputs are shorter to preserve feasibility.
+    - **Spatial.** A depthwise spatial conv spans the **full montage** (kernel ``(1, n_chans)``),
+        producing *per-temporal-filter* spatial projections (no cross-filter mixing at this step).
+        This mirrors EEGNet’s interpretability: each temporal filter has its own spatial pattern.
+    .. rubric:: Attention / Sequential Modules
+    - **Type.** Multi-head self-attention with ``H`` heads and per-head dim ``d_h`` implemented
+      in :class:`_MHA`, allowing ``embed_dim = H·d_h`` independent of input and output dims.
+    - **Shapes.** ``(B, F2, T_w) → (B, T_w, F2) → (B, F2, T_w)``. Attention operates along
+      the **temporal** axis within a window; channels/features stay in the embedding dim ``F2``.
+    - **Role.** Highlights salient temporal positions prior to causal convolution; small attention
+      keeps compute modest while improving context modeling over pooled features.
+    .. rubric:: Additional Mechanisms
+    - **Parallel encoders over shifted windows.** Improves montage/phase robustness by
+      ensembling nearby contexts rather than committing to a single segmentation.
+    - **Max-norm classifier.** Enforces weight norm constraints at the readout, a common
+      stabilization trick in EEG decoding.
+    - **ViT vs. ATCNet (design choices).** Convolutional *nonlinear* projection rather than
+      linear patchification; attention followed by **TCN** (not MLP); *parallel* window
+      encoders rather than stacked encoders.
+    .. rubric:: Usage and Configuration
+        - ``conv_block_n_filters (F1)``, ``conv_block_depth_mult (D)`` → capacity of the stem
+        (with ``F2 = F1·D`` feeding attention/TCN), dimensions aligned to ``F2``, like :class:`EEGNet`.
+        - Pool sizes ``P1,P2`` trade temporal resolution for stability/compute; they set
+        ``T_c = T/(P1·P2)`` and thus window width ``T_w``.
+        - ``n_windows`` controls the ensemble over shifts (compute ∝ windows).
+        - ``att_num_heads``, ``att_head_dim`` set attention capacity; keep ``H·d_h ≈ F2``.
+        - ``tcn_depth``, ``tcn_kernel_size`` govern receptive field; larger values demand
+        longer inputs (see minimum length above). The implementation warns and *rescales*
+        kernels/pools/windows if inputs are too short.
+        - **Aggregation choice.** ``concat=False`` (default, average of per-window logits) matches
+        the official code; ``concat=True`` mirrors the paper’s concatenation variant.
+    Notes
+    -----
+    - Inputs substantially shorter than the implied minimum length trigger **automatic
+      downscaling** of kernels, pools, windows, and TCN kernel size to maintain validity.
+    - The attention–TCN sequence operates **per window**; the last causal step is used as the
+      window feature, aligning the temporal semantics across windows.
+    .. versionadded:: 1.1
+        - More detailed documentation of the model.
     Parameters
     ----------
@@ -85,15 +226,13 @@ class ATCNet(EEGModuleMixin, nn.Module):
         Maximum L2-norm constraint imposed on weights of the last
         fully-connected layer. Defaults to 0.25.
     References
     ----------
-    .. [1] H. Altaheri, G. Muhammad and M. Alsulaiman,
-        Physics-informed attention temporal convolutional network for EEG-based
-        motor imagery classification in IEEE Transactions on Industrial Informatics,
-        2022, doi: 10.1109/TII.2022.3197419.
-    .. [2] EEE-ATCNet implementation.
-       https://github.com/Altaheri/EEG-ATCNet/blob/main/models.py
+    .. [1] H. Altaheri, G. Muhammad, M. Alsulaiman (2022).
+        *Physics-informed attention temporal convolutional network for EEG-based motor imagery classification.*
+        IEEE Transactions on Industrial Informatics. doi:10.1109/TII.2022.3197419.
+    .. [2] Official EEG-ATCNet implementation (TensorFlow):
+        https://github.com/Altaheri/EEG-ATCNet/blob/main/models.py
     """
     def __init__(
@@ -231,7 +370,7 @@ class ATCNet(EEGModuleMixin, nn.Module):
                 nn.Sequential(
                     *[
                         _TCNResidualBlock(
-                            in_channels=self.F2,
+                            in_channels=self.F2 if i == 0 else self.tcn_n_filters,
                             kernel_size=self.tcn_kernel_size,
                             n_filters=self.tcn_n_filters,
                             dropout=self.tcn_dropout,
@@ -249,7 +388,7 @@ class ATCNet(EEGModuleMixin, nn.Module):
             self.final_layer = nn.ModuleList(
                 [
                     MaxNormLinear(
-                        in_features=self.F2 * self.n_windows,
+                        in_features=self.tcn_n_filters * self.n_windows,
                         out_features=self.n_outputs,
                         max_norm_val=self.max_norm_const,
                     )
@@ -259,7 +398,7 @@ class ATCNet(EEGModuleMixin, nn.Module):
             self.final_layer = nn.ModuleList(
                 [
                     MaxNormLinear(
-                        in_features=self.F2,
+                        in_features=self.tcn_n_filters,
                         out_features=self.n_outputs,
                         max_norm_val=self.max_norm_const,
                     )
@@ -556,7 +695,8 @@ class _TCNResidualBlock(nn.Module):
         # Reshape the input for the residual connection when necessary
         if in_channels != n_filters:
             self.reshaping_conv = nn.Conv1d(
-                n_filters,
+                in_channels=in_channels,  # Specify input channels
+                out_channels=n_filters,  # Specify output channels
                 kernel_size=1,
                 padding="same",
             )
@@ -576,7 +716,7 @@ class _TCNResidualBlock(nn.Module):
         out = self.activation(out)
         out = self.drop2(out)
-        out = self.reshaping_conv(out)
+        X = self.reshaping_conv(X)
         # ----- Residual connection -----
         out = X + out

braindecode/models/attentionbasenet.py CHANGED Viewed

@@ -26,25 +26,150 @@ from braindecode.modules.attention import (
 class AttentionBaseNet(EEGModuleMixin, nn.Module):
     """AttentionBaseNet from Wimpff M et al. (2023) [Martin2023]_.
+    :bdg-success:`Convolution` :bdg-info:`Small Attention`
     .. figure:: https://content.cld.iop.org/journals/1741-2552/21/3/036020/revision2/jnead48b9f2_hr.jpg
-       :align: center
-       :alt: Attention Base Net
+        :align: center
+        :alt: AttentionBaseNet Architecture
+        :width: 640px
+    .. rubric:: Architectural Overview
+    AttentionBaseNet is a *convolution-first* network with a *channel-attention* stage.
+    The end-to-end flow is:
+    - (i) :class:`_FeatureExtractor` learns a temporal filter bank and per-filter spatial
+      projections (depthwise across electrodes), then condenses time by pooling;
+    - (ii) **Channel Expansion** uses a ``1x1`` convolution to set the feature width;
+    - (iii) :class:`_ChannelAttentionBlock` refines features via depthwise–pointwise temporal
+      convs and an optional channel-attention module (SE/CBAM/ECA/…);
+    - (iv) **Classifier** flattens the sequence and applies a linear readout.
+    This design mirrors shallow CNN pipelines (EEGNet-style stem) but inserts a pluggable
+    attention unit that *re-weights channels* (and optionally temporal positions) before
+    classification.
+    .. rubric:: Macro Components
+    - :class:`_FeatureExtractor` **(Shallow conv stem → condensed feature map)**
+        - *Operations.*
+        - **Temporal conv** (:class:`torch.nn.Conv2d`) with kernel ``(1, L_t)`` creates a learned
+          FIR-like filter bank with ``n_temporal_filters`` maps.
+        - **Depthwise spatial conv** (:class:`torch.nn.Conv2d`, ``groups=n_temporal_filters``)
+          with kernel ``(n_chans, 1)`` learns per-filter spatial projections over the full montage.
+        - **BatchNorm → ELU → AvgPool → Dropout** stabilize and downsample time.
+        - Output shape: ``(B, F2, 1, T₁)`` with ``F2 = n_temporal_filters x spatial_expansion``.
+    *Interpretability/robustness.* Temporal kernels behave as analyzable FIR filters; the
+    depthwise spatial step yields rhythm-specific topographies. Pooling acts as a local
+    integrator that reduces variance on short EEG windows.
+    - **Channel Expansion**
+        - *Operations.*
+        - A ``1x1`` conv → BN → activation maps ``F2 → ch_dim`` without changing
+          the temporal length ``T₁`` (shape: ``(B, ch_dim, 1, T₁)``).
+          This sets the embedding width for the attention block.
+    - :class:`_ChannelAttentionBlock` **(temporal refinement + channel attention)**
+        - *Operations.*
+        - **Depthwise temporal conv** ``(1, L_a)`` (groups=``ch_dim``) + **pointwise ``1x1``**,
+          BN and activation → preserves shape ``(B, ch_dim, 1, T₁)`` while refining timing.
+        - **Optional attention module** (see *Additional Mechanisms*) applies channel reweighting
+          (some variants also apply temporal gating).
+        - **AvgPool (1, P₂)** with stride ``(1, S₂)`` and **Dropout** → outputs
+          ``(B, ch_dim, 1, T₂)``.
+    *Role.* Emphasizes informative channels (and, in certain modes, salient time steps)
+    before the classifier; complements the convolutional priors with adaptive re-weighting.
+    - **Classifier (aggregation + readout)**
+    *Operations.* :class:`torch.nn.Flatten` → :class:`torch.nn.Linear` from
+    ``(B, ch_dim·T₂)`` to classes.
+    .. rubric:: Convolutional Details
+    - **Temporal (where time-domain patterns are learned).**
+        Wide kernels in the stem (``(1, L_t)``) act as a learned filter bank for oscillatory
+        bands/transients; the attention block’s depthwise temporal conv (``(1, L_a)``) sharpens
+        short-term dynamics after downsampling. Pool sizes/strides (``P₁,S₁`` then ``P₂,S₂``)
+        set the token rate and effective temporal resolution.
+    - **Spatial (how electrodes are processed).**
+        A depthwise spatial conv with kernel ``(n_chans, 1)`` spans the full montage to
+        learn *per-temporal-filter* spatial projections (no cross-filter mixing at this step),
+        mirroring the interpretable spatial stage in shallow CNNs.
-    Neural Network from the paper: EEG motor imagery decoding:
-    A framework for comparative analysis with channel attention
-    mechanisms
+    - **Spectral (how frequency content is captured).**
+        No explicit Fourier/wavelet transform is used in the stem—spectral selectivity
+        emerges from learned temporal kernels. When ``attention_mode="fca"``, a frequency
+        channel attention (DCT-based) summarizes frequencies to drive channel weights.
-    The paper and original code with more details about the methodological
-    choices are available at the [Martin2023]_ and [MartinCode]_.
-    The AttentionBaseNet architecture is composed of four modules:
-    - Input Block that performs a temporal convolution and a spatial
-    convolution.
-    - Channel Expansion that modifies the number of channels.
-    - An attention block that performs channel attention with several
-    options
-    - ClassificationHead
+    .. rubric:: Attention / Sequential Modules
+    - **Type.** Channel attention chosen by ``attention_mode`` (SE, ECA, CBAM, CAT, GSoP,
+        EncNet, GE, GCT, SRM, CATLite). Most operate purely on channels; CBAM/CAT additionally
+        include temporal attention.
+    - **Shapes.** Input/Output around attention: ``(B, ch_dim, 1, T₁)``. Re-arrangements
+        (if any) are internal to the module; the block returns the same shape before pooling.
+    - **Role.** Re-weights channels (and optionally time) to highlight informative sources
+        and suppress distractors, improving SNR ahead of the linear head.
+    .. rubric:: Additional Mechanisms
+        - **Attention variants at a glance.**
+        - ``"se"``: Squeeze-and-Excitation (global pooling → bottleneck → gates).
+        - ``"gsop"``: Global second-order pooling (covariance-aware channel weights).
+        - ``"fca"``: Frequency Channel Attention (DCT summary; uses ``seq_len`` and ``freq_idx``).
+        - ``"encnet"``: EncNet with learned codewords (uses ``n_codewords``).
+        - ``"eca"``: Efficient Channel Attention (local 1-D conv over channel descriptor; uses ``kernel_size``).
+        - ``"ge"``: Gather–Excite (context pooling with optional MLP; can use ``extra_params``).
+        - ``"gct"``: Gated Channel Transformation (global context normalization + gating).
+        - ``"srm"``: Style-based recalibration (mean–std descriptors; optional MLP).
+        - ``"cbam"``: Channel then temporal attention (uses ``kernel_size``).
+        - ``"cat"`` / ``"catlite"``: Collaborative (channel ± temporal) attention; *lite* omits temporal.
+        - **Auto-compatibility on short inputs.**
+    If the input duration is too short for the configured kernels/pools, the implementation
+    **automatically rescales** temporal lengths/strides downward (with a warning) to keep
+    shapes valid and preserve the pipeline semantics.
+    .. rubric:: Usage and Configuration
+    - ``n_temporal_filters``, ``temporal_filter_length`` and ``spatial_expansion``:
+        control the capacity and the number of spatial projections in the stem.
+    - ``pool_length_inp``, ``pool_stride_inp`` then ``pool_length``, ``pool_stride``:
+        trade temporal resolution for compute; they determine the final sequence length ``T₂``.
+    - ``ch_dim``: width after the ``1x1`` expansion and the effective embedding size for attention.
+    - ``attention_mode`` + its specific hyperparameters (``reduction_rate``,
+        ``kernel_size``, ``seq_len``, ``freq_idx``, ``n_codewords``, ``use_mlp``):
+        select and tune the reweighting mechanism.
+    - ``drop_prob_inp`` and ``drop_prob_attn``: regularize stem and attention stages.
+    - **Training tips.**
+    Start with moderate pooling (e.g., ``P₁=75,S₁=15``) and ELU activations; enable attention
+    only after the stem learns stable filters. For small datasets, prefer simpler modes
+    (``"se"``, ``"eca"``) before heavier ones (``"gsop"``, ``"encnet"``).
+    Notes
+    -----
+    - Sequence length after each stage is computed internally; the final classifier expects
+      a flattened ``ch_dim x T₂`` vector.
+    - Attention operates on *channel* dimension by design; temporal gating exists only in
+      specific variants (CBAM/CAT).
+    - The paper and original code with more details about the methodological
+      choices are available at the [Martin2023]_ and [MartinCode]_.
     .. versionadded:: 0.9
     Parameters
@@ -73,18 +198,18 @@ class AttentionBaseNet(EEGModuleMixin, nn.Module):
         the depth of the network after the initial layer. Default is 16.
     attention_mode : str, optional
         The type of attention mechanism to apply. If `None`, no attention is applied.
-        - "se" for Squeeze-and-excitation network
-        - "gsop" for Global Second-Order Pooling
-        - "fca" for Frequency Channel Attention Network
-        - "encnet" for context encoding module
-        - "eca" for Efficient channel attention for deep convolutional neural networks
-        - "ge" for Gather-Excite
-        - "gct" for Gated Channel Transformation
-        - "srm" for Style-based Recalibration Module
-        - "cbam" for Convolutional Block Attention Module
-        - "cat" for Learning to collaborate channel and temporal attention
-        from multi-information fusion
-        - "catlite" for Learning to collaborate channel attention
+            - "se" for Squeeze-and-excitation network
+            - "gsop" for Global Second-Order Pooling
+            - "fca" for Frequency Channel Attention Network
+            - "encnet" for context encoding module
+            - "eca" for Efficient channel attention for deep convolutional neural networks
+            - "ge" for Gather-Excite
+            - "gct" for Gated Channel Transformation
+            - "srm" for Style-based Recalibration Module
+            - "cbam" for Convolutional Block Attention Module
+            - "cat" for Learning to collaborate channel and temporal attention
+            from multi-information fusion
+            - "catlite" for Learning to collaborate channel attention
         from multi-information fusion (lite version, cat w/o temporal attention)
     pool_length : int, default=8
         The length of the window for the average pooling operation.

braindecode/models/{sleep_stager_eldele_2021.py → attn_sleep.py} RENAMED Viewed

@@ -8,18 +8,19 @@ from copy import deepcopy
 import torch
 import torch.nn.functional as F
+from mne.utils import deprecated
 from torch import nn
 from braindecode.models.base import EEGModuleMixin
 from braindecode.modules import CausalConv1d
-class SleepStagerEldele2021(EEGModuleMixin, nn.Module):
+class AttnSleep(EEGModuleMixin, nn.Module):
     """Sleep Staging Architecture from Eldele et al. (2021) [Eldele2021]_.
     .. figure:: https://raw.githubusercontent.com/emadeldeen24/AttnSleep/refs/heads/main/imgs/AttnSleep.png
         :align: center
-        :alt: SleepStagerEldele2021 Architecture
+        :alt: AttnSleep Architecture
     Attention based Neural Net for sleep staging as described in [Eldele2021]_.
     The code for the paper and this model is also available at [1]_.
@@ -533,3 +534,12 @@ class _PositionwiseFeedForward(nn.Module):
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Implements FFN equation."""
         return self.w_2(self.dropout(self.activate(self.w_1(x))))
+@deprecated(
+    "`SleepStagerEldele2021` was renamed to `AttnSleep` in v1.12 to follow original author's name; this alias will be removed in v1.14."
+)
+class SleepStagerEldele2021(AttnSleep):
+    """Deprecated alias for SleepStagerEldele2021."""
+    pass

braindecode/models/biot.py CHANGED Viewed

@@ -17,7 +17,7 @@ class BIOT(EEGModuleMixin, nn.Module):
     BIOT: Cross-data Biosignal Learning in the Wild.
-    BIOT is a large language model for biosignal classification. It is
+    BIOT is a large brain model for biosignal classification. It is
     a wrapper around the `BIOTEncoder` and `ClassificationHead` modules.
     It is designed for N-dimensional biosignal data such as EEG, ECG, etc.

braindecode/models/ctnet.py CHANGED Viewed

@@ -39,7 +39,7 @@ class CTNet(EEGModuleMixin, nn.Module):
     The architecture consists of three main components:
     1. **Convolutional Module**:
-        - Apply EEGNetV4 to perform some feature extraction, denoted here as
+        - Apply :class:`EEGNet` to perform some feature extraction, denoted here as
         _PatchEmbeddingEEGNet module.
     2. **Transformer Encoder Module**:

braindecode/models/deep4.py CHANGED Viewed

@@ -19,9 +19,13 @@ from braindecode.modules import (
 class Deep4Net(EEGModuleMixin, nn.Sequential):
     """Deep ConvNet model from Schirrmeister et al (2017) [Schirrmeister2017]_.
-     .. figure:: https://onlinelibrary.wiley.com/cms/asset/fc200ccc-d8c4-45b4-8577-56ce4d15999a/hbm23730-fig-0001-m.jpg
+    :bdg-success:`Convolution`
+    .. figure:: https://onlinelibrary.wiley.com/cms/asset/fc200ccc-d8c4-45b4-8577-56ce4d15999a/hbm23730-fig-0001-m.jpg
         :align: center
-        :alt: CTNet Architecture
+        :alt: Deep4Net Architecture
+        :width: 600px
     Model described in [Schirrmeister2017]_.

braindecode 1.2.0.dev182094932__py3-none-any.whl → 1.3.0.dev168011974__py3-none-any.whl

Potentially problematic release.

braindecode 1.2.0.dev182094932py3-none-any.whl → 1.3.0.dev168011974py3-none-any.whl