PyPI - braindecode - Versions diffs - 1.3.0.dev177069446__py3-none-any.whl → 1.3.0.dev177628147__py3-none-any.whl - Mend

braindecode 1.3.0.dev177069446py3-none-any.whl → 1.3.0.dev177628147py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

braindecode/augmentation/__init__.py +3 -5
braindecode/augmentation/base.py +5 -8
braindecode/augmentation/functional.py +22 -25
braindecode/augmentation/transforms.py +42 -51
braindecode/classifier.py +16 -11
braindecode/datasets/__init__.py +3 -5
braindecode/datasets/base.py +13 -17
braindecode/datasets/bbci.py +14 -13
braindecode/datasets/bcicomp.py +5 -4
braindecode/datasets/{bids/datasets.py → bids.py} +18 -12
braindecode/datasets/{bids/iterable.py → experimental.py} +6 -8
braindecode/datasets/{bids/hub.py → hub.py} +350 -375
braindecode/datasets/{bids/hub_validation.py → hub_validation.py} +1 -2
braindecode/datasets/mne.py +19 -19
braindecode/datasets/moabb.py +10 -10
braindecode/datasets/nmt.py +56 -58
braindecode/datasets/sleep_physio_challe_18.py +5 -3
braindecode/datasets/sleep_physionet.py +5 -5
braindecode/datasets/tuh.py +18 -21
braindecode/datasets/xy.py +9 -10
braindecode/datautil/__init__.py +3 -3
braindecode/datautil/serialization.py +20 -22
braindecode/datautil/util.py +7 -120
braindecode/eegneuralnet.py +52 -22
braindecode/functional/functions.py +10 -7
braindecode/functional/initialization.py +2 -3
braindecode/models/__init__.py +3 -5
braindecode/models/atcnet.py +39 -43
braindecode/models/attentionbasenet.py +41 -37
braindecode/models/attn_sleep.py +24 -26
braindecode/models/base.py +6 -6
braindecode/models/bendr.py +26 -50
braindecode/models/biot.py +30 -61
braindecode/models/contrawr.py +5 -5
braindecode/models/ctnet.py +35 -35
braindecode/models/deep4.py +5 -5
braindecode/models/deepsleepnet.py +7 -7
braindecode/models/eegconformer.py +26 -31
braindecode/models/eeginception_erp.py +2 -2
braindecode/models/eeginception_mi.py +6 -6
braindecode/models/eegitnet.py +5 -5
braindecode/models/eegminer.py +1 -1
braindecode/models/eegnet.py +3 -3
braindecode/models/eegnex.py +2 -2
braindecode/models/eegsimpleconv.py +2 -2
braindecode/models/eegsym.py +7 -7
braindecode/models/eegtcnet.py +6 -6
braindecode/models/fbcnet.py +2 -2
braindecode/models/fblightconvnet.py +3 -3
braindecode/models/fbmsnet.py +3 -3
braindecode/models/hybrid.py +2 -2
braindecode/models/ifnet.py +5 -5
braindecode/models/labram.py +46 -70
braindecode/models/luna.py +5 -60
braindecode/models/medformer.py +21 -23
braindecode/models/msvtnet.py +15 -15
braindecode/models/patchedtransformer.py +55 -55
braindecode/models/sccnet.py +2 -2
braindecode/models/shallow_fbcsp.py +3 -5
braindecode/models/signal_jepa.py +12 -39
braindecode/models/sinc_shallow.py +4 -3
braindecode/models/sleep_stager_blanco_2020.py +2 -2
braindecode/models/sleep_stager_chambon_2018.py +2 -2
braindecode/models/sparcnet.py +8 -8
braindecode/models/sstdpn.py +869 -869
braindecode/models/summary.csv +17 -19
braindecode/models/syncnet.py +2 -2
braindecode/models/tcn.py +5 -5
braindecode/models/tidnet.py +3 -3
braindecode/models/tsinception.py +3 -3
braindecode/models/usleep.py +7 -7
braindecode/models/util.py +14 -165
braindecode/modules/__init__.py +1 -9
braindecode/modules/activation.py +3 -29
braindecode/modules/attention.py +0 -123
braindecode/modules/blocks.py +1 -53
braindecode/modules/convolution.py +0 -53
braindecode/modules/filter.py +0 -31
braindecode/modules/layers.py +0 -84
braindecode/modules/linear.py +1 -22
braindecode/modules/stats.py +0 -10
braindecode/modules/util.py +0 -9
braindecode/modules/wrapper.py +0 -17
braindecode/preprocessing/preprocess.py +0 -3
braindecode/regressor.py +18 -15
braindecode/samplers/ssl.py +1 -1
braindecode/util.py +28 -38
braindecode/version.py +1 -1
braindecode-1.3.0.dev177628147.dist-info/METADATA +202 -0
braindecode-1.3.0.dev177628147.dist-info/RECORD +114 -0
braindecode/datasets/bids/__init__.py +0 -54
braindecode/datasets/bids/format.py +0 -717
braindecode/datasets/bids/hub_format.py +0 -717
braindecode/datasets/bids/hub_io.py +0 -197
braindecode/datasets/chb_mit.py +0 -163
braindecode/datasets/siena.py +0 -162
braindecode/datasets/utils.py +0 -67
braindecode/models/brainmodule.py +0 -845
braindecode/models/config.py +0 -233
braindecode/models/reve.py +0 -843
braindecode-1.3.0.dev177069446.dist-info/METADATA +0 -230
braindecode-1.3.0.dev177069446.dist-info/RECORD +0 -124
{braindecode-1.3.0.dev177069446.dist-info → braindecode-1.3.0.dev177628147.dist-info}/WHEEL +0 -0
{braindecode-1.3.0.dev177069446.dist-info → braindecode-1.3.0.dev177628147.dist-info}/licenses/LICENSE.txt +0 -0
{braindecode-1.3.0.dev177069446.dist-info → braindecode-1.3.0.dev177628147.dist-info}/licenses/NOTICE.txt +0 -0
{braindecode-1.3.0.dev177069446.dist-info → braindecode-1.3.0.dev177628147.dist-info}/top_level.txt +0 -0

braindecode/models/labram.py CHANGED Viewed

@@ -21,9 +21,9 @@ from braindecode.modules import MLP, DropPath
 class Labram(EEGModuleMixin, nn.Module):
-    r"""Labram from Jiang, W B et al (2024) [Jiang2024]_.
+    """Labram from Jiang, W B et al (2024) [Jiang2024]_.
-    :bdg-success:`Convolution` :bdg-danger:`Foundation Model`
+    :bdg-success:`Convolution` :bdg-danger:`Large Brain Model`
     .. figure:: https://arxiv.org/html/2405.18765v1/x1.png
         :align: center
@@ -64,30 +64,6 @@ class Labram(EEGModuleMixin, nn.Module):
             - LayerNorm: Apply layer normalization to the data;
             - Linear: An head linear layer to transformer the data into classes.
-    .. important::
-       **Pre-trained Weights Available**
-       This model has pre-trained weights available on the Hugging Face Hub.
-       You can load them using:
-       .. code-block:: python
-           from braindecode.models import Labram
-           # Load pre-trained model from Hugging Face Hub
-           model = Labram.from_pretrained("braindecode/labram-pretrained")
-       To push your own trained model to the Hub:
-       .. code-block:: python
-           # After training your model
-           model.push_to_hub(
-               repo_id="username/my-labram-model", commit_message="Upload trained Labram model"
-           )
-       Requires installing ``braindecode[hug]`` for Hub integration.
     .. versionadded:: 0.9
@@ -107,15 +83,15 @@ class Labram(EEGModuleMixin, nn.Module):
     ----------
     patch_size : int
         The size of the patch to be used in the patch embedding.
-    embed_dim : int
+    emb_size : int
         The dimension of the embedding.
-    conv_in_channels : int
+    in_conv_channels : int
         The number of convolutional input channels.
-    conv_out_channels : int
+    out_channels : int
         The number of convolutional output channels.
-    num_layers :  int (default=12)
+    n_layers :  int (default=12)
         The number of attention layers of the model.
-    num_heads : int (default=10)
+    att_num_heads : int (default=10)
         The number of attention heads.
     mlp_ratio : float (default=4.0)
         The expansion ratio of the mlp layer
@@ -179,26 +155,26 @@ class Labram(EEGModuleMixin, nn.Module):
         sfreq=None,
         input_window_seconds=None,
         patch_size=200,
-        embed_dim=200,
-        conv_in_channels=1,
-        conv_out_channels=8,
-        num_layers=12,
-        num_heads=10,
+        emb_size=200,
+        in_conv_channels=1,
+        out_channels=8,
+        n_layers=12,
+        att_num_heads=10,
         mlp_ratio=4.0,
         qkv_bias=False,
-        qk_norm: type[nn.Module] = nn.LayerNorm,
+        qk_norm=nn.LayerNorm,
         qk_scale=None,
         drop_prob=0.0,
         attn_drop_prob=0.0,
         drop_path_prob=0.0,
-        norm_layer: type[nn.Module] = nn.LayerNorm,
+        norm_layer=nn.LayerNorm,
         init_values=0.1,
         use_abs_pos_emb=True,
         use_mean_pooling=True,
         init_scale=0.001,
         neural_tokenizer=True,
         attn_head_dim=None,
-        activation: type[nn.Module] = nn.GELU,
+        activation: nn.Module = nn.GELU,
     ):
         super().__init__(
             n_outputs=n_outputs,
@@ -211,7 +187,7 @@ class Labram(EEGModuleMixin, nn.Module):
         del n_outputs, n_chans, chs_info, n_times, input_window_seconds, sfreq
         self.patch_size = patch_size
-        self.num_features = self.embed_dim = embed_dim
+        self.num_features = self.emb_size = emb_size
         self.neural_tokenizer = neural_tokenizer
         self.init_scale = init_scale
@@ -223,20 +199,20 @@ class Labram(EEGModuleMixin, nn.Module):
             )
             self.patch_size = self.n_times
             self.num_features = None
-            self.embed_dim = None
+            self.emb_size = None
         else:
             self.patch_size = patch_size
         self.n_path = self.n_times // self.patch_size
-        if neural_tokenizer and conv_in_channels != 1:
+        if neural_tokenizer and in_conv_channels != 1:
             warn(
                 "The model is in Neural Tokenizer mode, but the variable "
-                + "`conv_in_channels` is different from the default values."
-                + "`conv_in_channels` is only needed for the Neural Decoder mode."
-                + "conv_in_channels is not used in the Neural Tokenizer mode.",
+                + "`in_conv_channels` is different from the default values."
+                + "`in_conv_channels` is only needed for the Neural Decoder mode."
+                + "in_conv_channels is not used in the Neural Tokenizer mode.",
                 UserWarning,
             )
-            conv_in_channels = 1
+            in_conv_channels = 1
             # If you can use the model in Neural Tokenizer mode,
         # temporal conv layer will be use over the patched dataset
         if neural_tokenizer:
@@ -255,7 +231,7 @@ class Labram(EEGModuleMixin, nn.Module):
                         (
                             "temporal_conv",
                             _TemporalConv(
-                                out_channels=conv_out_channels, activation=activation
+                                out_channels=out_channels, activation=activation
                             ),
                         ),
                     ]
@@ -273,8 +249,8 @@ class Labram(EEGModuleMixin, nn.Module):
                 _PatchEmbed(
                     n_times=self.n_times,
                     patch_size=patch_size,
-                    in_channels=conv_in_channels,
-                    emb_dim=self.embed_dim,
+                    in_channels=in_conv_channels,
+                    emb_dim=self.emb_size,
                 ),
             )
@@ -283,12 +259,12 @@ class Labram(EEGModuleMixin, nn.Module):
             out = self.patch_embed(dummy)
         # out.shape for tokenizer: (1, n_chans, emb_dim)
         # for decoder:        (1, n_patch, patch_size, emb_dim), but we want last dim
-        self.embed_dim = out.shape[-1]
-        self.num_features = self.embed_dim
+        self.emb_size = out.shape[-1]
+        self.num_features = self.emb_size
         # Defining the parameters
         # Creating a parameter list with cls token]
-        self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim))
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, self.emb_size))
         # Positional embedding and time embedding are complementary
         # one is for the spatial information and the other is for the temporal
         # information.
@@ -297,26 +273,26 @@ class Labram(EEGModuleMixin, nn.Module):
         # information.
         if use_abs_pos_emb:
             self.position_embedding = nn.Parameter(
-                torch.zeros(1, self.n_chans + 1, self.embed_dim),
+                torch.zeros(1, self.n_chans + 1, self.emb_size),
                 requires_grad=True,
             )
         else:
             self.position_embedding = None
         self.temporal_embedding = nn.Parameter(
-            torch.zeros(1, self.patch_embed[0].n_patchs + 1, self.embed_dim),
+            torch.zeros(1, self.patch_embed[0].n_patchs + 1, self.emb_size),
             requires_grad=True,
         )
         self.pos_drop = nn.Dropout(p=drop_prob)
         dpr = [
-            x.item() for x in torch.linspace(0, drop_path_prob, num_layers)
+            x.item() for x in torch.linspace(0, drop_path_prob, n_layers)
         ]  # stochastic depth decay rule
         self.blocks = nn.ModuleList(
             [
                 _WindowsAttentionBlock(
-                    dim=self.embed_dim,
-                    num_heads=num_heads,
+                    dim=self.emb_size,
+                    num_heads=att_num_heads,
                     mlp_ratio=mlp_ratio,
                     qkv_bias=qkv_bias,
                     qk_norm=qk_norm,
@@ -334,14 +310,14 @@ class Labram(EEGModuleMixin, nn.Module):
                     attn_head_dim=attn_head_dim,
                     activation=activation,
                 )
-                for i in range(num_layers)
+                for i in range(n_layers)
             ]
         )
-        self.norm = nn.Identity() if use_mean_pooling else norm_layer(self.embed_dim)
-        self.fc_norm = norm_layer(self.embed_dim) if use_mean_pooling else None
+        self.norm = nn.Identity() if use_mean_pooling else norm_layer(self.emb_size)
+        self.fc_norm = norm_layer(self.emb_size) if use_mean_pooling else None
         if self.n_outputs > 0:
-            self.final_layer = nn.Linear(self.embed_dim, self.n_outputs)
+            self.final_layer = nn.Linear(self.emb_size, self.n_outputs)
         else:
             self.final_layer = nn.Identity()
@@ -439,7 +415,7 @@ class Labram(EEGModuleMixin, nn.Module):
             x = self.patch_embed(x)
             # x shape: (batch, n_chans, emb_dim)
             n_patch = self.n_chans
-            temporal = self.embed_dim
+            temporal = self.emb_size
         else:
             # For neural decoder: input is (batch, n_chans, n_times)
             # patch_embed returns (batch, n_patchs, emb_dim)
@@ -486,7 +462,7 @@ class Labram(EEGModuleMixin, nn.Module):
             # In decoder mode, we have n_patch patches and don't need to expand
             # Just broadcast the temporal embedding
             if temporal is None:
-                temporal = self.embed_dim
+                temporal = self.emb_size
             # Get temporal embeddings for n_patch patches
             n_time_tokens = min(n_patch, self.temporal_embedding.shape[1] - 1)
@@ -661,7 +637,7 @@ class Labram(EEGModuleMixin, nn.Module):
 class _SegmentPatch(nn.Module):
-    r"""Segment and Patch for EEG data.
+    """Segment and Patch for EEG data.
     Adapted Patch Embedding inspired in the Visual Transform approach
     to extract the learned segmentor, we expect get the input shape as:
@@ -767,7 +743,7 @@ class _SegmentPatch(nn.Module):
 class _PatchEmbed(nn.Module):
-    r"""EEG to Patch Embedding for Neural Decoder mode.
+    """EEG to Patch Embedding for Neural Decoder mode.
     This code is used when we want to apply the patch embedding
     after the codebook layer (Neural Decoder mode).
@@ -873,7 +849,7 @@ class _PatchEmbed(nn.Module):
 class _Attention(nn.Module):
-    r"""
+    """
     Attention with the options of Window-based multi-head self attention (W-MSA).
     This code is strong inspired by:
@@ -1071,7 +1047,7 @@ class _Attention(nn.Module):
 class _WindowsAttentionBlock(nn.Module):
-    r"""Blocks of Windows Attention with Layer norm and MLP.
+    """Blocks of Windows Attention with Layer norm and MLP.
     Notes: This code is strong inspired by:
     BeiTv2 from Microsoft.
@@ -1130,7 +1106,7 @@ class _WindowsAttentionBlock(nn.Module):
         attn_drop=0.0,
         drop_path=0.0,
         init_values=None,
-        activation: type[nn.Module] = nn.GELU,
+        activation: nn.Module = nn.GELU,
         norm_layer=nn.LayerNorm,
         window_size=None,
         attn_head_dim=None,
@@ -1206,7 +1182,7 @@ class _WindowsAttentionBlock(nn.Module):
 class _TemporalConv(nn.Module):
-    r"""
+    """
     Temporal Convolutional Module inspired by Visual Transformer.
     In this module we apply the follow steps three times repeatedly
@@ -1253,7 +1229,7 @@ class _TemporalConv(nn.Module):
         padding_1=(0, 7),
         kernel_size_2=(1, 3),
         padding_2=(0, 1),
-        activation: type[nn.Module] = nn.GELU,
+        activation: nn.Module = nn.GELU,
     ):
         super().__init__()

braindecode/models/luna.py CHANGED Viewed

@@ -27,9 +27,9 @@ from braindecode.modules.layers import DropPath
 class LUNA(EEGModuleMixin, nn.Module):
-    r"""LUNA from Döner et al [LUNA]_.
+    """LUNA from Döner et al. [LUNA]_.
-    :bdg-success:`Convolution` :bdg-danger:`Foundation Model` :bdg-dark-line:`Channel`
+    :bdg-success:`Convolution` :bdg-danger:`Large Brain Model` :bdg-dark-line:`Channel`
     .. figure:: https://arxiv.org/html/2510.22257v1/x1.png
         :align: center
@@ -44,61 +44,6 @@ class LUNA(EEGModuleMixin, nn.Module):
     3. Patch-wise Temporal Encoder (RoPE-based transformer)
     4. Decoder Heads (classification or reconstruction)
-    .. important::
-       **Pre-trained Weights Available**
-       This model has pre-trained weights available on the Hugging Face Hub
-       at `thorir/LUNA <https://huggingface.co/thorir/LUNA>`_.
-       Available model variants:
-       - **LUNA_base.safetensors** - Base model (embed_dim=64, num_queries=4, depth=8)
-       - **LUNA_large.safetensors** - Large model (embed_dim=96, num_queries=6, depth=10)
-       - **LUNA_huge.safetensors** - Huge model (embed_dim=128, num_queries=8, depth=24)
-       Example loading for fine-tuning:
-       .. code-block:: python
-           from huggingface_hub import hf_hub_download
-           from safetensors.torch import load_file
-           from braindecode.models import LUNA
-           # Download pre-trained weights
-           model_path = hf_hub_download(
-               repo_id="thorir/LUNA",
-               filename="LUNA_base.safetensors",
-           )
-           # Create model for classification (fine-tuning)
-           model = LUNA(
-               n_outputs=2,  # Number of classes for your task
-               n_chans=22,
-               n_times=1000,
-               embed_dim=64,
-               num_queries=4,
-               depth=8,
-           )
-           # Load pre-trained encoder weights
-           state_dict = load_file(model_path)
-           # Apply key mapping for pretrained weights
-           mapping = model.mapping.copy()
-           mapping["cross_attn.temparature"] = "cross_attn.temperature"
-           mapped_state_dict = {mapping.get(k, k): v for k, v in state_dict.items()}
-           model.load_state_dict(mapped_state_dict, strict=False)
-       To push your own trained model to the Hub:
-       .. code-block:: python
-           # After training your model
-           model.push_to_hub(
-               repo_id="username/my-luna-model", commit_message="Upload trained LUNA model"
-           )
-       Requires installing ``braindecode[hug]`` for Hub integration.
     Parameters
     ----------
     patch_size : int
@@ -457,7 +402,7 @@ def nerf_positional_encoding(coords: torch.Tensor, embed_size: int) -> torch.Ten
 class _ChannelEmbeddings(nn.Module):
-    r"""
+    """
     This class creates embeddings for each EEG channel based on a predefined
     mapping of channel names to indices.
@@ -485,7 +430,7 @@ class _ChannelEmbeddings(nn.Module):
 class _FrequencyFeatureEmbedder(nn.Module):
-    r"""
+    """
     This class takes data that is of the form (B, C, T) and patches it
     along the time dimension (T) into patches of size P (patch_size).
     The output is of the form (B, C, S, P) where S = T // P.
@@ -861,7 +806,7 @@ class _PatchEmbedNetwork(nn.Module):
 class _Mlp(nn.Module):
-    r"""MLP as used in Vision Transformer, MLP-Mixer and related networks.
+    """MLP as used in Vision Transformer, MLP-Mixer and related networks.
     Code copied from timm.models.mlp.Mlp
     """

braindecode/models/medformer.py CHANGED Viewed

@@ -18,21 +18,20 @@ from braindecode.models.base import EEGModuleMixin
 class MEDFormer(EEGModuleMixin, nn.Module):
-    r"""
-    Medformer from Wang et al (2024) [Medformer2024]_.
+    r"""Medformer from Wang et al. (2024) [Medformer2024]_.
-    :bdg-success:`Convolution` :bdg-danger:`Foundation Model`
+    :bdg-success:`Convolution` :bdg-danger:`Large Brain Model`
     .. figure:: https://raw.githubusercontent.com/DL4mHealth/Medformer/refs/heads/main/figs/medformer_architecture.png
         :align: center
         :alt: MEDFormer Architecture.
-        a) Workflow. b) For the input sample :math:`{x}_{\text{in}}`, the authors apply :math:`n`
+        a) Workflow. b) For the input sample :math:`{x}_{\\textrm{in}}`, the authors apply :math:`n`
         different patch lengths in parallel to create patched features :math:`{x}_p^{(i)}`, where :math:`i`
         ranges from 1 to :math:`n`. Each patch length represents a different granularity. These patched
-        features are linearly transformed into :math:`{x}_e^{(i)}` and augmented into :math:`\widetilde{x}_e^{(i)}`.
-        c) The final patch embedding :math:`{x}^{(i)}` fuses augmented :math:`\widetilde{x}_e^{(i)}` with the
-        positional embedding :math:`{W}_{\text{pos}}` and the granularity embedding :math:`{W}_{\text{gr}}^{(i)}`.
+        features are linearly transformed into :math:`{x}_e^{(i)}` and augmented into :math:`\\widetilde{x}_e^{(i)}`.
+        c) The final patch embedding :math:`{x}^{(i)}` fuses augmented :math:`\\widetilde{{x}}_e^{(i)}` with the
+        positional embedding :math:`{W}_{\\text{pos}}` and the granularity embedding :math:`{W}_{\\text{gr}}^{(i)}`.
         Each granularity employs a router :math:`{u}^{(i)}` to capture aggregated information.
         Intra-granularity attention focuses within individual granularities, and inter-granularity attention
         leverages the routers to integrate information across granularities.
@@ -116,7 +115,6 @@ class MEDFormer(EEGModuleMixin, nn.Module):
         **Role.** Learns representations and correlations within and across temporal scales while
         reducing complexity from :math:`O((\sum_i N_i)^2)` to
         :math:`O(\sum_i N_i^2 + n^2)` through the router mechanism.
     .. rubric:: Temporal, Spatial, and Spectral Encoding
     - **Temporal:** Multiple patch lengths in :attr:`patch_len_list` capture features at several
@@ -130,7 +128,7 @@ class MEDFormer(EEGModuleMixin, nn.Module):
     .. rubric:: Additional Mechanisms
     - **Granularity router:** Each granularity :math:`i` receives a dedicated router token
-      :math:`\mathbf{u}^{(i)}`. Intra-attention updates the token, and inter-attention exchanges
+      :math:`\\mathbf{u}^{(i)}`. Intra-attention updates the token, and inter-attention exchanges
       aggregated information across scales.
     - **Complexity:** Router-mediated two-stage attention maintains :math:`O(T^2)` complexity for
       suitable patch lengths (e.g., power series), preserving transformer-like efficiency while
@@ -141,7 +139,7 @@ class MEDFormer(EEGModuleMixin, nn.Module):
     patch_len_list : list of int, optional
         Patch lengths for multi-granularity patching; each entry selects a temporal scale.
         The default is ``[14, 44, 45]``.
-    embed_dim : int, optional
+    d_model : int, optional
         Embedding dimensionality. The default is ``128``.
     num_heads : int, optional
         Number of attention heads, which must divide :attr:`d_model`. The default is ``8``.
@@ -149,7 +147,7 @@ class MEDFormer(EEGModuleMixin, nn.Module):
         Dropout probability. The default is ``0.1``.
     no_inter_attn : bool, optional
         If ``True``, disables inter-granularity attention. The default is ``False``.
-    num_layers : int, optional
+    n_layers : int, optional
         Number of encoder layers. The default is ``6``.
     dim_feedforward : int, optional
         Feedforward dimensionality. The default is ``256``.
@@ -191,16 +189,16 @@ class MEDFormer(EEGModuleMixin, nn.Module):
         sfreq=None,
         # Model parameters
         patch_len_list: Optional[List[int]] = None,
-        embed_dim: int = 128,
+        d_model: int = 128,
         num_heads: int = 8,
         drop_prob: float = 0.1,
         no_inter_attn: bool = False,
-        num_layers: int = 6,
+        n_layers: int = 6,
         dim_feedforward: int = 256,
-        activation_trans: type[nn.Module] | None = nn.ReLU,
+        activation_trans: Optional[nn.Module] = nn.ReLU,
         single_channel: bool = False,
         output_attention: bool = True,
-        activation_class: type[nn.Module] | None = nn.GELU,
+        activation_class: Optional[nn.Module] = nn.GELU,
     ):
         super().__init__(
             n_outputs=n_outputs,
@@ -217,11 +215,11 @@ class MEDFormer(EEGModuleMixin, nn.Module):
         # - enc_in refers to the number of time points
         # Save model parameters as instance variables
-        self.embed_dim = embed_dim
+        self.d_model = d_model
         self.num_heads = num_heads
         self.drop_prob = drop_prob
         self.no_inter_attn = no_inter_attn
-        self.num_layers = num_layers
+        self.n_layers = n_layers
         self.dim_feedforward = dim_feedforward
         self.activation_trans = activation_trans
         self.output_attention = output_attention
@@ -244,7 +242,7 @@ class MEDFormer(EEGModuleMixin, nn.Module):
         # Initialize the embedding layer.
         self.enc_embedding = _ListPatchEmbedding(
             enc_in=self.n_times,
-            d_model=self.embed_dim,
+            d_model=self.d_model,
             seq_len=self.n_chans,
             patch_len_list=self.patch_len_list,
             stride_list=self.stride_list,
@@ -259,22 +257,22 @@ class MEDFormer(EEGModuleMixin, nn.Module):
                 _EncoderLayer(
                     attention=_MedformerLayer(
                         num_blocks=len(self.patch_len_list),
-                        d_model=self.embed_dim,
+                        d_model=self.d_model,
                         num_heads=self.num_heads,
                         dropout=self.drop_prob,
                         output_attention=self.output_attention,
                         no_inter=self.no_inter_attn,
                     ),
-                    d_model=self.embed_dim,
+                    d_model=self.d_model,
                     dim_feedforward=self.dim_feedforward,
                     dropout=self.drop_prob,
                     activation=self.activation_trans()
                     if self.activation_trans is not None
                     else nn.ReLU(),
                 )
-                for _ in range(self.num_layers)
+                for _ in range(self.n_layers)
             ],
-            norm_layer=torch.nn.LayerNorm(self.embed_dim),
+            norm_layer=torch.nn.LayerNorm(self.d_model),
         )
         # For classification tasks, add additional layers.
@@ -283,7 +281,7 @@ class MEDFormer(EEGModuleMixin, nn.Module):
         )
         self.dropout = nn.Dropout(self.drop_prob)
         self.final_layer = nn.Linear(
-            self.embed_dim
+            self.d_model
             * len(self.patch_num_list)
             * (1 if not self.single_channel else self.n_chans),
             self.n_outputs,

braindecode/models/msvtnet.py CHANGED Viewed

@@ -11,9 +11,9 @@ from braindecode.models.base import EEGModuleMixin
 class MSVTNet(EEGModuleMixin, nn.Module):
-    r"""MSVTNet model from Liu K et al (2024) from [msvt2024]_.
+    """MSVTNet model from Liu K et al (2024) from [msvt2024]_.
-    :bdg-success:`Convolution` :bdg-secondary:`Recurrent` :bdg-info:`Attention/Transformer`
+    :bdg-success:`Convolution` :bdg-secondary:`Recurrent` :bdg-info:`Small Attention`
     This model implements a multi-scale convolutional transformer network
     for EEG signal classification, as described in [msvt2024]_.
@@ -41,9 +41,9 @@ class MSVTNet(EEGModuleMixin, nn.Module):
         Dropout probability for convolutional layers, by default 0.3.
     num_heads : int, optional
         Number of attention heads in the transformer encoder, by default 8.
-    ffn_expansion_factor : float, optional
+    feedforward_ratio : float, optional
         Ratio to compute feedforward dimension in the transformer, by default 1.
-    att_drop_prob : float, optional
+    drop_prob_trans : float, optional
         Dropout probability for the transformer, by default 0.5.
     num_layers : int, optional
         Number of transformer encoder layers, by default 2.
@@ -85,8 +85,8 @@ class MSVTNet(EEGModuleMixin, nn.Module):
         pool2_size: int = 7,
         drop_prob: float = 0.3,
         num_heads: int = 8,
-        ffn_expansion_factor: float = 1,
-        att_drop_prob: float = 0.5,
+        feedforward_ratio: float = 1,
+        drop_prob_trans: float = 0.5,
         num_layers: int = 2,
         activation: Type[nn.Module] = nn.ELU,
         return_features: bool = False,
@@ -139,8 +139,8 @@ class MSVTNet(EEGModuleMixin, nn.Module):
             seq_len,
             d_model,
             num_heads,
-            ffn_expansion_factor,
-            att_drop_prob,
+            feedforward_ratio,
+            drop_prob_trans,
             num_layers,
         )
@@ -193,7 +193,7 @@ class MSVTNet(EEGModuleMixin, nn.Module):
 class _TSConv(nn.Sequential):
-    r"""
+    """
     Time-Distributed Separable Convolution block.
     The architecture consists of:
@@ -280,7 +280,7 @@ class _TSConv(nn.Sequential):
 class _PositionalEncoding(nn.Module):
-    r"""
+    """
     Positional encoding module that adds learnable positional embeddings.
     Parameters
@@ -303,7 +303,7 @@ class _PositionalEncoding(nn.Module):
 class _Transformer(nn.Module):
-    r"""
+    """
     Transformer encoder module with learnable class token and positional encoding.
     Parameters
@@ -314,7 +314,7 @@ class _Transformer(nn.Module):
         Dimensionality of the model.
     num_heads : int
         Number of heads in the multihead attention.
-    ffn_expansion_factor : float
+    feedforward_ratio : float
         Ratio to compute the dimension of the feedforward network.
     drop_prob : float, optional
         Dropout probability, by default 0.5.
@@ -327,7 +327,7 @@ class _Transformer(nn.Module):
         seq_length: int,
         d_model: int,
         num_heads: int,
-        ffn_expansion_factor: float,
+        feedforward_ratio: float,
         drop_prob: float = 0.5,
         num_layers: int = 4,
     ) -> None:
@@ -335,7 +335,7 @@ class _Transformer(nn.Module):
         self.cls_embedding = nn.Parameter(torch.zeros(1, 1, d_model))
         self.pos_embedding = _PositionalEncoding(seq_length + 1, d_model)
-        dim_ff = int(d_model * ffn_expansion_factor)
+        dim_ff = int(d_model * feedforward_ratio)
         self.dropout = nn.Dropout(drop_prob)
         self.trans = nn.TransformerEncoder(
             nn.TransformerEncoderLayer(
@@ -359,7 +359,7 @@ class _Transformer(nn.Module):
 class _DenseLayers(nn.Sequential):
-    r"""
+    """
     Final classification layers.
     Parameters

braindecode 1.3.0.dev177069446__py3-none-any.whl → 1.3.0.dev177628147__py3-none-any.whl

braindecode 1.3.0.dev177069446py3-none-any.whl → 1.3.0.dev177628147py3-none-any.whl