PyPI - braindecode - Versions diffs - 1.3.0.dev177069446__py3-none-any.whl → 1.3.0.dev177628147__py3-none-any.whl - Mend

braindecode 1.3.0.dev177069446py3-none-any.whl → 1.3.0.dev177628147py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

braindecode/augmentation/__init__.py +3 -5
braindecode/augmentation/base.py +5 -8
braindecode/augmentation/functional.py +22 -25
braindecode/augmentation/transforms.py +42 -51
braindecode/classifier.py +16 -11
braindecode/datasets/__init__.py +3 -5
braindecode/datasets/base.py +13 -17
braindecode/datasets/bbci.py +14 -13
braindecode/datasets/bcicomp.py +5 -4
braindecode/datasets/{bids/datasets.py → bids.py} +18 -12
braindecode/datasets/{bids/iterable.py → experimental.py} +6 -8
braindecode/datasets/{bids/hub.py → hub.py} +350 -375
braindecode/datasets/{bids/hub_validation.py → hub_validation.py} +1 -2
braindecode/datasets/mne.py +19 -19
braindecode/datasets/moabb.py +10 -10
braindecode/datasets/nmt.py +56 -58
braindecode/datasets/sleep_physio_challe_18.py +5 -3
braindecode/datasets/sleep_physionet.py +5 -5
braindecode/datasets/tuh.py +18 -21
braindecode/datasets/xy.py +9 -10
braindecode/datautil/__init__.py +3 -3
braindecode/datautil/serialization.py +20 -22
braindecode/datautil/util.py +7 -120
braindecode/eegneuralnet.py +52 -22
braindecode/functional/functions.py +10 -7
braindecode/functional/initialization.py +2 -3
braindecode/models/__init__.py +3 -5
braindecode/models/atcnet.py +39 -43
braindecode/models/attentionbasenet.py +41 -37
braindecode/models/attn_sleep.py +24 -26
braindecode/models/base.py +6 -6
braindecode/models/bendr.py +26 -50
braindecode/models/biot.py +30 -61
braindecode/models/contrawr.py +5 -5
braindecode/models/ctnet.py +35 -35
braindecode/models/deep4.py +5 -5
braindecode/models/deepsleepnet.py +7 -7
braindecode/models/eegconformer.py +26 -31
braindecode/models/eeginception_erp.py +2 -2
braindecode/models/eeginception_mi.py +6 -6
braindecode/models/eegitnet.py +5 -5
braindecode/models/eegminer.py +1 -1
braindecode/models/eegnet.py +3 -3
braindecode/models/eegnex.py +2 -2
braindecode/models/eegsimpleconv.py +2 -2
braindecode/models/eegsym.py +7 -7
braindecode/models/eegtcnet.py +6 -6
braindecode/models/fbcnet.py +2 -2
braindecode/models/fblightconvnet.py +3 -3
braindecode/models/fbmsnet.py +3 -3
braindecode/models/hybrid.py +2 -2
braindecode/models/ifnet.py +5 -5
braindecode/models/labram.py +46 -70
braindecode/models/luna.py +5 -60
braindecode/models/medformer.py +21 -23
braindecode/models/msvtnet.py +15 -15
braindecode/models/patchedtransformer.py +55 -55
braindecode/models/sccnet.py +2 -2
braindecode/models/shallow_fbcsp.py +3 -5
braindecode/models/signal_jepa.py +12 -39
braindecode/models/sinc_shallow.py +4 -3
braindecode/models/sleep_stager_blanco_2020.py +2 -2
braindecode/models/sleep_stager_chambon_2018.py +2 -2
braindecode/models/sparcnet.py +8 -8
braindecode/models/sstdpn.py +869 -869
braindecode/models/summary.csv +17 -19
braindecode/models/syncnet.py +2 -2
braindecode/models/tcn.py +5 -5
braindecode/models/tidnet.py +3 -3
braindecode/models/tsinception.py +3 -3
braindecode/models/usleep.py +7 -7
braindecode/models/util.py +14 -165
braindecode/modules/__init__.py +1 -9
braindecode/modules/activation.py +3 -29
braindecode/modules/attention.py +0 -123
braindecode/modules/blocks.py +1 -53
braindecode/modules/convolution.py +0 -53
braindecode/modules/filter.py +0 -31
braindecode/modules/layers.py +0 -84
braindecode/modules/linear.py +1 -22
braindecode/modules/stats.py +0 -10
braindecode/modules/util.py +0 -9
braindecode/modules/wrapper.py +0 -17
braindecode/preprocessing/preprocess.py +0 -3
braindecode/regressor.py +18 -15
braindecode/samplers/ssl.py +1 -1
braindecode/util.py +28 -38
braindecode/version.py +1 -1
braindecode-1.3.0.dev177628147.dist-info/METADATA +202 -0
braindecode-1.3.0.dev177628147.dist-info/RECORD +114 -0
braindecode/datasets/bids/__init__.py +0 -54
braindecode/datasets/bids/format.py +0 -717
braindecode/datasets/bids/hub_format.py +0 -717
braindecode/datasets/bids/hub_io.py +0 -197
braindecode/datasets/chb_mit.py +0 -163
braindecode/datasets/siena.py +0 -162
braindecode/datasets/utils.py +0 -67
braindecode/models/brainmodule.py +0 -845
braindecode/models/config.py +0 -233
braindecode/models/reve.py +0 -843
braindecode-1.3.0.dev177069446.dist-info/METADATA +0 -230
braindecode-1.3.0.dev177069446.dist-info/RECORD +0 -124
{braindecode-1.3.0.dev177069446.dist-info → braindecode-1.3.0.dev177628147.dist-info}/WHEEL +0 -0
{braindecode-1.3.0.dev177069446.dist-info → braindecode-1.3.0.dev177628147.dist-info}/licenses/LICENSE.txt +0 -0
{braindecode-1.3.0.dev177069446.dist-info → braindecode-1.3.0.dev177628147.dist-info}/licenses/NOTICE.txt +0 -0
{braindecode-1.3.0.dev177069446.dist-info → braindecode-1.3.0.dev177628147.dist-info}/top_level.txt +0 -0

braindecode/models/attentionbasenet.py CHANGED Viewed

@@ -24,15 +24,16 @@ from braindecode.modules.attention import (
 class AttentionBaseNet(EEGModuleMixin, nn.Module):
-    r"""AttentionBaseNet from Wimpff M et al (2023) [Martin2023]_.
+    """AttentionBaseNet from Wimpff M et al. (2023) [Martin2023]_.
-    :bdg-success:`Convolution` :bdg-info:`Attention/Transformer`
+    :bdg-success:`Convolution` :bdg-info:`Small Attention`
     .. figure:: https://content.cld.iop.org/journals/1741-2552/21/3/036020/revision2/jnead48b9f2_hr.jpg
         :align: center
         :alt: AttentionBaseNet Architecture
         :width: 640px
     .. rubric:: Architectural Overview
     AttentionBaseNet is a *convolution-first* network with a *channel-attention* stage.
@@ -49,6 +50,7 @@ class AttentionBaseNet(EEGModuleMixin, nn.Module):
     attention unit that *re-weights channels* (and optionally temporal positions) before
     classification.
     .. rubric:: Macro Components
     - :class:`_FeatureExtractor` **(Shallow conv stem → condensed feature map)**
@@ -90,6 +92,7 @@ class AttentionBaseNet(EEGModuleMixin, nn.Module):
     *Operations.* :class:`torch.nn.Flatten` → :class:`torch.nn.Linear` from
     ``(B, ch_dim·T₂)`` to classes.
     .. rubric:: Convolutional Details
     - **Temporal (where time-domain patterns are learned).**
@@ -108,6 +111,7 @@ class AttentionBaseNet(EEGModuleMixin, nn.Module):
         emerges from learned temporal kernels. When ``attention_mode="fca"``, a frequency
         channel attention (DCT-based) summarizes frequencies to drive channel weights.
     .. rubric:: Attention / Sequential Modules
     - **Type.** Channel attention chosen by ``attention_mode`` (SE, ECA, CBAM, CAT, GSoP,
@@ -120,6 +124,7 @@ class AttentionBaseNet(EEGModuleMixin, nn.Module):
     - **Role.** Re-weights channels (and optionally time) to highlight informative sources
         and suppress distractors, improving SNR ahead of the linear head.
     .. rubric:: Additional Mechanisms
     **Attention variants at a glance:**
@@ -158,6 +163,17 @@ class AttentionBaseNet(EEGModuleMixin, nn.Module):
         only after the stem learns stable filters. For small datasets, prefer simpler modes
         (``"se"``, ``"eca"``) before heavier ones (``"gsop"``, ``"encnet"``).
+    Notes
+    -----
+    - Sequence length after each stage is computed internally; the final classifier expects
+      a flattened ``ch_dim x T₂`` vector.
+    - Attention operates on *channel* dimension by design; temporal gating exists only in
+      specific variants (CBAM/CAT).
+    - The paper and original code with more details about the methodological
+      choices are available at the [Martin2023]_ and [MartinCode]_.
+    .. versionadded:: 0.9
     Parameters
     ----------
     n_temporal_filters : int, optional
@@ -219,24 +235,13 @@ class AttentionBaseNet(EEGModuleMixin, nn.Module):
     kernel_size : int, default=9
         The kernel size used in certain types of attention mechanisms for convolution
         operations.
-    activation : type[nn.Module] = nn.ELU,
+    activation: nn.Module, default=nn.ELU
         Activation function class to apply. Should be a PyTorch activation
         module class like ``nn.ReLU`` or ``nn.ELU``. Default is ``nn.ELU``.
     extra_params : bool, default=False
         Flag to indicate whether additional, custom parameters should be passed to
         the attention mechanism.
-    Notes
-    -----
-    - Sequence length after each stage is computed internally; the final classifier expects
-      a flattened ``ch_dim x T₂`` vector.
-    - Attention operates on *channel* dimension by design; temporal gating exists only in
-      specific variants (CBAM/CAT).
-    - The paper and original code with more details about the methodological
-      choices are available at the [Martin2023]_ and [MartinCode]_.
-    .. versionadded:: 0.9
     References
     ----------
     .. [Martin2023] Wimpff, M., Gizzi, L., Zerfowski, J. and Yang, B., 2023.
@@ -272,7 +277,7 @@ class AttentionBaseNet(EEGModuleMixin, nn.Module):
         freq_idx: int = 0,
         n_codewords: int = 4,
         kernel_size: int = 9,
-        activation: type[nn.Module] = nn.ELU,
+        activation: nn.Module = nn.ELU,
         extra_params: bool = False,
     ):
         super(AttentionBaseNet, self).__init__()
@@ -392,8 +397,7 @@ class AttentionBaseNet(EEGModuleMixin, nn.Module):
         pool_length: int,
     ) -> int:
         """
-        Calculates the minimum n_times required for the model to work.
+        Calculates the minimum n_times required for the model to work
         with the given parameters.
         The calculation is based on reversing the pooling operations to
@@ -409,15 +413,15 @@ class AttentionBaseNet(EEGModuleMixin, nn.Module):
 class _FeatureExtractor(nn.Module):
-    r"""
-    A module for feature extraction of the data with temporal and spatial.
+    """
+    A module for feature extraction of the data with temporal and spatial
     transformations.
     This module sequentially processes the input through a series of layers:
     rearrangement, temporal convolution, batch normalization, spatial convolution,
     another batch normalization, an ELU non-linearity, average pooling, and dropout.
     Parameters
     ----------
     n_chans : int
@@ -435,7 +439,7 @@ class _FeatureExtractor(nn.Module):
         The stride of the average pooling operation. Default is 15.
     drop_prob : float, optional
         The dropout rate for regularization. Default is 0.5.
-    activation : nn.Module, default=nn.ELU
+    activation: nn.Module, default=nn.ELU
         Activation function class to apply. Should be a PyTorch activation
         module class like ``nn.ReLU`` or ``nn.ELU``. Default is ``nn.ELU``.
     """
@@ -449,7 +453,7 @@ class _FeatureExtractor(nn.Module):
         pool_length: int = 75,
         pool_stride: int = 15,
         drop_prob: float = 0.5,
-        activation: type[nn.Module] = nn.ELU,
+        activation: nn.Module = nn.ELU,
     ):
         super().__init__()
@@ -489,9 +493,8 @@ class _FeatureExtractor(nn.Module):
 class _ChannelAttentionBlock(nn.Module):
-    r"""
-    A neural network module implementing channel-wise attention mechanisms to enhance.
+    """
+    A neural network module implementing channel-wise attention mechanisms to enhance
     feature representations by selectively emphasizing important channels and suppressing
     less useful ones. This block integrates convolutional layers, pooling, dropout, and
     an optional attention mechanism that can be customized based on the given mode.
@@ -545,7 +548,7 @@ class _ChannelAttentionBlock(nn.Module):
     extra_params : bool, default=False
         Flag to indicate whether additional, custom parameters should be passed to
         the attention mechanism.
-    activation : nn.Module, default=nn.ELU
+    activation: nn.Module, default=nn.ELU
         Activation function class to apply. Should be a PyTorch activation
         module class like ``nn.ReLU`` or ``nn.ELU``. Default is ``nn.ELU``.
@@ -561,7 +564,7 @@ class _ChannelAttentionBlock(nn.Module):
     attention_block : torch.nn.Module or None
         The attention mechanism applied to the output of the convolutional layers,
         if `attention_mode` is not None. Otherwise, it's set to None.
-    activation : nn.Module, default=nn.ELU
+    activation: nn.Module, default=nn.ELU
         Activation function class to apply. Should be a PyTorch activation
         module class like ``nn.ReLU`` or ``nn.ELU``. Default is ``nn.ELU``.
@@ -571,6 +574,7 @@ class _ChannelAttentionBlock(nn.Module):
     >>> x = torch.randn(1, 16, 64, 64)  # Example input tensor
     >>> output = channel_attention_block(x)
     The output tensor then can be further processed or used as input to another block.
     """
     def __init__(
@@ -588,7 +592,7 @@ class _ChannelAttentionBlock(nn.Module):
         n_codewords: int = 4,
         kernel_size: int = 9,
         extra_params: bool = False,
-        activation: type[nn.Module] = nn.ELU,
+        activation: nn.Module = nn.ELU,
     ):
         super().__init__()
         self.conv = nn.Sequential(
@@ -648,31 +652,31 @@ def get_attention_block(
     Parameters
     ----------
-    attention_mode : str
+    attention_mode: str
         The type of attention mechanism to apply.
-    ch_dim : int
+    ch_dim: int
         The number of input channels to the block.
-    reduction_rate : int
+    reduction_rate: int
         The reduction rate used in the attention mechanism to reduce
         dimensionality and computational complexity.
         Used in all the methods, except for the
         encnet and eca.
-    use_mlp : bool
+    use_mlp: bool
         Flag to indicate whether an MLP (Multi-Layer Perceptron) should be used
         within the attention mechanism for further processing. Used in the ge
         and srm attention mechanism.
-    seq_len : int
+    seq_len: int
         The sequence length, used in certain types of attention mechanisms to
         process temporal dimensions. Used in the ge or fca attention mechanism.
-    freq_idx : int
+    freq_idx: int
         DCT index used in fca attention mechanism.
-    n_codewords : int
+    n_codewords: int
         The number of codewords (clusters) used in attention mechanisms
         that employ quantization or clustering strategies, encnet.
-    kernel_size : int
+    kernel_size: int
         The kernel size used in certain types of attention mechanisms for convolution
         operations, used in the cbam, eca, and cat attention mechanisms.
-    extra_params : bool
+    extra_params: bool
         Parameter to pass additional parameters to the GatherExcite mechanism.
     Returns

braindecode/models/attn_sleep.py CHANGED Viewed

@@ -16,9 +16,9 @@ from braindecode.modules import CausalConv1d
 class AttnSleep(EEGModuleMixin, nn.Module):
-    r"""Sleep Staging Architecture from Eldele et al  (2021) [Eldele2021]_.
+    """Sleep Staging Architecture from Eldele et al. (2021) [Eldele2021]_.
-    :bdg-success:`Convolution` :bdg-info:`Attention/Transformer`
+    :bdg-success:`Convolution` :bdg-info:`Small Attention`
     .. figure:: https://raw.githubusercontent.com/emadeldeen24/AttnSleep/refs/heads/main/imgs/AttnSleep.png
         :align: center
@@ -63,10 +63,10 @@ class AttnSleep(EEGModuleMixin, nn.Module):
         Alias for `n_outputs`.
     input_size_s : float
         Alias for `input_window_seconds`.
-    activation : nn.Module, default=nn.ReLU
+    activation: nn.Module, default=nn.ReLU
         Activation function class to apply. Should be a PyTorch activation
         module class like ``nn.ReLU`` or ``nn.ELU``. Default is ``nn.ReLU``.
-    activation_mrcnn : nn.Module, default=nn.ReLU
+    activation_mrcnn: nn.Module, default=nn.ReLU
         Activation function class to apply in the Mask R-CNN layer.
         Should be a PyTorch activation module class like ``nn.ReLU`` or
         ``nn.GELU``. Default is ``nn.GELU``.
@@ -90,8 +90,8 @@ class AttnSleep(EEGModuleMixin, nn.Module):
         d_ff=120,
         n_attn_heads=5,
         drop_prob=0.1,
-        activation_mrcnn: type[nn.Module] = nn.GELU,
-        activation: type[nn.Module] = nn.ReLU,
+        activation_mrcnn: nn.Module = nn.GELU,
+        activation: nn.Module = nn.ReLU,
         input_window_seconds=None,
         n_outputs=None,
         after_reduced_cnn_size=30,
@@ -175,7 +175,7 @@ class AttnSleep(EEGModuleMixin, nn.Module):
         Parameters
         ----------
-        x : torch.Tensor
+        x: torch.Tensor
             Batch of EEG windows of shape (batch_size, n_channels, n_times).
         """
@@ -230,7 +230,7 @@ class _SEBasicBlock(nn.Module):
         planes,
         stride=1,
         downsample=None,
-        activation: type[nn.Module] = nn.ReLU,
+        activation: nn.Module = nn.ReLU,
         *,
         reduction=16,
     ):
@@ -278,8 +278,8 @@ class _MRCNN(nn.Module):
         self,
         after_reduced_cnn_size,
         kernel_size=7,
-        activation: type[nn.Module] = nn.GELU,
-        activation_se: type[nn.Module] = nn.ReLU,
+        activation: nn.Module = nn.GELU,
+        activation_se: nn.Module = nn.ReLU,
     ):
         super(_MRCNN, self).__init__()
         drate = 0.5
@@ -325,7 +325,7 @@ class _MRCNN(nn.Module):
         )
     def _make_layer(
-        self, block, planes, blocks, stride=1, activate: type[nn.Module] = nn.ReLU
+        self, block, planes, blocks, stride=1, activate: nn.Module = nn.ReLU
     ):  # makes residual SE block
         downsample = None
         if stride != 1 or self.inplanes != planes * block.expansion:
@@ -363,7 +363,7 @@ class _MRCNN(nn.Module):
 def _attention(
     query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
 ) -> tuple[torch.Tensor, torch.Tensor]:
-    """Implementation of Scaled dot product attention."""
+    """Implementation of Scaled dot product attention"""
     # d_k - dimension of the query and key vectors
     d_k = query.size(-1)
     scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
@@ -392,7 +392,7 @@ class _MultiHeadedAttention(nn.Module):
         self.dropout = nn.Dropout(p=dropout)
     def forward(self, query, key, value: torch.Tensor) -> torch.Tensor:
-        """Implements Multi-head attention."""
+        """Implements Multi-head attention"""
         nbatches = query.size(0)
         query = query.view(nbatches, -1, self.h, self.d_per_head).transpose(1, 2)
@@ -423,7 +423,9 @@ class _MultiHeadedAttention(nn.Module):
 class _ResidualLayerNormAttn(nn.Module):
-    r"""A residual connection followed by a layer norm."""
+    """
+    A residual connection followed by a layer norm.
+    """
     def __init__(self, size, dropout, fn_attn):
         super().__init__()
@@ -462,9 +464,8 @@ class _ResidualLayerNormFF(nn.Module):
 class _TCE(nn.Module):
-    r"""
-    Transformer Encoder.
+    """
+    Transformer Encoder
     It is a stack of n layers.
     """
@@ -482,9 +483,8 @@ class _TCE(nn.Module):
 class _EncoderLayer(nn.Module):
-    r"""
-    An encoder layer.
+    """
+    An encoder layer
     Made up of self-attention and a feed forward layer.
     Each of these sublayers have residual and layer norm, implemented by _ResidualLayerNorm.
     """
@@ -515,7 +515,7 @@ class _EncoderLayer(nn.Module):
         )
     def forward(self, x_in: torch.Tensor) -> torch.Tensor:
-        """Transformer Encoder."""
+        """Transformer Encoder"""
         query = self.conv(x_in)
         # Encoder self-attention
         x = self.residual_self_attn(query, x_in, x_in)
@@ -524,11 +524,9 @@ class _EncoderLayer(nn.Module):
 class _PositionwiseFeedForward(nn.Module):
-    r"""Positionwise feed-forward network."""
+    """Positionwise feed-forward network."""
-    def __init__(
-        self, d_model, d_ff, dropout=0.1, activation: type[nn.Module] = nn.ReLU
-    ):
+    def __init__(self, d_model, d_ff, dropout=0.1, activation: nn.Module = nn.ReLU):
         super().__init__()
         self.w_1 = nn.Linear(d_model, d_ff)
         self.w_2 = nn.Linear(d_ff, d_model)
@@ -544,6 +542,6 @@ class _PositionwiseFeedForward(nn.Module):
     "`SleepStagerEldele2021` was renamed to `AttnSleep` in v1.12 to follow original author's name; this alias will be removed in v1.14."
 )
 class SleepStagerEldele2021(AttnSleep):
-    r"""Deprecated alias for SleepStagerEldele2021."""
+    """Deprecated alias for SleepStagerEldele2021."""
     pass

braindecode/models/base.py CHANGED Viewed

@@ -192,7 +192,7 @@ class EEGModuleMixin(_BaseHubMixin, metaclass=NumpyDocstringInheritanceInitMeta)
             n_times is not None
             and input_window_seconds is not None
             and sfreq is not None
-            and n_times != round(input_window_seconds * sfreq)
+            and n_times != int(input_window_seconds * sfreq)
         ):
             raise ValueError(
                 f"{n_times=} different from {input_window_seconds=} * {sfreq=}"
@@ -236,7 +236,7 @@ class EEGModuleMixin(_BaseHubMixin, metaclass=NumpyDocstringInheritanceInitMeta)
             and self._input_window_seconds is not None
             and self._sfreq is not None
         ):
-            return round(self._input_window_seconds * self._sfreq)
+            return int(self._input_window_seconds * self._sfreq)
         elif self._n_times is None:
             raise ValueError(
                 "n_times could not be inferred. "
@@ -284,7 +284,7 @@ class EEGModuleMixin(_BaseHubMixin, metaclass=NumpyDocstringInheritanceInitMeta)
         Returns
         -------
-        output_shape : tuple[int, ...]
+        output_shape: tuple[int, ...]
             shape of the network output for `batch_size==1` (1, ...)
         """
         with torch.inference_mode():
@@ -330,14 +330,13 @@ class EEGModuleMixin(_BaseHubMixin, metaclass=NumpyDocstringInheritanceInitMeta)
     def to_dense_prediction_model(self, axis: tuple[int, ...] | int = (2, 3)) -> None:
         """
-        Transform a sequential model with strides to a model that outputs.
+        Transform a sequential model with strides to a model that outputs
         dense predictions by removing the strides and instead inserting dilations.
         Modifies model in-place.
         Parameters
         ----------
-        axis : int or (int,int)
+        axis: int or (int,int)
             Axis to transform (in terms of intermediate output axes)
             can either be 2, 3, or (2,3).
@@ -346,6 +345,7 @@ class EEGModuleMixin(_BaseHubMixin, metaclass=NumpyDocstringInheritanceInitMeta)
         Does not yet work correctly for average pooling.
         Prior to version 0.1.7, there had been a bug that could move strides
         backwards one layer.
         """
         if not hasattr(axis, "__iter__"):
             axis = (axis,)

braindecode/models/bendr.py CHANGED Viewed

@@ -8,15 +8,16 @@ from braindecode.models.base import EEGModuleMixin
 class BENDR(EEGModuleMixin, nn.Module):
-    r"""BENDR (BErt-inspired Neural Data Representations) from Kostas et al (2021) [bendr]_.
+    """BENDR (BErt-inspired Neural Data Representations) from Kostas et al. (2021) [bendr]_.
-    :bdg-success:`Convolution` :bdg-danger:`Foundation Model`
+    :bdg-success:`Convolution` :bdg-danger:`Large Brain Model`
     .. figure:: https://www.frontiersin.org/files/Articles/653659/fnhum-15-653659-HTML/image_m/fnhum-15-653659-g001.jpg
         :align: center
         :alt: BENDR Architecture
         :width: 1000px
     The **BENDR** architecture adapts techniques used for language modeling (LM) toward the
     development of encephalography modeling (EM) [bendr]_. It utilizes a self-supervised
     training objective to learn compressed representations of raw EEG signals [bendr]_. The
@@ -78,31 +79,6 @@ class BENDR(EEGModuleMixin, nn.Module):
       prepended to the BENDR sequence before input to the transformer, serving as the aggregate
       representation token [bendr]_.
-    .. important::
-       **Pre-trained Weights Available**
-       This model has pre-trained weights available on the Hugging Face Hub.
-       You can load them using:
-       .. code-block:: python
-           from braindecode.models import BENDR
-           # Load pre-trained model from Hugging Face Hub
-           # you can specify `n_outputs` for your downstream task
-           model = BENDR.from_pretrained("braindecode/braindecode-bendr", n_outputs=2)
-       To push your own trained model to the Hub:
-       .. code-block:: python
-           # After training your model
-           model.push_to_hub(
-               repo_id="username/my-bendr-model", commit_message="Upload trained BENDR model"
-           )
-       Requires installing ``braindecode[hug]`` for Hub integration.
     Notes
     -----
     * The full BENDR architecture contains a large number of parameters; configuration (1)
@@ -119,27 +95,6 @@ class BENDR(EEGModuleMixin, nn.Module):
         **self-supervised pre-training** on large, unlabeled EEG datasets (like TUEG) followed
         by subsequent fine-tuning on the specific downstream classification task [bendr]_.
-    References
-    ----------
-    .. [bendr] Kostas, D., Aroca-Ouellette, S., & Rudzicz, F. (2021).
-       BENDR: Using transformers and a contrastive self-supervised learning task to learn from
-       massive amounts of EEG data.
-       Frontiers in Human Neuroscience, 15, 653659.
-       https://doi.org/10.3389/fnhum.2021.653659
-    .. [wav2vec2] Baevski, A., Zhou, Y., Mohamed, A., & Auli, M. (2020).
-       wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations.
-       In H. Larochelle, M. Ranzato, R. Hadsell, M. F. Balcan, & H. Lin (Eds),
-       Advances in Neural Information Processing Systems (Vol. 33, pp. 12449-12460).
-       https://dl.acm.org/doi/10.5555/3495724.3496768
-    .. [tfixup] Huang, T. K., Liang, S., Jha, A., & Salakhutdinov, R. (2020).
-       Improving Transformer Optimization Through Better Initialization.
-       In International Conference on Machine Learning (pp. 4475-4483). PMLR.
-       https://dl.acm.org/doi/10.5555/3524938.3525354
-    .. [layerdrop] Fan, A., Grave, E., & Joulin, A. (2020).
-       Reducing Transformer Depth on Demand with Structured Dropout.
-       International Conference on Learning Representations.
-       Retrieved from https://openreview.net/forum?id=SylO2yStDr
     Parameters
     ----------
     encoder_h : int, default=512
@@ -183,6 +138,27 @@ class BENDR(EEGModuleMixin, nn.Module):
     final_layer : bool, default=True
         If True, includes a final linear classification layer that maps from encoder_h to
         n_outputs. If False, the model outputs the contextualized features directly.
+    References
+    ----------
+    .. [bendr] Kostas, D., Aroca-Ouellette, S., & Rudzicz, F. (2021).
+       BENDR: Using transformers and a contrastive self-supervised learning task to learn from
+       massive amounts of EEG data.
+       Frontiers in Human Neuroscience, 15, 653659.
+       https://doi.org/10.3389/fnhum.2021.653659
+    .. [wav2vec2] Baevski, A., Zhou, Y., Mohamed, A., & Auli, M. (2020).
+       wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations.
+       In H. Larochelle, M. Ranzato, R. Hadsell, M. F. Balcan, & H. Lin (Eds),
+       Advances in Neural Information Processing Systems (Vol. 33, pp. 12449-12460).
+       https://dl.acm.org/doi/10.5555/3495724.3496768
+    .. [tfixup] Huang, T. K., Liang, S., Jha, A., & Salakhutdinov, R. (2020).
+       Improving Transformer Optimization Through Better Initialization.
+       In International Conference on Machine Learning (pp. 4475-4483). PMLR.
+       https://dl.acm.org/doi/10.5555/3524938.3525354
+    .. [layerdrop] Fan, A., Grave, E., & Joulin, A. (2020).
+       Reducing Transformer Depth on Demand with Structured Dropout.
+       International Conference on Learning Representations.
+       Retrieved from https://openreview.net/forum?id=SylO2yStDr
     """
     def __init__(
@@ -200,7 +176,7 @@ class BENDR(EEGModuleMixin, nn.Module):
         projection_head=False,  # Whether encoder should project back to input feature size (unused in original fine-tuning)
         drop_prob=0.1,  # General dropout probability (paper: 0.15 for pretraining, 0.0 for fine-tuning)
         layer_drop=0.0,  # Probability of dropping transformer layers during training (paper: 0.01 for pretraining)
-        activation: type[nn.Module] = nn.GELU,  # Activation function
+        activation=nn.GELU,  # Activation function
         # Transformer specific parameters
         transformer_layers=8,
         transformer_heads=8,
@@ -349,7 +325,7 @@ class _ConvEncoderBENDR(nn.Module):
 class _BENDRContextualizer(nn.Module):
-    r"""Transformer-based contextualizer for BENDR."""
+    """Transformer-based contextualizer for BENDR."""
     def __init__(
         self,

braindecode 1.3.0.dev177069446__py3-none-any.whl → 1.3.0.dev177628147__py3-none-any.whl

braindecode 1.3.0.dev177069446py3-none-any.whl → 1.3.0.dev177628147py3-none-any.whl