PyPI - braindecode - Versions diffs - 0.8.1__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

braindecode 0.8.1py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of braindecode might be problematic. Click here for more details.

Files changed (108) hide show

braindecode/__init__.py +1 -2
braindecode/augmentation/__init__.py +39 -19
braindecode/augmentation/base.py +25 -28
braindecode/augmentation/functional.py +237 -100
braindecode/augmentation/transforms.py +325 -158
braindecode/classifier.py +26 -24
braindecode/datasets/__init__.py +28 -10
braindecode/datasets/base.py +220 -134
braindecode/datasets/bbci.py +43 -52
braindecode/datasets/bcicomp.py +47 -32
braindecode/datasets/bids.py +245 -0
braindecode/datasets/mne.py +45 -24
braindecode/datasets/moabb.py +87 -27
braindecode/datasets/nmt.py +311 -0
braindecode/datasets/sleep_physio_challe_18.py +412 -0
braindecode/datasets/sleep_physionet.py +43 -26
braindecode/datasets/tuh.py +324 -140
braindecode/datasets/xy.py +27 -12
braindecode/datautil/__init__.py +37 -18
braindecode/datautil/serialization.py +110 -72
braindecode/eegneuralnet.py +63 -47
braindecode/functional/__init__.py +22 -0
braindecode/functional/functions.py +250 -0
braindecode/functional/initialization.py +47 -0
braindecode/models/__init__.py +84 -14
braindecode/models/atcnet.py +193 -164
braindecode/models/attentionbasenet.py +599 -0
braindecode/models/base.py +86 -102
braindecode/models/biot.py +504 -0
braindecode/models/contrawr.py +317 -0
braindecode/models/ctnet.py +536 -0
braindecode/models/deep4.py +116 -77
braindecode/models/deepsleepnet.py +149 -119
braindecode/models/eegconformer.py +112 -173
braindecode/models/eeginception_erp.py +109 -118
braindecode/models/eeginception_mi.py +161 -97
braindecode/models/eegitnet.py +215 -152
braindecode/models/eegminer.py +254 -0
braindecode/models/eegnet.py +228 -161
braindecode/models/eegnex.py +247 -0
braindecode/models/eegresnet.py +234 -152
braindecode/models/eegsimpleconv.py +199 -0
braindecode/models/eegtcnet.py +335 -0
braindecode/models/fbcnet.py +221 -0
braindecode/models/fblightconvnet.py +313 -0
braindecode/models/fbmsnet.py +324 -0
braindecode/models/hybrid.py +52 -71
braindecode/models/ifnet.py +441 -0
braindecode/models/labram.py +1186 -0
braindecode/models/msvtnet.py +375 -0
braindecode/models/sccnet.py +207 -0
braindecode/models/shallow_fbcsp.py +50 -56
braindecode/models/signal_jepa.py +1011 -0
braindecode/models/sinc_shallow.py +337 -0
braindecode/models/sleep_stager_blanco_2020.py +55 -46
braindecode/models/sleep_stager_chambon_2018.py +54 -53
braindecode/models/sleep_stager_eldele_2021.py +247 -141
braindecode/models/sparcnet.py +424 -0
braindecode/models/summary.csv +41 -0
braindecode/models/syncnet.py +232 -0
braindecode/models/tcn.py +158 -88
braindecode/models/tidnet.py +280 -167
braindecode/models/tsinception.py +283 -0
braindecode/models/usleep.py +190 -177
braindecode/models/util.py +109 -145
braindecode/modules/__init__.py +84 -0
braindecode/modules/activation.py +60 -0
braindecode/modules/attention.py +757 -0
braindecode/modules/blocks.py +108 -0
braindecode/modules/convolution.py +274 -0
braindecode/modules/filter.py +628 -0
braindecode/modules/layers.py +131 -0
braindecode/modules/linear.py +49 -0
braindecode/modules/parametrization.py +38 -0
braindecode/modules/stats.py +77 -0
braindecode/modules/util.py +76 -0
braindecode/modules/wrapper.py +73 -0
braindecode/preprocessing/__init__.py +36 -11
braindecode/preprocessing/mne_preprocess.py +13 -7
braindecode/preprocessing/preprocess.py +139 -75
braindecode/preprocessing/windowers.py +576 -187
braindecode/regressor.py +23 -12
braindecode/samplers/__init__.py +16 -8
braindecode/samplers/base.py +146 -32
braindecode/samplers/ssl.py +162 -17
braindecode/training/__init__.py +18 -10
braindecode/training/callbacks.py +2 -4
braindecode/training/losses.py +3 -8
braindecode/training/scoring.py +76 -68
braindecode/util.py +55 -59
braindecode/version.py +1 -1
braindecode/visualization/__init__.py +2 -3
braindecode/visualization/confusion_matrices.py +117 -73
braindecode/visualization/gradients.py +14 -10
{braindecode-0.8.1.dist-info → braindecode-1.1.0.dist-info}/METADATA +42 -58
braindecode-1.1.0.dist-info/RECORD +101 -0
{braindecode-0.8.1.dist-info → braindecode-1.1.0.dist-info}/WHEEL +1 -1
{braindecode-0.8.1.dist-info → braindecode-1.1.0.dist-info/licenses}/LICENSE.txt +1 -1
braindecode-1.1.0.dist-info/licenses/NOTICE.txt +20 -0
braindecode/datautil/mne.py +0 -9
braindecode/datautil/preprocess.py +0 -12
braindecode/datautil/windowers.py +0 -6
braindecode/datautil/xy.py +0 -9
braindecode/models/eeginception.py +0 -317
braindecode/models/functions.py +0 -47
braindecode/models/modules.py +0 -358
braindecode-0.8.1.dist-info/RECORD +0 -68
{braindecode-0.8.1.dist-info → braindecode-1.1.0.dist-info}/top_level.txt +0 -0

braindecode/models/sleep_stager_eldele_2021.py CHANGED Viewed

@@ -3,18 +3,23 @@
 # License: BSD (3-clause)
 import math
-import copy
-from copy import deepcopy
 import warnings
+from copy import deepcopy
 import torch
-from torch import nn
 import torch.nn.functional as F
-from .base import EEGModuleMixin, deprecated_args
+from torch import nn
+from braindecode.models.base import EEGModuleMixin
+from braindecode.modules import CausalConv1d
 class SleepStagerEldele2021(EEGModuleMixin, nn.Module):
-    """Sleep Staging Architecture from Eldele et al 2021.
+    """Sleep Staging Architecture from Eldele et al. (2021) [Eldele2021]_.
+    .. figure:: https://raw.githubusercontent.com/emadeldeen24/AttnSleep/refs/heads/main/imgs/AttnSleep.png
+        :align: center
+        :alt: SleepStagerEldele2021 Architecture
     Attention based Neural Net for sleep staging as described in [Eldele2021]_.
     The code for the paper and this model is also available at [1]_.
@@ -43,7 +48,7 @@ class SleepStagerEldele2021(EEGModuleMixin, nn.Module):
         input dimension of the second FC layer in the same.
     n_attn_heads : int
         Number of attention heads. It should be a factor of d_model
-    dropout : float
+    drop_prob : float
         Dropout rate in the PositionWiseFeedforward layer and the TCE layers.
     after_reduced_cnn_size : int
         Number of output channels produced by the convolution in the AFR module.
@@ -55,6 +60,13 @@ class SleepStagerEldele2021(EEGModuleMixin, nn.Module):
         Alias for `n_outputs`.
     input_size_s : float
         Alias for `input_window_seconds`.
+    activation: nn.Module, default=nn.ReLU
+        Activation function class to apply. Should be a PyTorch activation
+        module class like ``nn.ReLU`` or ``nn.ELU``. Default is ``nn.ReLU``.
+    activation_mrcnn: nn.Module, default=nn.ReLU
+        Activation function class to apply in the Mask R-CNN layer.
+        Should be a PyTorch activation module class like ``nn.ReLU`` or
+        ``nn.GELU``. Default is ``nn.GELU``.
     References
     ----------
@@ -68,28 +80,23 @@ class SleepStagerEldele2021(EEGModuleMixin, nn.Module):
     """
     def __init__(
-            self,
-            sfreq=None,
-            n_tce=2,
-            d_model=80,
-            d_ff=120,
-            n_attn_heads=5,
-            dropout=0.1,
-            input_window_seconds=30,
-            n_outputs=5,
-            after_reduced_cnn_size=30,
-            return_feats=False,
-            chs_info=None,
-            n_chans=None,
-            n_times=None,
-            n_classes=None,
-            input_size_s=None,
+        self,
+        sfreq=None,
+        n_tce=2,
+        d_model=80,
+        d_ff=120,
+        n_attn_heads=5,
+        drop_prob=0.1,
+        activation_mrcnn: nn.Module = nn.GELU,
+        activation: nn.Module = nn.ReLU,
+        input_window_seconds=None,
+        n_outputs=None,
+        after_reduced_cnn_size=30,
+        return_feats=False,
+        chs_info=None,
+        n_chans=None,
+        n_times=None,
     ):
-        n_outputs, input_window_seconds, = deprecated_args(
-            self,
-            ("n_classes", "n_outputs", n_classes, n_outputs),
-            ("input_size_s", "input_window_seconds", input_size_s, input_window_seconds),
-        )
         super().__init__(
             n_outputs=n_outputs,
             n_chans=n_chans,
@@ -99,19 +106,25 @@ class SleepStagerEldele2021(EEGModuleMixin, nn.Module):
             sfreq=sfreq,
         )
         del n_outputs, n_chans, chs_info, n_times, input_window_seconds, sfreq
-        del n_classes, input_size_s
         self.mapping = {
             "fc.weight": "final_layer.weight",
-            "fc.bias": "final_layer.bias"
+            "fc.bias": "final_layer.bias",
         }
-        if not ((self.input_window_seconds == 30 and self.sfreq == 100 and d_model == 80) or
-                (self.input_window_seconds == 30 and self.sfreq == 125 and d_model == 100)):
-            warnings.warn("This model was designed originally for input windows of 30sec at 100Hz, "
-                          "with d_model at 80 or at 125Hz, with d_model at 100, to use anything "
-                          "other than this may cause errors or cause the model to perform in "
-                          "other ways than intended", UserWarning)
+        if not (
+            (self.input_window_seconds == 30 and self.sfreq == 100 and d_model == 80)
+            or (
+                self.input_window_seconds == 30 and self.sfreq == 125 and d_model == 100
+            )
+        ):
+            warnings.warn(
+                "This model was designed originally for input windows of 30sec at 100Hz, "
+                "with d_model at 80 or at 125Hz, with d_model at 100, to use anything "
+                "other than this may cause errors or cause the model to perform in "
+                "other ways than intended",
+                UserWarning,
+            )
         # the usual kernel size for the mrcnn, for sfreq 100
         kernel_size = 7
@@ -119,11 +132,20 @@ class SleepStagerEldele2021(EEGModuleMixin, nn.Module):
         if self.sfreq == 125:
             kernel_size = 6
-        mrcnn = _MRCNN(after_reduced_cnn_size, kernel_size)
+        mrcnn = _MRCNN(
+            after_reduced_cnn_size,
+            kernel_size,
+            activation=activation_mrcnn,
+            activation_se=activation,
+        )
         attn = _MultiHeadedAttention(n_attn_heads, d_model, after_reduced_cnn_size)
-        ff = _PositionwiseFeedForward(d_model, d_ff, dropout)
-        tce = _TCE(_EncoderLayer(d_model, deepcopy(attn), deepcopy(ff), after_reduced_cnn_size,
-                                 dropout), n_tce)
+        ff = _PositionwiseFeedForward(d_model, d_ff, drop_prob, activation=activation)
+        tce = _TCE(
+            _EncoderLayer(
+                d_model, deepcopy(attn), deepcopy(ff), after_reduced_cnn_size, drop_prob
+            ),
+            n_tce,
+        )
         self.feature_extractor = nn.Sequential(mrcnn, tce)
         self.len_last_layer = self._len_last_layer(self.n_times)
@@ -133,7 +155,9 @@ class SleepStagerEldele2021(EEGModuleMixin, nn.Module):
         """if return_feats:
             raise ValueError("return_feat == True is not accepted anymore")"""
         if not return_feats:
-            self.final_layer = nn.Linear(d_model * after_reduced_cnn_size, self.n_outputs)
+            self.final_layer = nn.Linear(
+                d_model * after_reduced_cnn_size, self.n_outputs
+            )
     def _len_last_layer(self, input_size):
         self.feature_extractor.eval()
@@ -142,7 +166,7 @@ class SleepStagerEldele2021(EEGModuleMixin, nn.Module):
         self.feature_extractor.train()
         return len(out.flatten())
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Forward pass.
@@ -153,27 +177,41 @@ class SleepStagerEldele2021(EEGModuleMixin, nn.Module):
         """
         encoded_features = self.feature_extractor(x)
-        encoded_features = encoded_features.contiguous().view(encoded_features.shape[0], -1)
+        encoded_features = encoded_features.contiguous().view(
+            encoded_features.shape[0], -1
+        )
         if self.return_feats:
             return encoded_features
-        else:
-            final_output = self.final_layer(encoded_features)
-            return final_output
+        return self.final_layer(encoded_features)
 class _SELayer(nn.Module):
-    def __init__(self, channel, reduction=16):
+    def __init__(self, channel, reduction=16, activation=nn.ReLU):
         super(_SELayer, self).__init__()
         self.avg_pool = nn.AdaptiveAvgPool1d(1)
         self.fc = nn.Sequential(
             nn.Linear(channel, channel // reduction, bias=False),
-            nn.ReLU(inplace=True),
+            activation(inplace=True),
             nn.Linear(channel // reduction, channel, bias=False),
-            nn.Sigmoid()
+            nn.Sigmoid(),
         )
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass of the SE layer.
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor of shape (batch_size, channel, length).
+        Returns
+        -------
+        torch.Tensor
+            Output tensor after applying the SE recalibration.
+        """
         b, c, _ = x.size()
         y = self.avg_pool(x).view(b, c)
         y = self.fc(y).view(b, c, 1)
@@ -183,22 +221,43 @@ class _SELayer(nn.Module):
 class _SEBasicBlock(nn.Module):
     expansion = 1
-    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
-                 base_width=64, dilation=1, norm_layer=None,
-                 *, reduction=16):
+    def __init__(
+        self,
+        inplanes,
+        planes,
+        stride=1,
+        downsample=None,
+        activation: nn.Module = nn.ReLU,
+        *,
+        reduction=16,
+    ):
         super(_SEBasicBlock, self).__init__()
         self.conv1 = nn.Conv1d(inplanes, planes, stride)
         self.bn1 = nn.BatchNorm1d(planes)
-        self.relu = nn.ReLU(inplace=True)
+        self.relu = activation(inplace=True)
         self.conv2 = nn.Conv1d(planes, planes, 1)
         self.bn2 = nn.BatchNorm1d(planes)
         self.se = _SELayer(planes, reduction)
         self.downsample = downsample
         self.stride = stride
-        self.features = nn.Sequential(self.conv1, self.bn1, self.relu, self.conv2, self.bn2,
-                                      self.se)
+        self.features = nn.Sequential(
+            self.conv1, self.bn1, self.relu, self.conv2, self.bn2, self.se
+        )
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass of the SE layer.
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor of shape (batch_size, n_chans, n_times).
+        Returns
+        -------
+        torch.Tensor
+            Output tensor after applying the SE recalibration.
+        """
         residual = x
         out = self.features(x)
@@ -212,26 +271,29 @@ class _SEBasicBlock(nn.Module):
 class _MRCNN(nn.Module):
-    def __init__(self, after_reduced_cnn_size, kernel_size=7):
+    def __init__(
+        self,
+        after_reduced_cnn_size,
+        kernel_size=7,
+        activation: nn.Module = nn.GELU,
+        activation_se: nn.Module = nn.ReLU,
+    ):
         super(_MRCNN, self).__init__()
         drate = 0.5
-        self.GELU = nn.GELU()
+        self.GELU = activation()
         self.features1 = nn.Sequential(
             nn.Conv1d(1, 64, kernel_size=50, stride=6, bias=False, padding=24),
             nn.BatchNorm1d(64),
             self.GELU,
             nn.MaxPool1d(kernel_size=8, stride=2, padding=4),
             nn.Dropout(drate),
             nn.Conv1d(64, 128, kernel_size=8, stride=1, bias=False, padding=4),
             nn.BatchNorm1d(128),
             self.GELU,
             nn.Conv1d(128, 128, kernel_size=8, stride=1, bias=False, padding=4),
             nn.BatchNorm1d(128),
             self.GELU,
-            nn.MaxPool1d(kernel_size=4, stride=4, padding=2)
+            nn.MaxPool1d(kernel_size=4, stride=4, padding=2),
         )
         self.features2 = nn.Sequential(
@@ -240,28 +302,38 @@ class _MRCNN(nn.Module):
             self.GELU,
             nn.MaxPool1d(kernel_size=4, stride=2, padding=2),
             nn.Dropout(drate),
-            nn.Conv1d(64, 128, kernel_size=kernel_size, stride=1, bias=False, padding=3),
+            nn.Conv1d(
+                64, 128, kernel_size=kernel_size, stride=1, bias=False, padding=3
+            ),
             nn.BatchNorm1d(128),
             self.GELU,
-            nn.Conv1d(128, 128, kernel_size=kernel_size, stride=1, bias=False, padding=3),
+            nn.Conv1d(
+                128, 128, kernel_size=kernel_size, stride=1, bias=False, padding=3
+            ),
             nn.BatchNorm1d(128),
             self.GELU,
-            nn.MaxPool1d(kernel_size=2, stride=2, padding=1)
+            nn.MaxPool1d(kernel_size=2, stride=2, padding=1),
         )
         self.dropout = nn.Dropout(drate)
         self.inplanes = 128
-        self.AFR = self._make_layer(_SEBasicBlock, after_reduced_cnn_size, 1)
+        self.AFR = self._make_layer(
+            _SEBasicBlock, after_reduced_cnn_size, 1, activate=activation_se
+        )
-    def _make_layer(self, block, planes, blocks, stride=1):  # makes residual SE block
+    def _make_layer(
+        self, block, planes, blocks, stride=1, activate: nn.Module = nn.ReLU
+    ):  # makes residual SE block
         downsample = None
         if stride != 1 or self.inplanes != planes * block.expansion:
             downsample = nn.Sequential(
-                nn.Conv1d(self.inplanes, planes * block.expansion,
-                          kernel_size=1, stride=stride, bias=False),
+                nn.Conv1d(
+                    self.inplanes,
+                    planes * block.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
                 nn.BatchNorm1d(planes * block.expansion),
             )
@@ -269,11 +341,11 @@ class _MRCNN(nn.Module):
         layers.append(block(self.inplanes, planes, stride, downsample))
         self.inplanes = planes * block.expansion
         for i in range(1, blocks):
-            layers.append(block(self.inplanes, planes))
+            layers.append(block(self.inplanes, planes, activate=activate))
         return nn.Sequential(*layers)
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         x1 = self.features1(x)
         x2 = self.features2(x)
         x_concat = torch.cat((x1, x2), dim=2)
@@ -285,93 +357,107 @@ class _MRCNN(nn.Module):
 ##########################################################################################
-def _attention(query, key, value, dropout=None):
+def _attention(
+    query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
+) -> tuple[torch.Tensor, torch.Tensor]:
     """Implementation of Scaled dot product attention"""
     # d_k - dimension of the query and key vectors
     d_k = query.size(-1)
     scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
-    p_attn = F.softmax(scores, dim=-1)
-    if dropout is not None:
-        p_attn = dropout(p_attn)
-    return torch.matmul(p_attn, value), p_attn
-class _CausalConv1d(torch.nn.Conv1d):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 dilation=1,
-                 groups=1,
-                 bias=True):
-        self.__padding = (kernel_size - 1) * dilation
-        super(_CausalConv1d, self).__init__(
-            in_channels,
-            out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=self.__padding,
-            dilation=dilation,
-            groups=groups,
-            bias=bias)
-    def forward(self, input):
-        result = super(_CausalConv1d, self).forward(input)
-        if self.__padding != 0:
-            return result[:, :, :-self.__padding]
-        return result
+    p_attn = F.softmax(scores, dim=-1)  # attention weights
+    output = torch.matmul(p_attn, value)  # (B, h, T, d_k)
+    return output, p_attn
 class _MultiHeadedAttention(nn.Module):
     def __init__(self, h, d_model, after_reduced_cnn_size, dropout=0.1):
         """Take in model size and number of heads."""
-        super(_MultiHeadedAttention, self).__init__()
+        super().__init__()
         assert d_model % h == 0
         self.d_per_head = d_model // h
         self.h = h
-        self.convs = _clones(_CausalConv1d(after_reduced_cnn_size, after_reduced_cnn_size,
-                                           kernel_size=7, stride=1), 3)
+        base_conv = CausalConv1d(
+            in_channels=after_reduced_cnn_size,
+            out_channels=after_reduced_cnn_size,
+            kernel_size=7,
+            stride=1,
+        )
+        self.convs = nn.ModuleList([deepcopy(base_conv) for _ in range(3)])
         self.linear = nn.Linear(d_model, d_model)
         self.dropout = nn.Dropout(p=dropout)
-    def forward(self, query, key, value):
+    def forward(self, query, key, value: torch.Tensor) -> torch.Tensor:
         """Implements Multi-head attention"""
         nbatches = query.size(0)
         query = query.view(nbatches, -1, self.h, self.d_per_head).transpose(1, 2)
-        key = self.convs[1](key).view(nbatches, -1, self.h, self.d_per_head).transpose(1, 2)
-        value = self.convs[2](value).view(nbatches, -1, self.h, self.d_per_head).transpose(1, 2)
+        key = (
+            self.convs[1](key)
+            .view(nbatches, -1, self.h, self.d_per_head)
+            .transpose(1, 2)
+        )
+        value = (
+            self.convs[2](value)
+            .view(nbatches, -1, self.h, self.d_per_head)
+            .transpose(1, 2)
+        )
+        x_raw, attn_weights = _attention(query, key, value)
+        # apply dropout to the *weights*
+        attn = self.dropout(attn_weights)
+        # recompute the weighted sum with dropped weights
+        x = torch.matmul(attn, value)
-        x, self.attn = _attention(query, key, value, dropout=self.dropout)
+        # stash the pre‑dropout weights if you need them
+        self.attn = attn_weights
-        x = x.transpose(1, 2).contiguous() \
-            .view(nbatches, -1, self.h * self.d_per_head)
+        # merge heads and project
+        x = x.transpose(1, 2).contiguous().view(nbatches, -1, self.h * self.d_per_head)
         return self.linear(x)
-class _SublayerOutput(nn.Module):
+class _ResidualLayerNormAttn(nn.Module):
     """
     A residual connection followed by a layer norm.
     """
-    def __init__(self, size, dropout):
-        super(_SublayerOutput, self).__init__()
+    def __init__(self, size, dropout, fn_attn):
+        super().__init__()
+        self.norm = nn.LayerNorm(size, eps=1e-6)
+        self.dropout = nn.Dropout(dropout)
+        self.fn_attn = fn_attn
+    def forward(
+        self,
+        x: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+    ) -> torch.Tensor:
+        """Apply residual connection to any sublayer with the same size."""
+        x_norm = self.norm(x)
+        out = self.fn_attn(x_norm, key, value)
+        return x + self.dropout(out)
+class _ResidualLayerNormFF(nn.Module):
+    def __init__(self, size, dropout, fn_ff):
+        super().__init__()
         self.norm = nn.LayerNorm(size, eps=1e-6)
         self.dropout = nn.Dropout(dropout)
+        self.fn_ff = fn_ff
-    def forward(self, x, sublayer):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Apply residual connection to any sublayer with the same size."""
-        return x + self.dropout(sublayer(self.norm(x)))
+        x_norm = self.norm(x)
+        out = self.fn_ff(x_norm)
-def _clones(module, n):
-    """Produce n identical layers."""
-    return nn.ModuleList([copy.deepcopy(module) for _ in range(n)])
+        return x + self.dropout(out)
 class _TCE(nn.Module):
@@ -381,11 +467,13 @@ class _TCE(nn.Module):
     """
     def __init__(self, layer, n):
-        super(_TCE, self).__init__()
-        self.layers = _clones(layer, n)
+        super().__init__()
+        self.layers = nn.ModuleList([deepcopy(layer) for _ in range(n)])
         self.norm = nn.LayerNorm(layer.size, eps=1e-6)
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         for layer in self.layers:
             x = layer(x)
         return self.norm(x)
@@ -395,35 +483,53 @@ class _EncoderLayer(nn.Module):
     """
     An encoder layer
     Made up of self-attention and a feed forward layer.
-    Each of these sublayers have residual and layer norm, implemented by _SublayerOutput.
+    Each of these sublayers have residual and layer norm, implemented by _ResidualLayerNorm.
     """
     def __init__(self, size, self_attn, feed_forward, after_reduced_cnn_size, dropout):
-        super(_EncoderLayer, self).__init__()
+        super().__init__()
+        self.size = size
         self.self_attn = self_attn
         self.feed_forward = feed_forward
-        self.sublayer_output = _clones(_SublayerOutput(size, dropout), 2)
-        self.size = size
-        self.conv = _CausalConv1d(after_reduced_cnn_size, after_reduced_cnn_size, kernel_size=7,
-                                  stride=1, dilation=1)
-    def forward(self, x_in):
+        self.residual_self_attn = _ResidualLayerNormAttn(
+            size=size,
+            dropout=dropout,
+            fn_attn=self_attn,
+        )
+        self.residual_ff = _ResidualLayerNormFF(
+            size=size,
+            dropout=dropout,
+            fn_ff=feed_forward,
+        )
+        self.conv = CausalConv1d(
+            in_channels=after_reduced_cnn_size,
+            out_channels=after_reduced_cnn_size,
+            kernel_size=7,
+            stride=1,
+            dilation=1,
+        )
+    def forward(self, x_in: torch.Tensor) -> torch.Tensor:
         """Transformer Encoder"""
         query = self.conv(x_in)
         # Encoder self-attention
-        x = self.sublayer_output[0](query, lambda x: self.self_attn(query, x_in, x_in))
-        return self.sublayer_output[1](x, self.feed_forward)
+        x = self.residual_self_attn(query, x_in, x_in)
+        x_ff = self.residual_ff(x)
+        return x_ff
 class _PositionwiseFeedForward(nn.Module):
     """Positionwise feed-forward network."""
-    def __init__(self, d_model, d_ff, dropout=0.1):
-        super(_PositionwiseFeedForward, self).__init__()
+    def __init__(self, d_model, d_ff, dropout=0.1, activation: nn.Module = nn.ReLU):
+        super().__init__()
         self.w_1 = nn.Linear(d_model, d_ff)
         self.w_2 = nn.Linear(d_ff, d_model)
         self.dropout = nn.Dropout(dropout)
+        self.activate = activation()
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Implements FFN equation."""
-        return self.w_2(self.dropout(F.relu(self.w_1(x))))
+        return self.w_2(self.dropout(self.activate(self.w_1(x))))

braindecode 0.8.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

Potentially problematic release.

braindecode 0.8.1py3-none-any.whl → 1.1.0py3-none-any.whl