PyPI - braindecode - Versions diffs - 1.3.0.dev177069446__py3-none-any.whl - Mend

braindecode 1.3.0.dev177069446__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

braindecode/__init__.py +9 -0
braindecode/augmentation/__init__.py +52 -0
braindecode/augmentation/base.py +225 -0
braindecode/augmentation/functional.py +1300 -0
braindecode/augmentation/transforms.py +1356 -0
braindecode/classifier.py +258 -0
braindecode/datasets/__init__.py +44 -0
braindecode/datasets/base.py +823 -0
braindecode/datasets/bbci.py +693 -0
braindecode/datasets/bcicomp.py +193 -0
braindecode/datasets/bids/__init__.py +54 -0
braindecode/datasets/bids/datasets.py +239 -0
braindecode/datasets/bids/format.py +717 -0
braindecode/datasets/bids/hub.py +987 -0
braindecode/datasets/bids/hub_format.py +717 -0
braindecode/datasets/bids/hub_io.py +197 -0
braindecode/datasets/bids/hub_validation.py +114 -0
braindecode/datasets/bids/iterable.py +220 -0
braindecode/datasets/chb_mit.py +163 -0
braindecode/datasets/mne.py +170 -0
braindecode/datasets/moabb.py +219 -0
braindecode/datasets/nmt.py +313 -0
braindecode/datasets/registry.py +120 -0
braindecode/datasets/siena.py +162 -0
braindecode/datasets/sleep_physio_challe_18.py +411 -0
braindecode/datasets/sleep_physionet.py +125 -0
braindecode/datasets/tuh.py +591 -0
braindecode/datasets/utils.py +67 -0
braindecode/datasets/xy.py +96 -0
braindecode/datautil/__init__.py +62 -0
braindecode/datautil/channel_utils.py +114 -0
braindecode/datautil/hub_formats.py +180 -0
braindecode/datautil/serialization.py +359 -0
braindecode/datautil/util.py +154 -0
braindecode/eegneuralnet.py +372 -0
braindecode/functional/__init__.py +22 -0
braindecode/functional/functions.py +251 -0
braindecode/functional/initialization.py +47 -0
braindecode/models/__init__.py +117 -0
braindecode/models/atcnet.py +830 -0
braindecode/models/attentionbasenet.py +727 -0
braindecode/models/attn_sleep.py +549 -0
braindecode/models/base.py +574 -0
braindecode/models/bendr.py +493 -0
braindecode/models/biot.py +537 -0
braindecode/models/brainmodule.py +845 -0
braindecode/models/config.py +233 -0
braindecode/models/contrawr.py +319 -0
braindecode/models/ctnet.py +541 -0
braindecode/models/deep4.py +376 -0
braindecode/models/deepsleepnet.py +417 -0
braindecode/models/eegconformer.py +475 -0
braindecode/models/eeginception_erp.py +379 -0
braindecode/models/eeginception_mi.py +379 -0
braindecode/models/eegitnet.py +302 -0
braindecode/models/eegminer.py +256 -0
braindecode/models/eegnet.py +359 -0
braindecode/models/eegnex.py +354 -0
braindecode/models/eegsimpleconv.py +201 -0
braindecode/models/eegsym.py +917 -0
braindecode/models/eegtcnet.py +337 -0
braindecode/models/fbcnet.py +225 -0
braindecode/models/fblightconvnet.py +315 -0
braindecode/models/fbmsnet.py +338 -0
braindecode/models/hybrid.py +126 -0
braindecode/models/ifnet.py +443 -0
braindecode/models/labram.py +1316 -0
braindecode/models/luna.py +891 -0
braindecode/models/medformer.py +760 -0
braindecode/models/msvtnet.py +377 -0
braindecode/models/patchedtransformer.py +640 -0
braindecode/models/reve.py +843 -0
braindecode/models/sccnet.py +280 -0
braindecode/models/shallow_fbcsp.py +212 -0
braindecode/models/signal_jepa.py +1122 -0
braindecode/models/sinc_shallow.py +339 -0
braindecode/models/sleep_stager_blanco_2020.py +169 -0
braindecode/models/sleep_stager_chambon_2018.py +159 -0
braindecode/models/sparcnet.py +426 -0
braindecode/models/sstdpn.py +869 -0
braindecode/models/summary.csv +47 -0
braindecode/models/syncnet.py +234 -0
braindecode/models/tcn.py +275 -0
braindecode/models/tidnet.py +397 -0
braindecode/models/tsinception.py +295 -0
braindecode/models/usleep.py +439 -0
braindecode/models/util.py +369 -0
braindecode/modules/__init__.py +92 -0
braindecode/modules/activation.py +86 -0
braindecode/modules/attention.py +883 -0
braindecode/modules/blocks.py +160 -0
braindecode/modules/convolution.py +330 -0
braindecode/modules/filter.py +654 -0
braindecode/modules/layers.py +216 -0
braindecode/modules/linear.py +70 -0
braindecode/modules/parametrization.py +38 -0
braindecode/modules/stats.py +87 -0
braindecode/modules/util.py +85 -0
braindecode/modules/wrapper.py +90 -0
braindecode/preprocessing/__init__.py +271 -0
braindecode/preprocessing/eegprep_preprocess.py +1317 -0
braindecode/preprocessing/mne_preprocess.py +240 -0
braindecode/preprocessing/preprocess.py +579 -0
braindecode/preprocessing/util.py +177 -0
braindecode/preprocessing/windowers.py +1037 -0
braindecode/regressor.py +234 -0
braindecode/samplers/__init__.py +18 -0
braindecode/samplers/base.py +399 -0
braindecode/samplers/ssl.py +263 -0
braindecode/training/__init__.py +23 -0
braindecode/training/callbacks.py +23 -0
braindecode/training/losses.py +105 -0
braindecode/training/scoring.py +477 -0
braindecode/util.py +419 -0
braindecode/version.py +1 -0
braindecode/visualization/__init__.py +8 -0
braindecode/visualization/confusion_matrices.py +289 -0
braindecode/visualization/gradients.py +62 -0
braindecode-1.3.0.dev177069446.dist-info/METADATA +230 -0
braindecode-1.3.0.dev177069446.dist-info/RECORD +124 -0
braindecode-1.3.0.dev177069446.dist-info/WHEEL +5 -0
braindecode-1.3.0.dev177069446.dist-info/licenses/LICENSE.txt +31 -0
braindecode-1.3.0.dev177069446.dist-info/licenses/NOTICE.txt +20 -0
braindecode-1.3.0.dev177069446.dist-info/top_level.txt +1 -0

braindecode/models/fblightconvnet.py ADDED Viewed

@@ -0,0 +1,315 @@
+from __future__ import annotations
+from typing import Optional
+import torch
+import torch.nn.functional as F
+from einops.layers.torch import Rearrange
+from mne.utils import warn
+from torch import nn
+from braindecode.models.base import EEGModuleMixin
+from braindecode.modules import (
+    FilterBankLayer,
+    LogVarLayer,
+)
+class FBLightConvNet(EEGModuleMixin, nn.Module):
+    r"""LightConvNet from Ma, X et al (2023) [lightconvnet]_.
+    :bdg-success:`Convolution` :bdg-primary:`Filterbank`
+    .. figure:: https://raw.githubusercontent.com/Ma-Xinzhi/LightConvNet/refs/heads/main/network_architecture.png
+        :align: center
+        :alt: LightConvNet Neural Network
+    A lightweight convolutional neural network incorporating temporal
+    dependency learning and attention mechanisms. The architecture is
+    designed to efficiently capture spatial and temporal features through
+    specialized convolutional layers and **multi-head attention**.
+    The network architecture consists of four main modules:
+    1. **Spatial and Spectral Information Learning**:
+        Applies filterbank and spatial convolutions.
+        This module is followed by batch normalization and
+        an activation function to enhance feature representation.
+    2. **Temporal Segmentation and Feature Extraction**:
+        Divides the processed data into non-overlapping temporal windows.
+        Within each window, a variance-based layer extracts discriminative features,
+        which are then log-transformed to stabilize variance before being
+        passed to the attention module.
+    3. **Temporal Attention Module**: Utilizes a multi-head attention
+        mechanism with depthwise separable convolutions to capture dependencies
+        across different temporal segments. The attention weights are normalized
+        using softmax and aggregated to form a comprehensive temporal
+        representation.
+    4. **Final Layer**: Flattens the aggregated features and passes them
+        through a linear layer to with kernel sizes matching the input
+        dimensions to integrate features across different channels generate the
+        final output predictions.
+    Notes
+    -----
+    This implementation is not guaranteed to be correct and has not been checked
+    by the original authors; it is a braindecode adaptation from the Pytorch
+    source-code [lightconvnetcode]_.
+    Parameters
+    ----------
+    n_bands : int or None or list of tuple of int, default=8
+        Number of frequency bands or a list of frequency band tuples. If a list of tuples is provided,
+        each tuple defines the lower and upper bounds of a frequency band.
+    n_filters_spat : int, default=32
+        Number of spatial filters in the depthwise convolutional layer.
+    n_dim : int, default=3
+        Number of dimensions for the temporal reduction layer.
+    stride_factor : int, default=4
+        Stride factor used for reshaping the temporal dimension.
+    activation : nn.Module, default=nn.ELU
+        Activation function class to apply after convolutional layers.
+    verbose : bool, default=False
+        If True, enables verbose output during filter creation using mne.
+    filter_parameters : dict, default={}
+        Additional parameters for the FilterBankLayer.
+    heads : int, default=8
+        Number of attention heads in the multi-head attention mechanism.
+    weight_softmax : bool, default=True
+        If True, applies softmax to the attention weights.
+    bias : bool, default=False
+        If True, includes a bias term in the convolutional layers.
+    References
+    ----------
+    .. [lightconvnet] Ma, X., Chen, W., Pei, Z., Liu, J., Huang, B., & Chen, J.
+        (2023). A temporal dependency learning CNN with attention mechanism
+        for MI-EEG decoding. IEEE Transactions on Neural Systems and
+        Rehabilitation Engineering.
+    .. [lightconvnetcode] Link to source-code:
+        https://github.com/Ma-Xinzhi/LightConvNet
+    """
+    def __init__(
+        self,
+        # Braindecode parameters
+        n_chans=None,
+        n_outputs=None,
+        chs_info=None,
+        n_times=None,
+        input_window_seconds=None,
+        sfreq=None,
+        # models parameters
+        n_bands=9,
+        n_filters_spat: int = 32,
+        n_dim: int = 3,
+        stride_factor: int = 4,
+        win_len: int = 250,
+        heads: int = 8,
+        weight_softmax: bool = True,
+        bias: bool = False,
+        activation: type[nn.Module] = nn.ELU,
+        verbose: bool = False,
+        filter_parameters: Optional[dict] = None,
+    ):
+        super().__init__(
+            n_chans=n_chans,
+            n_outputs=n_outputs,
+            chs_info=chs_info,
+            n_times=n_times,
+            input_window_seconds=input_window_seconds,
+            sfreq=sfreq,
+        )
+        del n_outputs, n_chans, chs_info, n_times, input_window_seconds, sfreq
+        # Parameters
+        self.n_bands = n_bands
+        self.n_filters_spat = n_filters_spat
+        self.n_dim = n_dim
+        self.stride_factor = stride_factor
+        self.win_len = win_len
+        self.activation = activation
+        self.heads = heads
+        self.weight_softmax = weight_softmax
+        self.bias = bias
+        self.filter_parameters = filter_parameters or {}
+        # Checkers
+        self.n_times_truncated = self.n_times
+        if self.n_times % self.win_len != 0:
+            warn(
+                f"Time dimension ({self.n_times}) is not divisible by"
+                f" win_len ({self.win_len}). Input will be "
+                f"truncated in {self.n_times % self.win_len} temporal points ",
+                UserWarning,
+            )
+            self.n_times_truncated = self.n_times - (self.n_times % self.win_len)
+        # Layers
+        # Following paper nomeclature
+        self.spectral_filtering = FilterBankLayer(
+            n_chans=self.n_chans,
+            sfreq=self.sfreq,
+            band_filters=self.n_bands,
+            verbose=verbose,
+            **self.filter_parameters,
+        )
+        # As we have an internal process to create the bands,
+        # we get the values from the filterbank
+        self.n_bands = self.spectral_filtering.n_bands
+        # The convolution here is different.
+        self.spatial_conv = nn.Sequential(
+            nn.Conv2d(
+                in_channels=self.n_bands,
+                out_channels=self.n_filters_spat,
+                kernel_size=(self.n_chans, 1),
+            ),
+            nn.BatchNorm2d(self.n_filters_spat),
+            self.activation(),
+        )
+        # Temporal aggregator
+        self.temporal_layer = LogVarLayer(self.n_dim, False)
+        self.flatten_layer = Rearrange("batch ... -> batch (...)")
+        # LightWeightConv1D
+        self.attn_conv = _LightweightConv1d(
+            self.n_filters_spat,
+            (self.n_times // self.win_len),
+            heads=self.heads,
+            weight_softmax=weight_softmax,
+            bias=bias,
+        )
+        self.final_layer = nn.Linear(
+            in_features=self.n_filters_spat,
+            out_features=self.n_outputs,
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass of the FBLightConvNet model.
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor with shape (batch_size, n_chans, n_times).
+        Returns
+        -------
+        torch.Tensor
+            Output tensor with shape (batch_size, n_outputs).
+        """
+        batch_size, _, _ = x.shape
+        # x.shape: batch, n_chans, n_times
+        x = self.spectral_filtering(x)
+        # x.shape: batch, nbands, n_chans, n_times
+        x = self.spatial_conv(x)
+        # x.shape: batch, n_filters_spat, n_times
+        x = x[:, :, :, : self.n_times_truncated]
+        # batch, n_filters_spat, n_times_trucated
+        x = x.reshape([batch_size, self.n_filters_spat, -1, self.win_len])
+        # batch, n_filters_spat, n_windows, win_len
+        # where the n_windows = n_times_truncated / win_len
+        # and win_len = 250 by default
+        x = self.temporal_layer(x)
+        # x.shape : batch, n_filters_spat, n_windows
+        x = self.attn_conv(x)
+        # x.shape : batch, n_filters_spat, 1
+        x = self.flatten_layer(x)
+        # x.shape : batch, n_filters_spat
+        x = self.final_layer(x)
+        # x.shape : batch, n_outputs
+        return x
+class _LightweightConv1d(nn.Module):
+    r"""Lightweight 1D Convolution Module.
+    Applies a convolution operation with multiple heads, allowing for
+    parallel filter applications. Optionally applies a softmax normalization
+    to the convolution weights.
+    Parameters
+    ----------
+    input_size : int
+        Number of channels of the input and output.
+    kernel_size : int, optional
+        Size of the convolution kernel. Default is `1`.
+    padding : int, optional
+        Amount of zero-padding added to both sides of the input. Default is `0`.
+    heads : int, optional
+        Number of attention heads used. The weight has shape `(heads, 1, kernel_size)`.
+        Default is `1`.
+    weight_softmax : bool, optional
+        If `True`, normalizes the convolution weights with softmax before applying the convolution.
+        Default is `False`.
+    bias : bool, optional
+        If `True`, adds a learnable bias to the output. Default is `False`.
+    """
+    def __init__(
+        self,
+        input_size: int,
+        kernel_size: int = 1,
+        padding: int = 0,
+        heads: int = 1,
+        weight_softmax: bool = False,
+        bias: bool = False,
+    ):
+        super().__init__()
+        self.input_size = input_size
+        self.kernel_size = kernel_size
+        self.heads = heads
+        self.padding = padding
+        self.weight_softmax = weight_softmax
+        self.weight = nn.Parameter(torch.Tensor(heads, 1, kernel_size))
+        if bias:
+            self.bias = nn.Parameter(torch.Tensor(input_size))
+        else:
+            self.bias = None
+        self._init_parameters()
+    def _init_parameters(self):
+        nn.init.xavier_uniform_(self.weight)
+        if self.bias is not None:
+            nn.init.constant_(self.bias, 0.0)
+    def forward(self, input):
+        # batch, n_filters_spat, n_windows
+        B, C, T = input.size()
+        H = self.heads
+        weight = self.weight
+        if self.weight_softmax:
+            weight = F.softmax(weight, dim=-1)
+            # shape: (heads, 1, kernel_size)
+        # reshape input so each head is its own “batch”
+        # original C = H * (C/H), so view to (B * (C/H), H, T) then transpose
+        # but since C/H == 1 here per head-channel grouping, .view(-1, H, T) works
+        # new shape: (B * channels_per_head, H, T)
+        input = input.view(-1, H, T)
+        output = F.conv1d(input, weight, padding=self.padding, groups=self.heads)
+        # 4, 8, 1
+        output = output.view(B, C, -1)
+        # 1, 32, 1
+        if self.bias is not None:
+            # Add bias if it exists
+            output = output + self.bias.view(1, -1, 1)
+        # final shape: batch, n_filters_spat
+        return output

braindecode/models/fbmsnet.py ADDED Viewed

@@ -0,0 +1,338 @@
+from __future__ import annotations
+from typing import Any, Sequence
+import torch
+from einops.layers.torch import Rearrange
+from mne.utils import warn
+from torch import nn
+from braindecode.models.base import EEGModuleMixin
+from braindecode.models.fbcnet import _valid_layers
+from braindecode.modules import (
+    Conv2dWithConstraint,
+    FilterBankLayer,
+    LinearWithConstraint,
+)
+class FBMSNet(EEGModuleMixin, nn.Module):
+    r"""FBMSNet from Liu et al (2022) [fbmsnet]_.
+    :bdg-success:`Convolution` :bdg-primary:`Filterbank`
+    .. figure:: https://raw.githubusercontent.com/Want2Vanish/FBMSNet/refs/heads/main/FBMSNet.png
+        :align: center
+        :alt: FBMSNet Architecture
+    0. **FilterBank Layer**: Applying filterbank to transform the input.
+    1. **Temporal Convolution Block**: Utilizes mixed depthwise convolution
+       (MixConv) to extract multiscale temporal features from multiview EEG
+       representations. The input is split into groups corresponding to different
+       views each convolved with kernels of varying sizes.
+       Kernel sizes are set relative to the EEG
+       sampling rate, with ratio coefficients [0.5, 0.25, 0.125, 0.0625],
+       dividing the input into four groups.
+    2. **Spatial Convolution Block**: Applies depthwise convolution with a kernel
+       size of (n_chans, 1) to span all EEG channels, effectively learning spatial
+       filters. This is followed by batch normalization and the Swish activation
+       function. A maximum norm constraint of 2 is imposed on the convolution
+       weights to regularize the model.
+    3. **Temporal Log-Variance Block**: Computes the log-variance.
+    4. **Classification Layer**: A fully connected with weight constraint.
+    Notes
+    -----
+    This implementation is not guaranteed to be correct and has not been checked
+    by the original authors; it has only been reimplemented from the paper
+    description and source code [fbmsnetcode]_. There is an extra layer here to
+    compute the filterbank during bash time and not on data time. This avoids
+    data-leak, and allows the model to follow the braindecode convention.
+    Parameters
+    ----------
+    n_bands : int, default=9
+        Number of input channels (e.g., number of frequency bands).
+    n_filters_spat : int, default=36
+        Number of output channels from the MixedConv2d layer.
+    temporal_layer : str, default='LogVarLayer'
+        Temporal aggregation layer to use.
+    n_dim: int, default=3
+        Dimension of the temporal reduction layer.
+    stride_factor : int, default=4
+        Stride factor for temporal segmentation.
+    dilatability : int, default=8
+        Expansion factor for the spatial convolution block.
+    activation : nn.Module, default=nn.SiLU
+        Activation function class to apply.
+    kernels_weights : Sequence[int], default=(15, 31, 63, 125)
+        Kernel sizes for the MixedConv2d layer.
+    cnn_max_norm : float, default=2
+        Maximum norm constraint for the convolutional layers.
+    linear_max_norm : float, default=0.5
+        Maximum norm constraint for the linear layers.
+    filter_parameters : dict, default=None
+        Dictionary of parameters to use for the FilterBankLayer.
+        If None, a default Chebyshev Type II filter with transition bandwidth of
+        2 Hz and stop-band ripple of 30 dB will be used.
+    verbose: bool, default False
+        Verbose parameter to create the filter using mne.
+    References
+    ----------
+    .. [fbmsnet] Liu, K., Yang, M., Yu, Z., Wang, G., & Wu, W. (2022).
+        FBMSNet: A filter-bank multi-scale convolutional neural network for
+        EEG-based motor imagery decoding. IEEE Transactions on Biomedical
+        Engineering, 70(2), 436-445.
+    .. [fbmsnetcode] Liu, K., Yang, M., Yu, Z., Wang, G., & Wu, W. (2022).
+        FBMSNet: A filter-bank multi-scale convolutional neural network for
+        EEG-based motor imagery decoding.
+        https://github.com/Want2Vanish/FBMSNet
+    """
+    def __init__(
+        self,
+        # Braindecode parameters
+        n_chans=None,
+        n_outputs=None,
+        chs_info=None,
+        n_times=None,
+        input_window_seconds=None,
+        sfreq=None,
+        # models parameters
+        n_bands: int = 9,
+        n_filters_spat: int = 36,
+        temporal_layer: str = "LogVarLayer",
+        n_dim: int = 3,
+        stride_factor: int = 4,
+        dilatability: int = 8,
+        activation: type[nn.Module] = nn.SiLU,
+        kernels_weights: Sequence[int] = (15, 31, 63, 125),
+        cnn_max_norm: float = 2,
+        linear_max_norm: float = 0.5,
+        verbose: bool = False,
+        filter_parameters: dict[Any, Any] | None = None,
+    ):
+        super().__init__(
+            n_chans=n_chans,
+            n_outputs=n_outputs,
+            chs_info=chs_info,
+            n_times=n_times,
+            input_window_seconds=input_window_seconds,
+            sfreq=sfreq,
+        )
+        del n_outputs, n_chans, chs_info, n_times, input_window_seconds, sfreq
+        # Parameters
+        self.n_bands = n_bands
+        self.n_filters_spat = n_filters_spat
+        self.n_dim = n_dim
+        self.stride_factor = stride_factor
+        self.activation = activation
+        self.dilatability = dilatability
+        self.kernels_weights = kernels_weights
+        self.filter_parameters = filter_parameters or {}
+        self.out_channels_spatial = self.n_filters_spat * self.dilatability
+        # Checkers
+        if temporal_layer not in _valid_layers:
+            raise NotImplementedError(
+                f"Temporal layer '{temporal_layer}' is not implemented."
+            )
+        if self.n_times % self.stride_factor != 0:
+            warn(
+                f"Time dimension ({self.n_times}) is not divisible by"
+                f" stride_factor ({self.stride_factor}). Input will be padded.",
+                UserWarning,
+            )
+        # Layers
+        # Following paper nomeclature
+        self.spectral_filtering = FilterBankLayer(
+            n_chans=self.n_chans,
+            sfreq=self.sfreq,
+            band_filters=self.n_bands,
+            verbose=verbose,
+            **self.filter_parameters,
+        )
+        # As we have an internal process to create the bands,
+        # we get the values from the filterbank
+        self.n_bands = self.spectral_filtering.n_bands
+        # MixedConv2d Layer
+        self.mix_conv = nn.Sequential(
+            _MixedConv2d(
+                in_channels=self.n_bands,
+                out_channels=self.n_filters_spat,
+                stride=1,
+                dilation=1,
+                depthwise=False,
+                kernels_weights=kernels_weights,
+            ),
+            nn.BatchNorm2d(self.n_filters_spat),
+        )
+        # Spatial Convolution Block (SCB)
+        self.spatial_conv = nn.Sequential(
+            Conv2dWithConstraint(
+                in_channels=self.n_filters_spat,
+                out_channels=self.out_channels_spatial,
+                kernel_size=(self.n_chans, 1),
+                groups=self.n_filters_spat,
+                max_norm=cnn_max_norm,
+                padding=0,
+            ),
+            nn.BatchNorm2d(self.out_channels_spatial),
+            self.activation(),
+        )
+        # Padding layer
+        if self.n_times % self.stride_factor != 0:
+            self.padding_size = stride_factor - (self.n_times % stride_factor)
+            self.n_times_padded = self.n_times + self.padding_size
+            self.padding_layer = nn.ConstantPad1d((0, self.padding_size), 0.0)
+        else:
+            self.padding_layer = nn.Identity()
+            self.n_times_padded = self.n_times
+        # Temporal Aggregation Layer
+        self.temporal_layer = _valid_layers[temporal_layer](dim=self.n_dim)  # type: ignore
+        self.flatten_layer = Rearrange("batch ... -> batch (...)")
+        # Final fully connected layer
+        self.final_layer = LinearWithConstraint(
+            in_features=self.out_channels_spatial * self.stride_factor,
+            out_features=self.n_outputs,
+            max_norm=linear_max_norm,
+        )
+    def forward(self, x):
+        """
+        Forward pass of the FBMSNet model.
+        Parameters
+        ----------
+        x : torch.Tensor
+            Input tensor with shape (batch_size, n_chans, n_times).
+        Returns
+        -------
+        torch.Tensor
+            Output tensor with shape (batch_size, n_outputs).
+        """
+        batch, _, _ = x.shape
+        # shape: (batch, n_chans, n_times)
+        x = self.spectral_filtering(x)
+        # shape: (batch, n_bands, n_chans, n_times)
+        # Mixed convolution
+        x = self.mix_conv(x)
+        # shape: (batch, self.n_filters_spat, n_chans, n_times)
+        # Spatial convolution block
+        x = self.spatial_conv(x)
+        # shape: (batch, self.out_channels_spatial, 1, n_times)
+        # Apply some padding to the input to make it divisible by the stride factor
+        x = self.padding_layer(x)
+        # shape: (batch, self.out_channels_spatial, 1, n_times_padded)
+        # Reshape for temporal layer
+        x = x.view(batch, self.out_channels_spatial, self.stride_factor, -1)
+        # shape: (batch, self.out_channels_spatial, self.stride_factor, n_times/self.stride_factor)
+        # Temporal aggregation
+        x = self.temporal_layer(x)
+        # shape: (batch, self.out_channels_spatial, self.stride_factor, 1)
+        # Flatten and classify
+        x = self.flatten_layer(x)
+        # shape: (batch, self.out_channels_spatial*self.stride_factor)
+        x = self.final_layer(x)
+        # shape: (batch, n_outputs)
+        return x
+class _MixedConv2d(nn.Module):
+    r"""Mixed Grouped Convolution for multiscale feature extraction."""
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernels_weights=(15, 31, 63, 125),
+        stride=1,
+        dilation=1,
+        depthwise=False,
+    ):
+        super().__init__()
+        num_groups = len(kernels_weights)
+        in_splits = self._split_channels(in_channels, num_groups)
+        out_splits = self._split_channels(out_channels, num_groups)
+        self.splits = in_splits
+        self.convs = nn.ModuleList()
+        # Create a convolutional layer for each kernel size
+        for k, in_ch, out_ch in zip(kernels_weights, in_splits, out_splits):
+            conv_groups = out_ch if depthwise else 1
+            conv = nn.Conv2d(
+                in_channels=in_ch,
+                out_channels=out_ch,
+                kernel_size=(1, k),
+                stride=stride,
+                padding="same",
+                dilation=dilation,
+                groups=conv_groups,
+                bias=False,
+            )
+            self.convs.append(conv)
+    @staticmethod
+    def _split_channels(num_chan, num_groups):
+        """
+        Splits the total number of channels into a specified
+        number of groups as evenly as possible.
+        Parameters
+        ----------
+        num_chan : int
+            The total number of channels to split.
+        num_groups : int
+            The number of groups to split the channels into.
+        Returns
+        -------
+        list of int
+            A list containing the number of channels in each group.
+            The first group may have more channels if the division is not even.
+        """
+        split = [num_chan // num_groups for _ in range(num_groups)]
+        split[0] += num_chan - sum(split)
+        return split
+    def forward(self, x):
+        # Split the input tensor `x` along the channel dimension (dim=1) into groups.
+        # The size of each group is defined by `self.splits`, which is calculated
+        # based on the number of input channels and the number of kernel sizes.
+        x_split = torch.split(x, self.splits, 1)
+        # For each split group, apply the corresponding convolutional layer.
+        # `self.values()` returns the convolutional layers in the order they were added.
+        # The result is a list of output tensors, one for each group.
+        x_out = [conv(x_split[i]) for i, conv in enumerate(self.convs)]
+        # Concatenate the outputs from all groups along the channel dimension (dim=1)
+        # to form a single output tensor.
+        x = torch.cat(x_out, 1)
+        # Return the concatenated tensor as the output of the mixed convolution.
+        return x