PyPI - braindecode - Versions diffs - 1.3.0.dev177069446__py3-none-any.whl - Mend

braindecode 1.3.0.dev177069446__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

braindecode/__init__.py +9 -0
braindecode/augmentation/__init__.py +52 -0
braindecode/augmentation/base.py +225 -0
braindecode/augmentation/functional.py +1300 -0
braindecode/augmentation/transforms.py +1356 -0
braindecode/classifier.py +258 -0
braindecode/datasets/__init__.py +44 -0
braindecode/datasets/base.py +823 -0
braindecode/datasets/bbci.py +693 -0
braindecode/datasets/bcicomp.py +193 -0
braindecode/datasets/bids/__init__.py +54 -0
braindecode/datasets/bids/datasets.py +239 -0
braindecode/datasets/bids/format.py +717 -0
braindecode/datasets/bids/hub.py +987 -0
braindecode/datasets/bids/hub_format.py +717 -0
braindecode/datasets/bids/hub_io.py +197 -0
braindecode/datasets/bids/hub_validation.py +114 -0
braindecode/datasets/bids/iterable.py +220 -0
braindecode/datasets/chb_mit.py +163 -0
braindecode/datasets/mne.py +170 -0
braindecode/datasets/moabb.py +219 -0
braindecode/datasets/nmt.py +313 -0
braindecode/datasets/registry.py +120 -0
braindecode/datasets/siena.py +162 -0
braindecode/datasets/sleep_physio_challe_18.py +411 -0
braindecode/datasets/sleep_physionet.py +125 -0
braindecode/datasets/tuh.py +591 -0
braindecode/datasets/utils.py +67 -0
braindecode/datasets/xy.py +96 -0
braindecode/datautil/__init__.py +62 -0
braindecode/datautil/channel_utils.py +114 -0
braindecode/datautil/hub_formats.py +180 -0
braindecode/datautil/serialization.py +359 -0
braindecode/datautil/util.py +154 -0
braindecode/eegneuralnet.py +372 -0
braindecode/functional/__init__.py +22 -0
braindecode/functional/functions.py +251 -0
braindecode/functional/initialization.py +47 -0
braindecode/models/__init__.py +117 -0
braindecode/models/atcnet.py +830 -0
braindecode/models/attentionbasenet.py +727 -0
braindecode/models/attn_sleep.py +549 -0
braindecode/models/base.py +574 -0
braindecode/models/bendr.py +493 -0
braindecode/models/biot.py +537 -0
braindecode/models/brainmodule.py +845 -0
braindecode/models/config.py +233 -0
braindecode/models/contrawr.py +319 -0
braindecode/models/ctnet.py +541 -0
braindecode/models/deep4.py +376 -0
braindecode/models/deepsleepnet.py +417 -0
braindecode/models/eegconformer.py +475 -0
braindecode/models/eeginception_erp.py +379 -0
braindecode/models/eeginception_mi.py +379 -0
braindecode/models/eegitnet.py +302 -0
braindecode/models/eegminer.py +256 -0
braindecode/models/eegnet.py +359 -0
braindecode/models/eegnex.py +354 -0
braindecode/models/eegsimpleconv.py +201 -0
braindecode/models/eegsym.py +917 -0
braindecode/models/eegtcnet.py +337 -0
braindecode/models/fbcnet.py +225 -0
braindecode/models/fblightconvnet.py +315 -0
braindecode/models/fbmsnet.py +338 -0
braindecode/models/hybrid.py +126 -0
braindecode/models/ifnet.py +443 -0
braindecode/models/labram.py +1316 -0
braindecode/models/luna.py +891 -0
braindecode/models/medformer.py +760 -0
braindecode/models/msvtnet.py +377 -0
braindecode/models/patchedtransformer.py +640 -0
braindecode/models/reve.py +843 -0
braindecode/models/sccnet.py +280 -0
braindecode/models/shallow_fbcsp.py +212 -0
braindecode/models/signal_jepa.py +1122 -0
braindecode/models/sinc_shallow.py +339 -0
braindecode/models/sleep_stager_blanco_2020.py +169 -0
braindecode/models/sleep_stager_chambon_2018.py +159 -0
braindecode/models/sparcnet.py +426 -0
braindecode/models/sstdpn.py +869 -0
braindecode/models/summary.csv +47 -0
braindecode/models/syncnet.py +234 -0
braindecode/models/tcn.py +275 -0
braindecode/models/tidnet.py +397 -0
braindecode/models/tsinception.py +295 -0
braindecode/models/usleep.py +439 -0
braindecode/models/util.py +369 -0
braindecode/modules/__init__.py +92 -0
braindecode/modules/activation.py +86 -0
braindecode/modules/attention.py +883 -0
braindecode/modules/blocks.py +160 -0
braindecode/modules/convolution.py +330 -0
braindecode/modules/filter.py +654 -0
braindecode/modules/layers.py +216 -0
braindecode/modules/linear.py +70 -0
braindecode/modules/parametrization.py +38 -0
braindecode/modules/stats.py +87 -0
braindecode/modules/util.py +85 -0
braindecode/modules/wrapper.py +90 -0
braindecode/preprocessing/__init__.py +271 -0
braindecode/preprocessing/eegprep_preprocess.py +1317 -0
braindecode/preprocessing/mne_preprocess.py +240 -0
braindecode/preprocessing/preprocess.py +579 -0
braindecode/preprocessing/util.py +177 -0
braindecode/preprocessing/windowers.py +1037 -0
braindecode/regressor.py +234 -0
braindecode/samplers/__init__.py +18 -0
braindecode/samplers/base.py +399 -0
braindecode/samplers/ssl.py +263 -0
braindecode/training/__init__.py +23 -0
braindecode/training/callbacks.py +23 -0
braindecode/training/losses.py +105 -0
braindecode/training/scoring.py +477 -0
braindecode/util.py +419 -0
braindecode/version.py +1 -0
braindecode/visualization/__init__.py +8 -0
braindecode/visualization/confusion_matrices.py +289 -0
braindecode/visualization/gradients.py +62 -0
braindecode-1.3.0.dev177069446.dist-info/METADATA +230 -0
braindecode-1.3.0.dev177069446.dist-info/RECORD +124 -0
braindecode-1.3.0.dev177069446.dist-info/WHEEL +5 -0
braindecode-1.3.0.dev177069446.dist-info/licenses/LICENSE.txt +31 -0
braindecode-1.3.0.dev177069446.dist-info/licenses/NOTICE.txt +20 -0
braindecode-1.3.0.dev177069446.dist-info/top_level.txt +1 -0

braindecode/models/eegminer.py ADDED Viewed

@@ -0,0 +1,256 @@
+"""
+* Copyright (C) Cogitat, Ltd.
+* Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)
+* Patent GB2609265 - Learnable filters for eeg classification
+* https://www.ipo.gov.uk/p-ipsum/Case/ApplicationNumber/GB2113420.0
+"""
+from functools import partial
+import torch
+from einops.layers.torch import Rearrange
+from torch import nn
+import braindecode.functional as F
+from braindecode.models.base import EEGModuleMixin
+from braindecode.modules import GeneralizedGaussianFilter
+_eeg_miner_methods = ["mag", "corr", "plv"]
+class EEGMiner(EEGModuleMixin, nn.Module):
+    r"""EEGMiner from Ludwig et al (2024) [eegminer]_.
+    :bdg-success:`Convolution` :bdg-warning:`Interpretability`
+    .. figure:: https://content.cld.iop.org/journals/1741-2552/21/3/036010/revision2/jnead44d7f1_hr.jpg
+       :align: center
+       :alt: EEGMiner Architecture
+    EEGMiner is a neural network model for EEG signal classification using
+    learnable generalized Gaussian filters. The model leverages frequency domain
+    filtering and connectivity metrics or feature extraction, such as Phase Locking
+    Value (PLV) to extract meaningful features from EEG data, enabling effective
+    classification tasks.
+    The model has the following steps:
+    - **Generalized Gaussian** filters in the frequency domain to the input EEG signals.
+    - **Connectivity estimators** (corr, plv) or **Electrode-Wise Band Power** (mag), by default (plv).
+        - `'corr'`: Computes the correlation of the filtered signals.
+        - `'plv'`: Computes the phase locking value of the filtered signals.
+        - `'mag'`: Computes the magnitude of the filtered signals.
+    - **Feature Normalization**
+        - Apply batch normalization.
+    - **Final Layer**
+        - Feeds the batch-normalized features into a final linear layer for classification.
+    Depending on the selected method (`mag`, `corr`, or `plv`),
+    it computes the filtered signals' magnitude, correlation, or phase locking value.
+    These features are then normalized and passed through a batch normalization layer
+    before being fed into a final linear layer for classification.
+    The input to EEGMiner should be a three-dimensional tensor representing EEG signals:
+    ``(batch_size, n_channels, n_timesteps)``.
+    Notes
+    -----
+    EEGMiner incorporates learnable parameters for filter characteristics, allowing the
+    model to adaptively learn optimal frequency bands and phase delays for the classification task.
+    By default, using the PLV as a connectivity metric makes EEGMiner suitable for tasks requiring
+    the analysis of phase relationships between different EEG channels.
+    The model and the module have patent [eegminercode]_, and the code is CC BY-NC 4.0.
+    .. versionadded:: 0.9
+    Parameters
+    ----------
+    method : str, default="plv"
+        The method used for feature extraction. Options are:
+        - "mag": Electrode-Wise band power of the filtered signals.
+        - "corr": Correlation between filtered channels.
+        - "plv": Phase Locking Value connectivity metric.
+    filter_f_mean : list of float, default=[23.0, 23.0]
+        Mean frequencies for the generalized Gaussian filters.
+    filter_bandwidth : list of float, default=[44.0, 44.0]
+        Bandwidths for the generalized Gaussian filters.
+    filter_shape : list of float, default=[2.0, 2.0]
+        Shape parameters for the generalized Gaussian filters.
+    group_delay : tuple of float, default=(20.0, 20.0)
+        Group delay values for the filters in milliseconds.
+    clamp_f_mean : tuple of float, default=(1.0, 45.0)
+        Clamping range for the mean frequency parameters.
+    References
+    ----------
+    .. [eegminer] Ludwig, S., Bakas, S., Adamos, D. A., Laskaris, N., Panagakis,
+       Y., & Zafeiriou, S. (2024). EEGMiner: discovering interpretable features
+       of brain activity with learnable filters. Journal of Neural Engineering,
+       21(3), 036010.
+    .. [eegminercode] Ludwig, S., Bakas, S., Adamos, D. A., Laskaris, N., Panagakis,
+       Y., & Zafeiriou, S. (2024). EEGMiner: discovering interpretable features
+       of brain activity with learnable filters.
+       https://github.com/SMLudwig/EEGminer/.
+       Cogitat, Ltd. "Learnable filters for EEG classification."
+       Patent GB2609265.
+       https://www.ipo.gov.uk/p-ipsum/Case/ApplicationNumber/GB2113420.0
+    """
+    def __init__(
+        self,  # Signal related parameters
+        method: str = "plv",
+        n_chans=None,
+        n_outputs=None,
+        n_times=None,
+        chs_info=None,
+        input_window_seconds=None,
+        sfreq=None,
+        # model related
+        filter_f_mean=(23.0, 23.0),
+        filter_bandwidth=(44.0, 44.0),
+        filter_shape=(2.0, 2.0),
+        group_delay=(20.0, 20.0),
+        clamp_f_mean=(1.0, 45.0),
+    ):
+        super().__init__(
+            n_outputs=n_outputs,
+            n_chans=n_chans,
+            chs_info=chs_info,
+            n_times=n_times,
+            input_window_seconds=input_window_seconds,
+            sfreq=sfreq,
+        )
+        del n_outputs, n_chans, chs_info, n_times, input_window_seconds, sfreq
+        # Initialize filter parameters
+        self.filter_f_mean = filter_f_mean
+        self.filter_bandwidth = filter_bandwidth
+        self.filter_shape = filter_shape
+        self.n_filters = len(self.filter_f_mean)
+        self.group_delay = group_delay
+        self.clamp_f_mean = clamp_f_mean
+        self.method = method.lower()
+        if self.method not in _eeg_miner_methods:
+            raise ValueError(
+                f"The method {self.method} is not one of the valid options"
+                f" {_eeg_miner_methods}"
+            )
+        if self.method == "mag" or self.method == "corr":
+            inverse_fourier = True
+            in_channels = self.n_chans
+            out_channels = self.n_chans * self.n_filters
+        else:
+            inverse_fourier = False
+            in_channels = 1
+            out_channels = 1 * self.n_filters
+        # Generalized Gaussian Filter
+        self.filter = GeneralizedGaussianFilter(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            sequence_length=self.n_times,
+            sample_rate=self.sfreq,
+            f_mean=self.filter_f_mean,
+            bandwidth=self.filter_bandwidth,
+            shape=self.filter_shape,
+            affine_group_delay=False,
+            inverse_fourier=inverse_fourier,
+            group_delay=self.group_delay,
+            clamp_f_mean=self.clamp_f_mean,
+        )
+        # Forward method
+        if self.method == "mag":
+            self.method_forward = self._apply_mag_forward
+            self.n_features = self.n_chans * self.n_filters
+            self.ensure_dim = nn.Identity()
+        elif self.method == "corr":
+            self.method_forward = partial(
+                self._apply_corr_forward,
+                n_chans=self.n_chans,
+                n_filters=self.n_filters,
+                n_times=self.n_times,
+            )
+            self.n_features = self.n_filters * self.n_chans * (self.n_chans - 1) // 2
+            self.ensure_dim = nn.Identity()
+        elif self.method == "plv":
+            self.method_forward = partial(self._apply_plv, n_chans=self.n_chans)
+            self.ensure_dim = Rearrange("... d -> ... 1 d")
+            self.n_features = (self.n_filters * self.n_chans * (self.n_chans - 1)) // 2
+        self.flatten_layer = nn.Flatten()
+        # Classifier
+        self.batch_layer = nn.BatchNorm1d(self.n_features, affine=False)
+        self.final_layer = nn.Linear(self.n_features, self.n_outputs)
+        nn.init.zeros_(self.final_layer.bias)
+    def forward(self, x):
+        """x: (batch, electrodes, time)"""
+        batch = x.shape[0]
+        x = self.ensure_dim(x)
+        # Apply Gaussian filters in frequency domain
+        # x -> (batch, electrodes * filters, time)
+        x = self.filter(x)
+        x = self.method_forward(x=x, batch=batch)
+        # Classifier
+        # Note that the order of dimensions before flattening the feature vector is important
+        # for attributing feature weights during interpretation.
+        x = x.reshape(batch, self.n_features)
+        x = self.batch_layer(x)
+        x = self.final_layer(x)
+        return x
+    @staticmethod
+    def _apply_mag_forward(x, batch=None):
+        # Signal magnitude
+        x = x * x
+        x = x.mean(dim=-1)
+        x = torch.sqrt(x)
+        return x
+    @staticmethod
+    def _apply_corr_forward(
+        x, batch, n_chans, n_filters, n_times, epilson: float = 1e-6
+    ):
+        x = x.reshape(batch, n_chans, n_filters, n_times).transpose(-3, -2)
+        x = (x - x.mean(dim=-1, keepdim=True)) / torch.sqrt(
+            x.var(dim=-1, keepdim=True) + epilson
+        )
+        x = torch.matmul(x, x.transpose(-2, -1)) / x.shape[-1]
+        # Original tensor shape: [batch, n_filters, chans, chans]
+        x = x.permute(0, 2, 3, 1)
+        # New tensor shape: [batch, chans, chans, n_filters]
+        # move filter channels to the end
+        x = x.abs()
+        # Get upper triu of symmetric connectivity matrix
+        triu = torch.triu_indices(n_chans, n_chans, 1)
+        x = x[:, triu[0], triu[1], :]
+        return x
+    @staticmethod
+    def _apply_plv(x, n_chans, batch=None):
+        # Compute PLV connectivity
+        # x -> (batch, electrodes, electrodes, filters)
+        x = x.transpose(-4, -3)  # swap electrodes and filters
+        # adjusting to compute the plv
+        x = F.plv_time(x, forward_fourier=False)
+        # batch, number of filters, connectivity matrix
+        # [batch, n_filters, chans, chans]
+        x = x.permute(0, 2, 3, 1)
+        # [batch, chans, chans, n_filters]
+        # Get upper triu of symmetric connectivity matrix
+        triu = torch.triu_indices(n_chans, n_chans, 1)
+        x = x[:, triu[0], triu[1], :]
+        return x

braindecode/models/eegnet.py ADDED Viewed

@@ -0,0 +1,359 @@
+# Authors: Robin Schirrmeister <robintibor@gmail.com>
+#
+# License: BSD (3-clause)
+from __future__ import annotations
+from typing import Dict, Optional
+from einops.layers.torch import Rearrange
+from mne.utils import deprecated, warn
+from torch import nn
+from braindecode.functional import glorot_weight_zero_bias
+from braindecode.models.base import EEGModuleMixin
+from braindecode.modules import (
+    Conv2dWithConstraint,
+    Ensure4d,
+    LinearWithConstraint,
+    SqueezeFinalOutput,
+)
+class EEGNet(EEGModuleMixin, nn.Sequential):
+    r"""EEGNet model from Lawhern et al (2018) [Lawhern2018]_.
+    :bdg-success:`Convolution`
+    .. figure:: https://content.cld.iop.org/journals/1741-2552/15/5/056013/revision2/jneaace8cf01_hr.jpg
+        :align: center
+        :alt: EEGNet Architecture
+        :width: 600px
+    .. rubric:: Architectural Overview
+    EEGNet is a compact convolutional network designed for EEG decoding with a pipeline that mirrors classical EEG processing:
+    - (i) learn temporal frequency-selective filters,
+    - (ii) learn spatial filters for those frequencies, and
+    - (iii) condense features with depthwise-separable convolutions before a lightweight classifier.
+    The architecture is deliberately small (temporal convolutional and spatial patterns) [Lawhern2018]_.
+    .. rubric:: Macro Components
+    - **Temporal convolution**
+      Temporal convolution applied per channel; learns ``F1`` kernels that act as data-driven band-pass filters.
+    - **Depthwise Spatial Filtering.**
+      Depthwise convolution spanning the channel dimension with ``groups = F1``,
+      yielding ``D`` spatial filters for each temporal filter (no cross-filter mixing).
+    - **Norm-Nonlinearity-Pooling (+ dropout).**
+      Batch normalization → ELU → temporal pooling, with dropout.
+    - **Depthwise-Separable Convolution Block.**
+      (a) depthwise temporal conv to refine temporal structure;
+      (b) pointwise 1x1 conv to mix feature maps into ``F2`` combinations.
+    - **Classifier Head.**
+      Lightweight 1x1 conv or dense layer (often with max-norm constraint).
+    .. rubric:: Convolutional Details
+    - **Temporal.** The initial temporal convs serve as a *learned filter bank*:
+      long 1-D kernels (implemented as 2-D with singleton spatial extent) emphasize oscillatory bands and transients.
+      Because this stage is linear prior to BN/ELU, kernels can be analyzed as FIR filters to reveal each feature's spectrum [Lawhern2018]_.
+    - **Spatial.** The depthwise spatial conv spans the full channel axis (kernel height = #electrodes; temporal size = 1).
+      With ``groups = F1``, each temporal filter learns its own set of ``D`` spatial projections—akin to CSP, learned end-to-end and
+      typically regularized with max-norm.
+    - **Spectral.** No explicit Fourier/wavelet transform is used. Frequency structure
+      is captured implicitly by the temporal filter bank; later depthwise temporal kernels act as short-time integrators/refiners.
+    .. rubric:: Additional Comments
+    - **Filter-bank structure:** Parallel temporal kernels (``F1``) emulate classical filter banks; pairing them with frequency-specific spatial filters
+      yields features mappable to rhythms and topographies.
+    - **Depthwise & separable convs:** Parameter-efficient decomposition (depthwise + pointwise) retains power while limiting overfitting
+      [Chollet2017]_ and keeps temporal vs. mixing steps interpretable.
+    - **Regularization:** Batch norm, dropout, pooling, and optional max-norm on spatial kernels aid stability on small EEG datasets.
+    - The v4 means the version 4 at the arxiv paper [Lawhern2018]_.
+    Parameters
+    ----------
+    final_conv_length : int or "auto", default="auto"
+        Length of the final convolution layer. If "auto", it is set based on n_times.
+    pool_mode : {"mean", "max"}, default="mean"
+        Pooling method to use in pooling layers.
+    F1 : int, default=8
+        Number of temporal filters in the first convolutional layer.
+    D : int, default=2
+        Depth multiplier for the depthwise convolution.
+    F2 : int or None, default=None
+        Number of pointwise filters in the separable convolution. Usually set to ``F1 * D``.
+    depthwise_kernel_length : int, default=16
+        Length of the depthwise convolution kernel in the separable convolution.
+    pool1_kernel_size : int, default=4
+        Kernel size of the first pooling layer.
+    pool2_kernel_size : int, default=8
+        Kernel size of the second pooling layer.
+    kernel_length : int, default=64
+        Length of the temporal convolution kernel.
+    conv_spatial_max_norm : float, default=1
+        Maximum norm constraint for the spatial (depthwise) convolution.
+    activation : nn.Module, default=nn.ELU
+        Non-linear activation function to be used in the layers.
+    batch_norm_momentum : float, default=0.01
+        Momentum for instance normalization in batch norm layers.
+    batch_norm_affine : bool, default=True
+        If True, batch norm has learnable affine parameters.
+    batch_norm_eps : float, default=1e-3
+        Epsilon for numeric stability in batch norm layers.
+    drop_prob : float, default=0.25
+        Dropout probability.
+    final_layer_with_constraint : bool, default=False
+        If ``False``, uses a convolution-based classification layer. If ``True``,
+        apply a flattened linear layer with constraint on the weights norm as the final classification step.
+    norm_rate : float, default=0.25
+        Max-norm constraint value for the linear layer (used if ``final_layer_conv=False``).
+    References
+    ----------
+    .. [Lawhern2018] Lawhern, V. J., Solon, A. J., Waytowich, N. R., Gordon, S. M.,
+        Hung, C. P., & Lance, B. J. (2018). EEGNet: a compact convolutional
+        neural network for EEG-based brain–computer interfaces. Journal of
+        neural engineering, 15(5), 056013.
+    .. [Chollet2017] Chollet, F., *Xception: Deep Learning with Depthwise Separable
+        Convolutions*, CVPR, 2017.
+    """
+    def __init__(
+        self,
+        # signal's parameters
+        n_chans: Optional[int] = None,
+        n_outputs: Optional[int] = None,
+        n_times: Optional[int] = None,
+        # model's parameters
+        final_conv_length: str | int = "auto",
+        pool_mode: str = "mean",
+        F1: int = 8,
+        D: int = 2,
+        F2: Optional[int | None] = None,
+        kernel_length: int = 64,
+        *,
+        depthwise_kernel_length: int = 16,
+        pool1_kernel_size: int = 4,
+        pool2_kernel_size: int = 8,
+        conv_spatial_max_norm: int = 1,
+        activation: type[nn.Module] = nn.ELU,
+        batch_norm_momentum: float = 0.01,
+        batch_norm_affine: bool = True,
+        batch_norm_eps: float = 1e-3,
+        drop_prob: float = 0.25,
+        final_layer_with_constraint: bool = False,
+        norm_rate: float = 0.25,
+        # Other ways to construct the signal related parameters
+        chs_info: Optional[list[Dict]] = None,
+        input_window_seconds=None,
+        sfreq=None,
+        **kwargs,
+    ):
+        super().__init__(
+            n_outputs=n_outputs,
+            n_chans=n_chans,
+            chs_info=chs_info,
+            n_times=n_times,
+            input_window_seconds=input_window_seconds,
+            sfreq=sfreq,
+        )
+        del n_outputs, n_chans, chs_info, n_times, input_window_seconds, sfreq
+        if final_conv_length == "auto":
+            assert self.n_times is not None
+        if not final_layer_with_constraint:
+            warn(
+                "Parameter 'final_layer_with_constraint=False' is deprecated and will be "
+                "removed in a future release. Please use `final_layer_linear=True`.",
+                DeprecationWarning,
+            )
+        if "third_kernel_size" in kwargs:
+            warn(
+                "The parameter `third_kernel_size` is deprecated "
+                "and will be removed in a future version.",
+            )
+        unexpected_kwargs = set(kwargs) - {"third_kernel_size"}
+        if unexpected_kwargs:
+            raise TypeError(f"Unexpected keyword arguments: {unexpected_kwargs}")
+        self.final_conv_length = final_conv_length
+        self.pool_mode = pool_mode
+        self.F1 = F1
+        self.D = D
+        if F2 is None:
+            F2 = self.F1 * self.D
+        self.F2 = F2
+        self.kernel_length = kernel_length
+        self.depthwise_kernel_length = depthwise_kernel_length
+        self.pool1_kernel_size = pool1_kernel_size
+        self.pool2_kernel_size = pool2_kernel_size
+        self.drop_prob = drop_prob
+        self.activation = activation
+        self.batch_norm_momentum = batch_norm_momentum
+        self.batch_norm_affine = batch_norm_affine
+        self.batch_norm_eps = batch_norm_eps
+        self.conv_spatial_max_norm = conv_spatial_max_norm
+        self.norm_rate = norm_rate
+        # For the load_state_dict
+        # When padronize all layers,
+        # add the old's parameters here
+        self.mapping = {
+            "conv_classifier.weight": "final_layer.conv_classifier.weight",
+            "conv_classifier.bias": "final_layer.conv_classifier.bias",
+        }
+        pool_class = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)[self.pool_mode]
+        self.add_module("ensuredims", Ensure4d())
+        self.add_module("dimshuffle", Rearrange("batch ch t 1 -> batch 1 ch t"))
+        self.add_module(
+            "conv_temporal",
+            nn.Conv2d(
+                1,
+                self.F1,
+                (1, self.kernel_length),
+                bias=False,
+                padding=(0, self.kernel_length // 2),
+            ),
+        )
+        self.add_module(
+            "bnorm_temporal",
+            nn.BatchNorm2d(
+                self.F1,
+                momentum=self.batch_norm_momentum,
+                affine=self.batch_norm_affine,
+                eps=self.batch_norm_eps,
+            ),
+        )
+        self.add_module(
+            "conv_spatial",
+            Conv2dWithConstraint(
+                in_channels=self.F1,
+                out_channels=self.F1 * self.D,
+                kernel_size=(self.n_chans, 1),
+                max_norm=self.conv_spatial_max_norm,
+                bias=False,
+                groups=self.F1,
+            ),
+        )
+        self.add_module(
+            "bnorm_1",
+            nn.BatchNorm2d(
+                self.F1 * self.D,
+                momentum=self.batch_norm_momentum,
+                affine=self.batch_norm_affine,
+                eps=self.batch_norm_eps,
+            ),
+        )
+        self.add_module("elu_1", activation())
+        self.add_module(
+            "pool_1",
+            pool_class(
+                kernel_size=(1, self.pool1_kernel_size),
+            ),
+        )
+        self.add_module("drop_1", nn.Dropout(p=self.drop_prob))
+        # https://discuss.pytorch.org/t/how-to-modify-a-conv2d-to-depthwise-separable-convolution/15843/7
+        self.add_module(
+            "conv_separable_depth",
+            nn.Conv2d(
+                self.F1 * self.D,
+                self.F1 * self.D,
+                (1, self.depthwise_kernel_length),
+                bias=False,
+                groups=self.F1 * self.D,
+                padding=(0, self.depthwise_kernel_length // 2),
+            ),
+        )
+        self.add_module(
+            "conv_separable_point",
+            nn.Conv2d(
+                self.F1 * self.D,
+                self.F2,
+                kernel_size=(1, 1),
+                bias=False,
+            ),
+        )
+        self.add_module(
+            "bnorm_2",
+            nn.BatchNorm2d(
+                self.F2,
+                momentum=self.batch_norm_momentum,
+                affine=self.batch_norm_affine,
+                eps=self.batch_norm_eps,
+            ),
+        )
+        self.add_module("elu_2", self.activation())
+        self.add_module(
+            "pool_2",
+            pool_class(
+                kernel_size=(1, self.pool2_kernel_size),
+            ),
+        )
+        self.add_module("drop_2", nn.Dropout(p=self.drop_prob))
+        output_shape = self.get_output_shape()
+        n_out_virtual_chans = output_shape[2]
+        if self.final_conv_length == "auto":
+            n_out_time = output_shape[3]
+            self.final_conv_length = n_out_time
+        # Incorporating classification module and subsequent ones in one final layer
+        module = nn.Sequential()
+        if not final_layer_with_constraint:
+            module.add_module(
+                "conv_classifier",
+                nn.Conv2d(
+                    self.F2,
+                    self.n_outputs,
+                    (n_out_virtual_chans, self.final_conv_length),
+                    bias=True,
+                ),
+            )
+            # Transpose back to the logic of braindecode,
+            # so time in third dimension (axis=2)
+            module.add_module(
+                "permute_back",
+                Rearrange("batch x y z -> batch x z y"),
+            )
+            module.add_module("squeeze", SqueezeFinalOutput())
+        else:
+            module.add_module("flatten", nn.Flatten())
+            module.add_module(
+                "linearconstraint",
+                LinearWithConstraint(
+                    in_features=self.F2 * self.final_conv_length,
+                    out_features=self.n_outputs,
+                    max_norm=norm_rate,
+                ),
+            )
+        self.add_module("final_layer", module)
+        glorot_weight_zero_bias(self)
+@deprecated(
+    "`EEGNetv4` was renamed to `EEGNet` in v1.12; this alias will be removed in v1.14."
+)
+class EEGNetv4(EEGNet):
+    r"""Deprecated alias for EEGNet."""
+    pass