PyPI - braindecode - Versions diffs - 1.3.0.dev177069446__py3-none-any.whl - Mend

braindecode 1.3.0.dev177069446__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

braindecode/__init__.py +9 -0
braindecode/augmentation/__init__.py +52 -0
braindecode/augmentation/base.py +225 -0
braindecode/augmentation/functional.py +1300 -0
braindecode/augmentation/transforms.py +1356 -0
braindecode/classifier.py +258 -0
braindecode/datasets/__init__.py +44 -0
braindecode/datasets/base.py +823 -0
braindecode/datasets/bbci.py +693 -0
braindecode/datasets/bcicomp.py +193 -0
braindecode/datasets/bids/__init__.py +54 -0
braindecode/datasets/bids/datasets.py +239 -0
braindecode/datasets/bids/format.py +717 -0
braindecode/datasets/bids/hub.py +987 -0
braindecode/datasets/bids/hub_format.py +717 -0
braindecode/datasets/bids/hub_io.py +197 -0
braindecode/datasets/bids/hub_validation.py +114 -0
braindecode/datasets/bids/iterable.py +220 -0
braindecode/datasets/chb_mit.py +163 -0
braindecode/datasets/mne.py +170 -0
braindecode/datasets/moabb.py +219 -0
braindecode/datasets/nmt.py +313 -0
braindecode/datasets/registry.py +120 -0
braindecode/datasets/siena.py +162 -0
braindecode/datasets/sleep_physio_challe_18.py +411 -0
braindecode/datasets/sleep_physionet.py +125 -0
braindecode/datasets/tuh.py +591 -0
braindecode/datasets/utils.py +67 -0
braindecode/datasets/xy.py +96 -0
braindecode/datautil/__init__.py +62 -0
braindecode/datautil/channel_utils.py +114 -0
braindecode/datautil/hub_formats.py +180 -0
braindecode/datautil/serialization.py +359 -0
braindecode/datautil/util.py +154 -0
braindecode/eegneuralnet.py +372 -0
braindecode/functional/__init__.py +22 -0
braindecode/functional/functions.py +251 -0
braindecode/functional/initialization.py +47 -0
braindecode/models/__init__.py +117 -0
braindecode/models/atcnet.py +830 -0
braindecode/models/attentionbasenet.py +727 -0
braindecode/models/attn_sleep.py +549 -0
braindecode/models/base.py +574 -0
braindecode/models/bendr.py +493 -0
braindecode/models/biot.py +537 -0
braindecode/models/brainmodule.py +845 -0
braindecode/models/config.py +233 -0
braindecode/models/contrawr.py +319 -0
braindecode/models/ctnet.py +541 -0
braindecode/models/deep4.py +376 -0
braindecode/models/deepsleepnet.py +417 -0
braindecode/models/eegconformer.py +475 -0
braindecode/models/eeginception_erp.py +379 -0
braindecode/models/eeginception_mi.py +379 -0
braindecode/models/eegitnet.py +302 -0
braindecode/models/eegminer.py +256 -0
braindecode/models/eegnet.py +359 -0
braindecode/models/eegnex.py +354 -0
braindecode/models/eegsimpleconv.py +201 -0
braindecode/models/eegsym.py +917 -0
braindecode/models/eegtcnet.py +337 -0
braindecode/models/fbcnet.py +225 -0
braindecode/models/fblightconvnet.py +315 -0
braindecode/models/fbmsnet.py +338 -0
braindecode/models/hybrid.py +126 -0
braindecode/models/ifnet.py +443 -0
braindecode/models/labram.py +1316 -0
braindecode/models/luna.py +891 -0
braindecode/models/medformer.py +760 -0
braindecode/models/msvtnet.py +377 -0
braindecode/models/patchedtransformer.py +640 -0
braindecode/models/reve.py +843 -0
braindecode/models/sccnet.py +280 -0
braindecode/models/shallow_fbcsp.py +212 -0
braindecode/models/signal_jepa.py +1122 -0
braindecode/models/sinc_shallow.py +339 -0
braindecode/models/sleep_stager_blanco_2020.py +169 -0
braindecode/models/sleep_stager_chambon_2018.py +159 -0
braindecode/models/sparcnet.py +426 -0
braindecode/models/sstdpn.py +869 -0
braindecode/models/summary.csv +47 -0
braindecode/models/syncnet.py +234 -0
braindecode/models/tcn.py +275 -0
braindecode/models/tidnet.py +397 -0
braindecode/models/tsinception.py +295 -0
braindecode/models/usleep.py +439 -0
braindecode/models/util.py +369 -0
braindecode/modules/__init__.py +92 -0
braindecode/modules/activation.py +86 -0
braindecode/modules/attention.py +883 -0
braindecode/modules/blocks.py +160 -0
braindecode/modules/convolution.py +330 -0
braindecode/modules/filter.py +654 -0
braindecode/modules/layers.py +216 -0
braindecode/modules/linear.py +70 -0
braindecode/modules/parametrization.py +38 -0
braindecode/modules/stats.py +87 -0
braindecode/modules/util.py +85 -0
braindecode/modules/wrapper.py +90 -0
braindecode/preprocessing/__init__.py +271 -0
braindecode/preprocessing/eegprep_preprocess.py +1317 -0
braindecode/preprocessing/mne_preprocess.py +240 -0
braindecode/preprocessing/preprocess.py +579 -0
braindecode/preprocessing/util.py +177 -0
braindecode/preprocessing/windowers.py +1037 -0
braindecode/regressor.py +234 -0
braindecode/samplers/__init__.py +18 -0
braindecode/samplers/base.py +399 -0
braindecode/samplers/ssl.py +263 -0
braindecode/training/__init__.py +23 -0
braindecode/training/callbacks.py +23 -0
braindecode/training/losses.py +105 -0
braindecode/training/scoring.py +477 -0
braindecode/util.py +419 -0
braindecode/version.py +1 -0
braindecode/visualization/__init__.py +8 -0
braindecode/visualization/confusion_matrices.py +289 -0
braindecode/visualization/gradients.py +62 -0
braindecode-1.3.0.dev177069446.dist-info/METADATA +230 -0
braindecode-1.3.0.dev177069446.dist-info/RECORD +124 -0
braindecode-1.3.0.dev177069446.dist-info/WHEEL +5 -0
braindecode-1.3.0.dev177069446.dist-info/licenses/LICENSE.txt +31 -0
braindecode-1.3.0.dev177069446.dist-info/licenses/NOTICE.txt +20 -0
braindecode-1.3.0.dev177069446.dist-info/top_level.txt +1 -0

braindecode/models/sccnet.py ADDED Viewed

@@ -0,0 +1,280 @@
+# Authors: Chun-Shu Wei
+#         Bruno Aristimunha <b.aristimunha@gmail.com> (braindecode adaptation)
+#
+# License: BSD (3-clause)
+import math
+from warnings import warn
+import torch
+from einops.layers.torch import Rearrange
+from torch import nn
+from braindecode.models.base import EEGModuleMixin
+from braindecode.modules import LogActivation
+class SCCNet(EEGModuleMixin, nn.Module):
+    r"""SCCNet from Wei, C S (2019) [sccnet]_.
+    :bdg-success:`Convolution`
+    Spatial component-wise convolutional network (SCCNet) for motor-imagery EEG
+    classification.
+    .. figure:: https://dt5vp8kor0orz.cloudfront.net/6e3ec5d729cd51fe8acc5a978db27d02a5df9e05/2-Figure1-1.png
+       :align: center
+       :alt:  Spatial component-wise convolutional network
+       :width: 680px
+    .. rubric:: Architectural Overview
+    SCCNet is a spatial-first convolutional layer that fixes temporal kernels in seconds
+    to make its filters correspond to neurophysiologically aligned windows. The model
+    comprises four stages:
+    1. **Spatial Component Analysis**: Performs convolution spatial filtering
+        across all EEG channels to extract spatial components, effectively
+        reducing the channel dimension.
+    2. **Spatio-Temporal Filtering**: Applies convolution across the spatial
+        components and temporal domain to capture spatio-temporal patterns.
+    3. **Temporal Smoothing (Pooling)**: Uses average pooling over time to smooth the
+       features and reduce the temporal dimension, focusing on longer-term patterns.
+    4. **Classification**: Flattens the features and applies a fully connected
+       layer.
+    .. rubric:: Macro Components
+    - `SCCNet.spatial_conv` **(spatial component analysis)**
+        - *Operations.*
+        - :class:`~torch.nn.Conv2d` with kernel `(n_chans, N_t)` and stride `(1, 1)` on an input reshaped to `(B, 1, n_chans, T)`; typical choice `N_t=1` yields a pure across-channel projection (montage-wide linear spatial filter).
+        - Zero padding to preserve time, :class:`~torch.nn.BatchNorm2d`; output has `N_u` component signals shaped `(B, 1, N_u, T)` after a permute step.
+    *Interpretability/robustness.* Mimics CSP-like spatial filtering: each learned filter is a channel-weighted component, easing inspection and reducing channel noise.
+    - `SCCNet.spatial_filt_conv` **(spatio-temporal filtering)**
+        - *Operations.*
+        - :class:`~torch.nn.Conv2d` with kernel `(N_u, 12)` over components and time (12 samples ~ 0.1 s at 125 Hz),
+        - :class:`~torch.nn.BatchNorm2d`;
+        - Nonlinearity is **power-like**: the original paper uses **square** like :class:`~braindecode.models.ShallowFBCSPNet` with the class :class:`~braindecode.modules.LogActivation` as default.
+        - :class:`~torch.nn.Dropout` with rate `p=0.5`.
+    - *Role.* Learns frequency-selective energy features and inter-component interactions within a 0.1 s context (beta/alpha cycle scale).
+    - `SCCNet.temporal_smoothing` **(aggregation + readout)**
+        - *Operations.*
+        - :class:`~torch.nn.AvgPool2d` with size `(1, 62)` (~ 0.5 s) for temporal smoothing and downsampling
+        - :class:`~torch.nn.Flatten`
+        - :class:`~torch.nn.Linear` to `n_outputs`.
+    .. rubric:: Convolutional Details
+    * **Temporal (where time-domain patterns are learned).**
+        The second block's kernel length is fixed to 12 samples (≈ 100 ms) and slides with
+        stride 1; average pooling `(1, 62)` (≈ 500 ms) integrates power over longer spans.
+        These choices bake in short-cycle detection followed by half-second trend smoothing.
+    * **Spatial (how electrodes are processed).**
+        The first block's kernel spans **all electrodes** `(n_chans, N_t)`. With `N_t=1`,
+        it reduces to a montage-wide linear projection, mapping channels → `N_u` components.
+        The second block mixes **across components** via kernel height `N_u`.
+    * **Spectral (how frequency information is captured).**
+        No explicit transform is used; learned **temporal kernels** serve as bandpass-like
+        filters, and the **square/log power** nonlinearity plus 0.5 s averaging approximate
+        band-power estimation (ERD/ERS-style features).
+    .. rubric:: Attention / Sequential Modules
+    This model contains **no attention** and **no recurrent units**.
+    .. rubric:: Additional Mechanisms
+    - :class:`~torch.nn.BatchNorm2d` and zero-padding are applied to both convolutions;
+      L2 weight decay was used in the original paper; dropout `p=0.5` combats overfitting.
+    - Contrasting with other compact neural network, in EEGNet performs a temporal depthwise conv
+      followed by a **depthwise spatial** conv (separable), learning temporal filters first.
+      SCCNet inverts this order: it performs a **full spatial projection first** (CSP-like),
+      then a short **spatio-temporal** conv with an explicit 0.1 s kernel, followed by
+      **power-like** nonlinearity and longer temporal averaging. EEGNet's ELU and
+      separable design favor parameter efficiency; SCCNet's second-scale kernels and
+      square/log emphasize interpretable **band-power** features.
+    - Reference implementation: see [sccnetcode]_.
+    .. rubric:: Usage and Configuration
+    * **Training from the original authors.**
+    * Match window length so that `T` is comfortably larger than pooling length
+        (e.g., > 1.5-2 s for MI).
+    * Start with standard MI augmentations (channel dropout/shuffle, time reverse)
+        and tune `n_spatial_filters` before deeper changes.
+    Parameters
+    ----------
+    n_spatial_filters : int, optional
+        Number of spatial filters in the first convolutional layer, variable `N_u` from the
+        original paper. Default is 22.
+    n_spatial_filters_smooth : int, optional
+        Number of spatial filters used as filter in the second convolutional
+        layer. Default is 20.
+    drop_prob : float, optional
+        Dropout probability. Default is 0.5.
+    activation : nn.Module, optional
+        Activation function after the second convolutional layer. Default is
+        logarithm activation.
+    References
+    ----------
+    .. [sccnet] Wei, C. S., Koike-Akino, T., & Wang, Y. (2019, March). Spatial
+        component-wise convolutional network (SCCNet) for motor-imagery EEG
+        classification. In 2019 9th International IEEE/EMBS Conference on
+        Neural Engineering (NER) (pp. 328-331). IEEE.
+    .. [sccnetcode] Hsieh, C. Y., Chou, J. L., Chang, Y. H., & Wei, C. S.
+        XBrainLab: An Open-Source Software for Explainable Artificial
+        Intelligence-Based EEG Analysis. In NeurIPS 2023 AI for
+        Science Workshop.
+    """
+    def __init__(
+        self,
+        # Signal related parameters
+        n_chans=None,
+        n_outputs=None,
+        n_times=None,
+        chs_info=None,
+        input_window_seconds=None,
+        sfreq=None,
+        # Model related parameters
+        n_spatial_filters: int = 22,
+        n_spatial_filters_smooth: int = 20,
+        drop_prob: float = 0.5,
+        activation: type[nn.Module] = LogActivation,
+        batch_norm_momentum: float = 0.1,
+    ):
+        super().__init__(
+            n_outputs=n_outputs,
+            n_chans=n_chans,
+            chs_info=chs_info,
+            n_times=n_times,
+            input_window_seconds=input_window_seconds,
+            sfreq=sfreq,
+        )
+        del n_outputs, n_chans, chs_info, n_times, input_window_seconds, sfreq
+        # Parameters
+        self.n_spatial_filters = n_spatial_filters
+        self.n_spatial_filters_smooth = n_spatial_filters_smooth
+        self.drop_prob = drop_prob
+        # Original logical for SCCNet
+        conv_kernel_time = 0.1  # 100ms
+        pool_kernel_time = 0.5  # 500ms
+        # Calculate sample-based sizes from time durations
+        conv_kernel_samples = int(math.floor(self.sfreq * conv_kernel_time))
+        pool_kernel_samples = int(math.floor(self.sfreq * pool_kernel_time))
+        # If the input window is too short for the default kernel sizes,
+        # scale them down proportionally.
+        total_kernel_samples = conv_kernel_samples + pool_kernel_samples
+        if self.n_times < total_kernel_samples:
+            warning_msg = (
+                f"Input window seconds ({self.input_window_seconds:.2f}s) is smaller than the "
+                f"model's combined kernel sizes ({(total_kernel_samples / self.sfreq):.2f}s). "
+                "Scaling temporal parameters down proportionally."
+            )
+            warn(warning_msg, UserWarning, stacklevel=2)
+            scaling_factor = self.n_times / total_kernel_samples
+            conv_kernel_samples = int(math.floor(conv_kernel_samples * scaling_factor))
+            pool_kernel_samples = int(math.floor(pool_kernel_samples * scaling_factor))
+        # Ensure kernels are at least 1 sample wide
+        self.samples_100ms = max(1, conv_kernel_samples)
+        self.kernel_size_pool = max(1, pool_kernel_samples)
+        num_features = self._calc_num_features()
+        # Layers
+        self.ensure_dim = Rearrange("batch nchan times -> batch 1 nchan times")
+        self.activation = LogActivation() if activation is None else activation()
+        self.spatial_conv = nn.Conv2d(
+            in_channels=1,
+            out_channels=self.n_spatial_filters,
+            kernel_size=(self.n_chans, 1),
+        )
+        self.spatial_batch_norm = nn.BatchNorm2d(
+            self.n_spatial_filters, momentum=batch_norm_momentum
+        )
+        self.permute = Rearrange(
+            "batch filspat nchans time -> batch nchans filspat time"
+        )
+        self.spatial_filt_conv = nn.Conv2d(
+            in_channels=1,
+            out_channels=self.n_spatial_filters_smooth,
+            kernel_size=(self.n_spatial_filters, self.samples_100ms),
+            bias=False,
+        )
+        self.batch_norm = nn.BatchNorm2d(
+            self.n_spatial_filters_smooth, momentum=batch_norm_momentum
+        )
+        self.dropout = nn.Dropout(self.drop_prob)
+        self.temporal_smoothing = nn.AvgPool2d(
+            kernel_size=(1, self.kernel_size_pool),
+            stride=(1, self.samples_100ms),
+        )
+        self.final_layer = nn.Linear(num_features, self.n_outputs)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # Shape: (batch_size, n_chans, n_times)
+        x = self.ensure_dim(x)
+        # Shape: (batch_size, 1, n_chans, n_times)
+        x = self.spatial_conv(x)
+        # Shape: (batch_size, n_filters, 1, n_times)
+        x = self.spatial_batch_norm(x)
+        # Shape: (batch_size, n_filters, 1, n_times)
+        x = self.permute(x)
+        # Shape: (batch_size, 1, n_filters, n_times)
+        x = self.spatial_filt_conv(x)
+        # Shape: (batch_size, n_filters_filt, 1, n_times_reduced)
+        x = self.batch_norm(x)
+        # Shape: (batch_size, n_filters_filt, 1, n_times_reduced)
+        x = torch.pow(x, 2)
+        # Shape: (batch_size, n_filters_filt, 1, n_times_reduced)
+        x = self.dropout(x)
+        # Shape: (batch_size, n_filters_filt, 1, n_times_reduced)
+        x = self.temporal_smoothing(x)
+        # Shape: (batch_size, n_filters_filt, 1, n_times_reduced_avg_pool)
+        x = self.activation(x)
+        # Shape: (batch_size, n_filters_filt, 1, n_times_reduced_avg_pool)
+        x = x.view(x.size(0), -1)
+        # Shape: (batch_size, n_filters_filt*n_times_reduced_avg_pool)
+        x = self.final_layer(x)
+        # Shape: (batch_size, n_outputs)
+        return x
+    def _calc_num_features(self) -> int:
+        # Compute the number of features for the final linear layer
+        w_out_conv2 = (
+            self.n_times - self.samples_100ms + 1  # After second conv layer
+        )
+        w_out_pool = (
+            (w_out_conv2 - self.kernel_size_pool) // self.samples_100ms + 1
+            # After pooling layer
+        )
+        num_features = self.n_spatial_filters_smooth * w_out_pool
+        return num_features

braindecode/models/shallow_fbcsp.py ADDED Viewed

@@ -0,0 +1,212 @@
+# Authors: Robin Schirrmeister <robintibor@gmail.com>
+#
+# License: BSD (3-clause)
+from typing import Callable
+from einops.layers.torch import Rearrange
+from torch import nn
+from torch.nn import init
+from braindecode.functional import square
+from braindecode.models.base import EEGModuleMixin
+from braindecode.modules import (
+    CombinedConv,
+    Ensure4d,
+    Expression,
+    SafeLog,
+    SqueezeFinalOutput,
+)
+class ShallowFBCSPNet(EEGModuleMixin, nn.Sequential):
+    r"""Shallow ConvNet model from Schirrmeister et al (2017) [Schirrmeister2017]_.
+    :bdg-success:`Convolution`
+    .. figure:: https://onlinelibrary.wiley.com/cms/asset/221ea375-6701-40d3-ab3f-e411aad62d9e/hbm23730-fig-0002-m.jpg
+        :align: center
+        :alt: ShallowNet Architecture
+    Model described in [Schirrmeister2017]_.
+    Parameters
+    ----------
+    n_filters_time: int
+        Number of temporal filters.
+    filter_time_length: int
+        Length of the temporal filter.
+    n_filters_spat: int
+        Number of spatial filters.
+    pool_time_length: int
+        Length of temporal pooling filter.
+    pool_time_stride: int
+        Length of stride between temporal pooling filters.
+    final_conv_length: int | str
+        Length of the final convolution layer.
+        If set to "auto", length of the input signal must be specified.
+    conv_nonlin: callable
+        Non-linear function to be used after convolution layers.
+    pool_mode: str
+        Method to use on pooling layers. "max" or "mean".
+    activation_pool_nonlin: callable
+        Non-linear function to be used after pooling layers.
+    split_first_layer: bool
+        Split first layer into temporal and spatial layers (True) or just use temporal (False).
+        There would be no non-linearity between the split layers.
+    batch_norm: bool
+        Whether to use batch normalisation.
+    batch_norm_alpha: float
+        Momentum for BatchNorm2d.
+    drop_prob: float
+        Dropout probability.
+    References
+    ----------
+    .. [Schirrmeister2017] Schirrmeister, R. T., Springenberg, J. T., Fiederer,
+       L. D. J., Glasstetter, M., Eggensperger, K., Tangermann, M., Hutter, F.
+       & Ball, T. (2017).
+       Deep learning with convolutional neural networks for EEG decoding and
+       visualization.
+       Human Brain Mapping , Aug. 2017.
+       Online: http://dx.doi.org/10.1002/hbm.23730
+    """
+    def __init__(
+        self,
+        n_chans=None,
+        n_outputs=None,
+        n_times=None,
+        n_filters_time=40,
+        filter_time_length=25,
+        n_filters_spat=40,
+        pool_time_length=75,
+        pool_time_stride=15,
+        final_conv_length="auto",
+        conv_nonlin: Callable = square,
+        pool_mode="mean",
+        activation_pool_nonlin: type[nn.Module] = SafeLog,
+        split_first_layer=True,
+        batch_norm=True,
+        batch_norm_alpha=0.1,
+        drop_prob=0.5,
+        chs_info=None,
+        input_window_seconds=None,
+        sfreq=None,
+    ):
+        super().__init__(
+            n_outputs=n_outputs,
+            n_chans=n_chans,
+            chs_info=chs_info,
+            n_times=n_times,
+            input_window_seconds=input_window_seconds,
+            sfreq=sfreq,
+        )
+        del n_outputs, n_chans, chs_info, n_times, input_window_seconds, sfreq
+        if final_conv_length == "auto":
+            assert self.n_times is not None
+        self.n_filters_time = n_filters_time
+        self.filter_time_length = filter_time_length
+        self.n_filters_spat = n_filters_spat
+        self.pool_time_length = pool_time_length
+        self.pool_time_stride = pool_time_stride
+        self.final_conv_length = final_conv_length
+        self.conv_nonlin = conv_nonlin
+        self.pool_mode = pool_mode
+        self.pool_nonlin = activation_pool_nonlin
+        self.split_first_layer = split_first_layer
+        self.batch_norm = batch_norm
+        self.batch_norm_alpha = batch_norm_alpha
+        self.drop_prob = drop_prob
+        self.mapping = {
+            "conv_time.weight": "conv_time_spat.conv_time.weight",
+            "conv_spat.weight": "conv_time_spat.conv_spat.weight",
+            "conv_time.bias": "conv_time_spat.conv_time.bias",
+            "conv_spat.bias": "conv_time_spat.conv_spat.bias",
+            "conv_classifier.weight": "final_layer.conv_classifier.weight",
+            "conv_classifier.bias": "final_layer.conv_classifier.bias",
+        }
+        self.add_module("ensuredims", Ensure4d())
+        pool_class = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)[self.pool_mode]
+        if self.split_first_layer:
+            self.add_module("dimshuffle", Rearrange("batch C T 1 -> batch 1 T C"))
+            self.add_module(
+                "conv_time_spat",
+                CombinedConv(
+                    in_chans=self.n_chans,
+                    n_filters_time=self.n_filters_time,
+                    n_filters_spat=self.n_filters_spat,
+                    filter_time_length=filter_time_length,
+                    bias_time=True,
+                    bias_spat=not self.batch_norm,
+                ),
+            )
+            n_filters_conv = self.n_filters_spat
+        else:
+            self.add_module(
+                "conv_time",
+                nn.Conv2d(
+                    self.n_chans,
+                    self.n_filters_time,
+                    (self.filter_time_length, 1),
+                    stride=1,
+                    bias=not self.batch_norm,
+                ),
+            )
+            n_filters_conv = self.n_filters_time
+        if self.batch_norm:
+            self.add_module(
+                "bnorm",
+                nn.BatchNorm2d(
+                    n_filters_conv, momentum=self.batch_norm_alpha, affine=True
+                ),
+            )
+        self.add_module("conv_nonlin_exp", Expression(self.conv_nonlin))
+        self.add_module(
+            "pool",
+            pool_class(
+                kernel_size=(self.pool_time_length, 1),
+                stride=(self.pool_time_stride, 1),
+            ),
+        )
+        self.add_module("pool_nonlin_exp", self.pool_nonlin())
+        self.add_module("drop", nn.Dropout(p=self.drop_prob))
+        self.eval()
+        if self.final_conv_length == "auto":
+            self.final_conv_length = self.get_output_shape()[2]
+        # Incorporating classification module and subsequent ones in one final layer
+        module = nn.Sequential()
+        module.add_module(
+            "conv_classifier",
+            nn.Conv2d(
+                n_filters_conv,
+                self.n_outputs,
+                (self.final_conv_length, 1),
+                bias=True,
+            ),
+        )
+        module.add_module("squeeze", SqueezeFinalOutput())
+        self.add_module("final_layer", module)
+        # Initialization, xavier is same as in paper...
+        init.xavier_uniform_(self.conv_time_spat.conv_time.weight, gain=1)
+        # maybe no bias in case of no split layer and batch norm
+        if self.split_first_layer or (not self.batch_norm):
+            init.constant_(self.conv_time_spat.conv_time.bias, 0)
+        if self.split_first_layer:
+            init.xavier_uniform_(self.conv_time_spat.conv_spat.weight, gain=1)
+            if not self.batch_norm:
+                init.constant_(self.conv_time_spat.conv_spat.bias, 0)
+        if self.batch_norm:
+            init.constant_(self.bnorm.weight, 1)
+            init.constant_(self.bnorm.bias, 0)
+        init.xavier_uniform_(self.final_layer.conv_classifier.weight, gain=1)
+        init.constant_(self.final_layer.conv_classifier.bias, 0)
+        self.train()