PyPI - brainstate - Versions diffs - 0.2.1__py2.py3-none-any.whl → 0.2.2__py2.py3-none-any.whl - Mend

brainstate 0.2.1py2.py3-none-any.whl → 0.2.2py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

brainstate/__init__.py +167 -169
brainstate/_compatible_import.py +340 -340
brainstate/_compatible_import_test.py +681 -681
brainstate/_deprecation.py +210 -210
brainstate/_deprecation_test.py +2297 -2319
brainstate/_error.py +45 -45
brainstate/_state.py +2157 -1652
brainstate/_state_test.py +1129 -52
brainstate/_utils.py +47 -47
brainstate/environ.py +1495 -1495
brainstate/environ_test.py +1223 -1223
brainstate/graph/__init__.py +22 -22
brainstate/graph/_node.py +240 -240
brainstate/graph/_node_test.py +589 -589
brainstate/graph/_operation.py +1620 -1624
brainstate/graph/_operation_test.py +1147 -1147
brainstate/mixin.py +1447 -1433
brainstate/mixin_test.py +1017 -1017
brainstate/nn/__init__.py +146 -137
brainstate/nn/_activations.py +1100 -1100
brainstate/nn/_activations_test.py +354 -354
brainstate/nn/_collective_ops.py +635 -633
brainstate/nn/_collective_ops_test.py +774 -774
brainstate/nn/_common.py +226 -226
brainstate/nn/_common_test.py +134 -154
brainstate/nn/_conv.py +2010 -2010
brainstate/nn/_conv_test.py +849 -849
brainstate/nn/_delay.py +575 -575
brainstate/nn/_delay_test.py +243 -243
brainstate/nn/_dropout.py +618 -618
brainstate/nn/_dropout_test.py +480 -477
brainstate/nn/_dynamics.py +870 -1267
brainstate/nn/_dynamics_test.py +53 -67
brainstate/nn/_elementwise.py +1298 -1298
brainstate/nn/_elementwise_test.py +829 -829
brainstate/nn/_embedding.py +408 -408
brainstate/nn/_embedding_test.py +156 -156
brainstate/nn/_event_fixedprob.py +233 -233
brainstate/nn/_event_fixedprob_test.py +115 -115
brainstate/nn/_event_linear.py +83 -83
brainstate/nn/_event_linear_test.py +121 -121
brainstate/nn/_exp_euler.py +254 -254
brainstate/nn/_exp_euler_test.py +377 -377
brainstate/nn/_linear.py +744 -744
brainstate/nn/_linear_test.py +475 -475
brainstate/nn/_metrics.py +1070 -1070
brainstate/nn/_metrics_test.py +611 -611
brainstate/nn/_module.py +391 -384
brainstate/nn/_module_test.py +40 -40
brainstate/nn/_normalizations.py +1334 -1334
brainstate/nn/_normalizations_test.py +699 -699
brainstate/nn/_paddings.py +1020 -1020
brainstate/nn/_paddings_test.py +722 -722
brainstate/nn/_poolings.py +2239 -2239
brainstate/nn/_poolings_test.py +952 -952
brainstate/nn/_rnns.py +946 -946
brainstate/nn/_rnns_test.py +592 -592
brainstate/nn/_utils.py +216 -216
brainstate/nn/_utils_test.py +401 -401
brainstate/nn/init.py +809 -809
brainstate/nn/init_test.py +180 -180
brainstate/random/__init__.py +270 -270
brainstate/random/{_rand_funs.py → _fun.py} +3938 -3938
brainstate/random/{_rand_funs_test.py → _fun_test.py} +638 -640
brainstate/random/_impl.py +672 -0
brainstate/random/{_rand_seed.py → _seed.py} +675 -675
brainstate/random/{_rand_seed_test.py → _seed_test.py} +48 -48
brainstate/random/{_rand_state.py → _state.py} +1320 -1617
brainstate/random/{_rand_state_test.py → _state_test.py} +551 -551
brainstate/transform/__init__.py +56 -59
brainstate/transform/_ad_checkpoint.py +176 -176
brainstate/transform/_ad_checkpoint_test.py +49 -49
brainstate/transform/_autograd.py +1025 -1025
brainstate/transform/_autograd_test.py +1289 -1289
brainstate/transform/_conditions.py +316 -316
brainstate/transform/_conditions_test.py +220 -220
brainstate/transform/_error_if.py +94 -94
brainstate/transform/_error_if_test.py +52 -52
brainstate/transform/_find_state.py +200 -0
brainstate/transform/_find_state_test.py +84 -0
brainstate/transform/_jit.py +399 -399
brainstate/transform/_jit_test.py +143 -143
brainstate/transform/_loop_collect_return.py +675 -675
brainstate/transform/_loop_collect_return_test.py +58 -58
brainstate/transform/_loop_no_collection.py +283 -283
brainstate/transform/_loop_no_collection_test.py +50 -50
brainstate/transform/_make_jaxpr.py +2176 -2016
brainstate/transform/_make_jaxpr_test.py +1634 -1510
brainstate/transform/_mapping.py +607 -529
brainstate/transform/_mapping_test.py +104 -194
brainstate/transform/_progress_bar.py +255 -255
brainstate/transform/_unvmap.py +256 -256
brainstate/transform/_util.py +286 -286
brainstate/typing.py +837 -837
brainstate/typing_test.py +780 -780
brainstate/util/__init__.py +27 -27
brainstate/util/_others.py +1024 -1024
brainstate/util/_others_test.py +962 -962
brainstate/util/_pretty_pytree.py +1301 -1301
brainstate/util/_pretty_pytree_test.py +675 -675
brainstate/util/_pretty_repr.py +462 -462
brainstate/util/_pretty_repr_test.py +696 -696
brainstate/util/filter.py +945 -945
brainstate/util/filter_test.py +911 -911
brainstate/util/struct.py +910 -910
brainstate/util/struct_test.py +602 -602
{brainstate-0.2.1.dist-info → brainstate-0.2.2.dist-info}/METADATA +108 -108
brainstate-0.2.2.dist-info/RECORD +111 -0
{brainstate-0.2.1.dist-info → brainstate-0.2.2.dist-info}/licenses/LICENSE +202 -202
brainstate/transform/_eval_shape.py +0 -145
brainstate/transform/_eval_shape_test.py +0 -38
brainstate/transform/_random.py +0 -171
brainstate-0.2.1.dist-info/RECORD +0 -111
{brainstate-0.2.1.dist-info → brainstate-0.2.2.dist-info}/WHEEL +0 -0
{brainstate-0.2.1.dist-info → brainstate-0.2.2.dist-info}/top_level.txt +0 -0

brainstate/nn/_dropout.py CHANGED Viewed

@@ -1,618 +1,618 @@
-# Copyright 2024 BrainX Ecosystem Limited. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from functools import partial
-from typing import Optional, Sequence
-import brainunit as u
-import jax.numpy as jnp
-from brainstate import random, environ
-from brainstate._state import ShortTermState
-from brainstate.typing import Size
-from . import init as init
-from ._module import ElementWiseBlock
-__all__ = [
-    'Dropout',
-    'Dropout1d',
-    'Dropout2d',
-    'Dropout3d',
-    'AlphaDropout',
-    'FeatureAlphaDropout',
-    'DropoutFixed',
-]
-class Dropout(ElementWiseBlock):
-    """A layer that stochastically ignores a subset of inputs each training step.
-    In training, to compensate for the fraction of input values dropped (`rate`),
-    all surviving values are multiplied by `1 / (1 - rate)`.
-    This layer is active only during training (``mode=brainstate.mixin.Training``). In other
-    circumstances it is a no-op.
-    Parameters
-    ----------
-    prob : float
-        Probability to keep element of the tensor. Default is 0.5.
-    broadcast_dims : Sequence[int]
-        Dimensions that will share the same dropout mask. Default is ().
-    name : str, optional
-        The name of the dynamic system.
-    References
-    ----------
-    .. [1] Srivastava, Nitish, et al. "Dropout: a simple way to prevent
-           neural networks from overfitting." The journal of machine learning
-           research 15.1 (2014): 1929-1958.
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate
-        >>> layer = brainstate.nn.Dropout(prob=0.8)
-        >>> x = brainstate.random.randn(10, 20)
-        >>> with brainstate.environ.context(fit=True):
-        ...     output = layer(x)
-        >>> output.shape
-        (10, 20)
-    """
-    __module__ = 'brainstate.nn'
-    def __init__(
-        self,
-        prob: float = 0.5,
-        broadcast_dims: Sequence[int] = (),
-        name: Optional[str] = None
-    ) -> None:
-        super().__init__(name=name)
-        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
-        self.prob = prob
-        self.broadcast_dims = broadcast_dims
-    def __call__(self, x):
-        dtype = u.math.get_dtype(x)
-        fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
-        if fit_phase and self.prob < 1.:
-            broadcast_shape = list(x.shape)
-            for dim in self.broadcast_dims:
-                broadcast_shape[dim] = 1
-            keep_mask = random.bernoulli(self.prob, broadcast_shape)
-            keep_mask = u.math.broadcast_to(keep_mask, x.shape)
-            return u.math.where(
-                keep_mask,
-                u.math.asarray(x / self.prob, dtype=dtype),
-                u.math.asarray(0., dtype=dtype)
-            )
-        else:
-            return x
-class _DropoutNd(ElementWiseBlock):
-    __module__ = 'brainstate.nn'
-    prob: float
-    channel_axis: int
-    minimal_dim: int
-    def __init__(
-        self,
-        prob: float = 0.5,
-        channel_axis: int = -1,
-        name: Optional[str] = None
-    ) -> None:
-        super().__init__(name=name)
-        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
-        self.prob = prob
-        self.channel_axis = channel_axis
-    def __call__(self, x):
-        # check input shape
-        inp_dim = u.math.ndim(x)
-        if inp_dim not in (self.minimal_dim, self.minimal_dim + 1):
-            raise RuntimeError(f"dropout1d: Expected {self.minimal_dim}D or {self.minimal_dim + 1}D input, "
-                               f"but received a {inp_dim}D input. {self._get_msg(x)}")
-        is_not_batched = self.minimal_dim
-        if is_not_batched:
-            channel_axis = self.channel_axis if self.channel_axis >= 0 else (x.ndim + self.channel_axis)
-            mask_shape = [(dim if i == channel_axis else 1) for i, dim in enumerate(x.shape)]
-        else:
-            channel_axis = (self.channel_axis + 1) if self.channel_axis >= 0 else (x.ndim + self.channel_axis)
-            assert channel_axis != 0, f"Channel axis must not be 0. But got {self.channel_axis}."
-            mask_shape = [(dim if i in (channel_axis, 0) else 1) for i, dim in enumerate(x.shape)]
-        # get fit phase
-        fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
-        # generate mask
-        if fit_phase and self.prob < 1.:
-            dtype = u.math.get_dtype(x)
-            keep_mask = random.bernoulli(self.prob, mask_shape)
-            keep_mask = jnp.broadcast_to(keep_mask, x.shape)
-            return jnp.where(
-                keep_mask,
-                jnp.asarray(x / self.prob, dtype=dtype),
-                jnp.asarray(0., dtype=dtype)
-            )
-        else:
-            return x
-    def _get_msg(self, x):
-        return ''
-class Dropout1d(_DropoutNd):
-    r"""Randomly zero out entire channels (a channel is a 1D feature map).
-    Each channel will be zeroed out independently on every forward call with
-    probability using samples from a Bernoulli distribution. The channel is
-    a 1D feature map, e.g., the :math:`j`-th channel of the :math:`i`-th sample
-    in the batched input is a 1D tensor :math:`\text{input}[i, j]`.
-    Usually the input comes from :class:`Conv1d` modules.
-    As described in the paper [1]_, if adjacent pixels within feature maps are
-    strongly correlated (as is normally the case in early convolution layers)
-    then i.i.d. dropout will not regularize the activations and will otherwise
-    just result in an effective learning rate decrease.
-    In this case, :class:`Dropout1d` will help promote independence between
-    feature maps and should be used instead.
-    Parameters
-    ----------
-    prob : float
-        Probability of an element to be kept. Default is 0.5.
-    channel_axis : int
-        The axis representing the channel dimension. Default is -1.
-    name : str, optional
-        The name of the dynamic system.
-    Notes
-    -----
-    Input shape: :math:`(N, C, L)` or :math:`(C, L)`.
-    Output shape: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input).
-    References
-    ----------
-    .. [1] Springenberg et al., "Striving for Simplicity: The All Convolutional Net"
-           https://arxiv.org/abs/1411.4280
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate
-        >>> m = brainstate.nn.Dropout1d(prob=0.8)
-        >>> x = brainstate.random.randn(20, 32, 16)
-        >>> with brainstate.environ.context(fit=True):
-        ...     output = m(x)
-        >>> output.shape
-        (20, 32, 16)
-    """
-    __module__ = 'brainstate.nn'
-    minimal_dim: int = 2
-    def _get_msg(self, x):
-        return ("Note that dropout1d exists to provide channel-wise dropout on inputs with 1 "
-                "spatial dimension, a channel dimension, and an optional batch dimension "
-                "(i.e. 2D or 3D inputs).")
-class Dropout2d(_DropoutNd):
-    r"""Randomly zero out entire channels (a channel is a 2D feature map).
-    Each channel will be zeroed out independently on every forward call with
-    probability using samples from a Bernoulli distribution. The channel is
-    a 2D feature map, e.g., the :math:`j`-th channel of the :math:`i`-th sample
-    in the batched input is a 2D tensor :math:`\text{input}[i, j]`.
-    Usually the input comes from :class:`Conv2d` modules.
-    As described in the paper [1]_, if adjacent pixels within feature maps are
-    strongly correlated (as is normally the case in early convolution layers)
-    then i.i.d. dropout will not regularize the activations and will otherwise
-    just result in an effective learning rate decrease.
-    In this case, :class:`Dropout2d` will help promote independence between
-    feature maps and should be used instead.
-    Parameters
-    ----------
-    prob : float
-        Probability of an element to be kept. Default is 0.5.
-    channel_axis : int
-        The axis representing the channel dimension. Default is -1.
-    name : str, optional
-        The name of the dynamic system.
-    Notes
-    -----
-    Input shape: :math:`(N, C, H, W)` or :math:`(C, H, W)`.
-    Output shape: :math:`(N, C, H, W)` or :math:`(C, H, W)` (same shape as input).
-    References
-    ----------
-    .. [1] Springenberg et al., "Striving for Simplicity: The All Convolutional Net"
-           https://arxiv.org/abs/1411.4280
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate
-        >>> m = brainstate.nn.Dropout2d(prob=0.8)
-        >>> x = brainstate.random.randn(20, 32, 32, 16)
-        >>> with brainstate.environ.context(fit=True):
-        ...     output = m(x)
-        >>> output.shape
-        (20, 32, 32, 16)
-    """
-    __module__ = 'brainstate.nn'
-    minimal_dim: int = 3
-    def _get_msg(self, x):
-        return ("Note that dropout2d exists to provide channel-wise dropout on inputs with 2 "
-                "spatial dimensions, a channel dimension, and an optional batch dimension "
-                "(i.e. 3D or 4D inputs).")
-class Dropout3d(_DropoutNd):
-    r"""Randomly zero out entire channels (a channel is a 3D feature map).
-    Each channel will be zeroed out independently on every forward call with
-    probability using samples from a Bernoulli distribution. The channel is
-    a 3D feature map, e.g., the :math:`j`-th channel of the :math:`i`-th sample
-    in the batched input is a 3D tensor :math:`\text{input}[i, j]`.
-    Usually the input comes from :class:`Conv3d` modules.
-    As described in the paper [1]_, if adjacent pixels within feature maps are
-    strongly correlated (as is normally the case in early convolution layers)
-    then i.i.d. dropout will not regularize the activations and will otherwise
-    just result in an effective learning rate decrease.
-    In this case, :class:`Dropout3d` will help promote independence between
-    feature maps and should be used instead.
-    Parameters
-    ----------
-    prob : float
-        Probability of an element to be kept. Default is 0.5.
-    channel_axis : int
-        The axis representing the channel dimension. Default is -1.
-    name : str, optional
-        The name of the dynamic system.
-    Notes
-    -----
-    Input shape: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`.
-    Output shape: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input).
-    References
-    ----------
-    .. [1] Springenberg et al., "Striving for Simplicity: The All Convolutional Net"
-           https://arxiv.org/abs/1411.4280
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate
-        >>> m = brainstate.nn.Dropout3d(prob=0.8)
-        >>> x = brainstate.random.randn(20, 16, 4, 32, 32)
-        >>> with brainstate.environ.context(fit=True):
-        ...     output = m(x)
-        >>> output.shape
-        (20, 16, 4, 32, 32)
-    """
-    __module__ = 'brainstate.nn'
-    minimal_dim: int = 4
-    def _get_msg(self, x):
-        return ("Note that dropout3d exists to provide channel-wise dropout on inputs with 3 "
-                "spatial dimensions, a channel dimension, and an optional batch dimension "
-                "(i.e. 4D or 5D inputs).")
-class AlphaDropout(_DropoutNd):
-    r"""Applies Alpha Dropout over the input.
-    Alpha Dropout is a type of Dropout that maintains the self-normalizing
-    property. For an input with zero mean and unit standard deviation, the output of
-    Alpha Dropout maintains the original mean and standard deviation of the
-    input.
-    Alpha Dropout goes hand-in-hand with SELU activation function, which ensures
-    that the outputs have zero mean and unit standard deviation.
-    During training, it randomly masks some of the elements of the input
-    tensor with probability using samples from a Bernoulli distribution.
-    The elements to be masked are randomized on every forward call, and scaled
-    and shifted to maintain zero mean and unit standard deviation.
-    During evaluation the module simply computes an identity function.
-    Parameters
-    ----------
-    prob : float
-        Probability of an element to be kept. Default is 0.5.
-    name : str, optional
-        The name of the dynamic system.
-    Notes
-    -----
-    Input shape: :math:`(*)`. Input can be of any shape.
-    Output shape: :math:`(*)`. Output is of the same shape as input.
-    References
-    ----------
-    .. [1] Klambauer et al., "Self-Normalizing Neural Networks"
-           https://arxiv.org/abs/1706.02515
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate
-        >>> m = brainstate.nn.AlphaDropout(prob=0.8)
-        >>> x = brainstate.random.randn(20, 16)
-        >>> with brainstate.environ.context(fit=True):
-        ...     output = m(x)
-        >>> output.shape
-        (20, 16)
-    """
-    __module__ = 'brainstate.nn'
-    def __init__(
-        self,
-        prob: float = 0.5,
-        name: Optional[str] = None
-    ) -> None:
-        super().__init__(name=name)
-        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
-        self.prob = prob
-        # SELU parameters
-        alpha = -1.7580993408473766
-        self.alpha = alpha
-        # Affine transformation parameters to maintain mean and variance
-        self.a = ((1 - prob) * (1 + prob * alpha ** 2)) ** -0.5
-        self.b = -self.a * alpha * prob
-    def __call__(self, x):
-        dtype = u.math.get_dtype(x)
-        fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
-        if fit_phase and self.prob < 1.:
-            keep_mask = random.bernoulli(self.prob, x.shape)
-            return u.math.where(
-                keep_mask,
-                u.math.asarray(x, dtype=dtype),
-                u.math.asarray(self.alpha, dtype=dtype)
-            ) * self.a + self.b
-        else:
-            return x
-class FeatureAlphaDropout(ElementWiseBlock):
-    r"""Randomly masks out entire channels with Alpha Dropout properties.
-    Instead of setting activations to zero as in regular Dropout, the activations
-    are set to the negative saturation value of the SELU activation function to
-    maintain self-normalizing properties.
-    Each channel (e.g., the :math:`j`-th channel of the :math:`i`-th sample in
-    the batch input is a tensor :math:`\text{input}[i, j]`) will be masked
-    independently for each sample on every forward call with probability using
-    samples from a Bernoulli distribution. The elements to be masked are randomized
-    on every forward call, and scaled and shifted to maintain zero mean and unit
-    variance.
-    Usually the input comes from convolutional layers with SELU activation.
-    As described in the paper [2]_, if adjacent pixels within feature maps are
-    strongly correlated (as is normally the case in early convolution layers)
-    then i.i.d. dropout will not regularize the activations and will otherwise
-    just result in an effective learning rate decrease.
-    In this case, :class:`FeatureAlphaDropout` will help promote independence between
-    feature maps and should be used instead.
-    Parameters
-    ----------
-    prob : float
-        Probability of an element to be kept. Default is 0.5.
-    channel_axis : int
-        The axis representing the channel dimension. Default is -1.
-    name : str, optional
-        The name of the dynamic system.
-    Notes
-    -----
-    Input shape: :math:`(N, C, *)` where C is the channel dimension.
-    Output shape: Same shape as input.
-    References
-    ----------
-    .. [1] Klambauer et al., "Self-Normalizing Neural Networks"
-           https://arxiv.org/abs/1706.02515
-    .. [2] Springenberg et al., "Striving for Simplicity: The All Convolutional Net"
-           https://arxiv.org/abs/1411.4280
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate
-        >>> m = brainstate.nn.FeatureAlphaDropout(prob=0.8)
-        >>> x = brainstate.random.randn(20, 16, 4, 32, 32)
-        >>> with brainstate.environ.context(fit=True):
-        ...     output = m(x)
-        >>> output.shape
-        (20, 16, 4, 32, 32)
-    """
-    __module__ = 'brainstate.nn'
-    def __init__(
-        self,
-        prob: float = 0.5,
-        channel_axis: int = -1,
-        name: Optional[str] = None
-    ) -> None:
-        super().__init__(name=name)
-        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
-        self.prob = prob
-        self.channel_axis = channel_axis
-        # SELU parameters
-        alpha = -1.7580993408473766
-        self.alpha = alpha
-        # Affine transformation parameters to maintain mean and variance
-        self.a = ((1 - prob) * (1 + prob * alpha ** 2)) ** -0.5
-        self.b = -self.a * alpha * prob
-    def __call__(self, x):
-        dtype = u.math.get_dtype(x)
-        fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
-        if fit_phase and self.prob < 1.:
-            # Create mask shape with 1s except for batch and channel dimensions
-            channel_axis = self.channel_axis if self.channel_axis >= 0 else (x.ndim + self.channel_axis)
-            mask_shape = [1] * x.ndim
-            mask_shape[0] = x.shape[0]  # batch dimension
-            mask_shape[channel_axis] = x.shape[channel_axis]  # channel dimension
-            keep_mask = random.bernoulli(self.prob, mask_shape)
-            keep_mask = u.math.broadcast_to(keep_mask, x.shape)
-            return u.math.where(
-                keep_mask,
-                u.math.asarray(x, dtype=dtype),
-                u.math.asarray(self.alpha, dtype=dtype)
-            ) * self.a + self.b
-        else:
-            return x
-class DropoutFixed(ElementWiseBlock):
-    """A dropout layer with a fixed dropout mask along the time axis.
-    In training, to compensate for the fraction of input values dropped,
-    all surviving values are multiplied by `1 / (1 - prob)`.
-    This layer is active only during training (``mode=brainstate.mixin.Training``). In other
-    circumstances it is a no-op.
-    This kind of Dropout is particularly useful for spiking neural networks (SNNs) where
-    the same dropout mask needs to be applied across multiple time steps within a single
-    mini-batch iteration.
-    Parameters
-    ----------
-    in_size : tuple or int
-        The size of the input tensor.
-    prob : float
-        Probability to keep element of the tensor. Default is 0.5.
-    name : str, optional
-        The name of the dynamic system.
-    Notes
-    -----
-    As described in [2]_, there is a subtle difference in the way dropout is applied in
-    SNNs compared to ANNs. In ANNs, each epoch of training has several iterations of
-    mini-batches. In each iteration, randomly selected units (with dropout ratio of
-    :math:`p`) are disconnected from the network while weighting by its posterior
-    probability (:math:`1-p`).
-    However, in SNNs, each iteration has more than one forward propagation depending on
-    the time length of the spike train. We back-propagate the output error and modify
-    the network parameters only at the last time step. For dropout to be effective in
-    our training method, it has to be ensured that the set of connected units within an
-    iteration of mini-batch data is not changed, such that the neural network is
-    constituted by the same random subset of units during each forward propagation within
-    a single iteration.
-    On the other hand, if the units are randomly connected at each time-step, the effect
-    of dropout will be averaged out over the entire forward propagation time within an
-    iteration. Then, the dropout effect would fade-out once the output error is propagated
-    backward and the parameters are updated at the last time step. Therefore, we need to
-    keep the set of randomly connected units for the entire time window within an iteration.
-    References
-    ----------
-    .. [1] Srivastava, Nitish, et al. "Dropout: a simple way to prevent
-           neural networks from overfitting." The journal of machine learning
-           research 15.1 (2014): 1929-1958.
-    .. [2] Lee et al., "Enabling Spike-based Backpropagation for Training Deep Neural
-           Network Architectures" https://arxiv.org/abs/1903.06379
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate
-        >>> layer = brainstate.nn.DropoutFixed(in_size=(20,), prob=0.8)
-        >>> layer.init_state(batch_size=10)
-        >>> x = brainstate.random.randn(10, 20)
-        >>> with brainstate.environ.context(fit=True):
-        ...     output = layer.update(x)
-        >>> output.shape
-        (10, 20)
-    """
-    __module__ = 'brainstate.nn'
-    def __init__(
-        self,
-        in_size: Size,
-        prob: float = 0.5,
-        name: Optional[str] = None
-    ) -> None:
-        super().__init__(name=name)
-        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
-        self.prob = prob
-        self.in_size = in_size
-        self.out_size = in_size
-    def init_state(self, batch_size=None, **kwargs):
-        if self.prob < 1.:
-            self.mask = ShortTermState(init.param(partial(random.bernoulli, self.prob), self.in_size, batch_size))
-    def update(self, x):
-        dtype = u.math.get_dtype(x)
-        fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
-        if fit_phase and self.prob < 1.:
-            if self.mask.value.shape != x.shape:
-                raise ValueError(f"Input shape {x.shape} does not match the mask shape {self.mask.value.shape}. "
-                                 f"Please call `init_state()` method first.")
-            return u.math.where(self.mask.value,
-                                u.math.asarray(x / self.prob, dtype=dtype),
-                                u.math.asarray(0., dtype=dtype) * u.get_unit(x))
-        else:
-            return x
+# Copyright 2024 BrainX Ecosystem Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from functools import partial
+from typing import Optional, Sequence
+import brainunit as u
+import jax.numpy as jnp
+from brainstate import random, environ
+from brainstate._state import ShortTermState
+from brainstate.typing import Size
+from . import init as init
+from ._module import ElementWiseBlock
+__all__ = [
+    'Dropout',
+    'Dropout1d',
+    'Dropout2d',
+    'Dropout3d',
+    'AlphaDropout',
+    'FeatureAlphaDropout',
+    'DropoutFixed',
+]
+class Dropout(ElementWiseBlock):
+    """A layer that stochastically ignores a subset of inputs each training step.
+    In training, to compensate for the fraction of input values dropped (`rate`),
+    all surviving values are multiplied by `1 / (1 - rate)`.
+    This layer is active only during training (``mode=brainstate.mixin.Training``). In other
+    circumstances it is a no-op.
+    Parameters
+    ----------
+    prob : float
+        Probability to keep element of the tensor. Default is 0.5.
+    broadcast_dims : Sequence[int]
+        Dimensions that will share the same dropout mask. Default is ().
+    name : str, optional
+        The name of the dynamic system.
+    References
+    ----------
+    .. [1] Srivastava, Nitish, et al. "Dropout: a simple way to prevent
+           neural networks from overfitting." The journal of machine learning
+           research 15.1 (2014): 1929-1958.
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> layer = brainstate.nn.Dropout(prob=0.8)
+        >>> x = brainstate.random.randn(10, 20)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = layer(x)
+        >>> output.shape
+        (10, 20)
+    """
+    __module__ = 'brainstate.nn'
+    def __init__(
+        self,
+        prob: float = 0.5,
+        broadcast_dims: Sequence[int] = (),
+        name: Optional[str] = None
+    ) -> None:
+        super().__init__(name=name)
+        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
+        self.prob = prob
+        self.broadcast_dims = broadcast_dims
+    def __call__(self, x):
+        dtype = u.math.get_dtype(x)
+        fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
+        if fit_phase and self.prob < 1.:
+            broadcast_shape = list(x.shape)
+            for dim in self.broadcast_dims:
+                broadcast_shape[dim] = 1
+            keep_mask = random.bernoulli(self.prob, broadcast_shape)
+            keep_mask = u.math.broadcast_to(keep_mask, x.shape)
+            return u.math.where(
+                keep_mask,
+                u.math.asarray(x / self.prob, dtype=dtype),
+                u.math.asarray(0., dtype=dtype)
+            )
+        else:
+            return x
+class _DropoutNd(ElementWiseBlock):
+    __module__ = 'brainstate.nn'
+    prob: float
+    channel_axis: int
+    minimal_dim: int
+    def __init__(
+        self,
+        prob: float = 0.5,
+        channel_axis: int = -1,
+        name: Optional[str] = None
+    ) -> None:
+        super().__init__(name=name)
+        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
+        self.prob = prob
+        self.channel_axis = channel_axis
+    def __call__(self, x):
+        # check input shape
+        inp_dim = u.math.ndim(x)
+        if inp_dim not in (self.minimal_dim, self.minimal_dim + 1):
+            raise RuntimeError(f"dropout1d: Expected {self.minimal_dim}D or {self.minimal_dim + 1}D input, "
+                               f"but received a {inp_dim}D input. {self._get_msg(x)}")
+        is_not_batched = self.minimal_dim
+        if is_not_batched:
+            channel_axis = self.channel_axis if self.channel_axis >= 0 else (x.ndim + self.channel_axis)
+            mask_shape = [(dim if i == channel_axis else 1) for i, dim in enumerate(x.shape)]
+        else:
+            channel_axis = (self.channel_axis + 1) if self.channel_axis >= 0 else (x.ndim + self.channel_axis)
+            assert channel_axis != 0, f"Channel axis must not be 0. But got {self.channel_axis}."
+            mask_shape = [(dim if i in (channel_axis, 0) else 1) for i, dim in enumerate(x.shape)]
+        # get fit phase
+        fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
+        # generate mask
+        if fit_phase and self.prob < 1.:
+            dtype = u.math.get_dtype(x)
+            keep_mask = random.bernoulli(self.prob, mask_shape)
+            keep_mask = jnp.broadcast_to(keep_mask, x.shape)
+            return jnp.where(
+                keep_mask,
+                jnp.asarray(x / self.prob, dtype=dtype),
+                jnp.asarray(0., dtype=dtype)
+            )
+        else:
+            return x
+    def _get_msg(self, x):
+        return ''
+class Dropout1d(_DropoutNd):
+    r"""Randomly zero out entire channels (a channel is a 1D feature map).
+    Each channel will be zeroed out independently on every forward call with
+    probability using samples from a Bernoulli distribution. The channel is
+    a 1D feature map, e.g., the :math:`j`-th channel of the :math:`i`-th sample
+    in the batched input is a 1D tensor :math:`\text{input}[i, j]`.
+    Usually the input comes from :class:`Conv1d` modules.
+    As described in the paper [1]_, if adjacent pixels within feature maps are
+    strongly correlated (as is normally the case in early convolution layers)
+    then i.i.d. dropout will not regularize the activations and will otherwise
+    just result in an effective learning rate decrease.
+    In this case, :class:`Dropout1d` will help promote independence between
+    feature maps and should be used instead.
+    Parameters
+    ----------
+    prob : float
+        Probability of an element to be kept. Default is 0.5.
+    channel_axis : int
+        The axis representing the channel dimension. Default is -1.
+    name : str, optional
+        The name of the dynamic system.
+    Notes
+    -----
+    Input shape: :math:`(N, C, L)` or :math:`(C, L)`.
+    Output shape: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input).
+    References
+    ----------
+    .. [1] Springenberg et al., "Striving for Simplicity: The All Convolutional Net"
+           https://arxiv.org/abs/1411.4280
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> m = brainstate.nn.Dropout1d(prob=0.8)
+        >>> x = brainstate.random.randn(20, 32, 16)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = m(x)
+        >>> output.shape
+        (20, 32, 16)
+    """
+    __module__ = 'brainstate.nn'
+    minimal_dim: int = 2
+    def _get_msg(self, x):
+        return ("Note that dropout1d exists to provide channel-wise dropout on inputs with 1 "
+                "spatial dimension, a channel dimension, and an optional batch dimension "
+                "(i.e. 2D or 3D inputs).")
+class Dropout2d(_DropoutNd):
+    r"""Randomly zero out entire channels (a channel is a 2D feature map).
+    Each channel will be zeroed out independently on every forward call with
+    probability using samples from a Bernoulli distribution. The channel is
+    a 2D feature map, e.g., the :math:`j`-th channel of the :math:`i`-th sample
+    in the batched input is a 2D tensor :math:`\text{input}[i, j]`.
+    Usually the input comes from :class:`Conv2d` modules.
+    As described in the paper [1]_, if adjacent pixels within feature maps are
+    strongly correlated (as is normally the case in early convolution layers)
+    then i.i.d. dropout will not regularize the activations and will otherwise
+    just result in an effective learning rate decrease.
+    In this case, :class:`Dropout2d` will help promote independence between
+    feature maps and should be used instead.
+    Parameters
+    ----------
+    prob : float
+        Probability of an element to be kept. Default is 0.5.
+    channel_axis : int
+        The axis representing the channel dimension. Default is -1.
+    name : str, optional
+        The name of the dynamic system.
+    Notes
+    -----
+    Input shape: :math:`(N, C, H, W)` or :math:`(C, H, W)`.
+    Output shape: :math:`(N, C, H, W)` or :math:`(C, H, W)` (same shape as input).
+    References
+    ----------
+    .. [1] Springenberg et al., "Striving for Simplicity: The All Convolutional Net"
+           https://arxiv.org/abs/1411.4280
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> m = brainstate.nn.Dropout2d(prob=0.8)
+        >>> x = brainstate.random.randn(20, 32, 32, 16)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = m(x)
+        >>> output.shape
+        (20, 32, 32, 16)
+    """
+    __module__ = 'brainstate.nn'
+    minimal_dim: int = 3
+    def _get_msg(self, x):
+        return ("Note that dropout2d exists to provide channel-wise dropout on inputs with 2 "
+                "spatial dimensions, a channel dimension, and an optional batch dimension "
+                "(i.e. 3D or 4D inputs).")
+class Dropout3d(_DropoutNd):
+    r"""Randomly zero out entire channels (a channel is a 3D feature map).
+    Each channel will be zeroed out independently on every forward call with
+    probability using samples from a Bernoulli distribution. The channel is
+    a 3D feature map, e.g., the :math:`j`-th channel of the :math:`i`-th sample
+    in the batched input is a 3D tensor :math:`\text{input}[i, j]`.
+    Usually the input comes from :class:`Conv3d` modules.
+    As described in the paper [1]_, if adjacent pixels within feature maps are
+    strongly correlated (as is normally the case in early convolution layers)
+    then i.i.d. dropout will not regularize the activations and will otherwise
+    just result in an effective learning rate decrease.
+    In this case, :class:`Dropout3d` will help promote independence between
+    feature maps and should be used instead.
+    Parameters
+    ----------
+    prob : float
+        Probability of an element to be kept. Default is 0.5.
+    channel_axis : int
+        The axis representing the channel dimension. Default is -1.
+    name : str, optional
+        The name of the dynamic system.
+    Notes
+    -----
+    Input shape: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`.
+    Output shape: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input).
+    References
+    ----------
+    .. [1] Springenberg et al., "Striving for Simplicity: The All Convolutional Net"
+           https://arxiv.org/abs/1411.4280
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> m = brainstate.nn.Dropout3d(prob=0.8)
+        >>> x = brainstate.random.randn(20, 16, 4, 32, 32)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = m(x)
+        >>> output.shape
+        (20, 16, 4, 32, 32)
+    """
+    __module__ = 'brainstate.nn'
+    minimal_dim: int = 4
+    def _get_msg(self, x):
+        return ("Note that dropout3d exists to provide channel-wise dropout on inputs with 3 "
+                "spatial dimensions, a channel dimension, and an optional batch dimension "
+                "(i.e. 4D or 5D inputs).")
+class AlphaDropout(_DropoutNd):
+    r"""Applies Alpha Dropout over the input.
+    Alpha Dropout is a type of Dropout that maintains the self-normalizing
+    property. For an input with zero mean and unit standard deviation, the output of
+    Alpha Dropout maintains the original mean and standard deviation of the
+    input.
+    Alpha Dropout goes hand-in-hand with SELU activation function, which ensures
+    that the outputs have zero mean and unit standard deviation.
+    During training, it randomly masks some of the elements of the input
+    tensor with probability using samples from a Bernoulli distribution.
+    The elements to be masked are randomized on every forward call, and scaled
+    and shifted to maintain zero mean and unit standard deviation.
+    During evaluation the module simply computes an identity function.
+    Parameters
+    ----------
+    prob : float
+        Probability of an element to be kept. Default is 0.5.
+    name : str, optional
+        The name of the dynamic system.
+    Notes
+    -----
+    Input shape: :math:`(*)`. Input can be of any shape.
+    Output shape: :math:`(*)`. Output is of the same shape as input.
+    References
+    ----------
+    .. [1] Klambauer et al., "Self-Normalizing Neural Networks"
+           https://arxiv.org/abs/1706.02515
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> m = brainstate.nn.AlphaDropout(prob=0.8)
+        >>> x = brainstate.random.randn(20, 16)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = m(x)
+        >>> output.shape
+        (20, 16)
+    """
+    __module__ = 'brainstate.nn'
+    def __init__(
+        self,
+        prob: float = 0.5,
+        name: Optional[str] = None
+    ) -> None:
+        super().__init__(name=name)
+        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
+        self.prob = prob
+        # SELU parameters
+        alpha = -1.7580993408473766
+        self.alpha = alpha
+        # Affine transformation parameters to maintain mean and variance
+        self.a = ((1 - prob) * (1 + prob * alpha ** 2)) ** -0.5
+        self.b = -self.a * alpha * prob
+    def __call__(self, x):
+        dtype = u.math.get_dtype(x)
+        fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
+        if fit_phase and self.prob < 1.:
+            keep_mask = random.bernoulli(self.prob, x.shape)
+            return u.math.where(
+                keep_mask,
+                u.math.asarray(x, dtype=dtype),
+                u.math.asarray(self.alpha, dtype=dtype)
+            ) * self.a + self.b
+        else:
+            return x
+class FeatureAlphaDropout(ElementWiseBlock):
+    r"""Randomly masks out entire channels with Alpha Dropout properties.
+    Instead of setting activations to zero as in regular Dropout, the activations
+    are set to the negative saturation value of the SELU activation function to
+    maintain self-normalizing properties.
+    Each channel (e.g., the :math:`j`-th channel of the :math:`i`-th sample in
+    the batch input is a tensor :math:`\text{input}[i, j]`) will be masked
+    independently for each sample on every forward call with probability using
+    samples from a Bernoulli distribution. The elements to be masked are randomized
+    on every forward call, and scaled and shifted to maintain zero mean and unit
+    variance.
+    Usually the input comes from convolutional layers with SELU activation.
+    As described in the paper [2]_, if adjacent pixels within feature maps are
+    strongly correlated (as is normally the case in early convolution layers)
+    then i.i.d. dropout will not regularize the activations and will otherwise
+    just result in an effective learning rate decrease.
+    In this case, :class:`FeatureAlphaDropout` will help promote independence between
+    feature maps and should be used instead.
+    Parameters
+    ----------
+    prob : float
+        Probability of an element to be kept. Default is 0.5.
+    channel_axis : int
+        The axis representing the channel dimension. Default is -1.
+    name : str, optional
+        The name of the dynamic system.
+    Notes
+    -----
+    Input shape: :math:`(N, C, *)` where C is the channel dimension.
+    Output shape: Same shape as input.
+    References
+    ----------
+    .. [1] Klambauer et al., "Self-Normalizing Neural Networks"
+           https://arxiv.org/abs/1706.02515
+    .. [2] Springenberg et al., "Striving for Simplicity: The All Convolutional Net"
+           https://arxiv.org/abs/1411.4280
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> m = brainstate.nn.FeatureAlphaDropout(prob=0.8)
+        >>> x = brainstate.random.randn(20, 16, 4, 32, 32)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = m(x)
+        >>> output.shape
+        (20, 16, 4, 32, 32)
+    """
+    __module__ = 'brainstate.nn'
+    def __init__(
+        self,
+        prob: float = 0.5,
+        channel_axis: int = -1,
+        name: Optional[str] = None
+    ) -> None:
+        super().__init__(name=name)
+        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
+        self.prob = prob
+        self.channel_axis = channel_axis
+        # SELU parameters
+        alpha = -1.7580993408473766
+        self.alpha = alpha
+        # Affine transformation parameters to maintain mean and variance
+        self.a = ((1 - prob) * (1 + prob * alpha ** 2)) ** -0.5
+        self.b = -self.a * alpha * prob
+    def __call__(self, x):
+        dtype = u.math.get_dtype(x)
+        fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
+        if fit_phase and self.prob < 1.:
+            # Create mask shape with 1s except for batch and channel dimensions
+            channel_axis = self.channel_axis if self.channel_axis >= 0 else (x.ndim + self.channel_axis)
+            mask_shape = [1] * x.ndim
+            mask_shape[0] = x.shape[0]  # batch dimension
+            mask_shape[channel_axis] = x.shape[channel_axis]  # channel dimension
+            keep_mask = random.bernoulli(self.prob, mask_shape)
+            keep_mask = u.math.broadcast_to(keep_mask, x.shape)
+            return u.math.where(
+                keep_mask,
+                u.math.asarray(x, dtype=dtype),
+                u.math.asarray(self.alpha, dtype=dtype)
+            ) * self.a + self.b
+        else:
+            return x
+class DropoutFixed(ElementWiseBlock):
+    """A dropout layer with a fixed dropout mask along the time axis.
+    In training, to compensate for the fraction of input values dropped,
+    all surviving values are multiplied by `1 / (1 - prob)`.
+    This layer is active only during training (``mode=brainstate.mixin.Training``). In other
+    circumstances it is a no-op.
+    This kind of Dropout is particularly useful for spiking neural networks (SNNs) where
+    the same dropout mask needs to be applied across multiple time steps within a single
+    mini-batch iteration.
+    Parameters
+    ----------
+    in_size : tuple or int
+        The size of the input tensor.
+    prob : float
+        Probability to keep element of the tensor. Default is 0.5.
+    name : str, optional
+        The name of the dynamic system.
+    Notes
+    -----
+    As described in [2]_, there is a subtle difference in the way dropout is applied in
+    SNNs compared to ANNs. In ANNs, each epoch of training has several iterations of
+    mini-batches. In each iteration, randomly selected units (with dropout ratio of
+    :math:`p`) are disconnected from the network while weighting by its posterior
+    probability (:math:`1-p`).
+    However, in SNNs, each iteration has more than one forward propagation depending on
+    the time length of the spike train. We back-propagate the output error and modify
+    the network parameters only at the last time step. For dropout to be effective in
+    our training method, it has to be ensured that the set of connected units within an
+    iteration of mini-batch data is not changed, such that the neural network is
+    constituted by the same random subset of units during each forward propagation within
+    a single iteration.
+    On the other hand, if the units are randomly connected at each time-step, the effect
+    of dropout will be averaged out over the entire forward propagation time within an
+    iteration. Then, the dropout effect would fade-out once the output error is propagated
+    backward and the parameters are updated at the last time step. Therefore, we need to
+    keep the set of randomly connected units for the entire time window within an iteration.
+    References
+    ----------
+    .. [1] Srivastava, Nitish, et al. "Dropout: a simple way to prevent
+           neural networks from overfitting." The journal of machine learning
+           research 15.1 (2014): 1929-1958.
+    .. [2] Lee et al., "Enabling Spike-based Backpropagation for Training Deep Neural
+           Network Architectures" https://arxiv.org/abs/1903.06379
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> layer = brainstate.nn.DropoutFixed(in_size=(20,), prob=0.8)
+        >>> layer.init_state(batch_size=10)
+        >>> x = brainstate.random.randn(10, 20)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = layer.update(x)
+        >>> output.shape
+        (10, 20)
+    """
+    __module__ = 'brainstate.nn'
+    def __init__(
+        self,
+        in_size: Size,
+        prob: float = 0.5,
+        name: Optional[str] = None
+    ) -> None:
+        super().__init__(name=name)
+        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
+        self.prob = prob
+        self.in_size = in_size
+        self.out_size = in_size
+    def init_state(self, batch_size=None, **kwargs):
+        if self.prob < 1.:
+            self.mask = ShortTermState(init.param(partial(random.bernoulli, self.prob), self.in_size, batch_size))
+    def update(self, x):
+        dtype = u.math.get_dtype(x)
+        fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
+        if fit_phase and self.prob < 1.:
+            if self.mask.value.shape != x.shape:
+                raise ValueError(f"Input shape {x.shape} does not match the mask shape {self.mask.value.shape}. "
+                                 f"Please call `init_state()` method first.")
+            return u.math.where(self.mask.value,
+                                u.math.asarray(x / self.prob, dtype=dtype),
+                                u.math.asarray(0., dtype=dtype) * u.get_unit(x))
+        else:
+            return x

brainstate 0.2.1__py2.py3-none-any.whl → 0.2.2__py2.py3-none-any.whl

brainstate 0.2.1py2.py3-none-any.whl → 0.2.2py2.py3-none-any.whl