PyPI - brainstate - Versions diffs - 0.1.9__py2.py3-none-any.whl → 0.2.0__py2.py3-none-any.whl - Mend

brainstate 0.1.9py2.py3-none-any.whl → 0.2.0py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (163) hide show

brainstate/__init__.py +130 -19
brainstate/_compatible_import.py +201 -9
brainstate/_compatible_import_test.py +681 -0
brainstate/_deprecation.py +210 -0
brainstate/_deprecation_test.py +2319 -0
brainstate/{util/error.py → _error.py} +10 -20
brainstate/_state.py +94 -47
brainstate/_state_test.py +1 -1
brainstate/_utils.py +1 -1
brainstate/environ.py +1279 -347
brainstate/environ_test.py +1187 -26
brainstate/graph/__init__.py +6 -13
brainstate/graph/_node.py +240 -0
brainstate/graph/_node_test.py +589 -0
brainstate/graph/{_graph_operation.py → _operation.py} +632 -746
brainstate/graph/_operation_test.py +1147 -0
brainstate/mixin.py +1209 -141
brainstate/mixin_test.py +991 -51
brainstate/nn/__init__.py +74 -72
brainstate/nn/_activations.py +587 -295
brainstate/nn/_activations_test.py +109 -86
brainstate/nn/_collective_ops.py +393 -274
brainstate/nn/_collective_ops_test.py +746 -15
brainstate/nn/_common.py +114 -66
brainstate/nn/_common_test.py +154 -0
brainstate/nn/_conv.py +1652 -143
brainstate/nn/_conv_test.py +838 -227
brainstate/nn/_delay.py +95 -29
brainstate/nn/_delay_test.py +25 -20
brainstate/nn/_dropout.py +359 -167
brainstate/nn/_dropout_test.py +429 -52
brainstate/nn/_dynamics.py +14 -90
brainstate/nn/_dynamics_test.py +1 -12
brainstate/nn/_elementwise.py +492 -313
brainstate/nn/_elementwise_test.py +806 -145
brainstate/nn/_embedding.py +369 -19
brainstate/nn/_embedding_test.py +156 -0
brainstate/nn/{_fixedprob.py → _event_fixedprob.py} +10 -16
brainstate/nn/{_fixedprob_test.py → _event_fixedprob_test.py} +6 -5
brainstate/nn/{_linear_mv.py → _event_linear.py} +2 -2
brainstate/nn/{_linear_mv_test.py → _event_linear_test.py} +6 -5
brainstate/nn/_exp_euler.py +200 -38
brainstate/nn/_exp_euler_test.py +350 -8
brainstate/nn/_linear.py +391 -71
brainstate/nn/_linear_test.py +427 -59
brainstate/nn/_metrics.py +1070 -0
brainstate/nn/_metrics_test.py +611 -0
brainstate/nn/_module.py +10 -3
brainstate/nn/_module_test.py +1 -1
brainstate/nn/_normalizations.py +688 -329
brainstate/nn/_normalizations_test.py +663 -37
brainstate/nn/_paddings.py +1020 -0
brainstate/nn/_paddings_test.py +723 -0
brainstate/nn/_poolings.py +1404 -342
brainstate/nn/_poolings_test.py +828 -92
brainstate/nn/{_rate_rnns.py → _rnns.py} +446 -54
brainstate/nn/_rnns_test.py +593 -0
brainstate/nn/_utils.py +132 -5
brainstate/nn/_utils_test.py +402 -0
brainstate/{init/_random_inits.py → nn/init.py} +301 -45
brainstate/{init/_random_inits_test.py → nn/init_test.py} +51 -20
brainstate/random/__init__.py +247 -1
brainstate/random/_rand_funs.py +668 -346
brainstate/random/_rand_funs_test.py +74 -1
brainstate/random/_rand_seed.py +541 -76
brainstate/random/_rand_seed_test.py +1 -1
brainstate/random/_rand_state.py +601 -393
brainstate/random/_rand_state_test.py +551 -0
brainstate/transform/__init__.py +59 -0
brainstate/transform/_ad_checkpoint.py +176 -0
brainstate/{compile → transform}/_ad_checkpoint_test.py +1 -1
brainstate/{augment → transform}/_autograd.py +360 -113
brainstate/{augment → transform}/_autograd_test.py +2 -2
brainstate/transform/_conditions.py +316 -0
brainstate/{compile → transform}/_conditions_test.py +11 -11
brainstate/{compile → transform}/_error_if.py +22 -20
brainstate/{compile → transform}/_error_if_test.py +1 -1
brainstate/transform/_eval_shape.py +145 -0
brainstate/{augment → transform}/_eval_shape_test.py +1 -1
brainstate/{compile → transform}/_jit.py +99 -46
brainstate/{compile → transform}/_jit_test.py +3 -3
brainstate/{compile → transform}/_loop_collect_return.py +219 -80
brainstate/{compile → transform}/_loop_collect_return_test.py +1 -1
brainstate/{compile → transform}/_loop_no_collection.py +133 -34
brainstate/{compile → transform}/_loop_no_collection_test.py +2 -2
brainstate/transform/_make_jaxpr.py +2016 -0
brainstate/transform/_make_jaxpr_test.py +1510 -0
brainstate/transform/_mapping.py +529 -0
brainstate/transform/_mapping_test.py +194 -0
brainstate/{compile → transform}/_progress_bar.py +78 -25
brainstate/{augment → transform}/_random.py +65 -45
brainstate/{compile → transform}/_unvmap.py +102 -5
brainstate/transform/_util.py +286 -0
brainstate/typing.py +594 -61
brainstate/typing_test.py +780 -0
brainstate/util/__init__.py +9 -32
brainstate/util/_others.py +1025 -0
brainstate/util/_others_test.py +962 -0
brainstate/util/_pretty_pytree.py +1301 -0
brainstate/util/_pretty_pytree_test.py +675 -0
brainstate/util/{pretty_repr.py → _pretty_repr.py} +161 -27
brainstate/util/_pretty_repr_test.py +696 -0
brainstate/util/filter.py +557 -81
brainstate/util/filter_test.py +912 -0
brainstate/util/struct.py +769 -382
brainstate/util/struct_test.py +602 -0
{brainstate-0.1.9.dist-info → brainstate-0.2.0.dist-info}/METADATA +34 -17
brainstate-0.2.0.dist-info/RECORD +111 -0
brainstate/augment/__init__.py +0 -30
brainstate/augment/_eval_shape.py +0 -99
brainstate/augment/_mapping.py +0 -1060
brainstate/augment/_mapping_test.py +0 -597
brainstate/compile/__init__.py +0 -38
brainstate/compile/_ad_checkpoint.py +0 -204
brainstate/compile/_conditions.py +0 -256
brainstate/compile/_make_jaxpr.py +0 -888
brainstate/compile/_make_jaxpr_test.py +0 -156
brainstate/compile/_util.py +0 -147
brainstate/functional/__init__.py +0 -27
brainstate/graph/_graph_node.py +0 -244
brainstate/graph/_graph_node_test.py +0 -73
brainstate/graph/_graph_operation_test.py +0 -563
brainstate/init/__init__.py +0 -26
brainstate/init/_base.py +0 -52
brainstate/init/_generic.py +0 -244
brainstate/init/_regular_inits.py +0 -105
brainstate/init/_regular_inits_test.py +0 -50
brainstate/nn/_inputs.py +0 -608
brainstate/nn/_ltp.py +0 -28
brainstate/nn/_neuron.py +0 -705
brainstate/nn/_neuron_test.py +0 -161
brainstate/nn/_others.py +0 -46
brainstate/nn/_projection.py +0 -486
brainstate/nn/_rate_rnns_test.py +0 -63
brainstate/nn/_readout.py +0 -209
brainstate/nn/_readout_test.py +0 -53
brainstate/nn/_stp.py +0 -236
brainstate/nn/_synapse.py +0 -505
brainstate/nn/_synapse_test.py +0 -131
brainstate/nn/_synaptic_projection.py +0 -423
brainstate/nn/_synouts.py +0 -162
brainstate/nn/_synouts_test.py +0 -57
brainstate/nn/metrics.py +0 -388
brainstate/optim/__init__.py +0 -38
brainstate/optim/_base.py +0 -64
brainstate/optim/_lr_scheduler.py +0 -448
brainstate/optim/_lr_scheduler_test.py +0 -50
brainstate/optim/_optax_optimizer.py +0 -152
brainstate/optim/_optax_optimizer_test.py +0 -53
brainstate/optim/_sgd_optimizer.py +0 -1104
brainstate/random/_random_for_unit.py +0 -52
brainstate/surrogate.py +0 -1957
brainstate/transform.py +0 -23
brainstate/util/caller.py +0 -98
brainstate/util/others.py +0 -540
brainstate/util/pretty_pytree.py +0 -945
brainstate/util/pretty_pytree_test.py +0 -159
brainstate/util/pretty_table.py +0 -2954
brainstate/util/scaling.py +0 -258
brainstate-0.1.9.dist-info/RECORD +0 -130
{brainstate-0.1.9.dist-info → brainstate-0.2.0.dist-info}/WHEEL +0 -0
{brainstate-0.1.9.dist-info → brainstate-0.2.0.dist-info}/licenses/LICENSE +0 -0
{brainstate-0.1.9.dist-info → brainstate-0.2.0.dist-info}/top_level.txt +0 -0

brainstate/nn/_dropout.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2024 BDP Ecosystem Limited. All Rights Reserved.
+# Copyright 2024 BrainX Ecosystem Limited. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,13 +20,20 @@ from typing import Optional, Sequence
 import brainunit as u
 import jax.numpy as jnp
-from brainstate import random, environ, init
+from brainstate import random, environ
 from brainstate._state import ShortTermState
 from brainstate.typing import Size
+from . import init as init
 from ._module import ElementWiseBlock
 __all__ = [
-    'DropoutFixed', 'Dropout', 'Dropout1d', 'Dropout2d', 'Dropout3d',
+    'Dropout',
+    'Dropout1d',
+    'Dropout2d',
+    'Dropout3d',
+    'AlphaDropout',
+    'FeatureAlphaDropout',
+    'DropoutFixed',
 ]
@@ -39,14 +46,32 @@ class Dropout(ElementWiseBlock):
     This layer is active only during training (``mode=brainstate.mixin.Training``). In other
     circumstances it is a no-op.
+    Parameters
+    ----------
+    prob : float
+        Probability to keep element of the tensor. Default is 0.5.
+    broadcast_dims : Sequence[int]
+        Dimensions that will share the same dropout mask. Default is ().
+    name : str, optional
+        The name of the dynamic system.
+    References
+    ----------
     .. [1] Srivastava, Nitish, et al. "Dropout: a simple way to prevent
            neural networks from overfitting." The journal of machine learning
            research 15.1 (2014): 1929-1958.
-    Args:
-        prob: Probability to keep element of the tensor.
-        broadcast_dims: dimensions that will share the same dropout mask.
-        name: str. The name of the dynamic system.
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> layer = brainstate.nn.Dropout(prob=0.8)
+        >>> x = brainstate.random.randn(10, 20)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = layer(x)
+        >>> output.shape
+        (10, 20)
     """
     __module__ = 'brainstate.nn'
@@ -133,41 +158,55 @@ class _DropoutNd(ElementWiseBlock):
 class Dropout1d(_DropoutNd):
-    r"""Randomly zero out entire channels (a channel is a 1D feature map,
-    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
-    batched input is a 1D tensor :math:`\text{input}[i, j]`).
+    r"""Randomly zero out entire channels (a channel is a 1D feature map).
     Each channel will be zeroed out independently on every forward call with
-    probability :attr:`p` using samples from a Bernoulli distribution.
+    probability using samples from a Bernoulli distribution. The channel is
+    a 1D feature map, e.g., the :math:`j`-th channel of the :math:`i`-th sample
+    in the batched input is a 1D tensor :math:`\text{input}[i, j]`.
-    Usually the input comes from :class:`nn.Conv1d` modules.
+    Usually the input comes from :class:`Conv1d` modules.
-    As described in the paper
-    `Efficient Object Localization Using Convolutional Networks`_ ,
-    if adjacent pixels within feature maps are strongly correlated
-    (as is normally the case in early convolution layers) then i.i.d. dropout
-    will not regularize the activations and will otherwise just result
-    in an effective learning rate decrease.
+    As described in the paper [1]_, if adjacent pixels within feature maps are
+    strongly correlated (as is normally the case in early convolution layers)
+    then i.i.d. dropout will not regularize the activations and will otherwise
+    just result in an effective learning rate decrease.
-    In this case, :func:`nn.Dropout1d` will help promote independence between
+    In this case, :class:`Dropout1d` will help promote independence between
     feature maps and should be used instead.
-    Args:
-        prob: float. probability of an element to be zero-ed.
-    Shape:
-        - Input: :math:`(N, C, L)` or :math:`(C, L)`.
-        - Output: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input).
-    Examples::
-        >>> m = Dropout1d(p=0.2)
-        >>> x = random.randn(20, 32, 16)
-        >>> output = m(x)
+    Parameters
+    ----------
+    prob : float
+        Probability of an element to be kept. Default is 0.5.
+    channel_axis : int
+        The axis representing the channel dimension. Default is -1.
+    name : str, optional
+        The name of the dynamic system.
+    Notes
+    -----
+    Input shape: :math:`(N, C, L)` or :math:`(C, L)`.
+    Output shape: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input).
+    References
+    ----------
+    .. [1] Springenberg et al., "Striving for Simplicity: The All Convolutional Net"
+           https://arxiv.org/abs/1411.4280
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> m = brainstate.nn.Dropout1d(prob=0.8)
+        >>> x = brainstate.random.randn(20, 32, 16)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = m(x)
         >>> output.shape
         (20, 32, 16)
-    .. _Efficient Object Localization Using Convolutional Networks:
-       https://arxiv.org/abs/1411.4280
     """
     __module__ = 'brainstate.nn'
     minimal_dim: int = 2
@@ -179,39 +218,55 @@ class Dropout1d(_DropoutNd):
 class Dropout2d(_DropoutNd):
-    r"""Randomly zero out entire channels (a channel is a 2D feature map,
-    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
-    batched input is a 2D tensor :math:`\text{input}[i, j]`).
+    r"""Randomly zero out entire channels (a channel is a 2D feature map).
     Each channel will be zeroed out independently on every forward call with
-    probability :attr:`p` using samples from a Bernoulli distribution.
+    probability using samples from a Bernoulli distribution. The channel is
+    a 2D feature map, e.g., the :math:`j`-th channel of the :math:`i`-th sample
+    in the batched input is a 2D tensor :math:`\text{input}[i, j]`.
-    Usually the input comes from :class:`nn.Conv2d` modules.
+    Usually the input comes from :class:`Conv2d` modules.
-    As described in the paper
-    `Efficient Object Localization Using Convolutional Networks`_ ,
-    if adjacent pixels within feature maps are strongly correlated
-    (as is normally the case in early convolution layers) then i.i.d. dropout
-    will not regularize the activations and will otherwise just result
-    in an effective learning rate decrease.
+    As described in the paper [1]_, if adjacent pixels within feature maps are
+    strongly correlated (as is normally the case in early convolution layers)
+    then i.i.d. dropout will not regularize the activations and will otherwise
+    just result in an effective learning rate decrease.
-    In this case, :func:`nn.Dropout2d` will help promote independence between
+    In this case, :class:`Dropout2d` will help promote independence between
     feature maps and should be used instead.
-    Args:
-        prob: float. probability of an element to be kept.
-    Shape:
-        - Input: :math:`(N, C, H, W)` or :math:`(N, C, L)`.
-        - Output: :math:`(N, C, H, W)` or :math:`(N, C, L)` (same shape as input).
-    Examples::
-        >>> m = Dropout2d(p=0.2)
-        >>> x = random.randn(20, 32, 32, 16)
-        >>> output = m(x)
+    Parameters
+    ----------
+    prob : float
+        Probability of an element to be kept. Default is 0.5.
+    channel_axis : int
+        The axis representing the channel dimension. Default is -1.
+    name : str, optional
+        The name of the dynamic system.
+    Notes
+    -----
+    Input shape: :math:`(N, C, H, W)` or :math:`(C, H, W)`.
+    Output shape: :math:`(N, C, H, W)` or :math:`(C, H, W)` (same shape as input).
+    References
+    ----------
+    .. [1] Springenberg et al., "Striving for Simplicity: The All Convolutional Net"
+           https://arxiv.org/abs/1411.4280
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> m = brainstate.nn.Dropout2d(prob=0.8)
+        >>> x = brainstate.random.randn(20, 32, 32, 16)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = m(x)
+        >>> output.shape
+        (20, 32, 32, 16)
-    .. _Efficient Object Localization Using Convolutional Networks:
-       https://arxiv.org/abs/1411.4280
     """
     __module__ = 'brainstate.nn'
     minimal_dim: int = 3
@@ -223,39 +278,55 @@ class Dropout2d(_DropoutNd):
 class Dropout3d(_DropoutNd):
-    r"""Randomly zero out entire channels (a channel is a 3D feature map,
-    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
-    batched input is a 3D tensor :math:`\text{input}[i, j]`).
+    r"""Randomly zero out entire channels (a channel is a 3D feature map).
     Each channel will be zeroed out independently on every forward call with
-    probability :attr:`p` using samples from a Bernoulli distribution.
+    probability using samples from a Bernoulli distribution. The channel is
+    a 3D feature map, e.g., the :math:`j`-th channel of the :math:`i`-th sample
+    in the batched input is a 3D tensor :math:`\text{input}[i, j]`.
-    Usually the input comes from :class:`nn.Conv3d` modules.
+    Usually the input comes from :class:`Conv3d` modules.
-    As described in the paper
-    `Efficient Object Localization Using Convolutional Networks`_ ,
-    if adjacent pixels within feature maps are strongly correlated
-    (as is normally the case in early convolution layers) then i.i.d. dropout
-    will not regularize the activations and will otherwise just result
-    in an effective learning rate decrease.
+    As described in the paper [1]_, if adjacent pixels within feature maps are
+    strongly correlated (as is normally the case in early convolution layers)
+    then i.i.d. dropout will not regularize the activations and will otherwise
+    just result in an effective learning rate decrease.
-    In this case, :func:`nn.Dropout3d` will help promote independence between
+    In this case, :class:`Dropout3d` will help promote independence between
     feature maps and should be used instead.
-    Args:
-        prob: float. probability of an element to be kept.
-    Shape:
-        - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`.
-        - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input).
-    Examples::
-        >>> m = Dropout3d(p=0.2)
-        >>> x = random.randn(20, 16, 4, 32, 32)
-        >>> output = m(x)
+    Parameters
+    ----------
+    prob : float
+        Probability of an element to be kept. Default is 0.5.
+    channel_axis : int
+        The axis representing the channel dimension. Default is -1.
+    name : str, optional
+        The name of the dynamic system.
+    Notes
+    -----
+    Input shape: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`.
+    Output shape: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input).
+    References
+    ----------
+    .. [1] Springenberg et al., "Striving for Simplicity: The All Convolutional Net"
+           https://arxiv.org/abs/1411.4280
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> m = brainstate.nn.Dropout3d(prob=0.8)
+        >>> x = brainstate.random.randn(20, 16, 4, 32, 32)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = m(x)
+        >>> output.shape
+        (20, 16, 4, 32, 32)
-    .. _Efficient Object Localization Using Convolutional Networks:
-       https://arxiv.org/abs/1411.4280
     """
     __module__ = 'brainstate.nn'
     minimal_dim: int = 4
@@ -270,129 +341,250 @@ class AlphaDropout(_DropoutNd):
     r"""Applies Alpha Dropout over the input.
     Alpha Dropout is a type of Dropout that maintains the self-normalizing
-    property.
-    For an input with zero mean and unit standard deviation, the output of
+    property. For an input with zero mean and unit standard deviation, the output of
     Alpha Dropout maintains the original mean and standard deviation of the
     input.
     Alpha Dropout goes hand-in-hand with SELU activation function, which ensures
     that the outputs have zero mean and unit standard deviation.
     During training, it randomly masks some of the elements of the input
-    tensor with probability *p* using samples from a bernoulli distribution.
-    The elements to masked are randomized on every forward call, and scaled
+    tensor with probability using samples from a Bernoulli distribution.
+    The elements to be masked are randomized on every forward call, and scaled
     and shifted to maintain zero mean and unit standard deviation.
     During evaluation the module simply computes an identity function.
-    More details can be found in the paper `Self-Normalizing Neural Networks`_ .
+    Parameters
+    ----------
+    prob : float
+        Probability of an element to be kept. Default is 0.5.
+    name : str, optional
+        The name of the dynamic system.
+    Notes
+    -----
+    Input shape: :math:`(*)`. Input can be of any shape.
+    Output shape: :math:`(*)`. Output is of the same shape as input.
+    References
+    ----------
+    .. [1] Klambauer et al., "Self-Normalizing Neural Networks"
+           https://arxiv.org/abs/1706.02515
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> m = brainstate.nn.AlphaDropout(prob=0.8)
+        >>> x = brainstate.random.randn(20, 16)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = m(x)
+        >>> output.shape
+        (20, 16)
-    Args:
-        prob: float. probability of an element to be kept.
+    """
+    __module__ = 'brainstate.nn'
-    Shape:
-        - Input: :math:`(*)`. Input can be of any shape
-        - Output: :math:`(*)`. Output is of the same shape as input
+    def __init__(
+        self,
+        prob: float = 0.5,
+        name: Optional[str] = None
+    ) -> None:
+        super().__init__(name=name)
+        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
+        self.prob = prob
-    Examples::
+        # SELU parameters
+        alpha = -1.7580993408473766
+        self.alpha = alpha
-        >>> m = AlphaDropout(p=0.2)
-        >>> x = random.randn(20, 16)
-        >>> output = m(x)
+        # Affine transformation parameters to maintain mean and variance
+        self.a = ((1 - prob) * (1 + prob * alpha ** 2)) ** -0.5
+        self.b = -self.a * alpha * prob
-    .. _Self-Normalizing Neural Networks: https://arxiv.org/abs/1706.02515
-    """
-    __module__ = 'brainstate.nn'
+    def __call__(self, x):
+        dtype = u.math.get_dtype(x)
+        fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
+        if fit_phase and self.prob < 1.:
+            keep_mask = random.bernoulli(self.prob, x.shape)
+            return u.math.where(
+                keep_mask,
+                u.math.asarray(x, dtype=dtype),
+                u.math.asarray(self.alpha, dtype=dtype)
+            ) * self.a + self.b
+        else:
+            return x
-    def update(self, *args, **kwargs):
-        raise NotImplementedError("AlphaDropout is not supported in the current version.")
+class FeatureAlphaDropout(ElementWiseBlock):
+    r"""Randomly masks out entire channels with Alpha Dropout properties.
-class FeatureAlphaDropout(_DropoutNd):
-    r"""Randomly masks out entire channels (a channel is a feature map,
-    e.g. the :math:`j`-th channel of the :math:`i`-th sample in the batch input
-    is a tensor :math:`\text{input}[i, j]`) of the input tensor). Instead of
-    setting activations to zero, as in regular Dropout, the activations are set
-    to the negative saturation value of the SELU activation function. More details
-    can be found in the paper `Self-Normalizing Neural Networks`_ .
+    Instead of setting activations to zero as in regular Dropout, the activations
+    are set to the negative saturation value of the SELU activation function to
+    maintain self-normalizing properties.
-    Each element will be masked independently for each sample on every forward
-    call with probability :attr:`p` using samples from a Bernoulli distribution.
-    The elements to be masked are randomized on every forward call, and scaled
-    and shifted to maintain zero mean and unit variance.
+    Each channel (e.g., the :math:`j`-th channel of the :math:`i`-th sample in
+    the batch input is a tensor :math:`\text{input}[i, j]`) will be masked
+    independently for each sample on every forward call with probability using
+    samples from a Bernoulli distribution. The elements to be masked are randomized
+    on every forward call, and scaled and shifted to maintain zero mean and unit
+    variance.
-    Usually the input comes from :class:`nn.AlphaDropout` modules.
+    Usually the input comes from convolutional layers with SELU activation.
-    As described in the paper
-    `Efficient Object Localization Using Convolutional Networks`_ ,
-    if adjacent pixels within feature maps are strongly correlated
-    (as is normally the case in early convolution layers) then i.i.d. dropout
-    will not regularize the activations and will otherwise just result
-    in an effective learning rate decrease.
+    As described in the paper [2]_, if adjacent pixels within feature maps are
+    strongly correlated (as is normally the case in early convolution layers)
+    then i.i.d. dropout will not regularize the activations and will otherwise
+    just result in an effective learning rate decrease.
-    In this case, :func:`nn.AlphaDropout` will help promote independence between
+    In this case, :class:`FeatureAlphaDropout` will help promote independence between
     feature maps and should be used instead.
-    Args:
-        prob: float. probability of an element to be kept.
+    Parameters
+    ----------
+    prob : float
+        Probability of an element to be kept. Default is 0.5.
+    channel_axis : int
+        The axis representing the channel dimension. Default is -1.
+    name : str, optional
+        The name of the dynamic system.
+    Notes
+    -----
+    Input shape: :math:`(N, C, *)` where C is the channel dimension.
+    Output shape: Same shape as input.
+    References
+    ----------
+    .. [1] Klambauer et al., "Self-Normalizing Neural Networks"
+           https://arxiv.org/abs/1706.02515
+    .. [2] Springenberg et al., "Striving for Simplicity: The All Convolutional Net"
+           https://arxiv.org/abs/1411.4280
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> m = brainstate.nn.FeatureAlphaDropout(prob=0.8)
+        >>> x = brainstate.random.randn(20, 16, 4, 32, 32)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = m(x)
+        >>> output.shape
+        (20, 16, 4, 32, 32)
-    Shape:
-        - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`.
-        - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input).
+    """
+    __module__ = 'brainstate.nn'
-    Examples::
+    def __init__(
+        self,
+        prob: float = 0.5,
+        channel_axis: int = -1,
+        name: Optional[str] = None
+    ) -> None:
+        super().__init__(name=name)
+        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
+        self.prob = prob
+        self.channel_axis = channel_axis
-        >>> m = FeatureAlphaDropout(p=0.2)
-        >>> x = random.randn(20, 16, 4, 32, 32)
-        >>> output = m(x)
+        # SELU parameters
+        alpha = -1.7580993408473766
+        self.alpha = alpha
-    .. _Self-Normalizing Neural Networks: https://arxiv.org/abs/1706.02515
-    .. _Efficient Object Localization Using Convolutional Networks:
-       https://arxiv.org/abs/1411.4280
-    """
-    __module__ = 'brainstate.nn'
+        # Affine transformation parameters to maintain mean and variance
+        self.a = ((1 - prob) * (1 + prob * alpha ** 2)) ** -0.5
+        self.b = -self.a * alpha * prob
-    def update(self, *args, **kwargs):
-        raise NotImplementedError("FeatureAlphaDropout is not supported in the current version.")
+    def __call__(self, x):
+        dtype = u.math.get_dtype(x)
+        fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
+        if fit_phase and self.prob < 1.:
+            # Create mask shape with 1s except for batch and channel dimensions
+            channel_axis = self.channel_axis if self.channel_axis >= 0 else (x.ndim + self.channel_axis)
+            mask_shape = [1] * x.ndim
+            mask_shape[0] = x.shape[0]  # batch dimension
+            mask_shape[channel_axis] = x.shape[channel_axis]  # channel dimension
+            keep_mask = random.bernoulli(self.prob, mask_shape)
+            keep_mask = u.math.broadcast_to(keep_mask, x.shape)
+            return u.math.where(
+                keep_mask,
+                u.math.asarray(x, dtype=dtype),
+                u.math.asarray(self.alpha, dtype=dtype)
+            ) * self.a + self.b
+        else:
+            return x
 class DropoutFixed(ElementWiseBlock):
-    """
-    A dropout layer with the fixed dropout mask along the time axis once after initialized.
+    """A dropout layer with a fixed dropout mask along the time axis.
-    In training, to compensate for the fraction of input values dropped (`rate`),
-    all surviving values are multiplied by `1 / (1 - rate)`.
+    In training, to compensate for the fraction of input values dropped,
+    all surviving values are multiplied by `1 / (1 - prob)`.
     This layer is active only during training (``mode=brainstate.mixin.Training``). In other
     circumstances it is a no-op.
+    This kind of Dropout is particularly useful for spiking neural networks (SNNs) where
+    the same dropout mask needs to be applied across multiple time steps within a single
+    mini-batch iteration.
+    Parameters
+    ----------
+    in_size : tuple or int
+        The size of the input tensor.
+    prob : float
+        Probability to keep element of the tensor. Default is 0.5.
+    name : str, optional
+        The name of the dynamic system.
+    Notes
+    -----
+    As described in [2]_, there is a subtle difference in the way dropout is applied in
+    SNNs compared to ANNs. In ANNs, each epoch of training has several iterations of
+    mini-batches. In each iteration, randomly selected units (with dropout ratio of
+    :math:`p`) are disconnected from the network while weighting by its posterior
+    probability (:math:`1-p`).
+    However, in SNNs, each iteration has more than one forward propagation depending on
+    the time length of the spike train. We back-propagate the output error and modify
+    the network parameters only at the last time step. For dropout to be effective in
+    our training method, it has to be ensured that the set of connected units within an
+    iteration of mini-batch data is not changed, such that the neural network is
+    constituted by the same random subset of units during each forward propagation within
+    a single iteration.
+    On the other hand, if the units are randomly connected at each time-step, the effect
+    of dropout will be averaged out over the entire forward propagation time within an
+    iteration. Then, the dropout effect would fade-out once the output error is propagated
+    backward and the parameters are updated at the last time step. Therefore, we need to
+    keep the set of randomly connected units for the entire time window within an iteration.
+    References
+    ----------
     .. [1] Srivastava, Nitish, et al. "Dropout: a simple way to prevent
            neural networks from overfitting." The journal of machine learning
            research 15.1 (2014): 1929-1958.
+    .. [2] Lee et al., "Enabling Spike-based Backpropagation for Training Deep Neural
+           Network Architectures" https://arxiv.org/abs/1903.06379
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate
+        >>> layer = brainstate.nn.DropoutFixed(in_size=(20,), prob=0.8)
+        >>> layer.init_state(batch_size=10)
+        >>> x = brainstate.random.randn(10, 20)
+        >>> with brainstate.environ.context(fit=True):
+        ...     output = layer.update(x)
+        >>> output.shape
+        (10, 20)
-    .. admonition:: Tip
-        :class: tip
-        This kind of Dropout is firstly described in `Enabling Spike-based Backpropagation for Training Deep Neural
-        Network Architectures <https://arxiv.org/abs/1903.06379>`_:
-        There is a subtle difference in the way dropout is applied in SNNs compared to ANNs. In ANNs, each epoch of
-        training has several iterations of mini-batches. In each iteration, randomly selected units (with dropout ratio of :math:`p`)
-        are disconnected from the network while weighting by its posterior probability (:math:`1-p`). However, in SNNs, each
-        iteration has more than one forward propagation depending on the time length of the spike train. We back-propagate
-        the output error and modify the network parameters only at the last time step. For dropout to be effective in
-        our training method, it has to be ensured that the set of connected units within an iteration of mini-batch
-        data is not changed, such that the neural network is constituted by the same random subset of units during
-        each forward propagation within a single iteration. On the other hand, if the units are randomly connected at
-        each time-step, the effect of dropout will be averaged out over the entire forward propagation time within an
-        iteration. Then, the dropout effect would fade-out once the output error is propagated backward and the parameters
-        are updated at the last time step. Therefore, we need to keep the set of randomly connected units for the entire
-        time window within an iteration.
-    Args:
-      in_size: The size of the input tensor.
-      prob: Probability to keep element of the tensor.
-      mode: Mode. The computation mode of the object.
-      name: str. The name of the dynamic system.
     """
     __module__ = 'brainstate.nn'

brainstate 0.1.9__py2.py3-none-any.whl → 0.2.0__py2.py3-none-any.whl

brainstate 0.1.9py2.py3-none-any.whl → 0.2.0py2.py3-none-any.whl