PyPI - brainstate - Versions diffs - 0.1.0.post20241122__py2.py3-none-any.whl → 0.1.0.post20241129__py2.py3-none-any.whl - Mend

brainstate 0.1.0.post20241122py2.py3-none-any.whl → 0.1.0.post20241129py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

brainstate/_state.py CHANGED Viewed

@@ -679,7 +679,7 @@ class StateTraceStack(Generic[A]):
         """
         for st, val in zip(self.states, self._original_state_values):
             # internal use
-            st._value = val
+            st.restore_value(val)
     def merge(self, *traces) -> 'StateTraceStack':
         """

brainstate/augment/_autograd.py CHANGED Viewed

@@ -29,15 +29,11 @@ The wrapped gradient transformations here are made possible by using the followi
 from __future__ import annotations
-import inspect
-from functools import partial, wraps
+from functools import wraps, partial
 from typing import Union, Callable, Dict, Sequence, Optional, Any, Tuple, TypeVar, Iterator
+import brainunit as u
 import jax
-from jax import numpy as jnp
-from jax._src.api import _vjp
-from jax.api_util import argnums_partial
-from jax.extend import linear_util
 from brainstate._state import State, StateTraceStack
 from brainstate._utils import set_module_as
@@ -54,54 +50,15 @@ LossValue = PyTree
 AuxData = PyTree
-def _isgeneratorfunction(fun):
-    # re-implemented here because of https://bugs.python.org/issue33261
-    while inspect.ismethod(fun):
-        fun = fun.__func__
-    while isinstance(fun, partial):
-        fun = fun.func
-    return inspect.isfunction(fun) and bool(fun.__code__.co_flags & inspect.CO_GENERATOR)
-def _check_callable(fun):
-    # In Python 3.10+, the only thing stopping us from supporting staticmethods
-    # is that we can't take weak references to them, which the C++ JIT requires.
-    if isinstance(fun, staticmethod):
-        raise TypeError(f"staticmethod arguments are not supported, got {fun}")
-    if not callable(fun):
-        raise TypeError(f"Expected a callable value, got {fun}")
-    if _isgeneratorfunction(fun):
-        raise TypeError(f"Expected a function, got a generator function: {fun}")
-def functional_vector_grad(func, argnums=0, return_value: bool = False, has_aux: bool = False):
-    """
-     Compute the gradient of a vector with respect to the input.
-     """
-    _check_callable(func)
-    @wraps(func)
-    def grad_fun(*args, **kwargs):
-        f = linear_util.wrap_init(func, kwargs)
-        f_partial, dyn_args = argnums_partial(f, argnums, args, require_static_args_hashable=False)
-        if has_aux:
-            y, vjp_fn, aux = _vjp(f_partial, *dyn_args, has_aux=True)
-        else:
-            y, vjp_fn = _vjp(f_partial, *dyn_args, has_aux=False)
-        leaves, tree = jax.tree.flatten(y)
-        tangents = jax.tree.unflatten(tree, [jnp.ones(l.shape, dtype=l.dtype) for l in leaves])
-        grads = vjp_fn(tangents)
-        if isinstance(argnums, int):
-            grads = grads[0]
-        if has_aux:
-            return (grads, y, aux) if return_value else (grads, aux)
-        else:
-            return (grads, y) if return_value else grads
-    return grad_fun
-def _jacrev(fun, argnums=0, holomorphic=False, allow_int=False, has_aux=False, return_value=False):
+def _jacrev(
+    fun,
+    argnums=0,
+    holomorphic=False,
+    allow_int=False,
+    has_aux=False,
+    return_value=False,
+    unit_aware=False,
+):
     @wraps(fun)
     def fun_wrapped(*args, **kwargs):
         if has_aux:
@@ -117,7 +74,18 @@ def _jacrev(fun, argnums=0, holomorphic=False, allow_int=False, has_aux=False, r
             else:
                 return y, None
-    transform = jax.jacrev(fun_wrapped, argnums=argnums, holomorphic=holomorphic, allow_int=allow_int, has_aux=True)
+    if unit_aware:
+        transform = u.autograd.jacrev(fun_wrapped,
+                                      argnums=argnums,
+                                      holomorphic=holomorphic,
+                                      allow_int=allow_int,
+                                      has_aux=True)
+    else:
+        transform = jax.jacrev(fun_wrapped,
+                               argnums=argnums,
+                               holomorphic=holomorphic,
+                               allow_int=allow_int,
+                               has_aux=True)
     @wraps(fun)
     def jacfun(*args, **kwargs):
@@ -130,7 +98,14 @@ def _jacrev(fun, argnums=0, holomorphic=False, allow_int=False, has_aux=False, r
     return jacfun
-def _jacfwd(fun, argnums=0, holomorphic=False, has_aux=False, return_value=False):
+def _jacfwd(
+    fun,
+    argnums=0,
+    holomorphic=False,
+    has_aux=False,
+    return_value=False,
+    unit_aware=False,
+):
     @wraps(fun)
     def fun_wrapped(*args, **kwargs):
         if has_aux:
@@ -146,7 +121,16 @@ def _jacfwd(fun, argnums=0, holomorphic=False, has_aux=False, return_value=False
             else:
                 return y, None
-    transform = jax.jacfwd(fun_wrapped, argnums=argnums, holomorphic=holomorphic, has_aux=True)
+    if unit_aware:
+        transform = u.autograd.jacfwd(fun_wrapped,
+                                      argnums=argnums,
+                                      holomorphic=holomorphic,
+                                      has_aux=True)
+    else:
+        transform = jax.jacfwd(fun_wrapped,
+                               argnums=argnums,
+                               holomorphic=holomorphic,
+                               has_aux=True)
     @wraps(fun)
     def jacfun(*args, **kwargs):
@@ -323,9 +307,9 @@ def grad(
     argnums: Optional[Union[int, Sequence[int]]] = None,
     holomorphic: Optional[bool] = False,
     allow_int: Optional[bool] = False,
-    reduce_axes: Optional[Sequence[str]] = (),
     has_aux: Optional[bool] = None,
     return_value: Optional[bool] = False,
+    unit_aware: bool = False,
 ) -> GradientTransform | Callable[[Callable], GradientTransform]:
     """
     Compute the gradient of a scalar-valued function with respect to its arguments.
@@ -333,27 +317,24 @@ def grad(
     %s
     Args:
-      fun: callable. the scalar-valued function to be differentiated.
-      reduce_axes: (Sequence[str]) optional. Specifies the axes to reduce over when
-        differentiating with respect to array-valued arguments. The default, (),
-        means to differentiate each element of the output with respect to each
-        element of the argument. If the argument is an array, this argument controls
-        how many axes the output of grad has.
-      allow_int: (bool) optional. Whether to allow differentiating with respect to
-        integer valued inputs. The gradient of an integer input will have a trivial
-        vector-space dtype (float0). Default False.
-      holomorphic: (bool) optional. Whether fun is promised to be holomorphic.
-        Default False.
-      grad_states: (State, Sequence[State], Dict[str, State]) optional. The variables
-        in fun to take their gradients.
-      fun: the scalar-valued function to be differentiated.
-      argnums: (int or tuple of ints) optional. Specifies which positional
-        argument(s) to differentiate with respect to.
-      has_aux: (bool) optional. Indicates whether fun returns a pair where the
-        first element is considered the output of the mathematical function to be
-        differentiated and the second element is auxiliary data. Default False.
-      return_value: (bool) optional. Indicates whether to return the value of the
-        function along with the gradient. Default False.
+        fun: callable. the scalar-valued function to be differentiated.
+        allow_int: (bool) optional. Whether to allow differentiating with respect to
+            integer valued inputs. The gradient of an integer input will have a trivial
+            vector-space dtype (float0). Default False.
+        holomorphic: (bool) optional. Whether fun is promised to be holomorphic.
+            Default False.
+        grad_states: (State, Sequence[State], Dict[str, State]) optional. The variables
+            in fun to take their gradients.
+        fun: the scalar-valued function to be differentiated.
+        argnums: (int or tuple of ints) optional. Specifies which positional
+            argument(s) to differentiate with respect to.
+        has_aux: (bool) optional. Indicates whether fun returns a pair where the
+            first element is considered the output of the mathematical function to be
+            differentiated and the second element is auxiliary data. Default False.
+        return_value: (bool) optional. Indicates whether to return the value of the
+            function along with the gradient. Default False.
+        unit_aware: (bool) optional. Whether to return the gradient in the unit-aware
+            mode. Default False.
     Returns:
       A function which computes the gradient of fun. The function takes the same
@@ -367,26 +348,24 @@ def grad(
     if isinstance(fun, Missing):
         def transform(fun) -> GradientTransform:
             return GradientTransform(target=fun,
-                                     transform=jax.grad,
+                                     transform=u.autograd.grad if unit_aware else jax.grad,
                                      grad_states=grad_states,
                                      argnums=argnums,
                                      return_value=return_value,
                                      has_aux=False if has_aux is None else has_aux,
                                      transform_params=dict(holomorphic=holomorphic,
-                                                           allow_int=allow_int,
-                                                           reduce_axes=reduce_axes))
+                                                           allow_int=allow_int))
         return transform
     return GradientTransform(target=fun,
-                             transform=jax.grad,
+                             transform=u.autograd.grad if unit_aware else jax.grad,
                              grad_states=grad_states,
                              argnums=argnums,
                              return_value=return_value,
                              has_aux=False if has_aux is None else has_aux,
                              transform_params=dict(holomorphic=holomorphic,
-                                                   allow_int=allow_int,
-                                                   reduce_axes=reduce_axes))
+                                                   allow_int=allow_int))
 grad.__doc__ = grad.__doc__ % _doc_of_return
@@ -399,6 +378,7 @@ def vector_grad(
     argnums: Optional[Union[int, Sequence[int]]] = None,
     return_value: bool = False,
     has_aux: Optional[bool] = None,
+    unit_aware: bool = False,
 ) -> GradientTransform | Callable[[Callable], GradientTransform]:
     """Take vector-valued gradients for function ``func``.
@@ -410,28 +390,30 @@ def vector_grad(
     Parameters
     ----------
     func: Callable
-      Function whose gradient is to be computed.
+        Function whose gradient is to be computed.
     grad_states : optional, ArrayType, sequence of ArrayType, dict
-      The variables in ``func`` to take their gradients.
+        The variables in ``func`` to take their gradients.
     has_aux: optional, bool
-      Indicates whether ``fun`` returns a pair where the
-      first element is considered the output of the mathematical function to be
-      differentiated and the second element is auxiliary data. Default False.
+        Indicates whether ``fun`` returns a pair where the
+        first element is considered the output of the mathematical function to be
+        differentiated and the second element is auxiliary data. Default False.
     return_value : bool
-      Whether return the loss value.
+        Whether return the loss value.
     argnums: Optional, integer or sequence of integers. Specifies which
-      positional argument(s) to differentiate with respect to (default ``0``).
+        positional argument(s) to differentiate with respect to (default ``0``).
+    unit_aware: (bool) optional. Whether to return the gradient in the unit-aware
+        mode. Default False.
     Returns
     -------
     func : GradientTransform
-      The vector gradient function.
+        The vector gradient function.
     """
     if isinstance(func, Missing):
         def transform(fun) -> GradientTransform:
             return GradientTransform(target=fun,
-                                     transform=functional_vector_grad,
+                                     transform=partial(u.autograd.vector_grad, unit_aware=unit_aware),
                                      grad_states=grad_states,
                                      argnums=argnums,
                                      return_value=return_value,
@@ -441,7 +423,7 @@ def vector_grad(
     else:
         return GradientTransform(target=func,
-                                 transform=functional_vector_grad,
+                                 transform=partial(u.autograd.vector_grad, unit_aware=unit_aware),
                                  grad_states=grad_states,
                                  argnums=argnums,
                                  return_value=return_value,
@@ -460,6 +442,7 @@ def jacrev(
     return_value: bool = False,
     holomorphic: bool = False,
     allow_int: bool = False,
+    unit_aware: bool = False,
 ) -> GradientTransform:
     """
     Extending automatic Jacobian (reverse-mode) of ``func`` to classes.
@@ -473,25 +456,28 @@ def jacrev(
     Parameters
     ----------
-    fun: Function whose Jacobian is to be computed.
+    fun: Callable
+        Function whose Jacobian is to be computed.
     grad_states : optional, ArrayType, sequence of ArrayType, dict
-      The variables in ``func`` to take their gradients.
+        The variables in ``func`` to take their gradients.
     has_aux: optional, bool
-      Indicates whether ``fun`` returns a pair where the
-      first element is considered the output of the mathematical function to be
-      differentiated and the second element is auxiliary data. Default False.
+        Indicates whether ``fun`` returns a pair where the
+        first element is considered the output of the mathematical function to be
+        differentiated and the second element is auxiliary data. Default False.
     return_value : bool
-      Whether return the loss value.
+        Whether return the loss value.
     argnums: Optional, integer or sequence of integers.
-      Specifies which
-      positional argument(s) to differentiate with respect to (default ``0``).
+        Specifies which
+        positional argument(s) to differentiate with respect to (default ``0``).
     holomorphic: Optional, bool.
-      Indicates whether ``fun`` is promised to be
-      holomorphic. Default False.
+        Indicates whether ``fun`` is promised to be
+        holomorphic. Default False.
     allow_int: Optional, bool.
-      Whether to allow differentiating with
-      respect to integer valued inputs. The gradient of an integer input will
-      have a trivial vector-space dtype (float0). Default False.
+        Whether to allow differentiating with
+        respect to integer valued inputs. The gradient of an integer input will
+        have a trivial vector-space dtype (float0). Default False.
+    unit_aware: (bool) optional. Whether to return the gradient in the unit-aware
+        mode. Default False.
     Returns
     -------
@@ -505,7 +491,8 @@ def jacrev(
                              return_value=return_value,
                              has_aux=False if has_aux is None else has_aux,
                              transform_params=dict(holomorphic=holomorphic,
-                                                   allow_int=allow_int))
+                                                   allow_int=allow_int,
+                                                   unit_aware=unit_aware, ))
 jacrev.__doc__ = jacrev.__doc__ % _doc_of_return
@@ -521,6 +508,7 @@ def jacfwd(
     has_aux: Optional[bool] = None,
     return_value: bool = False,
     holomorphic: bool = False,
+    unit_aware: bool = False,
 ) -> GradientTransform:
     """Extending automatic Jacobian (forward-mode) of ``func`` to classes.
@@ -542,9 +530,11 @@ def jacfwd(
     return_value : bool
       Whether return the loss value.
     argnums: Optional, integer or sequence of integers. Specifies which
-      positional argument(s) to differentiate with respect to (default ``0``).
+        positional argument(s) to differentiate with respect to (default ``0``).
     holomorphic: Optional, bool. Indicates whether ``fun`` is promised to be
-      holomorphic. Default False.
+        holomorphic. Default False.
+    unit_aware: (bool) optional. Whether to return the gradient in the unit-aware
+        mode. Default False.
     Returns
     -------
@@ -558,7 +548,8 @@ def jacfwd(
                              argnums=argnums,
                              return_value=return_value,
                              has_aux=False if has_aux is None else has_aux,
-                             transform_params=dict(holomorphic=holomorphic))
+                             transform_params=dict(holomorphic=holomorphic,
+                                                   unit_aware=unit_aware))
 jacfwd.__doc__ = jacfwd.__doc__ % _doc_of_return
@@ -569,9 +560,10 @@ def hessian(
     func: Callable,
     grad_states: Optional[Union[State, Sequence[State], Dict[str, State]]] = None,
     argnums: Optional[Union[int, Sequence[int]]] = None,
-    has_aux: bool = False,
     return_value: bool = False,
     holomorphic: bool = False,
+    has_aux: Optional[bool] = None,
+    unit_aware: bool = False,
 ) -> GradientTransform:
     """
     Hessian of ``func`` as a dense array.
@@ -593,6 +585,12 @@ def hessian(
       Indicates whether ``fun`` is promised to be holomorphic. Default False.
     return_value : bool
       Whether return the hessian values.
+    has_aux: Optional, bool
+        Indicates whether ``fun`` returns a pair where the first element is considered
+        the output of the mathematical function to be differentiated and the second
+        element is auxiliary data. Default False.
+    unit_aware: (bool) optional. Whether to return the gradient in the unit-aware
+        mode. Default False.
     Returns
     -------
@@ -600,7 +598,7 @@ def hessian(
       The transformed object.
     """
     return GradientTransform(target=func,
-                             transform=jax.hessian,
+                             transform=u.autograd.hessian if unit_aware else jax.hessian,
                              grad_states=grad_states,
                              argnums=argnums,
                              return_value=return_value,

brainstate/augment/_autograd_test.py CHANGED Viewed

@@ -19,6 +19,7 @@ from __future__ import annotations
 import unittest
 from pprint import pprint
+import brainunit as u
 import jax
 import jax.numpy as jnp
 import pytest
@@ -608,6 +609,8 @@ class TestClassFuncJacobian(unittest.TestCase):
         br = bst.augment.jacrev(t, grad_states=[t.x, t.y])()
         self.assertTrue((br[0] == _jr[0]).all())
         self.assertTrue((br[1] == _jr[1]).all())
 #
 #   def test_jacfwd1(self):
 #     def f1(x, y):
@@ -1191,3 +1194,97 @@ class TestClassFuncJacobian(unittest.TestCase):
 #       self.assertTrue(file.read().strip() == expect_res.strip())
 #
 #
+class TestUnitAwareGrad(unittest.TestCase):
+    def test_grad1(self):
+        def f(x):
+            return u.math.sum(x ** 2)
+        x = jnp.array([1., 2., 3.]) * u.ms
+        g = bst.augment.grad(f, unit_aware=True)(x)
+        self.assertTrue(u.math.allclose(g, 2 * x))
+    def test_vector_grad1(self):
+        def f(x):
+            return x ** 3
+        x = jnp.array([1., 2., 3.]) * u.ms
+        g = bst.augment.vector_grad(f, unit_aware=True)(x)
+        self.assertTrue(u.math.allclose(g, 3 * x ** 2))
+    def test_jacrev1(self):
+        def f(x, y):
+            return u.math.asarray([x[0] * y[0],
+                                   5 * x[2] * y[1],
+                                   4 * x[1] ** 2, ])
+        _x = jnp.array([1., 2., 3.]) * u.ms
+        _y = jnp.array([10., 5.]) * u.ms
+        g = bst.augment.jacrev(f, unit_aware=True, argnums=(0, 1))(_x, _y)
+        self.assertTrue(
+            u.math.allclose(
+                g[0],
+                u.math.asarray([
+                    [10., 0., 0.],
+                    [0., 0., 25.],
+                    [0., 16., 0.]
+                ]) * u.ms
+            )
+        )
+        self.assertTrue(
+            u.math.allclose(
+                g[1],
+                u.math.asarray([
+                    [1., 0.],
+                    [0., 15.],
+                    [0., 0.]
+                ]) * u.ms
+            )
+        )
+    def test_jacfwd1(self):
+        def f(x, y):
+            return u.math.asarray([x[0] * y[0],
+                                   5 * x[2] * y[1],
+                                   4 * x[1] ** 2, ])
+        _x = jnp.array([1., 2., 3.]) * u.ms
+        _y = jnp.array([10., 5.]) * u.ms
+        g = bst.augment.jacfwd(f, unit_aware=True, argnums=(0, 1))(_x, _y)
+        self.assertTrue(
+            u.math.allclose(
+                g[0],
+                u.math.asarray([
+                    [10., 0., 0.],
+                    [0., 0., 25.],
+                    [0., 16., 0.]
+                ]) * u.ms
+            )
+        )
+        self.assertTrue(
+            u.math.allclose(
+                g[1],
+                u.math.asarray([
+                    [1., 0.],
+                    [0., 15.],
+                    [0., 0.]
+                ]) * u.ms
+            )
+        )
+    def test_hessian(self):
+        unit = u.ms
+        def scalar_function(x):
+            return x ** 3 + 3 * x * unit * unit + 2 * unit * unit * unit
+        hess = bst.augment.hessian(scalar_function, unit_aware=True)
+        x = jnp.array(1.0) * unit
+        res = hess(x)
+        expected_hessian = jnp.array([[6.0]]) * unit
+        assert u.math.allclose(res, expected_hessian)

brainstate/event/__init__.py CHANGED Viewed

@@ -14,14 +14,14 @@
 # ==============================================================================
-from ._csr import *
-from ._csr import __all__ as __all_csr
-from ._fixed_probability import *
-from ._fixed_probability import __all__ as __all_fixed_probability
-from ._linear import *
+from ._csr_mv import *
+from ._csr_mv import __all__ as __all_csr
+from ._fixedprob_mv import *
+from ._fixedprob_mv import __all__ as __all_fixed_probability
+from ._linear_mv import *
 from ._xla_custom_op import *
 from ._xla_custom_op import __all__ as __all_xla_custom_op
-from ._linear import __all__ as __all_linear
+from ._linear_mv import __all__ as __all_linear
 __all__ = __all_fixed_probability + __all_linear + __all_csr + __all_xla_custom_op
 del __all_fixed_probability, __all_linear, __all_csr, __all_xla_custom_op

brainstate/event/_csr_mv_benchmark.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 BDP Ecosystem Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

brainstate/event/{_linear_test.py → _linear_mv_test.py} RENAMED Viewed

@@ -20,7 +20,7 @@ import jax.numpy as jnp
 from absl.testing import parameterized
 import brainstate as bst
-from brainstate.event._linear import Linear
+from brainstate.event._linear_mv import Linear
 class TestEventLinear(parameterized.TestCase):

brainstate/event/_xla_custom_op.py CHANGED Viewed

@@ -17,14 +17,8 @@ from jaxlib.hlo_helpers import custom_call
 numba_installed = importlib.util.find_spec('numba') is not None
-if numba_installed:
-    import numba  # pylint: disable=import-error
-    from numba import types, carray, cfunc  # pylint: disable=import-error
-    from numba.core.dispatcher import Dispatcher  # pylint: disable=import-error
-else:
-    numba = None
 __all__ = [
+    'defjvp',
     'XLACustomOp',
 ]
@@ -93,9 +87,12 @@ def _numba_mlir_cpu_translation_rule(
     *ins,
     **kwargs
 ):
-    if numba is None:
+    if not numba_installed:
         raise ImportError('Numba is required to compile the CPU kernel for the custom operator.')
+    from numba import types, carray, cfunc  # pylint: disable=import-error
+    from numba.core.dispatcher import Dispatcher  # pylint: disable=import-error
     if not isinstance(kernel, Dispatcher):
         kernel = kernel(**kwargs)
     assert isinstance(kernel, Dispatcher), f'The kernel should be a Numba dispatcher. But we got {kernel}'

brainstate/nn/_elementwise/_dropout.py CHANGED Viewed

@@ -88,7 +88,7 @@ class _DropoutNd(ElementWiseBlock):
         name: Optional[str] = None
     ) -> None:
         super().__init__(name=name)
-        assert 0. <= prob < 1., f"Dropout probability must be in the range [0, 1). But got {prob}."
+        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
         self.prob = prob
         self.channel_axis = channel_axis
@@ -112,7 +112,7 @@ class _DropoutNd(ElementWiseBlock):
         fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
         # generate mask
-        if fit_phase:
+        if fit_phase and self.prob < 1.:
             dtype = u.math.get_dtype(x)
             keep_mask = jnp.broadcast_to(random.bernoulli(self.prob, mask_shape), x.shape)
             return jnp.where(keep_mask,
@@ -396,7 +396,7 @@ class DropoutFixed(ElementWiseBlock):
         name: Optional[str] = None
     ) -> None:
         super().__init__(name=name)
-        assert 0. <= prob < 1., f"Dropout probability must be in the range [0, 1). But got {prob}."
+        assert 0. <= prob <= 1., f"Dropout probability must be in the range [0, 1]. But got {prob}."
         self.prob = prob
         self.in_size = in_size
         self.out_size = in_size
@@ -407,7 +407,7 @@ class DropoutFixed(ElementWiseBlock):
     def update(self, x):
         dtype = u.math.get_dtype(x)
         fit_phase = environ.get('fit', desc='Whether this is a fitting process. Bool.')
-        if fit_phase:
+        if fit_phase and self.prob < 1.:
             if self.mask.value.shape != x.shape:
                 raise ValueError(f"Input shape {x.shape} does not match the mask shape {self.mask.value.shape}. "
                                  f"Please call `init_state()` method first.")

brainstate/nn/_interaction/_linear.py CHANGED Viewed

@@ -79,7 +79,7 @@ class Linear(Module):
         weight = params['weight']
         if self.w_mask is not None:
             weight = weight * self.w_mask
-        y = u.math.dot(x, weight)
+        y = u.linalg.dot(x, weight)
         if 'bias' in params:
             y = y + params['bias']
         return y
@@ -192,7 +192,7 @@ class ScaledWSLinear(Module):
         w = functional.weight_standardization(w, self.eps, params.get('gain', None))
         if self.w_mask is not None:
             w = w * self.w_mask
-        y = u.math.dot(x, w)
+        y = u.linalg.dot(x, w)
         if 'bias' in params:
             y = y + params['bias']
         return y

brainstate 0.1.0.post20241122__py2.py3-none-any.whl → 0.1.0.post20241129__py2.py3-none-any.whl

brainstate 0.1.0.post20241122py2.py3-none-any.whl → 0.1.0.post20241129py2.py3-none-any.whl