PyPI - brainstate - Versions diffs - 0.0.2.post20241009__py2.py3-none-any.whl → 0.1.0__py2.py3-none-any.whl - Mend

brainstate 0.0.2.post20241009py2.py3-none-any.whl → 0.1.0py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (175) hide show

brainstate/__init__.py +31 -11
brainstate/_state.py +760 -316
brainstate/_state_test.py +41 -12
brainstate/_utils.py +31 -4
brainstate/augment/__init__.py +40 -0
brainstate/augment/_autograd.py +608 -0
brainstate/augment/_autograd_test.py +1193 -0
brainstate/augment/_eval_shape.py +102 -0
brainstate/augment/_eval_shape_test.py +40 -0
brainstate/augment/_mapping.py +525 -0
brainstate/augment/_mapping_test.py +210 -0
brainstate/augment/_random.py +99 -0
brainstate/{transform → compile}/__init__.py +25 -13
brainstate/compile/_ad_checkpoint.py +204 -0
brainstate/compile/_ad_checkpoint_test.py +51 -0
brainstate/compile/_conditions.py +259 -0
brainstate/compile/_conditions_test.py +221 -0
brainstate/compile/_error_if.py +94 -0
brainstate/compile/_error_if_test.py +54 -0
brainstate/compile/_jit.py +314 -0
brainstate/compile/_jit_test.py +143 -0
brainstate/compile/_loop_collect_return.py +516 -0
brainstate/compile/_loop_collect_return_test.py +59 -0
brainstate/compile/_loop_no_collection.py +185 -0
brainstate/compile/_loop_no_collection_test.py +51 -0
brainstate/compile/_make_jaxpr.py +756 -0
brainstate/compile/_make_jaxpr_test.py +134 -0
brainstate/compile/_progress_bar.py +111 -0
brainstate/compile/_unvmap.py +159 -0
brainstate/compile/_util.py +147 -0
brainstate/environ.py +408 -381
brainstate/environ_test.py +34 -32
brainstate/{nn/event → event}/__init__.py +6 -6
brainstate/event/_csr.py +308 -0
brainstate/event/_csr_test.py +118 -0
brainstate/event/_fixed_probability.py +271 -0
brainstate/event/_fixed_probability_test.py +128 -0
brainstate/event/_linear.py +219 -0
brainstate/event/_linear_test.py +112 -0
brainstate/{nn/event → event}/_misc.py +7 -7
brainstate/functional/_activations.py +521 -511
brainstate/functional/_activations_test.py +300 -300
brainstate/functional/_normalization.py +43 -43
brainstate/functional/_others.py +15 -15
brainstate/functional/_spikes.py +49 -49
brainstate/graph/__init__.py +33 -0
brainstate/graph/_graph_context.py +443 -0
brainstate/graph/_graph_context_test.py +65 -0
brainstate/graph/_graph_convert.py +246 -0
brainstate/graph/_graph_node.py +300 -0
brainstate/graph/_graph_node_test.py +75 -0
brainstate/graph/_graph_operation.py +1746 -0
brainstate/graph/_graph_operation_test.py +724 -0
brainstate/init/_base.py +28 -10
brainstate/init/_generic.py +175 -172
brainstate/init/_random_inits.py +470 -415
brainstate/init/_random_inits_test.py +150 -0
brainstate/init/_regular_inits.py +66 -69
brainstate/init/_regular_inits_test.py +51 -0
brainstate/mixin.py +236 -244
brainstate/mixin_test.py +44 -46
brainstate/nn/__init__.py +26 -51
brainstate/nn/_collective_ops.py +199 -0
brainstate/nn/_dyn_impl/__init__.py +46 -0
brainstate/nn/_dyn_impl/_dynamics_neuron.py +290 -0
brainstate/nn/_dyn_impl/_dynamics_neuron_test.py +162 -0
brainstate/nn/_dyn_impl/_dynamics_synapse.py +320 -0
brainstate/nn/_dyn_impl/_dynamics_synapse_test.py +132 -0
brainstate/nn/_dyn_impl/_inputs.py +154 -0
brainstate/nn/{_projection/__init__.py → _dyn_impl/_projection_alignpost.py} +6 -13
brainstate/nn/_dyn_impl/_rate_rnns.py +400 -0
brainstate/nn/_dyn_impl/_rate_rnns_test.py +64 -0
brainstate/nn/_dyn_impl/_readout.py +128 -0
brainstate/nn/_dyn_impl/_readout_test.py +54 -0
brainstate/nn/_dynamics/__init__.py +37 -0
brainstate/nn/_dynamics/_dynamics_base.py +631 -0
brainstate/nn/_dynamics/_dynamics_base_test.py +79 -0
brainstate/nn/_dynamics/_projection_base.py +346 -0
brainstate/nn/_dynamics/_state_delay.py +453 -0
brainstate/nn/_dynamics/_synouts.py +161 -0
brainstate/nn/_dynamics/_synouts_test.py +58 -0
brainstate/nn/_elementwise/__init__.py +22 -0
brainstate/nn/_elementwise/_dropout.py +418 -0
brainstate/nn/_elementwise/_dropout_test.py +100 -0
brainstate/nn/_elementwise/_elementwise.py +1122 -0
brainstate/nn/_elementwise/_elementwise_test.py +171 -0
brainstate/nn/_exp_euler.py +97 -0
brainstate/nn/_exp_euler_test.py +36 -0
brainstate/nn/_interaction/__init__.py +32 -0
brainstate/nn/_interaction/_connections.py +726 -0
brainstate/nn/_interaction/_connections_test.py +254 -0
brainstate/nn/_interaction/_embedding.py +59 -0
brainstate/nn/_interaction/_normalizations.py +388 -0
brainstate/nn/_interaction/_normalizations_test.py +75 -0
brainstate/nn/_interaction/_poolings.py +1179 -0
brainstate/nn/_interaction/_poolings_test.py +219 -0
brainstate/nn/_module.py +328 -0
brainstate/nn/_module_test.py +211 -0
brainstate/nn/metrics.py +309 -309
brainstate/optim/__init__.py +14 -2
brainstate/optim/_base.py +66 -0
brainstate/optim/_lr_scheduler.py +363 -400
brainstate/optim/_lr_scheduler_test.py +25 -24
brainstate/optim/_optax_optimizer.py +103 -176
brainstate/optim/_optax_optimizer_test.py +41 -1
brainstate/optim/_sgd_optimizer.py +950 -1025
brainstate/random/_rand_funs.py +3269 -3268
brainstate/random/_rand_funs_test.py +568 -0
brainstate/random/_rand_seed.py +149 -117
brainstate/random/_rand_seed_test.py +50 -0
brainstate/random/_rand_state.py +1360 -1318
brainstate/random/_random_for_unit.py +13 -13
brainstate/surrogate.py +1262 -1243
brainstate/{nn/_projection/_utils.py → transform.py} +1 -2
brainstate/typing.py +157 -130
brainstate/util/__init__.py +52 -0
brainstate/util/_caller.py +100 -0
brainstate/util/_dict.py +734 -0
brainstate/util/_dict_test.py +160 -0
brainstate/util/_error.py +28 -0
brainstate/util/_filter.py +178 -0
brainstate/util/_others.py +497 -0
brainstate/util/_pretty_repr.py +208 -0
brainstate/util/_scaling.py +260 -0
brainstate/util/_struct.py +524 -0
brainstate/util/_tracers.py +75 -0
brainstate/{_visualization.py → util/_visualization.py} +16 -16
{brainstate-0.0.2.post20241009.dist-info → brainstate-0.1.0.dist-info}/METADATA +11 -11
brainstate-0.1.0.dist-info/RECORD +135 -0
brainstate/_module.py +0 -1637
brainstate/_module_test.py +0 -207
brainstate/nn/_base.py +0 -251
brainstate/nn/_connections.py +0 -686
brainstate/nn/_dynamics.py +0 -426
brainstate/nn/_elementwise.py +0 -1438
brainstate/nn/_embedding.py +0 -66
brainstate/nn/_misc.py +0 -133
brainstate/nn/_normalizations.py +0 -389
brainstate/nn/_others.py +0 -101
brainstate/nn/_poolings.py +0 -1229
brainstate/nn/_poolings_test.py +0 -231
brainstate/nn/_projection/_align_post.py +0 -546
brainstate/nn/_projection/_align_pre.py +0 -599
brainstate/nn/_projection/_delta.py +0 -241
brainstate/nn/_projection/_vanilla.py +0 -101
brainstate/nn/_rate_rnns.py +0 -410
brainstate/nn/_readout.py +0 -136
brainstate/nn/_synouts.py +0 -166
brainstate/nn/event/csr.py +0 -312
brainstate/nn/event/csr_test.py +0 -118
brainstate/nn/event/fixed_probability.py +0 -276
brainstate/nn/event/fixed_probability_test.py +0 -127
brainstate/nn/event/linear.py +0 -220
brainstate/nn/event/linear_test.py +0 -111
brainstate/random/random_test.py +0 -593
brainstate/transform/_autograd.py +0 -585
brainstate/transform/_autograd_test.py +0 -1181
brainstate/transform/_conditions.py +0 -334
brainstate/transform/_conditions_test.py +0 -220
brainstate/transform/_error_if.py +0 -94
brainstate/transform/_error_if_test.py +0 -55
brainstate/transform/_jit.py +0 -265
brainstate/transform/_jit_test.py +0 -118
brainstate/transform/_loop_collect_return.py +0 -502
brainstate/transform/_loop_no_collection.py +0 -170
brainstate/transform/_make_jaxpr.py +0 -739
brainstate/transform/_make_jaxpr_test.py +0 -131
brainstate/transform/_mapping.py +0 -109
brainstate/transform/_progress_bar.py +0 -111
brainstate/transform/_unvmap.py +0 -143
brainstate/util.py +0 -746
brainstate-0.0.2.post20241009.dist-info/RECORD +0 -87
{brainstate-0.0.2.post20241009.dist-info → brainstate-0.1.0.dist-info}/LICENSE +0 -0
{brainstate-0.0.2.post20241009.dist-info → brainstate-0.1.0.dist-info}/WHEEL +0 -0
{brainstate-0.0.2.post20241009.dist-info → brainstate-0.1.0.dist-info}/top_level.txt +0 -0

brainstate/augment/_mapping_test.py ADDED Viewed

@@ -0,0 +1,210 @@
+# Copyright 2024 BDP Ecosystem Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import annotations
+import unittest
+import jax.core
+import jax.numpy as jnp
+import brainstate as bst
+class TestVmap(unittest.TestCase):
+    def test_vmap_return_keep_reference_return(self):
+        @bst.augment.vmap(in_axes=0, out_axes=0)
+        def create_model(key):
+            bst.random.set_key(key)
+            m1 = bst.nn.Linear(2, 3)
+            m2 = bst.nn.Linear(3, 4)
+            m2.a = m1
+            m3 = bst.nn.Linear(3, 5)
+            m3.a = m1
+            self.assertTrue(id(m2.a) == id(m3.a))
+            return m2, m3
+        m2, m3 = create_model(bst.random.split_key(10))
+        self.assertTrue(id(m2.a) == id(m3.a))
+        jax.core.concrete_or_error(None, bst.random.DEFAULT.value)
+    def test_vmap_return_keep_reference_pass_into_fun(self):
+        @bst.augment.vmap(in_axes=(None, None, 0), out_axes=0)
+        def run_model(m2, m3, x):
+            self.assertTrue(id(m2.a) == id(m3.a))
+            self.assertTrue(id(m2) != m2_id)
+            self.assertTrue(id(m3) != m3_id)
+            return m2(x), m3(x)
+        m1 = bst.nn.Linear(2, 3)
+        m2 = bst.nn.Linear(4, 3)
+        m2.a = m1
+        m3 = bst.nn.Linear(4, 5)
+        m3.a = m1
+        m3_id = id(m3)
+        m2_id = id(m2)
+        r1, r2 = run_model(m2, m3, jnp.ones((4, 3, 4)))
+    def test_vmap_set_key(self):
+        @bst.augment.vmap(in_axes=0, out_axes=0)
+        def create_model(key):
+            bst.random.set_key(key)
+            return bst.nn.Linear(2, 3)
+        model = create_model(bst.random.split_keys(10))
+        print(model.weight.value_call(jnp.shape))
+        model.weight.value_call(lambda x: jax.core.concrete_or_error(None, x))
+        bst.random.seed()
+    def test_vmap_input(self):
+        model = bst.nn.Linear(2, 3)
+        print(id(model), id(model.weight))
+        model_id = id(model)
+        weight_id = id(model.weight)
+        x = jnp.ones((5, 2))
+        @bst.augment.vmap
+        def forward(x):
+            self.assertTrue(id(model) == model_id)
+            self.assertTrue(id(model.weight) == weight_id)
+            return model(x)
+        y = forward(x)
+        self.assertTrue(y.shape == (5, 3))
+        print(y.shape)
+        print(model.weight.value_call(jnp.shape))
+        print(model.weight.value)
+    def test_vmap_model(self):
+        model = bst.nn.Linear(2, 3)
+        model_id = id(model)
+        weight_id = id(model.weight)
+        print(id(model), id(model.weight))
+        x = jnp.ones((5, 2))
+        @bst.augment.vmap(in_axes=(None, 0), out_axes=0)
+        def forward(model, x):
+            self.assertTrue(id(model) != model_id)
+            self.assertTrue(id(model.weight) != weight_id)
+            print(id(model), id(model.weight))
+            return model(x)
+        y = forward(model, x)
+        print(y.shape)
+        print(model.weight.value_call(jnp.shape))
+        print(model.weight.value)
+    def test_vmap1(self):
+        model = bst.nn.Linear(2, 3)
+        x = jnp.ones((5, 2))
+        @bst.augment.vmap(in_axes=(None, 0), out_axes=0)
+        def forward(model, x):
+            return model(x)
+        y = forward(model, x)
+        print(y.shape)
+    def test_vmap2(self):
+        class LinearEnsemble(bst.nn.Module):
+            def __init__(self, num):
+                super().__init__()
+                self.w = bst.ParamState(bst.random.random((num, 2, 3)))
+        model = LinearEnsemble(5)
+        x = jnp.ones((2,))
+        @bst.augment.vmap(in_axes=(0, None), out_axes=0)
+        def forward(model, x):
+            return jnp.dot(x, model.w.value)
+        y = forward(model, x)
+        print(y.shape)
+    def test_vmap3(self):
+        class Foo(bst.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.a = bst.ParamState(jnp.arange(4))
+                self.b = bst.ShortTermState(jnp.arange(4))
+        state_axes = bst.augment.StateAxes({bst.ParamState: 0, bst.ShortTermState: None})
+        @bst.augment.vmap(in_axes=(state_axes,), out_axes=0)
+        def mul(foo):
+            return foo.a.value * foo.b.value
+        foo = Foo()
+        y = mul(foo)
+        print(y.shape)
+    def test_vmap4(self):
+        class Foo(bst.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.a = bst.ParamState(jnp.arange(4))
+                self.b = bst.ShortTermState(jnp.arange(4))
+            def __call__(self):
+                self.b.value = self.a.value * self.b.value
+        @bst.augment.vmap
+        def mul(foo):
+            foo()
+            return foo
+        foo = Foo()
+        with bst.StateTraceStack() as trace:
+            m = mul(foo)
+        self.assertTrue(m is foo)
+        print(m.a.value, foo.a.value)
+        self.assertTrue(jnp.allclose(m.a.value, foo.a.value))
+        print(m.b.value, foo.b.value)
+        self.assertTrue(jnp.allclose(m.b.value, foo.b.value))
+        print(trace.get_write_states())
+        self.assertTrue(len(trace.get_write_states()) == 1)
+        print(trace.get_read_states())
+        self.assertTrue(len(trace.get_read_states()) == 2)
+    def test_vmap5(self):
+        class Foo(bst.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.a = bst.ParamState(jnp.arange(4))
+                self.b = bst.ShortTermState(jnp.arange(4))
+            def __call__(self):
+                self.b.value = self.a.value * self.b.value
+        @bst.augment.vmap
+        def mul(foo):
+            foo()
+        foo = Foo()
+        with bst.StateTraceStack() as trace:
+            mul(foo)
+        print(foo.a.value)
+        print(foo.b.value)
+        self.assertTrue(jnp.allclose(foo.a.value, jnp.arange(4)))
+        self.assertTrue(jnp.allclose(foo.b.value, jnp.arange(4) * jnp.arange(4)))
+        print(trace.get_write_states())
+        print(trace.get_read_states())

brainstate/augment/_random.py ADDED Viewed

@@ -0,0 +1,99 @@
+# Copyright 2024 BDP Ecosystem Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import annotations
+import functools
+from typing import Callable, Sequence, Union
+from brainstate.random import DEFAULT, RandomState
+from brainstate.typing import Missing
+__all__ = [
+    'restore_rngs'
+]
+class RngRestore:
+    """
+    Backup and restore the random state of a sequence of RandomState instances.
+    """
+    def __init__(self, rngs: Sequence[RandomState]):
+        self.rngs: Sequence[RandomState] = rngs
+        self.rng_keys = []
+    def backup(self):
+        """
+        Backup the current random key of the RandomState instances.
+        """
+        self.rng_keys = [rng.value for rng in self.rngs]
+    def restore(self):
+        """
+        Restore the random key of the RandomState instances.
+        """
+        for rng, key in zip(self.rngs, self.rng_keys):
+            rng.restore_value(key)
+        self.rng_keys = []
+def _rng_backup(
+    fn: Callable,
+    rngs: Union[RandomState, Sequence[RandomState]]
+) -> Callable:
+    rng_restorer = RngRestore(rngs)
+    @functools.wraps(fn)
+    def wrapper(*args, **kwargs):
+        # backup the random state
+        rng_restorer.backup()
+        # call the function
+        out = fn(*args, **kwargs)
+        # restore the random state
+        rng_restorer.restore()
+        return out
+    return wrapper
+def restore_rngs(
+    fn: Callable = Missing(),
+    rngs: Union[RandomState, Sequence[RandomState]] = DEFAULT,
+) -> Callable:
+    """
+    Backup the current random state and restore it after the function call.
+    Parameters
+    ----------
+    fn : Callable, optional
+        The function to be wrapped.
+    rngs : Union[RandomState, Sequence[RandomState]]
+        The random state to be backed up and restored. If not provided, the default RandomState instance will be used.
+    Returns
+    -------
+    Callable
+        The wrapped function.
+    """
+    if isinstance(fn, Missing):
+        return functools.partial(restore_rngs, rngs=rngs)
+    if isinstance(rngs, RandomState):
+        rngs = [rngs]
+    assert isinstance(rngs, Sequence), 'rngs must be a RandomState or a sequence of RandomState instances.'
+    for rng in rngs:
+        assert isinstance(rng, RandomState), 'rngs must be a RandomState or a sequence of RandomState instances.'
+    return _rng_backup(fn, rngs=rngs)

brainstate/{transform → compile}/__init__.py RENAMED Viewed

@@ -14,11 +14,11 @@
 # ==============================================================================
 """
-This module contains the functions for the transformation of the brain data.
+This module contains the functions for the compilation of JAX code.
 """
-from ._autograd import *
-from ._autograd import __all__ as _gradients_all
+from ._ad_checkpoint import *
+from ._ad_checkpoint import __all__ as _ad_checkpoint_all
 from ._conditions import *
 from ._conditions import __all__ as _conditions_all
 from ._error_if import *
@@ -26,20 +26,32 @@ from ._error_if import __all__ as _jit_error_all
 from ._jit import *
 from ._jit import __all__ as _jit_all
 from ._loop_collect_return import *
-from ._loop_collect_return import __all__ as _loops_all
+from ._loop_collect_return import __all__ as _loops_collection
 from ._loop_no_collection import *
-from ._loop_no_collection import __all__ as _loops_no_collection_all
+from ._loop_no_collection import __all__ as _loops_no_collection
 from ._make_jaxpr import *
 from ._make_jaxpr import __all__ as _make_jaxpr_all
-from ._mapping import *
-from ._mapping import __all__ as _mapping_all
 from ._progress_bar import *
 from ._progress_bar import __all__ as _progress_bar_all
-__all__ = (_gradients_all + _jit_error_all + _conditions_all + _loops_all +
-           _make_jaxpr_all + _jit_all + _progress_bar_all + _loops_no_collection_all +
-           _mapping_all)
+__all__ = (
+    _jit_error_all
+    + _conditions_all
+    + _make_jaxpr_all
+    + _jit_all
+    + _progress_bar_all
+    + _loops_collection
+    + _loops_no_collection
+    + _ad_checkpoint_all
+)
-del (_gradients_all, _jit_error_all, _conditions_all, _loops_all,
-     _make_jaxpr_all, _jit_all, _progress_bar_all, _loops_no_collection_all,
-     _mapping_all)
+del (
+    _jit_error_all,
+    _conditions_all,
+    _loops_collection,
+    _make_jaxpr_all,
+    _jit_all,
+    _progress_bar_all,
+    _loops_no_collection,
+    _ad_checkpoint_all
+)

brainstate/compile/_ad_checkpoint.py ADDED Viewed

@@ -0,0 +1,204 @@
+# Copyright 2024 BDP Ecosystem Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import annotations
+import functools
+from typing import Callable, Tuple, Union
+import jax
+from brainstate.typing import Missing
+from ._make_jaxpr import StatefulFunction, _ensure_index_tuple
+from ._util import write_back_state_values
+__all__ = [
+    'checkpoint',
+    'remat'
+]
+def checkpoint(
+    fun: Callable = Missing(),
+    *,
+    prevent_cse: bool = True,
+    policy: Callable[..., bool] | None = None,
+    static_argnums: int | Tuple[int, ...] = (),
+) -> Union[Callable, Callable[[Callable], Callable]]:
+    """Make ``fun`` recompute internal linearization points when differentiated.
+    The :func:`jax.checkpoint` decorator, aliased to :func:`jax.remat`, provides a
+    way to trade off computation time and memory cost in the context of automatic
+    differentiation, especially with reverse-mode autodiff like :func:`jax.grad`
+    and :func:`jax.vjp` but also with :func:`jax.linearize`.
+    When differentiating a function in reverse-mode, by default all the
+    linearization points (e.g. inputs to elementwise nonlinear primitive
+    operations) are stored when evaluating the forward pass so that they can be
+    reused on the backward pass. This evaluation strategy can lead to a high
+    memory cost, or even to poor performance on hardware accelerators where memory
+    access is much more expensive than FLOPs.
+    An alternative evaluation strategy is for some of the linearization points to
+    be recomputed (i.e. rematerialized) rather than stored. This approach can
+    reduce memory usage at the cost of increased computation.
+    This function decorator produces a new version of ``fun`` which follows
+    the rematerialization strategy rather than the default store-everything
+    strategy. That is, it returns a new version of ``fun`` which, when
+    differentiated, doesn't store any of its intermediate linearization points.
+    Instead, these linearization points are recomputed from the function's saved
+    inputs.
+    See the examples below.
+    Args:
+      fun: Function for which the autodiff evaluation strategy is to be changed
+        from the default of storing all intermediate linearization points to
+        recomputing them. Its arguments and return value should be arrays,
+        scalars, or (nested) standard Python containers (tuple/list/dict) thereof.
+      prevent_cse: Optional, boolean keyword-only argument indicating whether to
+        prevent common subexpression elimination (CSE) optimizations in the HLO
+        generated from differentiation. This CSE prevention has costs because it
+        can foil other optimizations, and because it can incur high overheads on
+        some backends, especially GPU. The default is True because otherwise,
+        under a :func:`~jax.jit` or :func:`~jax.pmap`, CSE can defeat the purpose
+        of this decorator.
+        But in some settings, like when used inside a :func:`~jax.lax.scan`, this
+        CSE prevention mechanism is unnecessary, in which case ``prevent_cse`` can
+        be set to False.
+      static_argnums: Optional, int or sequence of ints, a keyword-only argument
+        indicating which argument values on which to specialize for tracing and
+        caching purposes. Specifying arguments as static can avoid
+        ConcretizationTypeErrors when tracing, but at the cost of more retracing
+        overheads. See the example below.
+      policy: Optional, callable keyword-only argument. It should be one of the
+        attributes of ``jax.checkpoint_policies``. The callable takes as input a
+        type-level specification of a first-order primitive application and
+        returns a boolean indicating whether the corresponding output value(s) can
+        be saved as residuals (or instead must be recomputed in the (co)tangent
+        computation if needed).
+    Returns:
+      A function (callable) with the same input/output behavior as ``fun`` but
+      which, when differentiated using e.g. :func:`jax.grad`, :func:`jax.vjp`, or
+      :func:`jax.linearize`, recomputes rather than stores intermediate
+      linearization points, thus potentially saving memory at the cost of extra
+      computation.
+    Here is a simple example:
+    >>> import jax
+    >>> import jax.numpy as jnp
+    >>> @jax.checkpoint
+    ... def g(x):
+    ...   y = jnp.sin(x)
+    ...   z = jnp.sin(y)
+    ...   return z
+    ...
+    >>> jax.value_and_grad(g)(2.0)
+    (Array(0.78907233, dtype=float32, weak_type=True), Array(-0.2556391, dtype=float32, weak_type=True))
+    Here, the same value is produced whether or not the :func:`jax.checkpoint`
+    decorator is present. When the decorator is not present, the values
+    ``jnp.cos(2.0)`` and ``jnp.cos(jnp.sin(2.0))`` are computed on the forward
+    pass and are stored for use in the backward pass, because they are needed
+    on the backward pass and depend only on the primal inputs. When using
+    :func:`jax.checkpoint`, the forward pass will compute only the primal outputs
+    and only the primal inputs (``2.0``) will be stored for the backward pass.
+    At that time, the value ``jnp.sin(2.0)`` is recomputed, along with the values
+    ``jnp.cos(2.0)`` and ``jnp.cos(jnp.sin(2.0))``.
+    While :func:`jax.checkpoint` controls what values are stored from the
+    forward-pass to be used on the backward pass, the total amount of memory
+    required to evaluate a function or its VJP depends on many additional internal
+    details of that function. Those details include which numerical primitives are
+    used, how they're composed, where jit and control flow primitives like scan
+    are used, and other factors.
+    The :func:`jax.checkpoint` decorator can be applied recursively to express
+    sophisticated autodiff rematerialization strategies. For example:
+    >>> def recursive_checkpoint(funs):
+    ...   if len(funs) == 1:
+    ...     return funs[0]
+    ...   elif len(funs) == 2:
+    ...     f1, f2 = funs
+    ...     return lambda x: f1(f2(x))
+    ...   else:
+    ...     f1 = recursive_checkpoint(funs[:len(funs)//2])
+    ...     f2 = recursive_checkpoint(funs[len(funs)//2:])
+    ...     return lambda x: f1(jax.checkpoint(f2)(x))
+    ...
+    If ``fun`` involves Python control flow that depends on argument values,
+    it may be necessary to use the ``static_argnums`` parameter. For example,
+    consider a boolean flag argument::
+      from functools import partial
+      @partial(jax.checkpoint, static_argnums=(1,))
+      def foo(x, is_training):
+        if is_training:
+          ...
+        else:
+          ...
+    Here, the use of ``static_argnums`` allows the ``if`` statement's condition
+    to depends on the value of ``is_training``. The cost to using
+    ``static_argnums`` is that it introduces re-tracing overheads across calls:
+    in the example, ``foo`` is re-traced every time it is called with a new value
+    of ``is_training``. In some situations, ``jax.ensure_compile_time_eval``
+    is needed as well::
+      @partial(jax.checkpoint, static_argnums=(1,))
+      def foo(x, y):
+        with jax.ensure_compile_time_eval():
+          y_pos = y > 0
+        if y_pos:
+          ...
+        else:
+          ...
+    As an alternative to using ``static_argnums`` (and
+    ``jax.ensure_compile_time_eval``), it may be easier to compute some values
+    outside the :func:`jax.checkpoint`-decorated function and then close over them.
+    """
+    if isinstance(fun, Missing):
+        return lambda f: checkpoint(f, prevent_cse=prevent_cse, policy=policy, static_argnums=static_argnums)
+    static_argnums = _ensure_index_tuple(tuple() if static_argnums is None else static_argnums)
+    fun = StatefulFunction(fun, static_argnums=static_argnums)
+    checkpointed_fun = jax.checkpoint(fun.jaxpr_call,
+                                      prevent_cse=prevent_cse,
+                                      policy=policy,
+                                      static_argnums=tuple(i + 1 for i in static_argnums))
+    @functools.wraps(fun.fun)
+    def remat_fun(*args, **params):
+        # compile the function and get the state trace
+        state_trace = fun.compile_function_and_get_state_trace(*args, **params, return_only_write=True)
+        read_state_vals = state_trace.get_read_state_values()
+        # call the checkpointed function
+        write_state_vals, outs = checkpointed_fun(state_trace.get_state_values(), *args, **params)
+        # write the state values back to the states
+        write_back_state_values(state_trace, read_state_vals, write_state_vals)
+        return outs
+    return remat_fun
+remat = checkpoint

brainstate/compile/_ad_checkpoint_test.py ADDED Viewed

@@ -0,0 +1,51 @@
+# Copyright 2024 BDP Ecosystem Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from __future__ import annotations
+import jax
+import jax.numpy as jnp
+from absl.testing import absltest
+import brainstate as bst
+class TestRemat(absltest.TestCase):
+    def test_basic_remat(self):
+        module = bst.compile.remat(bst.nn.Linear(2, 3))
+        y = module(jnp.ones((1, 2)))
+        assert y.shape == (1, 3)
+    def test_remat_with_scan(self):
+        class ScanLinear(bst.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.linear = bst.nn.Linear(3, 3)
+            def __call__(self, x: jax.Array):
+                @bst.compile.remat
+                def fun(x: jax.Array, _):
+                    x = self.linear(x)
+                    return x, None
+                return bst.compile.scan(fun, x, None, length=10)[0]
+        m = ScanLinear()
+        assert m.linear.weight.value['weight'].shape == (3, 3)
+        assert m.linear.weight.value['bias'].shape == (3,)
+        y = m(jnp.ones((10, 3)))
+        assert y.shape == (10, 3)

brainstate 0.0.2.post20241009__py2.py3-none-any.whl → 0.1.0__py2.py3-none-any.whl

brainstate 0.0.2.post20241009py2.py3-none-any.whl → 0.1.0py2.py3-none-any.whl