PyPI - brainstate - Versions diffs - 0.1.0__py2.py3-none-any.whl → 0.1.0.post20241122__py2.py3-none-any.whl - Mend

brainstate 0.1.0py2.py3-none-any.whl → 0.1.0.post20241122py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

benchmark/COBA_2005.py +125 -0
benchmark/CUBA_2005.py +149 -0
brainstate/augment/_autograd.py +9 -6
brainstate/event/__init__.py +4 -2
brainstate/event/_csr.py +26 -18
brainstate/event/_csr_benchmark.py +14 -0
brainstate/event/_fixed_probability.py +589 -152
brainstate/event/_fixed_probability_benchmark.py +128 -0
brainstate/event/_fixed_probability_test.py +13 -10
brainstate/event/_linear.py +267 -127
brainstate/event/_linear_benckmark.py +82 -0
brainstate/event/_linear_test.py +8 -3
brainstate/event/_xla_custom_op.py +312 -0
brainstate/event/_xla_custom_op_test.py +55 -0
brainstate/nn/_dyn_impl/_dynamics_synapse.py +6 -11
brainstate/nn/_dyn_impl/_rate_rnns.py +1 -1
brainstate/nn/_dynamics/_projection_base.py +1 -1
brainstate/nn/_exp_euler.py +1 -1
brainstate/nn/_interaction/__init__.py +13 -4
brainstate/nn/_interaction/{_connections.py → _conv.py} +0 -227
brainstate/nn/_interaction/{_connections_test.py → _conv_test.py} +0 -15
brainstate/nn/_interaction/_linear.py +582 -0
brainstate/nn/_interaction/_linear_test.py +42 -0
brainstate/optim/_lr_scheduler.py +1 -1
brainstate/optim/_optax_optimizer.py +18 -0
{brainstate-0.1.0.dist-info → brainstate-0.1.0.post20241122.dist-info}/METADATA +1 -1
{brainstate-0.1.0.dist-info → brainstate-0.1.0.post20241122.dist-info}/RECORD +30 -21
{brainstate-0.1.0.dist-info → brainstate-0.1.0.post20241122.dist-info}/top_level.txt +1 -0
{brainstate-0.1.0.dist-info → brainstate-0.1.0.post20241122.dist-info}/LICENSE +0 -0
{brainstate-0.1.0.dist-info → brainstate-0.1.0.post20241122.dist-info}/WHEEL +0 -0

brainstate/event/_fixed_probability_benchmark.py ADDED Viewed

@@ -0,0 +1,128 @@
+# Copyright 2024 BDP Ecosystem Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# n_pre: 1000, n_post: 1000, conn_prob: 0.01, spk_prob: 0.01, Linear: 0.004549980163574219 s
+# n_pre: 1000, n_post: 1000, conn_prob: 0.01, spk_prob: 0.01, Matmul: 0.04318690299987793 s
+# Acceleration ratio: 8.491668413330538
+#
+# n_pre: 1000, n_post: 10000, conn_prob: 0.01, spk_prob: 0.01, Linear: 0.005620718002319336 s
+# n_pre: 1000, n_post: 10000, conn_prob: 0.01, spk_prob: 0.01, Matmul: 1.3311548233032227 s
+# Acceleration ratio: 235.83003181336161
+#
+# n_pre: 10000, n_post: 10000, conn_prob: 0.01, spk_prob: 0.01, Linear: 0.015388727188110352 s
+# n_pre: 10000, n_post: 10000, conn_prob: 0.01, spk_prob: 0.01, Matmul: 10.791011333465576 s
+# Acceleration ratio: 700.2283213262065
+#
+# n_pre: 10000, n_post: 1000, conn_prob: 0.01, spk_prob: 0.01, Linear: 0.01043844223022461 s
+# n_pre: 10000, n_post: 1000, conn_prob: 0.01, spk_prob: 0.01, Matmul: 0.8944694995880127 s
+# Acceleration ratio: 84.68994107167329
+#
+# n_pre: 10000, n_post: 20000, conn_prob: 0.01, spk_prob: 0.01, Linear: 0.021282196044921875 s
+# n_pre: 10000, n_post: 20000, conn_prob: 0.01, spk_prob: 0.01, Matmul: 21.388156414031982 s
+# Acceleration ratio: 1003.9788268506901
+#
+# n_pre: 20000, n_post: 10000, conn_prob: 0.01, spk_prob: 0.01, Linear: 0.025498151779174805 s
+# n_pre: 20000, n_post: 10000, conn_prob: 0.01, spk_prob: 0.01, Matmul: 21.211663246154785 s
+# Acceleration ratio: 830.8902259997943
+#
+# n_pre: 20000, n_post: 20000, conn_prob: 0.01, spk_prob: 0.01, Linear: 0.044051408767700195 s
+# n_pre: 20000, n_post: 20000, conn_prob: 0.01, spk_prob: 0.01, Matmul: 42.31502842903137 s
+# Acceleration ratio: 959.5828647200498
+#
+# n_pre: 20000, n_post: 30000, conn_prob: 0.01, spk_prob: 0.01, Linear: 0.06666803359985352 s
+# n_pre: 20000, n_post: 30000, conn_prob: 0.01, spk_prob: 0.01, Matmul: 62.46805453300476 s
+# Acceleration ratio: 936.0016057162067
+#
+# n_pre: 30000, n_post: 20000, conn_prob: 0.01, spk_prob: 0.01, Linear: 0.08313393592834473 s
+# n_pre: 30000, n_post: 20000, conn_prob: 0.01, spk_prob: 0.01, Matmul: 63.61667847633362 s
+# Acceleration ratio: 764.231163013459
+#
+#
+import os
+# os.environ['XLA_FLAGS'] = '--xla_cpu_use_thunk_runtime=false'
+os.environ['JAX_TRACEBACK_FILTERING'] = 'off'
+import jax
+#
+# jax.config.update('jax_cpu_enable_async_dispatch', False)
+import time
+import brainstate as bst
+def forward(n_pre, n_post, conn_prob, spk_prob, as_float: bool):
+    linear = bst.event.FixedProb(n_pre, n_post, prob=conn_prob, weight=bst.init.Normal())
+    spike = (bst.random.rand(n_pre) < spk_prob)
+    if as_float:
+        spike = spike.astype(float)
+    @jax.jit
+    def f1(spike):
+        return linear(spike)
+    weight = bst.init.Normal()([n_pre, n_post])
+    @jax.jit
+    def f2(spike):
+        return spike @ weight
+    y1 = jax.block_until_ready(f1(spike))
+    y2 = jax.block_until_ready(f2(spike))
+    # print('max difference:', jax.numpy.abs(y1 - y2).max())
+    n = 1000
+    t0 = time.time()
+    for _ in range(n):
+        jax.block_until_ready(f1(spike))
+    r1 = time.time() - t0
+    print(f"n_pre: {n_pre}, n_post: {n_post}, conn_prob: {conn_prob}, spk_prob: {spk_prob}, Linear: {r1} s")
+    t0 = time.time()
+    for _ in range(n):
+        jax.block_until_ready(f2(spike))
+    r2 = time.time() - t0
+    print(f"n_pre: {n_pre}, n_post: {n_post}, conn_prob: {conn_prob}, spk_prob: {spk_prob}, Matmul: {r2} s")
+    print('Acceleration ratio:', r2 / r1 - 1.)
+    print()
+    bst.util.clear_buffer_memory()
+def benchmark_forward():
+    for n_pre, n_post in [
+        (1000, 1000),
+        (1000, 10000),
+        (10000, 10000),
+        (10000, 1000),
+        (10000, 20000),
+        (20000, 10000),
+        (20000, 20000),
+        (20000, 30000),
+        (30000, 20000),
+    ]:
+        forward(n_pre, n_post, 0.01, 0.01, False)
+if __name__ == '__main__':
+    pass
+    # forward(1000, 6400, 0.01, 0.01, False)
+    # forward(10000, 12800, 0.01, 0.01, False)
+    benchmark_forward()

brainstate/event/_fixed_probability_test.py CHANGED Viewed

@@ -15,12 +15,12 @@
 from __future__ import annotations
 import jax.numpy
 import jax.numpy as jnp
 from absl.testing import parameterized
 import brainstate as bst
-from brainstate.event._fixed_probability import FixedProb
 class TestFixedProbCSR(parameterized.TestCase):
@@ -30,18 +30,18 @@ class TestFixedProbCSR(parameterized.TestCase):
     def test1(self, allow_multi_conn):
         x = bst.random.rand(20) < 0.1
         # x = bst.random.rand(20)
-        m = FixedProb(20, 40, 0.1, 1.0, seed=123, allow_multi_conn=allow_multi_conn)
+        m = bst.event.FixedProb(20, 40, 0.1, 1.0, seed=123, allow_multi_conn=allow_multi_conn)
         y = m(x)
         print(y)
-        m2 = FixedProb(20, 40, 0.1, bst.init.KaimingUniform(), seed=123)
+        m2 = bst.event.FixedProb(20, 40, 0.1, bst.init.KaimingUniform(), seed=123)
         print(m2(x))
     def test_grad_bool(self):
         n_in = 20
         n_out = 30
         x = bst.random.rand(n_in) < 0.3
-        fn = FixedProb(n_in, n_out, 0.1, bst.init.KaimingUniform(), seed=123)
+        fn = bst.event.FixedProb(n_in, n_out, 0.1, bst.init.KaimingUniform(), seed=123)
         def f(x):
             return fn(x).sum()
@@ -62,16 +62,16 @@ class TestFixedProbCSR(parameterized.TestCase):
             x = bst.random.rand(n_in)
         if homo_w:
-            fn = FixedProb(n_in, n_out, 0.1, 1.5, seed=123)
+            fn = bst.event.FixedProb(n_in, n_out, 0.1, 1.5, seed=123, float_as_event=bool_x)
         else:
-            fn = FixedProb(n_in, n_out, 0.1, bst.init.KaimingUniform(), seed=123)
+            fn = bst.event.FixedProb(n_in, n_out, 0.1, bst.init.KaimingUniform(), seed=123, float_as_event=bool_x)
         w = fn.weight.value
         def f(x, w):
             fn.weight.value = w
             return fn(x).sum()
-        r = bst.transform.grad(f, argnums=(0, 1))(x, w)
+        r = bst.augment.grad(f, argnums=(0, 1))(x, w)
         # -------------------
         # TRUE gradients
@@ -88,7 +88,6 @@ class TestFixedProbCSR(parameterized.TestCase):
         r2 = jax.grad(f2, argnums=(0, 1))(x, w)
         self.assertTrue(jnp.allclose(r[0], r2[0]))
         self.assertTrue(jnp.allclose(r[1], r2[1]))
-        print(r[1])
     @parameterized.product(
         bool_x=[True, False],
@@ -102,7 +101,11 @@ class TestFixedProbCSR(parameterized.TestCase):
         else:
             x = bst.random.rand(n_in)
-        fn = FixedProb(n_in, n_out, 0.1, 1.5 if homo_w else bst.init.KaimingUniform(), seed=123, grad_mode='jvp')
+        fn = bst.event.FixedProb(
+            n_in, n_out, 0.1, 1.5 if homo_w else bst.init.KaimingUniform(),
+            seed=123,
+            float_as_event=bool_x
+        )
         w = fn.weight.value
         def f(x, w):
@@ -124,5 +127,5 @@ class TestFixedProbCSR(parameterized.TestCase):
             return true_fn(x, w, fn.indices, n_out)
         o2, r2 = jax.jvp(f2, (x, w), (jnp.ones_like(x), jnp.ones_like(w)))
-        self.assertTrue(jnp.allclose(r1, r2))
         self.assertTrue(jnp.allclose(o1, o2))
+        self.assertTrue(jnp.allclose(r1, r2))

brainstate/event/_linear.py CHANGED Viewed

@@ -12,21 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 from __future__ import annotations
 from typing import Union, Callable, Optional
 import brainunit as u
 import jax
+import jax.experimental.pallas as pl
 import jax.numpy as jnp
 import numpy as np
+from jax.interpreters import ad
 from brainstate._state import ParamState, State
-from brainstate._utils import set_module_as
 from brainstate.init import param
 from brainstate.nn._module import Module
-from brainstate.typing import ArrayLike
-from ._misc import IntScalar
+from brainstate.typing import ArrayLike, Size
+from ._xla_custom_op import XLACustomOp
 __all__ = [
     'Linear',
@@ -39,12 +41,16 @@ class Linear(Module):
     Parameters
     ----------
-    n_pre : int
-        Number of pre-synaptic neurons.
-    n_post : int
-        Number of post-synaptic neurons.
+    in_size : Size
+        Number of pre-synaptic neurons, i.e., input size.
+    out_size : Size
+        Number of post-synaptic neurons, i.e., output size.
     weight : float or callable or jax.Array or brainunit.Quantity
         Maximum synaptic conductance.
+    block_size : int, optional
+        Block size for parallel computation.
+    float_as_event : bool, optional
+        Whether to treat float as event.
     name : str, optional
         Name of the module.
     """
@@ -53,167 +59,301 @@ class Linear(Module):
     def __init__(
         self,
-        n_pre: IntScalar,
-        n_post: IntScalar,
+        in_size: Size,
+        out_size: Size,
         weight: Union[Callable, ArrayLike],
+        float_as_event: bool = True,
+        block_size: int = 64,
         name: Optional[str] = None,
-        grad_mode: str = 'vjp'
     ):
         super().__init__(name=name)
-        self.n_pre = n_pre
-        self.n_post = n_post
-        self.in_size = n_pre
-        self.out_size = n_post
-        assert grad_mode in ['vjp', 'jvp'], f"Unsupported grad_mode: {grad_mode}"
-        self.grad_mode = grad_mode
+        # network parameters
+        self.in_size = in_size
+        self.out_size = out_size
+        self.float_as_event = float_as_event
+        self.block_size = block_size
         # maximum synaptic conductance
-        weight = param(weight, (self.n_pre, self.n_post), allow_none=False)
+        weight = param(weight, (self.in_size[-1], self.out_size[-1]), allow_none=False)
         self.weight = ParamState(weight)
     def update(self, spk: jax.Array) -> Union[jax.Array, u.Quantity]:
         weight = self.weight.value if isinstance(self.weight, State) else self.weight
         if u.math.size(weight) == 1:
-            return u.math.ones(self.n_post) * (u.math.sum(spk) * weight)
-        device_kind = jax.devices()[0].platform  # spk.device.device_kind
-        if device_kind == 'cpu':
-            return cpu_event_linear(u.math.asarray(weight),
-                                    u.math.asarray(spk),
-                                    n_post=self.n_post,
-                                    grad_mode=self.grad_mode)
-        elif device_kind in ['gpu', 'tpu']:
-            raise NotImplementedError()
-        else:
-            raise ValueError(f"Unsupported device: {device_kind}")
-@set_module_as('brainstate.event')
-def cpu_event_linear(
-    g_max: Union[u.Quantity, jax.Array],
-    spk: jax.Array,
-    *,
-    n_post: int = None,
-    grad_mode: str = 'vjp'
-) -> Union[u.Quantity, jax.Array]:
+            return u.math.ones(self.out_size) * (u.math.sum(spk) * weight)
+        return event_linear(spk, weight, block_size=self.block_size, float_as_event=self.float_as_event)
+def event_linear(spk, weight, *, block_size, float_as_event) -> jax.Array | u.Quantity:
     """
-    The FixedProb module implements a fixed probability connection with CSR sparse data structure.
+    The event-driven linear computation.
     Parameters
     ----------
-    n_post : int
-        Number of post-synaptic neurons.
-    g_max : brainunit.Quantity or jax.Array
+    weight : brainunit.Quantity or jax.Array
         Maximum synaptic conductance.
     spk : jax.Array
         Spike events.
-    grad_mode : str, optional
-        Gradient mode. Default is 'vjp'. Can be 'vjp' or 'jvp'.
+    block_size : int
+        Block size for parallel computation.
+    float_as_event : bool
+        Whether to treat float as event.
     Returns
     -------
     post_data : brainunit.Quantity or jax.Array
         Post synaptic data.
     """
-    unit = u.get_unit(g_max)
-    g_max = u.get_mantissa(g_max)
-    spk = jnp.asarray(spk)
+    with jax.ensure_compile_time_eval():
+        weight = u.math.asarray(weight)
+        unit = u.get_unit(weight)
+        weight = u.get_mantissa(weight)
+        spk = jnp.asarray(spk)
     def mv(spk_vector):
         assert spk_vector.ndim == 1, f"spk must be 1D. Got: {spk.ndim}"
-        if jnp.size(g_max) == 1:
-            assert isinstance(n_post, int), f"n_post must be an integer when weight is homogenous. Got: {n_post}"
-            # return jnp.full((n_post,), fill_value=jnp.sum(spk_vector) * weight)
-            return jnp.ones((n_post,), dtype=g_max.dtype) * (jnp.sum(spk_vector) * g_max)
-        if grad_mode == 'vjp':
-            post = _cpu_event_linear_mv_vjp(g_max, spk_vector)
-        elif grad_mode == 'jvp':
-            post = _cpu_event_linear_mv_jvp(g_max, spk_vector)
-        else:
-            raise ValueError(f"Unsupported grad_mode: {grad_mode}")
-        return post
+        return event_liner_p_call(
+            spk,
+            weight,
+            block_size=block_size,
+            float_as_event=float_as_event,
+        )
     assert spk.ndim >= 1, f"spk must be at least 1D. Got: {spk.ndim}"
-    assert g_max.ndim in [2, 0], f"weight must be 2D or 0D. Got: {g_max.ndim}"
+    assert weight.ndim in [2, 0], f"weight must be 2D or 0D. Got: {weight.ndim}"
     if spk.ndim == 1:
-        post_data = mv(spk)
+        [post_data] = mv(spk)
     else:
-        shape = spk.shape[:-1]
-        post_data = jax.vmap(mv)(u.math.reshape(spk, (-1, spk.shape[-1])))
-        post_data = u.math.reshape(post_data, shape + post_data.shape[-1:])
+        [post_data] = jax.vmap(mv)(u.math.reshape(spk, (-1, spk.shape[-1])))
+        post_data = u.math.reshape(post_data, spk.shape[:-1] + post_data.shape[-1:])
     return u.maybe_decimal(u.Quantity(post_data, unit=unit))
-# --------------
-# Implementation
-# --------------
-def _cpu_event_linear_mv(g_max, spk) -> jax.Array:
-    def scan_fn(post, i):
-        sp = spk[i]
-        if spk.dtype == jnp.bool_:
-            post = jax.lax.cond(sp, lambda: post + g_max[i], lambda: post)
-        else:
-            post = jax.lax.cond(sp == 0., lambda: post, lambda: post + g_max[i] * sp)
-        return post, None
-    return jax.lax.scan(scan_fn, jnp.zeros(g_max.shape[1], dtype=g_max.dtype), np.arange(len(spk)))[0]
-# --------------
-# VJP
-# --------------
-def _cpu_event_linear_mv_fwd(g_max, spk):
-    return _cpu_event_linear_mv(g_max, spk), (g_max, spk)
+Kernel = Callable
-def _cpu_event_linear_mv_bwd(res, ct):
-    g_max, spk = res
+def cpu_kernel_generator(
+    float_as_event: bool,
+    spk_info: jax.ShapeDtypeStruct,
+    **kwargs
+) -> Kernel:
+    import numba  # pylint: disable=import-outside-toplevel
-    # ∂L/∂spk = ∂L/∂y * ∂y/∂spk
-    ct_spk = jnp.matmul(g_max, ct)
+    if spk_info.dtype == jnp.bool_:
-    # ∂L/∂w = ∂L/∂y * ∂y/∂w
-    def map_fn(sp):
-        if spk.dtype == jnp.bool_:
-            d_gmax = jax.lax.cond(sp, lambda: ct, lambda: jnp.zeros_like(ct))
-        else:
-            d_gmax = jax.lax.cond(sp == 0., lambda: jnp.zeros_like(ct), lambda: ct * sp)
-        return d_gmax
+        @numba.njit
+        def _kernel(spikes, weights, posts):
+            r = np.zeros((weights.shape[1],), dtype=weights.dtype)
+            for i in range(spikes.shape[0]):
+                if spikes[i]:
+                    r = r + weights[i]
+            posts[:] = r
-    ct_gmax = jax.vmap(map_fn)(spk)
-    return ct_gmax, ct_spk
+    elif float_as_event:
+        @numba.njit
+        def _kernel(spikes, weights, posts):
+            r = np.zeros((weights.shape[1],), dtype=weights.dtype)
+            for i in range(spikes.shape[0]):
+                if spikes[i] != 0.:
+                    r = r + weights[i]
+            posts[:] = r
+    else:
+        @numba.njit
+        def _kernel(spikes, weights, posts):
+            r = np.zeros((weights.shape[1],), dtype=weights.dtype)
+            for i in range(spikes.shape[0]):
+                sp = spikes[i]
+                if sp != 0.:
+                    r = r + weights[i] * sp
+            posts[:] = r
+    return _kernel
+def gpu_kernel_generator(
+    block_size: int,
+    float_as_event: bool,
+    weight_info: jax.ShapeDtypeStruct,
+    **kwargs
+) -> Kernel:
+    # # 每个block处理一个[block_size,]的post
+    # # 每个block处理一个[n_pre]的pre
+    # # 每个block处理一个[n_pre, block_size]的w
+    # def _mv_kernel(sp_ref, w_ref, post_ref):
+    #
+    #     pid = pl.program_id(0)
+    #
+    #     def scan_fn(i, post_):
+    #         if sp_ref.dtype == jnp.bool_:
+    #             post_ = jax.lax.cond(
+    #                 sp_ref[i],
+    #                 lambda: post_ + w_ref[i, ...],
+    #                 lambda: post_
+    #             )
+    #         else:
+    #             if float_as_event:
+    #                 post_ = jax.lax.cond(
+    #                     sp_ref[i] != 0.,
+    #                     lambda: post_ + w_ref[i, ...],
+    #                     lambda: post_
+    #                 )
+    #             else:
+    #                 sp = sp_ref[i]
+    #                 post_ = jax.lax.cond(
+    #                     sp != 0.,
+    #                     lambda: post_ + w_ref[i, ...] * sp,
+    #                     lambda: post_
+    #                 )
+    #         return post_
+    #
+    #     post = jax.lax.fori_loop(0, n_pre, scan_fn, jnp.zeros(post_ref.shape, dtype=post_ref.dtype))
+    #     mask = jnp.arange(block_size) + pid * block_size < n_pre
+    #     pl.store(post_ref, pl.dslice(None, None), post, mask=mask)
+    #
+    # n_pre = weight_info.shape[0]
+    # n_post = weight_info.shape[1]
+    # kernel = pl.pallas_call(
+    #     _mv_kernel,
+    #     out_shape=[
+    #         jax.ShapeDtypeStruct([weight_info.shape[1]], weight_info.dtype),
+    #     ],
+    #     out_specs=[
+    #         pl.BlockSpec((block_size,), lambda i: i),
+    #     ],
+    #     in_specs=[
+    #         pl.BlockSpec((n_pre,), lambda i: 0),
+    #         pl.BlockSpec((n_pre, block_size), lambda i: (0, i)),
+    #     ],
+    #     grid=(
+    #         pl.cdiv(n_post, block_size),
+    #     ),
+    #     interpret=False,
+    # )
+    # return kernel
+    # 每个block处理一个[block_size,]的post
+    # 每个block处理一个[block_size]的pre
+    # 每个block处理一个[block_size, block_size]的w
+    def _mv_kernel(
+        sp_ref,  # [block_size]
+        w_ref,  # [block_size, block_size]
+        post_ref,  # [block_size]
+    ):
-_cpu_event_linear_mv_vjp = jax.custom_vjp(_cpu_event_linear_mv)
-_cpu_event_linear_mv_vjp.defvjp(_cpu_event_linear_mv_fwd, _cpu_event_linear_mv_bwd)
-# --------------
-# JVP
-# --------------
-def _cpu_event_linear_mv_jvp_rule(primals, tangents):
-    # forward pass
-    g_max, spk = primals
-    y = _cpu_event_linear_mv(g_max, spk)
-    # forward gradients
-    gmax_dot, spk_dot = tangents
-    # ∂y/∂gmax
-    dgmax = _cpu_event_linear_mv(gmax_dot, spk)
-    # ∂y/∂gspk
-    dspk = spk_dot @ g_max
-    return y, dgmax + dspk
+        r_pid = pl.program_id(0)
+        c_start = pl.program_id(1) * block_size
+        row_length = jnp.minimum(n_pre - r_pid * block_size, block_size)
+        mask = jnp.arange(block_size) + c_start < weight_info.shape[1]
+        def scan_fn(i, post_):
+            if sp_ref.dtype == jnp.bool_:
+                post_ = jax.lax.cond(
+                    sp_ref[i],
+                    lambda: post_ + w_ref[i, ...],
+                    lambda: post_
+                )
+            else:
+                if float_as_event:
+                    post_ = jax.lax.cond(
+                        sp_ref[i] != 0.,
+                        lambda: post_ + w_ref[i, ...],
+                        lambda: post_
+                    )
+                else:
+                    sp = sp_ref[i]
+                    post_ = jax.lax.cond(
+                        sp != 0.,
+                        lambda: post_ + w_ref[i, ...] * sp,
+                        lambda: post_
+                    )
+            return post_
+        post = jax.lax.fori_loop(0, row_length, scan_fn, jnp.zeros(post_ref.shape, dtype=post_ref.dtype))
+        pl.atomic_add(post_ref, pl.dslice(None, None), post, mask=mask)
+    n_pre = weight_info.shape[0]
+    n_post = weight_info.shape[1]
+    kernel = pl.pallas_call(
+        _mv_kernel,
+        out_shape=[
+            jax.ShapeDtypeStruct([weight_info.shape[1]], weight_info.dtype),
+        ],
+        out_specs=[
+            pl.BlockSpec((block_size,), lambda i, j: j),
+        ],
+        in_specs=[
+            pl.BlockSpec((block_size,), lambda i, j: i),
+            pl.BlockSpec((block_size, block_size), lambda i, j: (i, j)),
+        ],
+        grid=(
+            pl.cdiv(n_pre, block_size),
+            pl.cdiv(n_post, block_size),
+        ),
+        interpret=False,
+    )
+    return kernel
+def jvp_spikes(spk_dot, spikes, weights, **kwargs):
+    return [spk_dot @ weights]
+def jvp_weights(w_dot, spikes, weights, *, float_as_event, block_size, **kwargs):
+    return event_liner_p_call(
+        spikes,
+        w_dot,
+        block_size=block_size,
+        float_as_event=float_as_event,
+    )
+def transpose_rule(ct, spikes, weights, *, float_as_event, **kwargs):
+    if ad.is_undefined_primal(spikes):
+        ct_events = jnp.matmul(weights, ct[0])
+        return (ad.Zero(spikes) if type(ct[0]) is ad.Zero else ct_events), weights
-_cpu_event_linear_mv_jvp = jax.custom_jvp(_cpu_event_linear_mv)
-_cpu_event_linear_mv_jvp.defjvp(_cpu_event_linear_mv_jvp_rule)
+    else:
+        def map_fn(sp):
+            if spikes.dtype == jnp.bool_:
+                d_gmax = jnp.where(sp, ct[0], jnp.zeros_like(ct[0]))
+            else:
+                if float_as_event:
+                    d_gmax = jnp.where(sp == 0., jnp.zeros_like(ct[0]), ct[0])
+                else:
+                    d_gmax = jnp.where(sp == 0., jnp.zeros_like(ct[0]), ct[0] * sp)
+                    # d_gmax = jax.lax.cond(sp == 0., lambda: jnp.zeros_like(ct[0]), lambda: ct[0] * sp)
+            return d_gmax
+        ct_weights = jax.vmap(map_fn)(spikes)
+        return spikes, (ad.Zero(weights) if type(ct[0]) is ad.Zero else ct_weights)
+event_linear_p = XLACustomOp(
+    'event_linear',
+    cpu_kernel_generator=cpu_kernel_generator,
+    gpu_kernel_generator=gpu_kernel_generator,
+)
+event_linear_p.defjvp(jvp_spikes, jvp_weights)
+event_linear_p.def_transpose_rule(transpose_rule)
+def event_liner_p_call(
+    spikes,
+    weights,
+    *,
+    block_size,
+    float_as_event,
+):
+    return event_linear_p(
+        spikes,
+        weights,
+        outs=[jax.ShapeDtypeStruct([weights.shape[1]], weights.dtype)],
+        block_size=block_size,
+        float_as_event=float_as_event,
+        spk_info=jax.ShapeDtypeStruct(spikes.shape, spikes.dtype),
+        weight_info=jax.ShapeDtypeStruct(weights.shape, weights.dtype),
+    )

brainstate 0.1.0__py2.py3-none-any.whl → 0.1.0.post20241122__py2.py3-none-any.whl

brainstate 0.1.0py2.py3-none-any.whl → 0.1.0.post20241122py2.py3-none-any.whl