PyPI - brainstate - Versions diffs - 0.2.0__py2.py3-none-any.whl → 0.2.1__py2.py3-none-any.whl - Mend

brainstate 0.2.0py2.py3-none-any.whl → 0.2.1py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

brainstate/__init__.py +169 -169
brainstate/_compatible_import.py +340 -340
brainstate/_compatible_import_test.py +681 -681
brainstate/_deprecation.py +210 -210
brainstate/_deprecation_test.py +2319 -2319
brainstate/_error.py +45 -45
brainstate/_state.py +1652 -1652
brainstate/_state_test.py +52 -52
brainstate/_utils.py +47 -47
brainstate/environ.py +1495 -1495
brainstate/environ_test.py +1223 -1223
brainstate/graph/__init__.py +22 -22
brainstate/graph/_node.py +240 -240
brainstate/graph/_node_test.py +589 -589
brainstate/graph/_operation.py +1624 -1624
brainstate/graph/_operation_test.py +1147 -1147
brainstate/mixin.py +1433 -1433
brainstate/mixin_test.py +1017 -1017
brainstate/nn/__init__.py +137 -137
brainstate/nn/_activations.py +1100 -1100
brainstate/nn/_activations_test.py +354 -354
brainstate/nn/_collective_ops.py +633 -633
brainstate/nn/_collective_ops_test.py +774 -774
brainstate/nn/_common.py +226 -226
brainstate/nn/_common_test.py +154 -154
brainstate/nn/_conv.py +2010 -2010
brainstate/nn/_conv_test.py +849 -849
brainstate/nn/_delay.py +575 -575
brainstate/nn/_delay_test.py +243 -243
brainstate/nn/_dropout.py +618 -618
brainstate/nn/_dropout_test.py +477 -477
brainstate/nn/_dynamics.py +1267 -1267
brainstate/nn/_dynamics_test.py +67 -67
brainstate/nn/_elementwise.py +1298 -1298
brainstate/nn/_elementwise_test.py +829 -829
brainstate/nn/_embedding.py +408 -408
brainstate/nn/_embedding_test.py +156 -156
brainstate/nn/_event_fixedprob.py +233 -233
brainstate/nn/_event_fixedprob_test.py +115 -115
brainstate/nn/_event_linear.py +83 -83
brainstate/nn/_event_linear_test.py +121 -121
brainstate/nn/_exp_euler.py +254 -254
brainstate/nn/_exp_euler_test.py +377 -377
brainstate/nn/_linear.py +744 -744
brainstate/nn/_linear_test.py +475 -475
brainstate/nn/_metrics.py +1070 -1070
brainstate/nn/_metrics_test.py +611 -611
brainstate/nn/_module.py +384 -384
brainstate/nn/_module_test.py +40 -40
brainstate/nn/_normalizations.py +1334 -1334
brainstate/nn/_normalizations_test.py +699 -699
brainstate/nn/_paddings.py +1020 -1020
brainstate/nn/_paddings_test.py +722 -722
brainstate/nn/_poolings.py +2239 -2239
brainstate/nn/_poolings_test.py +952 -952
brainstate/nn/_rnns.py +946 -946
brainstate/nn/_rnns_test.py +592 -592
brainstate/nn/_utils.py +216 -216
brainstate/nn/_utils_test.py +401 -401
brainstate/nn/init.py +809 -809
brainstate/nn/init_test.py +180 -180
brainstate/random/__init__.py +270 -270
brainstate/random/_rand_funs.py +3938 -3938
brainstate/random/_rand_funs_test.py +640 -640
brainstate/random/_rand_seed.py +675 -675
brainstate/random/_rand_seed_test.py +48 -48
brainstate/random/_rand_state.py +1617 -1617
brainstate/random/_rand_state_test.py +551 -551
brainstate/transform/__init__.py +59 -59
brainstate/transform/_ad_checkpoint.py +176 -176
brainstate/transform/_ad_checkpoint_test.py +49 -49
brainstate/transform/_autograd.py +1025 -1025
brainstate/transform/_autograd_test.py +1289 -1289
brainstate/transform/_conditions.py +316 -316
brainstate/transform/_conditions_test.py +220 -220
brainstate/transform/_error_if.py +94 -94
brainstate/transform/_error_if_test.py +52 -52
brainstate/transform/_eval_shape.py +145 -145
brainstate/transform/_eval_shape_test.py +38 -38
brainstate/transform/_jit.py +399 -399
brainstate/transform/_jit_test.py +143 -143
brainstate/transform/_loop_collect_return.py +675 -675
brainstate/transform/_loop_collect_return_test.py +58 -58
brainstate/transform/_loop_no_collection.py +283 -283
brainstate/transform/_loop_no_collection_test.py +50 -50
brainstate/transform/_make_jaxpr.py +2016 -2016
brainstate/transform/_make_jaxpr_test.py +1510 -1510
brainstate/transform/_mapping.py +529 -529
brainstate/transform/_mapping_test.py +194 -194
brainstate/transform/_progress_bar.py +255 -255
brainstate/transform/_random.py +171 -171
brainstate/transform/_unvmap.py +256 -256
brainstate/transform/_util.py +286 -286
brainstate/typing.py +837 -837
brainstate/typing_test.py +780 -780
brainstate/util/__init__.py +27 -27
brainstate/util/_others.py +1024 -1024
brainstate/util/_others_test.py +962 -962
brainstate/util/_pretty_pytree.py +1301 -1301
brainstate/util/_pretty_pytree_test.py +675 -675
brainstate/util/_pretty_repr.py +462 -462
brainstate/util/_pretty_repr_test.py +696 -696
brainstate/util/filter.py +945 -945
brainstate/util/filter_test.py +911 -911
brainstate/util/struct.py +910 -910
brainstate/util/struct_test.py +602 -602
{brainstate-0.2.0.dist-info → brainstate-0.2.1.dist-info}/METADATA +108 -108
brainstate-0.2.1.dist-info/RECORD +111 -0
{brainstate-0.2.0.dist-info → brainstate-0.2.1.dist-info}/licenses/LICENSE +202 -202
brainstate-0.2.0.dist-info/RECORD +0 -111
{brainstate-0.2.0.dist-info → brainstate-0.2.1.dist-info}/WHEEL +0 -0
{brainstate-0.2.0.dist-info → brainstate-0.2.1.dist-info}/top_level.txt +0 -0

brainstate/nn/_linear.py CHANGED Viewed

@@ -1,744 +1,744 @@
-# Copyright 2024 BrainX Ecosystem Limited. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-# -*- coding: utf-8 -*-
-from typing import Callable, Union, Optional
-import brainunit as u
-import jax.numpy as jnp
-from brainstate._state import ParamState
-from brainstate.typing import ArrayLike, Size
-from . import init as init
-from ._module import Module
-from ._normalizations import weight_standardization
-__all__ = [
-    'Linear',
-    'ScaledWSLinear',
-    'SignedWLinear',
-    'SparseLinear',
-    'AllToAll',
-    'OneToOne',
-    'LoRA',
-]
-class Linear(Module):
-    """
-    Linear transformation layer.
-    Applies a linear transformation to the incoming data: :math:`y = xW + b`
-    Parameters
-    ----------
-    in_size : int or tuple of int
-        The input feature size.
-    out_size : int or tuple of int
-        The output feature size.
-    w_init : Callable or ArrayLike, optional
-        Weight initializer. Default is ``KaimingNormal()``.
-    b_init : Callable, ArrayLike, or None, optional
-        Bias initializer. If ``None``, no bias is added. Default is ``ZeroInit()``.
-    w_mask : ArrayLike, Callable, or None, optional
-        Optional mask for the weights. If provided, weights will be element-wise
-        multiplied by this mask.
-    name : str, optional
-        Name of the module.
-    param_type : type, optional
-        Type of parameter state. Default is ``ParamState``.
-    Attributes
-    ----------
-    in_size : tuple
-        Input feature size.
-    out_size : tuple
-        Output feature size.
-    w_mask : ArrayLike or None
-        Weight mask if provided.
-    weight : ParamState
-        Parameter state containing 'weight' and optionally 'bias'.
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate as bst
-        >>> import jax.numpy as jnp
-        >>>
-        >>> # Create a linear layer
-        >>> layer = bst.nn.Linear((10,), (5,))
-        >>> x = jnp.ones((32, 10))
-        >>> y = layer(x)
-        >>> y.shape
-        (32, 5)
-        >>>
-        >>> # Linear layer without bias
-        >>> layer = bst.nn.Linear((10,), (5,), b_init=None)
-        >>> y = layer(x)
-        >>> y.shape
-        (32, 5)
-    """
-    __module__ = 'brainstate.nn'
-    def __init__(
-        self,
-        in_size: Size,
-        out_size: Size,
-        w_init: Union[Callable, ArrayLike] = init.KaimingNormal(),
-        b_init: Optional[Union[Callable, ArrayLike]] = init.ZeroInit(),
-        w_mask: Optional[Union[ArrayLike, Callable]] = None,
-        name: Optional[str] = None,
-        param_type: type = ParamState,
-    ):
-        super().__init__(name=name)
-        # input and output shape
-        self.in_size = in_size
-        self.out_size = out_size
-        assert self.in_size[:-1] == self.out_size[:-1], ('The first n-1 dimensions of "in_size" '
-                                                         'and "out_size" must be the same.')
-        # w_mask
-        self.w_mask = init.param(w_mask, self.in_size + self.out_size)
-        # weights
-        params = dict(weight=init.param(w_init, (self.in_size[-1], self.out_size[-1]), allow_none=False))
-        if b_init is not None:
-            params['bias'] = init.param(b_init, self.out_size[-1], allow_none=False)
-        self.weight = param_type(params)
-    def update(self, x):
-        params = self.weight.value
-        weight = params['weight']
-        if self.w_mask is not None:
-            weight = weight * self.w_mask
-        y = u.linalg.dot(x, weight)
-        if 'bias' in params:
-            y = y + params['bias']
-        return y
-class SignedWLinear(Module):
-    """
-    Linear layer with signed absolute weights.
-    This layer uses absolute values of weights multiplied by a sign matrix,
-    ensuring all effective weights have controlled signs.
-    Parameters
-    ----------
-    in_size : int or tuple of int
-        The input feature size.
-    out_size : int or tuple of int
-        The output feature size.
-    w_init : Callable or ArrayLike, optional
-        Weight initializer. Default is ``KaimingNormal()``.
-    w_sign : ArrayLike or None, optional
-        Sign matrix for the weights. If ``None``, all weights are positive
-        (absolute values used). If provided, should have the same shape as
-        the weight matrix.
-    name : str, optional
-        Name of the module.
-    param_type : type, optional
-        Type of parameter state. Default is ``ParamState``.
-    Attributes
-    ----------
-    in_size : tuple
-        Input feature size.
-    out_size : tuple
-        Output feature size.
-    w_sign : ArrayLike or None
-        Sign matrix for weights.
-    weight : ParamState
-        Parameter state containing the weight values.
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate as bst
-        >>> import jax.numpy as jnp
-        >>>
-        >>> # Create a signed weight linear layer with all positive weights
-        >>> layer = bst.nn.SignedWLinear((10,), (5,))
-        >>> x = jnp.ones((32, 10))
-        >>> y = layer(x)
-        >>> y.shape
-        (32, 5)
-        >>>
-        >>> # With custom sign matrix (e.g., inhibitory connections)
-        >>> w_sign = jnp.ones((10, 5)) * -1.0  # all negative
-        >>> layer = bst.nn.SignedWLinear((10,), (5,), w_sign=w_sign)
-        >>> y = layer(x)
-        >>> y.shape
-        (32, 5)
-    """
-    __module__ = 'brainstate.nn'
-    def __init__(
-        self,
-        in_size: Size,
-        out_size: Size,
-        w_init: Union[Callable, ArrayLike] = init.KaimingNormal(),
-        w_sign: Optional[ArrayLike] = None,
-        name: Optional[str] = None,
-        param_type: type = ParamState,
-    ):
-        super().__init__(name=name)
-        # input and output shape
-        self.in_size = in_size
-        self.out_size = out_size
-        assert self.in_size[:-1] == self.out_size[:-1], ('The first n-1 dimensions of "in_size" '
-                                                         'and "out_size" must be the same.')
-        # w_mask
-        self.w_sign = w_sign
-        # weights
-        weight = init.param(w_init, self.in_size + self.out_size, allow_none=False)
-        self.weight = param_type(weight)
-    def update(self, x):
-        w = self.weight.value
-        if self.w_sign is None:
-            return u.math.matmul(x, u.math.abs(w))
-        else:
-            return u.math.matmul(x, u.math.abs(w) * self.w_sign)
-class ScaledWSLinear(Module):
-    """
-    Linear layer with weight standardization.
-    Applies weight standardization [1]_ to normalize weights before the linear
-    transformation, which can improve training stability and performance.
-    Parameters
-    ----------
-    in_size : int or tuple of int
-        The input feature size.
-    out_size : int or tuple of int
-        The output feature size.
-    w_init : Callable, optional
-        Weight initializer. Default is ``KaimingNormal()``.
-    b_init : Callable, optional
-        Bias initializer. Default is ``ZeroInit()``.
-    w_mask : ArrayLike, Callable, or None, optional
-        Optional mask for the weights.
-    ws_gain : bool, optional
-        Whether to use a learnable gain parameter for weight standardization.
-        Default is ``True``.
-    eps : float, optional
-        Small constant for numerical stability in standardization.
-        Default is ``1e-4``.
-    name : str, optional
-        Name of the module.
-    param_type : type, optional
-        Type of parameter state. Default is ``ParamState``.
-    Attributes
-    ----------
-    in_size : tuple
-        Input feature size.
-    out_size : tuple
-        Output feature size.
-    w_mask : ArrayLike or None
-        Weight mask if provided.
-    eps : float
-        Epsilon for numerical stability.
-    weight : ParamState
-        Parameter state containing 'weight', optionally 'bias' and 'gain'.
-    References
-    ----------
-    .. [1] Qiao, S., Wang, H., Liu, C., Shen, W., & Yuille, A. (2019).
-           Weight standardization. arXiv preprint arXiv:1903.10520.
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate as bst
-        >>> import jax.numpy as jnp
-        >>>
-        >>> # Create a weight-standardized linear layer
-        >>> layer = bst.nn.ScaledWSLinear((10,), (5,))
-        >>> x = jnp.ones((32, 10))
-        >>> y = layer(x)
-        >>> y.shape
-        (32, 5)
-        >>>
-        >>> # Without learnable gain
-        >>> layer = bst.nn.ScaledWSLinear((10,), (5,), ws_gain=False)
-        >>> y = layer(x)
-        >>> y.shape
-        (32, 5)
-    """
-    __module__ = 'brainstate.nn'
-    def __init__(
-        self,
-        in_size: Size,
-        out_size: Size,
-        w_init: Callable = init.KaimingNormal(),
-        b_init: Callable = init.ZeroInit(),
-        w_mask: Optional[Union[ArrayLike, Callable]] = None,
-        ws_gain: bool = True,
-        eps: float = 1e-4,
-        name: str = None,
-        param_type: type = ParamState,
-    ):
-        super().__init__(name=name)
-        # input and output shape
-        self.in_size = in_size
-        self.out_size = out_size
-        assert self.in_size[:-1] == self.out_size[:-1], ('The first n-1 dimensions of "in_size" '
-                                                         'and "out_size" must be the same.')
-        # w_mask
-        self.w_mask = init.param(w_mask, (self.in_size[0], 1))
-        # parameters
-        self.eps = eps
-        # weights
-        params = dict(weight=init.param(w_init, self.in_size + self.out_size, allow_none=False))
-        if b_init is not None:
-            params['bias'] = init.param(b_init, self.out_size, allow_none=False)
-        # gain
-        if ws_gain:
-            s = params['weight'].shape
-            params['gain'] = jnp.ones((1,) * (len(s) - 1) + (s[-1],), dtype=params['weight'].dtype)
-        self.weight = param_type(params)
-    def update(self, x):
-        params = self.weight.value
-        w = params['weight']
-        w = weight_standardization(w, self.eps, params.get('gain', None))
-        if self.w_mask is not None:
-            w = w * self.w_mask
-        y = u.linalg.dot(x, w)
-        if 'bias' in params:
-            y = y + params['bias']
-        return y
-class SparseLinear(Module):
-    """
-    Linear layer with sparse weight matrix.
-    Supports sparse matrices from ``brainunit.sparse`` including CSR, CSC,
-    and COO formats. Only the non-zero entries are stored and updated.
-    Parameters
-    ----------
-    spar_mat : brainunit.sparse.SparseMatrix
-        The sparse weight matrix defining the connectivity structure.
-    b_init : Callable, ArrayLike, or None, optional
-        Bias initializer. If ``None``, no bias is added.
-    in_size : int or tuple of int, optional
-        The input size. If not provided, inferred from ``spar_mat``.
-    name : str, optional
-        Name of the module.
-    param_type : type, optional
-        Type of parameter state. Default is ``ParamState``.
-    Attributes
-    ----------
-    in_size : tuple
-        Input feature size.
-    out_size : int
-        Output feature size.
-    spar_mat : brainunit.sparse.SparseMatrix
-        The sparse matrix structure.
-    weight : ParamState
-        Parameter state containing the sparse 'weight' data and optionally 'bias'.
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate as bst
-        >>> import brainunit as u
-        >>> import jax.numpy as jnp
-        >>>
-        >>> # Create a sparse linear layer with CSR matrix
-        >>> indices = jnp.array([[0, 1], [1, 2], [2, 0]])
-        >>> values = jnp.array([1.0, 2.0, 3.0])
-        >>> spar_mat = u.sparse.CSR((values, indices[:, 1], indices[:, 0]),
-        ...                          shape=(3, 3))
-        >>> layer = bst.nn.SparseLinear(spar_mat, in_size=(3,))
-        >>> x = jnp.ones((5, 3))
-        >>> y = layer(x)
-        >>> y.shape
-        (5, 3)
-    """
-    __module__ = 'brainstate.nn'
-    def __init__(
-        self,
-        spar_mat: u.sparse.SparseMatrix,
-        b_init: Optional[Union[Callable, ArrayLike]] = None,
-        in_size: Size = None,
-        name: Optional[str] = None,
-        param_type: type = ParamState,
-    ):
-        super().__init__(name=name)
-        # input and output shape
-        if in_size is not None:
-            self.in_size = in_size
-        self.out_size = spar_mat.shape[-1]
-        if in_size is not None:
-            assert self.in_size[:-1] == self.out_size[:-1], (
-                'The first n-1 dimensions of "in_size" '
-                'and "out_size" must be the same.'
-            )
-        # weights
-        assert isinstance(spar_mat, u.sparse.SparseMatrix), '"weight" must be a SparseMatrix.'
-        self.spar_mat = spar_mat
-        params = dict(weight=spar_mat.data)
-        if b_init is not None:
-            params['bias'] = init.param(b_init, self.out_size[-1], allow_none=False)
-        self.weight = param_type(params)
-    def update(self, x):
-        data = self.weight.value['weight']
-        y = x @ self.spar_mat.with_data(data)
-        if 'bias' in self.weight.value:
-            y = y + self.weight.value['bias']
-        return y
-class AllToAll(Module):
-    """
-    All-to-all connection layer.
-    Performs matrix multiplication with optional exclusion of self-connections,
-    commonly used in recurrent neural networks and graph neural networks.
-    Parameters
-    ----------
-    in_size : int or tuple of int
-        The number of neurons in the pre-synaptic group.
-    out_size : int or tuple of int
-        The number of neurons in the post-synaptic group.
-    w_init : Callable or ArrayLike, optional
-        Weight initializer. Default is ``KaimingNormal()``.
-    b_init : Callable, ArrayLike, or None, optional
-        Bias initializer. If ``None``, no bias is added.
-    include_self : bool, optional
-        Whether to include self-connections (diagonal elements).
-        Default is ``True``.
-    name : str, optional
-        Name of the module.
-    param_type : type, optional
-        Type of parameter state. Default is ``ParamState``.
-    Attributes
-    ----------
-    in_size : tuple
-        Input size.
-    out_size : tuple
-        Output size.
-    include_self : bool
-        Whether self-connections are included.
-    weight : ParamState
-        Parameter state containing 'weight' and optionally 'bias'.
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate as bst
-        >>> import jax.numpy as jnp
-        >>>
-        >>> # All-to-all with self-connections
-        >>> layer = bst.nn.AllToAll((10,), (10,), include_self=True)
-        >>> x = jnp.ones((32, 10))
-        >>> y = layer(x)
-        >>> y.shape
-        (32, 10)
-        >>>
-        >>> # All-to-all without self-connections (recurrent layer)
-        >>> layer = bst.nn.AllToAll((10,), (10,), include_self=False)
-        >>> y = layer(x)
-        >>> y.shape
-        (32, 10)
-    """
-    __module__ = 'brainstate.nn'
-    def __init__(
-        self,
-        in_size: Size,
-        out_size: Size,
-        w_init: Union[Callable, ArrayLike] = init.KaimingNormal(),
-        b_init: Optional[Union[Callable, ArrayLike]] = None,
-        include_self: bool = True,
-        name: Optional[str] = None,
-        param_type: type = ParamState,
-    ):
-        super().__init__(name=name)
-        # input and output shape
-        self.in_size = in_size
-        self.out_size = out_size
-        assert self.in_size[:-1] == self.out_size[:-1], ('The first n-1 dimensions of "in_size" '
-                                                         'and "out_size" must be the same.')
-        # others
-        self.include_self = include_self
-        # weights
-        weight = init.param(w_init, (self.in_size[-1], self.out_size[-1]), allow_none=False)
-        params = dict(weight=weight)
-        if b_init is not None:
-            params['bias'] = init.param(b_init, self.out_size[-1], allow_none=False)
-        self.weight = param_type(params)
-    def update(self, pre_val):
-        params = self.weight.value
-        pre_val, pre_unit = u.get_mantissa(pre_val), u.get_unit(pre_val)
-        w_val, w_unit = u.get_mantissa(params['weight']), u.get_unit(params['weight'])
-        if u.math.ndim(w_val) == 0:  # weight is a scalar
-            if pre_val.ndim == 1:
-                post_val = u.math.sum(pre_val)
-            else:
-                post_val = u.math.sum(pre_val, keepdims=True, axis=-1)
-            if not self.include_self:
-                if self.in_size == self.out_size:
-                    post_val = post_val - pre_val
-                elif self.in_size[-1] > self.out_size[-1]:
-                    val = pre_val[..., :self.out_size[-1]]
-                    post_val = post_val - val
-                else:
-                    size = list(self.out_size)
-                    size[-1] = self.out_size[-1] - self.in_size[-1]
-                    val = u.math.concatenate([pre_val, u.math.zeros(size, dtype=pre_val.dtype)])
-                    post_val = post_val - val
-            post_val = w_val * post_val
-        else:  # weight is a matrix
-            assert u.math.ndim(w_val) == 2, '"weight" must be a 2D matrix.'
-            if not self.include_self:
-                post_val = pre_val @ u.math.fill_diagonal(w_val, 0.)
-            else:
-                post_val = pre_val @ w_val
-        post_val = u.maybe_decimal(u.Quantity(post_val, unit=w_unit * pre_unit))
-        if 'bias' in params:
-            post_val = post_val + params['bias']
-        return post_val
-class OneToOne(Module):
-    """
-    One-to-one connection layer.
-    Applies element-wise multiplication with a weight vector, implementing
-    diagonal connectivity where each input unit connects only to its
-    corresponding output unit.
-    Parameters
-    ----------
-    in_size : int or tuple of int
-        The number of neurons. Input and output sizes are the same.
-    w_init : Callable or ArrayLike, optional
-        Weight initializer. Default is ``Normal()``.
-    b_init : Callable, ArrayLike, or None, optional
-        Bias initializer. If ``None``, no bias is added.
-    name : str, optional
-        Name of the module.
-    param_type : type, optional
-        Type of parameter state. Default is ``ParamState``.
-    Attributes
-    ----------
-    in_size : tuple
-        Input size.
-    out_size : tuple
-        Output size (same as input size).
-    weight : ParamState
-        Parameter state containing 'weight' and optionally 'bias'.
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate as bst
-        >>> import jax.numpy as jnp
-        >>>
-        >>> # One-to-one connection
-        >>> layer = bst.nn.OneToOne((10,))
-        >>> x = jnp.ones((32, 10))
-        >>> y = layer(x)
-        >>> y.shape
-        (32, 10)
-        >>>
-        >>> # With bias
-        >>> layer = bst.nn.OneToOne((10,), b_init=bst.init.Constant(0.1))
-        >>> y = layer(x)
-        >>> y.shape
-        (32, 10)
-    """
-    __module__ = 'brainstate.nn'
-    def __init__(
-        self,
-        in_size: Size,
-        w_init: Union[Callable, ArrayLike] = init.Normal(),
-        b_init: Optional[Union[Callable, ArrayLike]] = None,
-        name: Optional[str] = None,
-        param_type: type = ParamState,
-    ):
-        super().__init__(name=name)
-        # input and output shape
-        self.in_size = in_size
-        self.out_size = in_size
-        # weights
-        param = dict(weight=init.param(w_init, self.in_size, allow_none=False))
-        if b_init is not None:
-            param['bias'] = init.param(b_init, self.out_size, allow_none=False)
-        self.weight = param_type(param)
-    def update(self, pre_val):
-        post_val = pre_val * self.weight.value['weight']
-        if 'bias' in self.weight.value:
-            post_val = post_val + self.weight.value['bias']
-        return post_val
-class LoRA(Module):
-    """
-    Low-Rank Adaptation (LoRA) layer.
-    Implements parameter-efficient fine-tuning using low-rank decomposition [1]_.
-    Can be used standalone or as a wrapper around an existing module.
-    Parameters
-    ----------
-    in_features : int
-        The number of input features.
-    lora_rank : int
-        The rank of the low-rank decomposition. Lower rank means fewer parameters.
-    out_features : int
-        The number of output features.
-    base_module : Module, optional
-        A base module to wrap. If provided, the LoRA output will be added to
-        the base module's output. Default is ``None``.
-    kernel_init : Callable or ArrayLike, optional
-        Initializer for the LoRA weight matrices. Default is ``LecunNormal()``.
-    param_type : type, optional
-        Type of parameter state. Default is ``ParamState``.
-    Attributes
-    ----------
-    in_size : int
-        Input feature size.
-    out_size : int
-        Output feature size.
-    in_features : int
-        Number of input features.
-    out_features : int
-        Number of output features.
-    base_module : Module or None
-        The wrapped base module if provided.
-    weight : ParamState
-        Parameter state containing 'lora_a' and 'lora_b' matrices.
-    References
-    ----------
-    .. [1] Hu, E. J., Shen, Y., Wallis, P., Allen-Zhu, Z., Li, Y., Wang, S.,
-           Wang, L., & Chen, W. (2021). LoRA: Low-Rank Adaptation of Large
-           Language Models. arXiv preprint arXiv:2106.09685.
-    Examples
-    --------
-    .. code-block:: python
-        >>> import brainstate as bst
-        >>> import jax.numpy as jnp
-        >>>
-        >>> # Standalone LoRA layer
-        >>> layer = bst.nn.LoRA(in_features=10, lora_rank=2, out_features=5)
-        >>> x = jnp.ones((32, 10))
-        >>> y = layer(x)
-        >>> y.shape
-        (32, 5)
-        >>>
-        >>> # Wrap around existing linear layer
-        >>> base = bst.nn.Linear((10,), (5,))
-        >>> lora_layer = bst.nn.LoRA(in_features=10, lora_rank=2,
-        ...                           out_features=5, base_module=base)
-        >>> y = lora_layer(x)
-        >>> y.shape
-        (32, 5)
-        >>>
-        >>> # Check parameter count - LoRA has fewer parameters
-        >>> # Base layer: 10 * 5 = 50 parameters
-        >>> # LoRA: 10 * 2 + 2 * 5 = 30 parameters
-    """
-    __module__ = 'brainstate.nn'
-    def __init__(
-        self,
-        in_features: int,
-        lora_rank: int,
-        out_features: int,
-        *,
-        base_module: Optional[Module] = None,
-        kernel_init: Union[Callable, ArrayLike] = init.LecunNormal(),
-        param_type: type = ParamState,
-        in_size: Size = None,
-    ):
-        super().__init__()
-        # input and output shape
-        self.in_size = in_features
-        self.out_size = out_features
-        self.in_features = in_features
-        self.out_features = out_features
-        # others
-        self.base_module = base_module
-        # weights
-        param = dict(
-            lora_a=kernel_init((in_features, lora_rank)),
-            lora_b=kernel_init((lora_rank, out_features))
-        )
-        self.weight = param_type(param)
-        # in_size
-        if in_size is not None:
-            self.in_size = in_size
-            self.out_size = tuple(self.in_size[:-1]) + (out_features,)
-    def __call__(self, x: ArrayLike):
-        out = x @ self.weight.value['lora_a'] @ self.weight.value['lora_b']
-        if self.base_module is not None:
-            if not callable(self.base_module):
-                raise ValueError('`self.base_module` must be callable.')
-            out += self.base_module(x)
-        return out
+# Copyright 2024 BrainX Ecosystem Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# -*- coding: utf-8 -*-
+from typing import Callable, Union, Optional
+import brainunit as u
+import jax.numpy as jnp
+from brainstate._state import ParamState
+from brainstate.typing import ArrayLike, Size
+from . import init as init
+from ._module import Module
+from ._normalizations import weight_standardization
+__all__ = [
+    'Linear',
+    'ScaledWSLinear',
+    'SignedWLinear',
+    'SparseLinear',
+    'AllToAll',
+    'OneToOne',
+    'LoRA',
+]
+class Linear(Module):
+    """
+    Linear transformation layer.
+    Applies a linear transformation to the incoming data: :math:`y = xW + b`
+    Parameters
+    ----------
+    in_size : int or tuple of int
+        The input feature size.
+    out_size : int or tuple of int
+        The output feature size.
+    w_init : Callable or ArrayLike, optional
+        Weight initializer. Default is ``KaimingNormal()``.
+    b_init : Callable, ArrayLike, or None, optional
+        Bias initializer. If ``None``, no bias is added. Default is ``ZeroInit()``.
+    w_mask : ArrayLike, Callable, or None, optional
+        Optional mask for the weights. If provided, weights will be element-wise
+        multiplied by this mask.
+    name : str, optional
+        Name of the module.
+    param_type : type, optional
+        Type of parameter state. Default is ``ParamState``.
+    Attributes
+    ----------
+    in_size : tuple
+        Input feature size.
+    out_size : tuple
+        Output feature size.
+    w_mask : ArrayLike or None
+        Weight mask if provided.
+    weight : ParamState
+        Parameter state containing 'weight' and optionally 'bias'.
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate as bst
+        >>> import jax.numpy as jnp
+        >>>
+        >>> # Create a linear layer
+        >>> layer = bst.nn.Linear((10,), (5,))
+        >>> x = jnp.ones((32, 10))
+        >>> y = layer(x)
+        >>> y.shape
+        (32, 5)
+        >>>
+        >>> # Linear layer without bias
+        >>> layer = bst.nn.Linear((10,), (5,), b_init=None)
+        >>> y = layer(x)
+        >>> y.shape
+        (32, 5)
+    """
+    __module__ = 'brainstate.nn'
+    def __init__(
+        self,
+        in_size: Size,
+        out_size: Size,
+        w_init: Union[Callable, ArrayLike] = init.KaimingNormal(),
+        b_init: Optional[Union[Callable, ArrayLike]] = init.ZeroInit(),
+        w_mask: Optional[Union[ArrayLike, Callable]] = None,
+        name: Optional[str] = None,
+        param_type: type = ParamState,
+    ):
+        super().__init__(name=name)
+        # input and output shape
+        self.in_size = in_size
+        self.out_size = out_size
+        assert self.in_size[:-1] == self.out_size[:-1], ('The first n-1 dimensions of "in_size" '
+                                                         'and "out_size" must be the same.')
+        # w_mask
+        self.w_mask = init.param(w_mask, self.in_size + self.out_size)
+        # weights
+        params = dict(weight=init.param(w_init, (self.in_size[-1], self.out_size[-1]), allow_none=False))
+        if b_init is not None:
+            params['bias'] = init.param(b_init, self.out_size[-1], allow_none=False)
+        self.weight = param_type(params)
+    def update(self, x):
+        params = self.weight.value
+        weight = params['weight']
+        if self.w_mask is not None:
+            weight = weight * self.w_mask
+        y = u.linalg.dot(x, weight)
+        if 'bias' in params:
+            y = y + params['bias']
+        return y
+class SignedWLinear(Module):
+    """
+    Linear layer with signed absolute weights.
+    This layer uses absolute values of weights multiplied by a sign matrix,
+    ensuring all effective weights have controlled signs.
+    Parameters
+    ----------
+    in_size : int or tuple of int
+        The input feature size.
+    out_size : int or tuple of int
+        The output feature size.
+    w_init : Callable or ArrayLike, optional
+        Weight initializer. Default is ``KaimingNormal()``.
+    w_sign : ArrayLike or None, optional
+        Sign matrix for the weights. If ``None``, all weights are positive
+        (absolute values used). If provided, should have the same shape as
+        the weight matrix.
+    name : str, optional
+        Name of the module.
+    param_type : type, optional
+        Type of parameter state. Default is ``ParamState``.
+    Attributes
+    ----------
+    in_size : tuple
+        Input feature size.
+    out_size : tuple
+        Output feature size.
+    w_sign : ArrayLike or None
+        Sign matrix for weights.
+    weight : ParamState
+        Parameter state containing the weight values.
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate as bst
+        >>> import jax.numpy as jnp
+        >>>
+        >>> # Create a signed weight linear layer with all positive weights
+        >>> layer = bst.nn.SignedWLinear((10,), (5,))
+        >>> x = jnp.ones((32, 10))
+        >>> y = layer(x)
+        >>> y.shape
+        (32, 5)
+        >>>
+        >>> # With custom sign matrix (e.g., inhibitory connections)
+        >>> w_sign = jnp.ones((10, 5)) * -1.0  # all negative
+        >>> layer = bst.nn.SignedWLinear((10,), (5,), w_sign=w_sign)
+        >>> y = layer(x)
+        >>> y.shape
+        (32, 5)
+    """
+    __module__ = 'brainstate.nn'
+    def __init__(
+        self,
+        in_size: Size,
+        out_size: Size,
+        w_init: Union[Callable, ArrayLike] = init.KaimingNormal(),
+        w_sign: Optional[ArrayLike] = None,
+        name: Optional[str] = None,
+        param_type: type = ParamState,
+    ):
+        super().__init__(name=name)
+        # input and output shape
+        self.in_size = in_size
+        self.out_size = out_size
+        assert self.in_size[:-1] == self.out_size[:-1], ('The first n-1 dimensions of "in_size" '
+                                                         'and "out_size" must be the same.')
+        # w_mask
+        self.w_sign = w_sign
+        # weights
+        weight = init.param(w_init, self.in_size + self.out_size, allow_none=False)
+        self.weight = param_type(weight)
+    def update(self, x):
+        w = self.weight.value
+        if self.w_sign is None:
+            return u.math.matmul(x, u.math.abs(w))
+        else:
+            return u.math.matmul(x, u.math.abs(w) * self.w_sign)
+class ScaledWSLinear(Module):
+    """
+    Linear layer with weight standardization.
+    Applies weight standardization [1]_ to normalize weights before the linear
+    transformation, which can improve training stability and performance.
+    Parameters
+    ----------
+    in_size : int or tuple of int
+        The input feature size.
+    out_size : int or tuple of int
+        The output feature size.
+    w_init : Callable, optional
+        Weight initializer. Default is ``KaimingNormal()``.
+    b_init : Callable, optional
+        Bias initializer. Default is ``ZeroInit()``.
+    w_mask : ArrayLike, Callable, or None, optional
+        Optional mask for the weights.
+    ws_gain : bool, optional
+        Whether to use a learnable gain parameter for weight standardization.
+        Default is ``True``.
+    eps : float, optional
+        Small constant for numerical stability in standardization.
+        Default is ``1e-4``.
+    name : str, optional
+        Name of the module.
+    param_type : type, optional
+        Type of parameter state. Default is ``ParamState``.
+    Attributes
+    ----------
+    in_size : tuple
+        Input feature size.
+    out_size : tuple
+        Output feature size.
+    w_mask : ArrayLike or None
+        Weight mask if provided.
+    eps : float
+        Epsilon for numerical stability.
+    weight : ParamState
+        Parameter state containing 'weight', optionally 'bias' and 'gain'.
+    References
+    ----------
+    .. [1] Qiao, S., Wang, H., Liu, C., Shen, W., & Yuille, A. (2019).
+           Weight standardization. arXiv preprint arXiv:1903.10520.
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate as bst
+        >>> import jax.numpy as jnp
+        >>>
+        >>> # Create a weight-standardized linear layer
+        >>> layer = bst.nn.ScaledWSLinear((10,), (5,))
+        >>> x = jnp.ones((32, 10))
+        >>> y = layer(x)
+        >>> y.shape
+        (32, 5)
+        >>>
+        >>> # Without learnable gain
+        >>> layer = bst.nn.ScaledWSLinear((10,), (5,), ws_gain=False)
+        >>> y = layer(x)
+        >>> y.shape
+        (32, 5)
+    """
+    __module__ = 'brainstate.nn'
+    def __init__(
+        self,
+        in_size: Size,
+        out_size: Size,
+        w_init: Callable = init.KaimingNormal(),
+        b_init: Callable = init.ZeroInit(),
+        w_mask: Optional[Union[ArrayLike, Callable]] = None,
+        ws_gain: bool = True,
+        eps: float = 1e-4,
+        name: str = None,
+        param_type: type = ParamState,
+    ):
+        super().__init__(name=name)
+        # input and output shape
+        self.in_size = in_size
+        self.out_size = out_size
+        assert self.in_size[:-1] == self.out_size[:-1], ('The first n-1 dimensions of "in_size" '
+                                                         'and "out_size" must be the same.')
+        # w_mask
+        self.w_mask = init.param(w_mask, (self.in_size[0], 1))
+        # parameters
+        self.eps = eps
+        # weights
+        params = dict(weight=init.param(w_init, self.in_size + self.out_size, allow_none=False))
+        if b_init is not None:
+            params['bias'] = init.param(b_init, self.out_size, allow_none=False)
+        # gain
+        if ws_gain:
+            s = params['weight'].shape
+            params['gain'] = jnp.ones((1,) * (len(s) - 1) + (s[-1],), dtype=params['weight'].dtype)
+        self.weight = param_type(params)
+    def update(self, x):
+        params = self.weight.value
+        w = params['weight']
+        w = weight_standardization(w, self.eps, params.get('gain', None))
+        if self.w_mask is not None:
+            w = w * self.w_mask
+        y = u.linalg.dot(x, w)
+        if 'bias' in params:
+            y = y + params['bias']
+        return y
+class SparseLinear(Module):
+    """
+    Linear layer with sparse weight matrix.
+    Supports sparse matrices from ``brainunit.sparse`` including CSR, CSC,
+    and COO formats. Only the non-zero entries are stored and updated.
+    Parameters
+    ----------
+    spar_mat : brainunit.sparse.SparseMatrix
+        The sparse weight matrix defining the connectivity structure.
+    b_init : Callable, ArrayLike, or None, optional
+        Bias initializer. If ``None``, no bias is added.
+    in_size : int or tuple of int, optional
+        The input size. If not provided, inferred from ``spar_mat``.
+    name : str, optional
+        Name of the module.
+    param_type : type, optional
+        Type of parameter state. Default is ``ParamState``.
+    Attributes
+    ----------
+    in_size : tuple
+        Input feature size.
+    out_size : int
+        Output feature size.
+    spar_mat : brainunit.sparse.SparseMatrix
+        The sparse matrix structure.
+    weight : ParamState
+        Parameter state containing the sparse 'weight' data and optionally 'bias'.
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate as bst
+        >>> import brainunit as u
+        >>> import jax.numpy as jnp
+        >>>
+        >>> # Create a sparse linear layer with CSR matrix
+        >>> indices = jnp.array([[0, 1], [1, 2], [2, 0]])
+        >>> values = jnp.array([1.0, 2.0, 3.0])
+        >>> spar_mat = u.sparse.CSR((values, indices[:, 1], indices[:, 0]),
+        ...                          shape=(3, 3))
+        >>> layer = bst.nn.SparseLinear(spar_mat, in_size=(3,))
+        >>> x = jnp.ones((5, 3))
+        >>> y = layer(x)
+        >>> y.shape
+        (5, 3)
+    """
+    __module__ = 'brainstate.nn'
+    def __init__(
+        self,
+        spar_mat: u.sparse.SparseMatrix,
+        b_init: Optional[Union[Callable, ArrayLike]] = None,
+        in_size: Size = None,
+        name: Optional[str] = None,
+        param_type: type = ParamState,
+    ):
+        super().__init__(name=name)
+        # input and output shape
+        if in_size is not None:
+            self.in_size = in_size
+        self.out_size = spar_mat.shape[-1]
+        if in_size is not None:
+            assert self.in_size[:-1] == self.out_size[:-1], (
+                'The first n-1 dimensions of "in_size" '
+                'and "out_size" must be the same.'
+            )
+        # weights
+        assert isinstance(spar_mat, u.sparse.SparseMatrix), '"weight" must be a SparseMatrix.'
+        self.spar_mat = spar_mat
+        params = dict(weight=spar_mat.data)
+        if b_init is not None:
+            params['bias'] = init.param(b_init, self.out_size[-1], allow_none=False)
+        self.weight = param_type(params)
+    def update(self, x):
+        data = self.weight.value['weight']
+        y = x @ self.spar_mat.with_data(data)
+        if 'bias' in self.weight.value:
+            y = y + self.weight.value['bias']
+        return y
+class AllToAll(Module):
+    """
+    All-to-all connection layer.
+    Performs matrix multiplication with optional exclusion of self-connections,
+    commonly used in recurrent neural networks and graph neural networks.
+    Parameters
+    ----------
+    in_size : int or tuple of int
+        The number of neurons in the pre-synaptic group.
+    out_size : int or tuple of int
+        The number of neurons in the post-synaptic group.
+    w_init : Callable or ArrayLike, optional
+        Weight initializer. Default is ``KaimingNormal()``.
+    b_init : Callable, ArrayLike, or None, optional
+        Bias initializer. If ``None``, no bias is added.
+    include_self : bool, optional
+        Whether to include self-connections (diagonal elements).
+        Default is ``True``.
+    name : str, optional
+        Name of the module.
+    param_type : type, optional
+        Type of parameter state. Default is ``ParamState``.
+    Attributes
+    ----------
+    in_size : tuple
+        Input size.
+    out_size : tuple
+        Output size.
+    include_self : bool
+        Whether self-connections are included.
+    weight : ParamState
+        Parameter state containing 'weight' and optionally 'bias'.
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate as bst
+        >>> import jax.numpy as jnp
+        >>>
+        >>> # All-to-all with self-connections
+        >>> layer = bst.nn.AllToAll((10,), (10,), include_self=True)
+        >>> x = jnp.ones((32, 10))
+        >>> y = layer(x)
+        >>> y.shape
+        (32, 10)
+        >>>
+        >>> # All-to-all without self-connections (recurrent layer)
+        >>> layer = bst.nn.AllToAll((10,), (10,), include_self=False)
+        >>> y = layer(x)
+        >>> y.shape
+        (32, 10)
+    """
+    __module__ = 'brainstate.nn'
+    def __init__(
+        self,
+        in_size: Size,
+        out_size: Size,
+        w_init: Union[Callable, ArrayLike] = init.KaimingNormal(),
+        b_init: Optional[Union[Callable, ArrayLike]] = None,
+        include_self: bool = True,
+        name: Optional[str] = None,
+        param_type: type = ParamState,
+    ):
+        super().__init__(name=name)
+        # input and output shape
+        self.in_size = in_size
+        self.out_size = out_size
+        assert self.in_size[:-1] == self.out_size[:-1], ('The first n-1 dimensions of "in_size" '
+                                                         'and "out_size" must be the same.')
+        # others
+        self.include_self = include_self
+        # weights
+        weight = init.param(w_init, (self.in_size[-1], self.out_size[-1]), allow_none=False)
+        params = dict(weight=weight)
+        if b_init is not None:
+            params['bias'] = init.param(b_init, self.out_size[-1], allow_none=False)
+        self.weight = param_type(params)
+    def update(self, pre_val):
+        params = self.weight.value
+        pre_val, pre_unit = u.get_mantissa(pre_val), u.get_unit(pre_val)
+        w_val, w_unit = u.get_mantissa(params['weight']), u.get_unit(params['weight'])
+        if u.math.ndim(w_val) == 0:  # weight is a scalar
+            if pre_val.ndim == 1:
+                post_val = u.math.sum(pre_val)
+            else:
+                post_val = u.math.sum(pre_val, keepdims=True, axis=-1)
+            if not self.include_self:
+                if self.in_size == self.out_size:
+                    post_val = post_val - pre_val
+                elif self.in_size[-1] > self.out_size[-1]:
+                    val = pre_val[..., :self.out_size[-1]]
+                    post_val = post_val - val
+                else:
+                    size = list(self.out_size)
+                    size[-1] = self.out_size[-1] - self.in_size[-1]
+                    val = u.math.concatenate([pre_val, u.math.zeros(size, dtype=pre_val.dtype)])
+                    post_val = post_val - val
+            post_val = w_val * post_val
+        else:  # weight is a matrix
+            assert u.math.ndim(w_val) == 2, '"weight" must be a 2D matrix.'
+            if not self.include_self:
+                post_val = pre_val @ u.math.fill_diagonal(w_val, 0.)
+            else:
+                post_val = pre_val @ w_val
+        post_val = u.maybe_decimal(u.Quantity(post_val, unit=w_unit * pre_unit))
+        if 'bias' in params:
+            post_val = post_val + params['bias']
+        return post_val
+class OneToOne(Module):
+    """
+    One-to-one connection layer.
+    Applies element-wise multiplication with a weight vector, implementing
+    diagonal connectivity where each input unit connects only to its
+    corresponding output unit.
+    Parameters
+    ----------
+    in_size : int or tuple of int
+        The number of neurons. Input and output sizes are the same.
+    w_init : Callable or ArrayLike, optional
+        Weight initializer. Default is ``Normal()``.
+    b_init : Callable, ArrayLike, or None, optional
+        Bias initializer. If ``None``, no bias is added.
+    name : str, optional
+        Name of the module.
+    param_type : type, optional
+        Type of parameter state. Default is ``ParamState``.
+    Attributes
+    ----------
+    in_size : tuple
+        Input size.
+    out_size : tuple
+        Output size (same as input size).
+    weight : ParamState
+        Parameter state containing 'weight' and optionally 'bias'.
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate as bst
+        >>> import jax.numpy as jnp
+        >>>
+        >>> # One-to-one connection
+        >>> layer = bst.nn.OneToOne((10,))
+        >>> x = jnp.ones((32, 10))
+        >>> y = layer(x)
+        >>> y.shape
+        (32, 10)
+        >>>
+        >>> # With bias
+        >>> layer = bst.nn.OneToOne((10,), b_init=bst.init.Constant(0.1))
+        >>> y = layer(x)
+        >>> y.shape
+        (32, 10)
+    """
+    __module__ = 'brainstate.nn'
+    def __init__(
+        self,
+        in_size: Size,
+        w_init: Union[Callable, ArrayLike] = init.Normal(),
+        b_init: Optional[Union[Callable, ArrayLike]] = None,
+        name: Optional[str] = None,
+        param_type: type = ParamState,
+    ):
+        super().__init__(name=name)
+        # input and output shape
+        self.in_size = in_size
+        self.out_size = in_size
+        # weights
+        param = dict(weight=init.param(w_init, self.in_size, allow_none=False))
+        if b_init is not None:
+            param['bias'] = init.param(b_init, self.out_size, allow_none=False)
+        self.weight = param_type(param)
+    def update(self, pre_val):
+        post_val = pre_val * self.weight.value['weight']
+        if 'bias' in self.weight.value:
+            post_val = post_val + self.weight.value['bias']
+        return post_val
+class LoRA(Module):
+    """
+    Low-Rank Adaptation (LoRA) layer.
+    Implements parameter-efficient fine-tuning using low-rank decomposition [1]_.
+    Can be used standalone or as a wrapper around an existing module.
+    Parameters
+    ----------
+    in_features : int
+        The number of input features.
+    lora_rank : int
+        The rank of the low-rank decomposition. Lower rank means fewer parameters.
+    out_features : int
+        The number of output features.
+    base_module : Module, optional
+        A base module to wrap. If provided, the LoRA output will be added to
+        the base module's output. Default is ``None``.
+    kernel_init : Callable or ArrayLike, optional
+        Initializer for the LoRA weight matrices. Default is ``LecunNormal()``.
+    param_type : type, optional
+        Type of parameter state. Default is ``ParamState``.
+    Attributes
+    ----------
+    in_size : int
+        Input feature size.
+    out_size : int
+        Output feature size.
+    in_features : int
+        Number of input features.
+    out_features : int
+        Number of output features.
+    base_module : Module or None
+        The wrapped base module if provided.
+    weight : ParamState
+        Parameter state containing 'lora_a' and 'lora_b' matrices.
+    References
+    ----------
+    .. [1] Hu, E. J., Shen, Y., Wallis, P., Allen-Zhu, Z., Li, Y., Wang, S.,
+           Wang, L., & Chen, W. (2021). LoRA: Low-Rank Adaptation of Large
+           Language Models. arXiv preprint arXiv:2106.09685.
+    Examples
+    --------
+    .. code-block:: python
+        >>> import brainstate as bst
+        >>> import jax.numpy as jnp
+        >>>
+        >>> # Standalone LoRA layer
+        >>> layer = bst.nn.LoRA(in_features=10, lora_rank=2, out_features=5)
+        >>> x = jnp.ones((32, 10))
+        >>> y = layer(x)
+        >>> y.shape
+        (32, 5)
+        >>>
+        >>> # Wrap around existing linear layer
+        >>> base = bst.nn.Linear((10,), (5,))
+        >>> lora_layer = bst.nn.LoRA(in_features=10, lora_rank=2,
+        ...                           out_features=5, base_module=base)
+        >>> y = lora_layer(x)
+        >>> y.shape
+        (32, 5)
+        >>>
+        >>> # Check parameter count - LoRA has fewer parameters
+        >>> # Base layer: 10 * 5 = 50 parameters
+        >>> # LoRA: 10 * 2 + 2 * 5 = 30 parameters
+    """
+    __module__ = 'brainstate.nn'
+    def __init__(
+        self,
+        in_features: int,
+        lora_rank: int,
+        out_features: int,
+        *,
+        base_module: Optional[Module] = None,
+        kernel_init: Union[Callable, ArrayLike] = init.LecunNormal(),
+        param_type: type = ParamState,
+        in_size: Size = None,
+    ):
+        super().__init__()
+        # input and output shape
+        self.in_size = in_features
+        self.out_size = out_features
+        self.in_features = in_features
+        self.out_features = out_features
+        # others
+        self.base_module = base_module
+        # weights
+        param = dict(
+            lora_a=kernel_init((in_features, lora_rank)),
+            lora_b=kernel_init((lora_rank, out_features))
+        )
+        self.weight = param_type(param)
+        # in_size
+        if in_size is not None:
+            self.in_size = in_size
+            self.out_size = tuple(self.in_size[:-1]) + (out_features,)
+    def __call__(self, x: ArrayLike):
+        out = x @ self.weight.value['lora_a'] @ self.weight.value['lora_b']
+        if self.base_module is not None:
+            if not callable(self.base_module):
+                raise ValueError('`self.base_module` must be callable.')
+            out += self.base_module(x)
+        return out

brainstate 0.2.0__py2.py3-none-any.whl → 0.2.1__py2.py3-none-any.whl

brainstate 0.2.0py2.py3-none-any.whl → 0.2.1py2.py3-none-any.whl