PyPI - brainstate - Versions diffs - 0.0.2.post20241009__py2.py3-none-any.whl → 0.1.0__py2.py3-none-any.whl - Mend

brainstate 0.0.2.post20241009py2.py3-none-any.whl → 0.1.0py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (175) hide show

brainstate/__init__.py +31 -11
brainstate/_state.py +760 -316
brainstate/_state_test.py +41 -12
brainstate/_utils.py +31 -4
brainstate/augment/__init__.py +40 -0
brainstate/augment/_autograd.py +608 -0
brainstate/augment/_autograd_test.py +1193 -0
brainstate/augment/_eval_shape.py +102 -0
brainstate/augment/_eval_shape_test.py +40 -0
brainstate/augment/_mapping.py +525 -0
brainstate/augment/_mapping_test.py +210 -0
brainstate/augment/_random.py +99 -0
brainstate/{transform → compile}/__init__.py +25 -13
brainstate/compile/_ad_checkpoint.py +204 -0
brainstate/compile/_ad_checkpoint_test.py +51 -0
brainstate/compile/_conditions.py +259 -0
brainstate/compile/_conditions_test.py +221 -0
brainstate/compile/_error_if.py +94 -0
brainstate/compile/_error_if_test.py +54 -0
brainstate/compile/_jit.py +314 -0
brainstate/compile/_jit_test.py +143 -0
brainstate/compile/_loop_collect_return.py +516 -0
brainstate/compile/_loop_collect_return_test.py +59 -0
brainstate/compile/_loop_no_collection.py +185 -0
brainstate/compile/_loop_no_collection_test.py +51 -0
brainstate/compile/_make_jaxpr.py +756 -0
brainstate/compile/_make_jaxpr_test.py +134 -0
brainstate/compile/_progress_bar.py +111 -0
brainstate/compile/_unvmap.py +159 -0
brainstate/compile/_util.py +147 -0
brainstate/environ.py +408 -381
brainstate/environ_test.py +34 -32
brainstate/{nn/event → event}/__init__.py +6 -6
brainstate/event/_csr.py +308 -0
brainstate/event/_csr_test.py +118 -0
brainstate/event/_fixed_probability.py +271 -0
brainstate/event/_fixed_probability_test.py +128 -0
brainstate/event/_linear.py +219 -0
brainstate/event/_linear_test.py +112 -0
brainstate/{nn/event → event}/_misc.py +7 -7
brainstate/functional/_activations.py +521 -511
brainstate/functional/_activations_test.py +300 -300
brainstate/functional/_normalization.py +43 -43
brainstate/functional/_others.py +15 -15
brainstate/functional/_spikes.py +49 -49
brainstate/graph/__init__.py +33 -0
brainstate/graph/_graph_context.py +443 -0
brainstate/graph/_graph_context_test.py +65 -0
brainstate/graph/_graph_convert.py +246 -0
brainstate/graph/_graph_node.py +300 -0
brainstate/graph/_graph_node_test.py +75 -0
brainstate/graph/_graph_operation.py +1746 -0
brainstate/graph/_graph_operation_test.py +724 -0
brainstate/init/_base.py +28 -10
brainstate/init/_generic.py +175 -172
brainstate/init/_random_inits.py +470 -415
brainstate/init/_random_inits_test.py +150 -0
brainstate/init/_regular_inits.py +66 -69
brainstate/init/_regular_inits_test.py +51 -0
brainstate/mixin.py +236 -244
brainstate/mixin_test.py +44 -46
brainstate/nn/__init__.py +26 -51
brainstate/nn/_collective_ops.py +199 -0
brainstate/nn/_dyn_impl/__init__.py +46 -0
brainstate/nn/_dyn_impl/_dynamics_neuron.py +290 -0
brainstate/nn/_dyn_impl/_dynamics_neuron_test.py +162 -0
brainstate/nn/_dyn_impl/_dynamics_synapse.py +320 -0
brainstate/nn/_dyn_impl/_dynamics_synapse_test.py +132 -0
brainstate/nn/_dyn_impl/_inputs.py +154 -0
brainstate/nn/{_projection/__init__.py → _dyn_impl/_projection_alignpost.py} +6 -13
brainstate/nn/_dyn_impl/_rate_rnns.py +400 -0
brainstate/nn/_dyn_impl/_rate_rnns_test.py +64 -0
brainstate/nn/_dyn_impl/_readout.py +128 -0
brainstate/nn/_dyn_impl/_readout_test.py +54 -0
brainstate/nn/_dynamics/__init__.py +37 -0
brainstate/nn/_dynamics/_dynamics_base.py +631 -0
brainstate/nn/_dynamics/_dynamics_base_test.py +79 -0
brainstate/nn/_dynamics/_projection_base.py +346 -0
brainstate/nn/_dynamics/_state_delay.py +453 -0
brainstate/nn/_dynamics/_synouts.py +161 -0
brainstate/nn/_dynamics/_synouts_test.py +58 -0
brainstate/nn/_elementwise/__init__.py +22 -0
brainstate/nn/_elementwise/_dropout.py +418 -0
brainstate/nn/_elementwise/_dropout_test.py +100 -0
brainstate/nn/_elementwise/_elementwise.py +1122 -0
brainstate/nn/_elementwise/_elementwise_test.py +171 -0
brainstate/nn/_exp_euler.py +97 -0
brainstate/nn/_exp_euler_test.py +36 -0
brainstate/nn/_interaction/__init__.py +32 -0
brainstate/nn/_interaction/_connections.py +726 -0
brainstate/nn/_interaction/_connections_test.py +254 -0
brainstate/nn/_interaction/_embedding.py +59 -0
brainstate/nn/_interaction/_normalizations.py +388 -0
brainstate/nn/_interaction/_normalizations_test.py +75 -0
brainstate/nn/_interaction/_poolings.py +1179 -0
brainstate/nn/_interaction/_poolings_test.py +219 -0
brainstate/nn/_module.py +328 -0
brainstate/nn/_module_test.py +211 -0
brainstate/nn/metrics.py +309 -309
brainstate/optim/__init__.py +14 -2
brainstate/optim/_base.py +66 -0
brainstate/optim/_lr_scheduler.py +363 -400
brainstate/optim/_lr_scheduler_test.py +25 -24
brainstate/optim/_optax_optimizer.py +103 -176
brainstate/optim/_optax_optimizer_test.py +41 -1
brainstate/optim/_sgd_optimizer.py +950 -1025
brainstate/random/_rand_funs.py +3269 -3268
brainstate/random/_rand_funs_test.py +568 -0
brainstate/random/_rand_seed.py +149 -117
brainstate/random/_rand_seed_test.py +50 -0
brainstate/random/_rand_state.py +1360 -1318
brainstate/random/_random_for_unit.py +13 -13
brainstate/surrogate.py +1262 -1243
brainstate/{nn/_projection/_utils.py → transform.py} +1 -2
brainstate/typing.py +157 -130
brainstate/util/__init__.py +52 -0
brainstate/util/_caller.py +100 -0
brainstate/util/_dict.py +734 -0
brainstate/util/_dict_test.py +160 -0
brainstate/util/_error.py +28 -0
brainstate/util/_filter.py +178 -0
brainstate/util/_others.py +497 -0
brainstate/util/_pretty_repr.py +208 -0
brainstate/util/_scaling.py +260 -0
brainstate/util/_struct.py +524 -0
brainstate/util/_tracers.py +75 -0
brainstate/{_visualization.py → util/_visualization.py} +16 -16
{brainstate-0.0.2.post20241009.dist-info → brainstate-0.1.0.dist-info}/METADATA +11 -11
brainstate-0.1.0.dist-info/RECORD +135 -0
brainstate/_module.py +0 -1637
brainstate/_module_test.py +0 -207
brainstate/nn/_base.py +0 -251
brainstate/nn/_connections.py +0 -686
brainstate/nn/_dynamics.py +0 -426
brainstate/nn/_elementwise.py +0 -1438
brainstate/nn/_embedding.py +0 -66
brainstate/nn/_misc.py +0 -133
brainstate/nn/_normalizations.py +0 -389
brainstate/nn/_others.py +0 -101
brainstate/nn/_poolings.py +0 -1229
brainstate/nn/_poolings_test.py +0 -231
brainstate/nn/_projection/_align_post.py +0 -546
brainstate/nn/_projection/_align_pre.py +0 -599
brainstate/nn/_projection/_delta.py +0 -241
brainstate/nn/_projection/_vanilla.py +0 -101
brainstate/nn/_rate_rnns.py +0 -410
brainstate/nn/_readout.py +0 -136
brainstate/nn/_synouts.py +0 -166
brainstate/nn/event/csr.py +0 -312
brainstate/nn/event/csr_test.py +0 -118
brainstate/nn/event/fixed_probability.py +0 -276
brainstate/nn/event/fixed_probability_test.py +0 -127
brainstate/nn/event/linear.py +0 -220
brainstate/nn/event/linear_test.py +0 -111
brainstate/random/random_test.py +0 -593
brainstate/transform/_autograd.py +0 -585
brainstate/transform/_autograd_test.py +0 -1181
brainstate/transform/_conditions.py +0 -334
brainstate/transform/_conditions_test.py +0 -220
brainstate/transform/_error_if.py +0 -94
brainstate/transform/_error_if_test.py +0 -55
brainstate/transform/_jit.py +0 -265
brainstate/transform/_jit_test.py +0 -118
brainstate/transform/_loop_collect_return.py +0 -502
brainstate/transform/_loop_no_collection.py +0 -170
brainstate/transform/_make_jaxpr.py +0 -739
brainstate/transform/_make_jaxpr_test.py +0 -131
brainstate/transform/_mapping.py +0 -109
brainstate/transform/_progress_bar.py +0 -111
brainstate/transform/_unvmap.py +0 -143
brainstate/util.py +0 -746
brainstate-0.0.2.post20241009.dist-info/RECORD +0 -87
{brainstate-0.0.2.post20241009.dist-info → brainstate-0.1.0.dist-info}/LICENSE +0 -0
{brainstate-0.0.2.post20241009.dist-info → brainstate-0.1.0.dist-info}/WHEEL +0 -0
{brainstate-0.0.2.post20241009.dist-info → brainstate-0.1.0.dist-info}/top_level.txt +0 -0

brainstate/optim/_lr_scheduler.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # ==============================================================================
 # -*- coding: utf-8 -*-
+from __future__ import annotations
 from typing import Sequence, Union
@@ -21,22 +22,22 @@ import jax
 import jax.numpy as jnp
 import numpy as np
-from .. import environ
-from .._module import Module
-from .._state import State, LongTermState
+from brainstate import environ
+from brainstate._state import State, LongTermState
+from brainstate.graph import Node
 __all__ = [
-  'LearningRateScheduler',
-  'ConstantLR',
-  'StepLR',
-  'MultiStepLR',
-  'CosineAnnealingLR',
-  'CosineAnnealingWarmRestarts',
-  'ExponentialLR',
-  'ExponentialDecayLR',
-  'InverseTimeDecayLR',
-  'PolynomialDecayLR',
-  'PiecewiseConstantLR',
+    'LearningRateScheduler',
+    'ConstantLR',
+    'StepLR',
+    'MultiStepLR',
+    'CosineAnnealingLR',
+    'CosineAnnealingWarmRestarts',
+    'ExponentialLR',
+    'ExponentialDecayLR',
+    'InverseTimeDecayLR',
+    'PolynomialDecayLR',
+    'PiecewiseConstantLR',
 ]
@@ -45,442 +46,404 @@ __all__ = [
 def make_schedule(scalar_or_schedule):
-  if isinstance(scalar_or_schedule, LearningRateScheduler):
-    return scalar_or_schedule
-  elif isinstance(scalar_or_schedule, (int, float, State)):
-    return ConstantLR(scalar_or_schedule)
-  else:
-    raise TypeError(type(scalar_or_schedule))
-class LearningRateScheduler(Module):
-  """
-  The learning rate scheduler.
-  Attributes
-  ----------
-  lr: float, State
-    The learning rate.
-  last_epoch: int
-    The index of last epoch.
-  """
-  def __init__(self, lr: Union[float, State], last_epoch: int = -1):
-    super().__init__()
-    if isinstance(lr, State):
-      lr.value = jnp.asarray(lr.value, dtype=environ.dftype())
+    if isinstance(scalar_or_schedule, LearningRateScheduler):
+        return scalar_or_schedule
+    elif isinstance(scalar_or_schedule, (int, float, State)):
+        return ConstantLR(scalar_or_schedule)
     else:
-      lr = jnp.asarray(lr, dtype=environ.dftype())
-    self._lr = lr
-    assert last_epoch >= -1, 'last_epoch should be greater than -1.'
-    self.last_epoch = LongTermState(jnp.asarray(last_epoch, dtype=environ.ditype()))
-  @property
-  def lr(self):
-    return self._lr.value if isinstance(self._lr, State) else self._lr
-  @lr.setter
-  def lr(self, value):
-    if isinstance(value, State):
-      value = value.value
-    assert jnp.ndim(value) == 0, 'The learning rate should be a scalar.'
-    if isinstance(self._lr, State):
-      self._lr.value = value
-    else:
-      self._lr = value
+        raise TypeError(type(scalar_or_schedule))
-  def step_epoch(self):
-    """
-    Update the epoch count.
-    """
-    self.last_epoch.value += 1
-  def step_call(self):
-    """
-    Update the call count.
+class LearningRateScheduler(Node):
     """
-    pass
+    The learning rate scheduler.
-  def __repr__(self):
-    return f'{self.__class__.__name__}(lr={self.lr.value}, last_epoch={self.last_epoch.value}{self.extra_repr()})'
+    Parameters
+    ----------
+    lr: float, State
+      The learning rate.
+    last_epoch: int
+      The index of last epoch.
-  def extra_repr(self):
-    return ''
+    """
-  def __call__(self, i=None):
-    raise NotImplementedError
+    def __init__(self, lr: Union[float, State], last_epoch: int = -1):
+        super().__init__()
+        if isinstance(lr, State):
+            lr.value = jnp.asarray(lr.value, dtype=environ.dftype())
+        else:
+            lr = jnp.asarray(lr, dtype=environ.dftype())
+        self._lr = lr
+        assert last_epoch >= -1, 'last_epoch should be greater than -1.'
+        self.last_epoch = LongTermState(jnp.asarray(last_epoch, dtype=environ.ditype()))
+    @property
+    def lr(self):
+        return self._lr.value if isinstance(self._lr, State) else self._lr
+    @lr.setter
+    def lr(self, value):
+        if isinstance(value, State):
+            value = value.value
+        assert jnp.ndim(value) == 0, 'The learning rate should be a scalar.'
+        if isinstance(self._lr, State):
+            self._lr.value = value
+        else:
+            self._lr = value
+    def step_epoch(self):
+        """
+        Update the epoch count.
+        """
+        self.last_epoch.value += 1
+    def step_call(self):
+        """
+        Update the call count.
+        """
+        pass
+    def __call__(self, i=None):
+        raise NotImplementedError
 class ConstantLR(LearningRateScheduler):
-  """
-  Constant learning rate scheduler.
-  """
+    """
+    Constant learning rate scheduler.
+    """
-  def __call__(self, i=None):
-    return self.lr
+    def __call__(self, i=None):
+        return self.lr
 class CallBasedLRScheduler(LearningRateScheduler):
-  """
-  The learning rate scheduler based on the call count.
+    """
+    The learning rate scheduler based on the call count.
+    Parameters
+    ----------
+    lr: float
+      The learning rate.
+    last_epoch: int
+      The index of last epoch.
+    last_call: int
+      The index of last call.
-  Parameters
-  ----------
-  lr: float
-    The learning rate.
-  last_epoch: int
-    The index of last epoch.
-  last_call: int
-    The index of last call.
+    """
-  """
+    def __init__(self, lr: Union[float, State], last_epoch: int = -1, last_call: int = -1):
+        super().__init__(lr=lr, last_epoch=last_epoch)
-  def __init__(self, lr: Union[float, State], last_epoch: int = -1, last_call: int = -1):
-    super().__init__(lr=lr, last_epoch=last_epoch)
+        assert last_call >= -1, 'last_call should be greater than -1.'
+        self.last_call = LongTermState(jnp.asarray(last_call, dtype=environ.ditype()))
-    assert last_call >= -1, 'last_call should be greater than -1.'
-    self.last_call = LongTermState(jnp.asarray(last_call, dtype=environ.ditype()))
+    def step_call(self):
+        """
+        Update the call count.
+        """
+        self.last_call.value += 1
-  def step_call(self):
-    """
-    Update the call count.
+class StepLR(LearningRateScheduler):
+    """Decays the learning rate of each parameter group by gamma every
+    `step_size` epochs.
+    Parameters
+    ----------
+    lr: float
+      Initial learning rate.
+    step_size: int
+      Period of learning rate decay.
+    gamma: float
+      Multiplicative factor of learning rate decay.
+      Default: 0.1.
+    last_epoch: int
+      The index of last epoch. Default: -1.
     """
-    self.last_call.value += 1
-  def __repr__(self):
-    return (f'{self.__class__.__name__}(lr={self.lr.value}, '
-            f'last_epoch={self.last_epoch.value}, '
-            f'last_call={self.last_call.value}{self.extra_repr()})')
+    def __init__(
+        self,
+        lr: float,
+        step_size: int,
+        gamma: float = 0.1,
+        last_epoch: int = -1
+    ):
+        super().__init__(lr=lr, last_epoch=last_epoch)
+        assert step_size >= 1, 'step_size should be greater than or equal to 1.'
+        assert 1. >= gamma >= 0, 'gamma should be in the range [0, 1].'
+        self.step_size = step_size
+        self.gamma = gamma
-class StepLR(LearningRateScheduler):
-  """Decays the learning rate of each parameter group by gamma every
-  `step_size` epochs.
-  Parameters
-  ----------
-  lr: float
-    Initial learning rate.
-  step_size: int
-    Period of learning rate decay.
-  gamma: float
-    Multiplicative factor of learning rate decay.
-    Default: 0.1.
-  last_epoch: int
-    The index of last epoch. Default: -1.
-  """
-  def __init__(
-      self,
-      lr: float,
-      step_size: int,
-      gamma: float = 0.1,
-      last_epoch: int = -1
-  ):
-    super().__init__(lr=lr, last_epoch=last_epoch)
-    assert step_size >= 1, 'step_size should be greater than or equal to 1.'
-    assert 1. >= gamma >= 0, 'gamma should be in the range [0, 1].'
-    self.step_size = step_size
-    self.gamma = gamma
-  def __call__(self, i=None):
-    i = (self.last_epoch.value + 1) if i is None else i
-    return self.lr * self.gamma ** (jnp.floor_divide(i, self.step_size))
-  def extra_repr(self):
-    return f', gamma={self.gamma}, step_size={self.step_size}'
+    def __call__(self, i=None):
+        i = (self.last_epoch.value + 1) if i is None else i
+        return self.lr * self.gamma ** (jnp.floor_divide(i, self.step_size))
 class MultiStepLR(LearningRateScheduler):
-  """Decays the learning rate of each parameter group by gamma once the
-  number of epoch reaches one of the milestones. Notice that such decay can
-  happen simultaneously with other changes to the learning rate from outside
-  this scheduler. When last_epoch=-1, sets initial lr as lr.
-  Parameters
-  ----------
-  lr: float
-    Initial learning rate.
-  milestones: sequence of int
-    List of epoch indices. Must be increasing.
-  gamma: float
-    Multiplicative factor of learning rate decay.
-    Default: 0.1.
-  last_epoch: int
-    The index of last epoch. Default: -1.
-  """
-  def __init__(
-      self,
-      lr: float,
-      milestones: Sequence[int],
-      gamma: float = 0.1,
-      last_epoch: int = -1
-  ):
-    super().__init__(lr=lr, last_epoch=last_epoch)
-    assert len(milestones) > 0, 'milestones should be a non-empty sequence.'
-    assert all([milestones[i] < milestones[i + 1] for i in range(len(milestones) - 1)]), (
-      'milestones should be a sequence of increasing integers.'
-    )
-    assert 1. >= gamma >= 0, 'gamma should be in the range [0, 1].'
-    self.milestones = jnp.asarray((-1,) + tuple(milestones) + (np.iinfo(np.int32).max,), dtype=environ.ditype())
-    self.gamma = gamma
-  def __call__(self, i=None):
-    i = (self.last_epoch.value + 1) if i is None else i
-    conditions = jnp.logical_and((i >= self.milestones[:-1]), (i < self.milestones[1:]))
-    p = jnp.argmax(conditions)
-    return self.lr * self.gamma ** p
-  def extra_repr(self):
-    return f', milestones={self.milestones}, gamma={self.gamma}'
+    """Decays the learning rate of each parameter group by gamma once the
+    number of epoch reaches one of the milestones. Notice that such decay can
+    happen simultaneously with other changes to the learning rate from outside
+    this scheduler. When last_epoch=-1, sets initial lr as lr.
+    Parameters
+    ----------
+    lr: float
+      Initial learning rate.
+    milestones: sequence of int
+      List of epoch indices. Must be increasing.
+    gamma: float
+      Multiplicative factor of learning rate decay.
+      Default: 0.1.
+    last_epoch: int
+      The index of last epoch. Default: -1.
+    """
+    def __init__(
+        self,
+        lr: float,
+        milestones: Sequence[int],
+        gamma: float = 0.1,
+        last_epoch: int = -1
+    ):
+        super().__init__(lr=lr, last_epoch=last_epoch)
+        assert len(milestones) > 0, 'milestones should be a non-empty sequence.'
+        assert all([milestones[i] < milestones[i + 1] for i in range(len(milestones) - 1)]), (
+            'milestones should be a sequence of increasing integers.'
+        )
+        assert 1. >= gamma >= 0, 'gamma should be in the range [0, 1].'
+        self.milestones = jnp.asarray((-1,) + tuple(milestones) + (np.iinfo(np.int32).max,), dtype=environ.ditype())
+        self.gamma = gamma
+    def __call__(self, i=None):
+        i = (self.last_epoch.value + 1) if i is None else i
+        conditions = jnp.logical_and((i >= self.milestones[:-1]), (i < self.milestones[1:]))
+        p = jnp.argmax(conditions)
+        return self.lr * self.gamma ** p
 class CosineAnnealingLR(LearningRateScheduler):
-  r"""Set the learning rate of each parameter group using a cosine annealing
-  schedule, where :math:`\eta_{max}` is set to the initial lr and
-  :math:`T_{cur}` is the number of epochs since the last restart in SGDR:
-  .. math::
-      \begin{aligned}
-          \eta_t & = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1
-          + \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right),
-          & T_{cur} \neq (2k+1)T_{max}; \\
-          \eta_{t+1} & = \eta_{t} + \frac{1}{2}(\eta_{max} - \eta_{min})
-          \left(1 - \cos\left(\frac{1}{T_{max}}\pi\right)\right),
-          & T_{cur} = (2k+1)T_{max}.
-      \end{aligned}
-  When last_epoch=-1, sets initial lr as lr. Notice that because the schedule
-  is defined recursively, the learning rate can be simultaneously modified
-  outside this scheduler by other operators. If the learning rate is set
-  solely by this scheduler, the learning rate at each step becomes:
-  .. math::
-      \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1 +
-      \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right)
-  It has been proposed in
-  `SGDR: Stochastic Gradient Descent with Warm Restarts`_. Note that this only
-  implements the cosine annealing part of SGDR, and not the restarts.
-  Parameters
-  ----------
-  lr: float
-    Initial learning rate.
-  T_max: int
-    Maximum number of iterations.
-  eta_min: float
-    Minimum learning rate. Default: 0.
-  last_epoch: int
-    The index of last epoch. Default: -1.
-  .. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
-      https://arxiv.org/abs/1608.03983
-  """
-  def __init__(
-      self,
-      lr: float,
-      T_max: int,
-      eta_min: float = 0.,
-      last_epoch: int = -1,
-  ):
-    super().__init__(lr=lr, last_epoch=last_epoch)
-    assert T_max >= 1, 'T_max should be greater than or equal to 1.'
-    self._init_epoch = last_epoch
-    self.T_max = T_max
-    self.eta_min = eta_min
-  def __call__(self, i=None):
-    i = (self.last_epoch.value + 1) if i is None else i
-    return self.eta_min + (self.lr - self.eta_min) * (1 + jnp.cos(jnp.pi * i / self.T_max)) / 2
-  def extra_repr(self):
-    return f', T_max={self.T_max}, eta_min={self.eta_min}'
+    r"""Set the learning rate of each parameter group using a cosine annealing
+    schedule, where :math:`\eta_{max}` is set to the initial lr and
+    :math:`T_{cur}` is the number of epochs since the last restart in SGDR:
+    .. math::
+        \begin{aligned}
+            \eta_t & = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1
+            + \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right),
+            & T_{cur} \neq (2k+1)T_{max}; \\
+            \eta_{t+1} & = \eta_{t} + \frac{1}{2}(\eta_{max} - \eta_{min})
+            \left(1 - \cos\left(\frac{1}{T_{max}}\pi\right)\right),
+            & T_{cur} = (2k+1)T_{max}.
+        \end{aligned}
+    When last_epoch=-1, sets initial lr as lr. Notice that because the schedule
+    is defined recursively, the learning rate can be simultaneously modified
+    outside this scheduler by other operators. If the learning rate is set
+    solely by this scheduler, the learning rate at each step becomes:
+    .. math::
+        \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1 +
+        \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right)
+    It has been proposed in
+    `SGDR: Stochastic Gradient Descent with Warm Restarts`_. Note that this only
+    implements the cosine annealing part of SGDR, and not the restarts.
+    Parameters
+    ----------
+    lr: float
+      Initial learning rate.
+    T_max: int
+      Maximum number of iterations.
+    eta_min: float
+      Minimum learning rate. Default: 0.
+    last_epoch: int
+      The index of last epoch. Default: -1.
+    .. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
+        https://arxiv.org/abs/1608.03983
+    """
+    def __init__(
+        self,
+        lr: float,
+        T_max: int,
+        eta_min: float = 0.,
+        last_epoch: int = -1,
+    ):
+        super().__init__(lr=lr, last_epoch=last_epoch)
-class CosineAnnealingWarmRestarts(CallBasedLRScheduler):
-  """Set the learning rate of each parameter group using a cosine annealing
-  schedule, where :math:`\eta_{max}` is set to the initial lr, :math:`T_{cur}`
-  is the number of epochs since the last restart and :math:`T_{i}` is the number
-  of epochs between two warm restarts in SGDR:
-  .. math::
-      \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1 +
-      \cos\left(\frac{T_{cur}}{T_{i}}\pi\right)\right)
-  When :math:`T_{cur}=T_{i}`, set :math:`\eta_t = \eta_{min}`.
-  When :math:`T_{cur}=0` after restart, set :math:`\eta_t=\eta_{max}`.
-  It has been proposed in
-  `SGDR: Stochastic Gradient Descent with Warm Restarts`_.
-  Parameters
-  ----------
-  lr: float
-    Initial learning rate.
-  num_call_per_epoch: int
-    The number the scheduler to call in each epoch.
-    This usually means the number of batch in each epoch training.
-  T_0: int
-    Number of iterations for the first restart.
-  T_mult: int
-    A factor increases :math:`T_{i}` after a restart. Default: 1.
-  eta_min: float
-    Minimum learning rate. Default: 0.
-  last_call: int
-    The index of last call. Default: -1.
-  .. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
-      https://arxiv.org/abs/1608.03983
-  """
-  def __init__(
-      self,
-      lr: float,
-      num_call_per_epoch: int,
-      T_0: int,
-      T_mult: int = 1,
-      eta_min: float = 0.,
-      last_epoch: int = -1,
-      last_call: int = -1
-  ):
-    super().__init__(lr=lr, last_call=last_call, last_epoch=last_epoch)
-    if T_0 <= 0 or not isinstance(T_0, int):
-      raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
-    if T_mult < 1 or not isinstance(T_mult, int):
-      raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
-    self.T_mult = T_mult
-    self.eta_min = eta_min
-    self.T_0 = T_0
-    self.num_call_per_epoch = num_call_per_epoch
-  def _cond1(self, epoch):
-    if self.T_mult == 1:
-      T_cur = epoch % self.T_0
-      T_i = self.T_0
-    else:
-      n = jnp.floor(jnp.log(epoch / self.T_0 * (self.T_mult - 1) + 1) / jnp.log(self.T_mult))
-      T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1)
-      T_i = self.T_0 * self.T_mult ** n
-    return T_cur, T_i
+        assert T_max >= 1, 'T_max should be greater than or equal to 1.'
+        self._init_epoch = last_epoch
+        self.T_max = T_max
+        self.eta_min = eta_min
-  def _cond2(self, epoch):
-    return epoch, self.T_0
+    def __call__(self, i=None):
+        i = (self.last_epoch.value + 1) if i is None else i
+        return self.eta_min + (self.lr - self.eta_min) * (1 + jnp.cos(jnp.pi * i / self.T_max)) / 2
-  def __call__(self, i=None):
-    epoch = self.current_epoch(i)
-    T_cur, T_i = jax.lax.cond(epoch >= self.T_0, self._cond1, self._cond2, epoch)
-    return self.eta_min + (self.lr - self.eta_min) * (1 + jnp.cos(jnp.pi * T_cur / T_i)) / 2
-  def current_epoch(self, i=None):
-    i = (self.last_call.value + 1) if i is None else i
-    return jnp.floor(i / self.num_call_per_epoch)
+class CosineAnnealingWarmRestarts(CallBasedLRScheduler):
+    """Set the learning rate of each parameter group using a cosine annealing
+    schedule, where :math:`\eta_{max}` is set to the initial lr, :math:`T_{cur}`
+    is the number of epochs since the last restart and :math:`T_{i}` is the number
+    of epochs between two warm restarts in SGDR:
+    .. math::
+        \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1 +
+        \cos\left(\frac{T_{cur}}{T_{i}}\pi\right)\right)
+    When :math:`T_{cur}=T_{i}`, set :math:`\eta_t = \eta_{min}`.
+    When :math:`T_{cur}=0` after restart, set :math:`\eta_t=\eta_{max}`.
+    It has been proposed in
+    `SGDR: Stochastic Gradient Descent with Warm Restarts`_.
+    Parameters
+    ----------
+    lr: float
+      Initial learning rate.
+    num_call_per_epoch: int
+      The number the scheduler to call in each epoch.
+      This usually means the number of batch in each epoch training.
+    T_0: int
+      Number of iterations for the first restart.
+    T_mult: int
+      A factor increases :math:`T_{i}` after a restart. Default: 1.
+    eta_min: float
+      Minimum learning rate. Default: 0.
+    last_call: int
+      The index of last call. Default: -1.
+    .. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
+        https://arxiv.org/abs/1608.03983
+    """
-  def extra_repr(self):
-    return f', T_0={self.T_0}, T_mult={self.T_mult}, eta_min={self.eta_min}'
+    def __init__(
+        self,
+        lr: float,
+        num_call_per_epoch: int,
+        T_0: int,
+        T_mult: int = 1,
+        eta_min: float = 0.,
+        last_epoch: int = -1,
+        last_call: int = -1
+    ):
+        super().__init__(lr=lr, last_call=last_call, last_epoch=last_epoch)
+        if T_0 <= 0 or not isinstance(T_0, int):
+            raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
+        if T_mult < 1 or not isinstance(T_mult, int):
+            raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
+        self.T_mult = T_mult
+        self.eta_min = eta_min
+        self.T_0 = T_0
+        self.num_call_per_epoch = num_call_per_epoch
+    def _cond1(self, epoch):
+        if self.T_mult == 1:
+            T_cur = epoch % self.T_0
+            T_i = self.T_0
+        else:
+            n = jnp.floor(jnp.log(epoch / self.T_0 * (self.T_mult - 1) + 1) / jnp.log(self.T_mult))
+            T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1)
+            T_i = self.T_0 * self.T_mult ** n
+        return T_cur, T_i
+    def _cond2(self, epoch):
+        return epoch, self.T_0
+    def __call__(self, i=None):
+        epoch = self.current_epoch(i)
+        T_cur, T_i = jax.lax.cond(epoch >= self.T_0, self._cond1, self._cond2, epoch)
+        return self.eta_min + (self.lr - self.eta_min) * (1 + jnp.cos(jnp.pi * T_cur / T_i)) / 2
+    def current_epoch(self, i=None):
+        i = (self.last_call.value + 1) if i is None else i
+        return jnp.floor(i / self.num_call_per_epoch)
 class ExponentialLR(LearningRateScheduler):
-  """Decays the learning rate of each parameter group by gamma every epoch.
-  When last_epoch=-1, sets initial lr as lr.
-  Parameters
-  ----------
-  lr: float
-    Initial learning rate.
-  gamma: float
-    Multiplicative factor of learning rate decay.
-  last_epoch: int
-    The index of last epoch. Default: -1.
-  """
-  def __init__(self,
-               lr: float,
-               gamma: float,
-               last_epoch: int = -1):
-    super(ExponentialLR, self).__init__(lr=lr, last_epoch=last_epoch)
-    assert 1. >= gamma >= 0, 'gamma should be in the range [0, 1].'
-    self.gamma = gamma
-  def __call__(self, i: int = None):
-    i = (self.last_epoch.value + 1) if i is None else i
-    return self.lr * self.gamma ** i
-  def extra_repr(self):
-    return f', gamma={self.gamma}'
+    """Decays the learning rate of each parameter group by gamma every epoch.
+    When last_epoch=-1, sets initial lr as lr.
+    Parameters
+    ----------
+    lr: float
+      Initial learning rate.
+    gamma: float
+      Multiplicative factor of learning rate decay.
+    last_epoch: int
+      The index of last epoch. Default: -1.
+    """
+    def __init__(self,
+                 lr: float,
+                 gamma: float,
+                 last_epoch: int = -1):
+        super(ExponentialLR, self).__init__(lr=lr, last_epoch=last_epoch)
+        assert 1. >= gamma >= 0, 'gamma should be in the range [0, 1].'
+        self.gamma = gamma
+    def __call__(self, i: int = None):
+        i = (self.last_epoch.value + 1) if i is None else i
+        return self.lr * self.gamma ** i
-class ExponentialDecayLR(CallBasedLRScheduler):
-  def __init__(self, lr, decay_steps, decay_rate, last_epoch: int = -1, last_call: int = -1):
-    super().__init__(lr=lr, last_epoch=last_epoch, last_call=last_call)
-    self.decay_steps = decay_steps
-    self.decay_rate = decay_rate
-  def __call__(self, i=None):
-    i = (self.last_call.value + 1) if i is None else i
-    return self.lr * self.decay_rate ** (i / self.decay_steps)
+class ExponentialDecayLR(CallBasedLRScheduler):
+    def __init__(self, lr, decay_steps, decay_rate, last_epoch: int = -1, last_call: int = -1):
+        super().__init__(lr=lr, last_epoch=last_epoch, last_call=last_call)
+        self.decay_steps = decay_steps
+        self.decay_rate = decay_rate
-  def extra_repr(self):
-    return f', decay_steps={self.decay_steps}, decay_rate={self.decay_rate}'
+    def __call__(self, i=None):
+        i = (self.last_call.value + 1) if i is None else i
+        return self.lr * self.decay_rate ** (i / self.decay_steps)
 class InverseTimeDecayLR(ExponentialDecayLR):
-  def __init__(self, lr, decay_steps, decay_rate, staircase=False,
-               last_epoch: int = -1, last_call: int = -1):
-    super().__init__(lr, decay_steps, decay_rate, last_epoch=last_epoch, last_call=last_call)
-    self.staircase = staircase
-  def __call__(self, i=None):
-    i = (self.last_call.value + 1) if i is None else i
-    if self.staircase:
-      return self.lr / (1 + self.decay_rate * jnp.floor(i / self.decay_steps))
-    else:
-      return self.lr / (1 + self.decay_rate * i / self.decay_steps)
+    def __init__(self, lr, decay_steps, decay_rate, staircase=False,
+                 last_epoch: int = -1, last_call: int = -1):
+        super().__init__(lr, decay_steps, decay_rate, last_epoch=last_epoch, last_call=last_call)
+        self.staircase = staircase
-  def extra_repr(self):
-    return f', decay_steps={self.decay_steps}, decay_rate={self.decay_rate}, staircase={self.staircase}'
+    def __call__(self, i=None):
+        i = (self.last_call.value + 1) if i is None else i
+        if self.staircase:
+            return self.lr / (1 + self.decay_rate * jnp.floor(i / self.decay_steps))
+        else:
+            return self.lr / (1 + self.decay_rate * i / self.decay_steps)
 class PolynomialDecayLR(CallBasedLRScheduler):
-  def __init__(self, lr, decay_steps, final_lr, power=1.0, last_epoch: int = -1, last_call: int = -1):
-    super(PolynomialDecayLR, self).__init__(lr, last_epoch=last_epoch, last_call=last_call)
-    self.decay_steps = decay_steps
-    self.final_lr = final_lr
-    self.power = power
-  def __call__(self, i=None):
-    i = (self.last_call.value + 1) if i is None else i
-    i = jnp.minimum(i, self.decay_steps)
-    step_mult = (1 - i / self.decay_steps) ** self.power
-    return step_mult * (self.lr - self.final_lr) + self.final_lr
+    def __init__(self, lr, decay_steps, final_lr, power=1.0, last_epoch: int = -1, last_call: int = -1):
+        super(PolynomialDecayLR, self).__init__(lr, last_epoch=last_epoch, last_call=last_call)
+        self.decay_steps = decay_steps
+        self.final_lr = final_lr
+        self.power = power
-  def extra_repr(self):
-    return f', decay_steps={self.decay_steps}, final_lr={self.final_lr}, power={self.power}'
+    def __call__(self, i=None):
+        i = (self.last_call.value + 1) if i is None else i
+        i = jnp.minimum(i, self.decay_steps)
+        step_mult = (1 - i / self.decay_steps) ** self.power
+        return step_mult * (self.lr - self.final_lr) + self.final_lr
 class PiecewiseConstantLR(CallBasedLRScheduler):
-  def __init__(self, boundaries, values, last_epoch: int = -1, last_call: int = -1):
-    super().__init__(0., last_epoch=last_epoch, last_call=last_call)
-    boundaries = jnp.array(boundaries)
-    values = jnp.array(values)
-    if not boundaries.ndim == values.ndim == 1:
-      raise ValueError("boundaries and values must be sequences")
-    if not boundaries.shape[0] == values.shape[0] - 1:
-      raise ValueError("boundaries length must be one shorter than values length")
-    self.boundaries = boundaries
-    self.values = values
-  def __call__(self, i=None):
-    i = (self.last_call.value + 1) if i is None else i
-    return self.values[jnp.sum(i > self.boundaries)]
-  def extra_repr(self):
-    return f', boundaries={self.boundaries}, values={self.values}'
+    def __init__(self, boundaries, values, last_epoch: int = -1, last_call: int = -1):
+        super().__init__(0., last_epoch=last_epoch, last_call=last_call)
+        boundaries = jnp.array(boundaries)
+        values = jnp.array(values)
+        if not boundaries.ndim == values.ndim == 1:
+            raise ValueError("boundaries and values must be sequences")
+        if not boundaries.shape[0] == values.shape[0] - 1:
+            raise ValueError("boundaries length must be one shorter than values length")
+        self.boundaries = boundaries
+        self.values = values
+    def __call__(self, i=None):
+        i = (self.last_call.value + 1) if i is None else i
+        return self.values[jnp.sum(i > self.boundaries)]

brainstate 0.0.2.post20241009__py2.py3-none-any.whl → 0.1.0__py2.py3-none-any.whl

brainstate 0.0.2.post20241009py2.py3-none-any.whl → 0.1.0py2.py3-none-any.whl