brainstate 0.1.8__py2.py3-none-any.whl → 0.1.9__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. brainstate/__init__.py +58 -51
  2. brainstate/_compatible_import.py +148 -148
  3. brainstate/_state.py +1605 -1663
  4. brainstate/_state_test.py +52 -52
  5. brainstate/_utils.py +47 -47
  6. brainstate/augment/__init__.py +30 -30
  7. brainstate/augment/_autograd.py +778 -778
  8. brainstate/augment/_autograd_test.py +1289 -1289
  9. brainstate/augment/_eval_shape.py +99 -99
  10. brainstate/augment/_eval_shape_test.py +38 -38
  11. brainstate/augment/_mapping.py +1060 -1060
  12. brainstate/augment/_mapping_test.py +597 -597
  13. brainstate/augment/_random.py +151 -151
  14. brainstate/compile/__init__.py +38 -38
  15. brainstate/compile/_ad_checkpoint.py +204 -204
  16. brainstate/compile/_ad_checkpoint_test.py +49 -49
  17. brainstate/compile/_conditions.py +256 -256
  18. brainstate/compile/_conditions_test.py +220 -220
  19. brainstate/compile/_error_if.py +92 -92
  20. brainstate/compile/_error_if_test.py +52 -52
  21. brainstate/compile/_jit.py +346 -346
  22. brainstate/compile/_jit_test.py +143 -143
  23. brainstate/compile/_loop_collect_return.py +536 -536
  24. brainstate/compile/_loop_collect_return_test.py +58 -58
  25. brainstate/compile/_loop_no_collection.py +184 -184
  26. brainstate/compile/_loop_no_collection_test.py +50 -50
  27. brainstate/compile/_make_jaxpr.py +888 -888
  28. brainstate/compile/_make_jaxpr_test.py +156 -156
  29. brainstate/compile/_progress_bar.py +202 -202
  30. brainstate/compile/_unvmap.py +159 -159
  31. brainstate/compile/_util.py +147 -147
  32. brainstate/environ.py +563 -563
  33. brainstate/environ_test.py +62 -62
  34. brainstate/functional/__init__.py +27 -26
  35. brainstate/graph/__init__.py +29 -29
  36. brainstate/graph/_graph_node.py +244 -244
  37. brainstate/graph/_graph_node_test.py +73 -73
  38. brainstate/graph/_graph_operation.py +1738 -1738
  39. brainstate/graph/_graph_operation_test.py +563 -563
  40. brainstate/init/__init__.py +26 -26
  41. brainstate/init/_base.py +52 -52
  42. brainstate/init/_generic.py +244 -244
  43. brainstate/init/_random_inits.py +553 -553
  44. brainstate/init/_random_inits_test.py +149 -149
  45. brainstate/init/_regular_inits.py +105 -105
  46. brainstate/init/_regular_inits_test.py +50 -50
  47. brainstate/mixin.py +365 -363
  48. brainstate/mixin_test.py +77 -73
  49. brainstate/nn/__init__.py +135 -131
  50. brainstate/{functional → nn}/_activations.py +808 -813
  51. brainstate/{functional → nn}/_activations_test.py +331 -331
  52. brainstate/nn/_collective_ops.py +514 -514
  53. brainstate/nn/_collective_ops_test.py +43 -43
  54. brainstate/nn/_common.py +178 -178
  55. brainstate/nn/_conv.py +501 -501
  56. brainstate/nn/_conv_test.py +238 -238
  57. brainstate/nn/_delay.py +509 -502
  58. brainstate/nn/_delay_test.py +238 -184
  59. brainstate/nn/_dropout.py +426 -426
  60. brainstate/nn/_dropout_test.py +100 -100
  61. brainstate/nn/_dynamics.py +1343 -1343
  62. brainstate/nn/_dynamics_test.py +78 -78
  63. brainstate/nn/_elementwise.py +1119 -1119
  64. brainstate/nn/_elementwise_test.py +169 -169
  65. brainstate/nn/_embedding.py +58 -58
  66. brainstate/nn/_exp_euler.py +92 -92
  67. brainstate/nn/_exp_euler_test.py +35 -35
  68. brainstate/nn/_fixedprob.py +239 -239
  69. brainstate/nn/_fixedprob_test.py +114 -114
  70. brainstate/nn/_inputs.py +608 -608
  71. brainstate/nn/_linear.py +424 -424
  72. brainstate/nn/_linear_mv.py +83 -83
  73. brainstate/nn/_linear_mv_test.py +120 -120
  74. brainstate/nn/_linear_test.py +107 -107
  75. brainstate/nn/_ltp.py +28 -28
  76. brainstate/nn/_module.py +377 -377
  77. brainstate/nn/_module_test.py +40 -40
  78. brainstate/nn/_neuron.py +705 -705
  79. brainstate/nn/_neuron_test.py +161 -161
  80. brainstate/nn/_normalizations.py +975 -918
  81. brainstate/nn/_normalizations_test.py +73 -73
  82. brainstate/{functional → nn}/_others.py +46 -46
  83. brainstate/nn/_poolings.py +1177 -1177
  84. brainstate/nn/_poolings_test.py +217 -217
  85. brainstate/nn/_projection.py +486 -486
  86. brainstate/nn/_rate_rnns.py +554 -554
  87. brainstate/nn/_rate_rnns_test.py +63 -63
  88. brainstate/nn/_readout.py +209 -209
  89. brainstate/nn/_readout_test.py +53 -53
  90. brainstate/nn/_stp.py +236 -236
  91. brainstate/nn/_synapse.py +505 -505
  92. brainstate/nn/_synapse_test.py +131 -131
  93. brainstate/nn/_synaptic_projection.py +423 -423
  94. brainstate/nn/_synouts.py +162 -162
  95. brainstate/nn/_synouts_test.py +57 -57
  96. brainstate/nn/_utils.py +89 -89
  97. brainstate/nn/metrics.py +388 -388
  98. brainstate/optim/__init__.py +38 -38
  99. brainstate/optim/_base.py +64 -64
  100. brainstate/optim/_lr_scheduler.py +448 -448
  101. brainstate/optim/_lr_scheduler_test.py +50 -50
  102. brainstate/optim/_optax_optimizer.py +152 -152
  103. brainstate/optim/_optax_optimizer_test.py +53 -53
  104. brainstate/optim/_sgd_optimizer.py +1104 -1104
  105. brainstate/random/__init__.py +24 -24
  106. brainstate/random/_rand_funs.py +3616 -3616
  107. brainstate/random/_rand_funs_test.py +567 -567
  108. brainstate/random/_rand_seed.py +210 -210
  109. brainstate/random/_rand_seed_test.py +48 -48
  110. brainstate/random/_rand_state.py +1409 -1409
  111. brainstate/random/_random_for_unit.py +52 -52
  112. brainstate/surrogate.py +1957 -1957
  113. brainstate/transform.py +23 -23
  114. brainstate/typing.py +304 -304
  115. brainstate/util/__init__.py +50 -50
  116. brainstate/util/caller.py +98 -98
  117. brainstate/util/error.py +55 -55
  118. brainstate/util/filter.py +469 -469
  119. brainstate/util/others.py +540 -540
  120. brainstate/util/pretty_pytree.py +945 -945
  121. brainstate/util/pretty_pytree_test.py +159 -159
  122. brainstate/util/pretty_repr.py +328 -328
  123. brainstate/util/pretty_table.py +2954 -2954
  124. brainstate/util/scaling.py +258 -258
  125. brainstate/util/struct.py +523 -523
  126. {brainstate-0.1.8.dist-info → brainstate-0.1.9.dist-info}/METADATA +91 -99
  127. brainstate-0.1.9.dist-info/RECORD +130 -0
  128. {brainstate-0.1.8.dist-info → brainstate-0.1.9.dist-info}/WHEEL +1 -1
  129. {brainstate-0.1.8.dist-info → brainstate-0.1.9.dist-info/licenses}/LICENSE +202 -202
  130. brainstate/functional/_normalization.py +0 -81
  131. brainstate/functional/_spikes.py +0 -204
  132. brainstate-0.1.8.dist-info/RECORD +0 -132
  133. {brainstate-0.1.8.dist-info → brainstate-0.1.9.dist-info}/top_level.txt +0 -0
@@ -1,204 +1,204 @@
1
- # Copyright 2024 BDP Ecosystem Limited. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
-
16
- import functools
17
- from typing import Callable, Tuple, Union
18
-
19
- import jax
20
-
21
- from brainstate.typing import Missing
22
- from ._make_jaxpr import StatefulFunction, _ensure_index_tuple
23
- from ._util import write_back_state_values
24
-
25
- __all__ = [
26
- 'checkpoint',
27
- 'remat'
28
- ]
29
-
30
-
31
- def checkpoint(
32
- fun: Callable = Missing(),
33
- *,
34
- prevent_cse: bool = True,
35
- policy: Callable[..., bool] | None = None,
36
- static_argnums: int | Tuple[int, ...] = (),
37
- ) -> Union[Callable, Callable[[Callable], Callable]]:
38
- """Make ``fun`` recompute internal linearization points when differentiated.
39
-
40
- The :func:`jax.checkpoint` decorator, aliased to :func:`jax.remat`, provides a
41
- way to trade off computation time and memory cost in the context of automatic
42
- differentiation, especially with reverse-mode autodiff like :func:`jax.grad`
43
- and :func:`jax.vjp` but also with :func:`jax.linearize`.
44
-
45
- When differentiating a function in reverse-mode, by default all the
46
- linearization points (e.g. inputs to elementwise nonlinear primitive
47
- operations) are stored when evaluating the forward pass so that they can be
48
- reused on the backward pass. This evaluation strategy can lead to a high
49
- memory cost, or even to poor performance on hardware accelerators where memory
50
- access is much more expensive than FLOPs.
51
-
52
- An alternative evaluation strategy is for some of the linearization points to
53
- be recomputed (i.e. rematerialized) rather than stored. This approach can
54
- reduce memory usage at the cost of increased computation.
55
-
56
- This function decorator produces a new version of ``fun`` which follows
57
- the rematerialization strategy rather than the default store-everything
58
- strategy. That is, it returns a new version of ``fun`` which, when
59
- differentiated, doesn't store any of its intermediate linearization points.
60
- Instead, these linearization points are recomputed from the function's saved
61
- inputs.
62
-
63
- See the examples below.
64
-
65
- Args:
66
- fun: Function for which the autodiff evaluation strategy is to be changed
67
- from the default of storing all intermediate linearization points to
68
- recomputing them. Its arguments and return value should be arrays,
69
- scalars, or (nested) standard Python containers (tuple/list/dict) thereof.
70
- prevent_cse: Optional, boolean keyword-only argument indicating whether to
71
- prevent common subexpression elimination (CSE) optimizations in the HLO
72
- generated from differentiation. This CSE prevention has costs because it
73
- can foil other optimizations, and because it can incur high overheads on
74
- some backends, especially GPU. The default is True because otherwise,
75
- under a :func:`~jax.jit` or :func:`~jax.pmap`, CSE can defeat the purpose
76
- of this decorator.
77
- But in some settings, like when used inside a :func:`~jax.lax.scan`, this
78
- CSE prevention mechanism is unnecessary, in which case ``prevent_cse`` can
79
- be set to False.
80
- static_argnums: Optional, int or sequence of ints, a keyword-only argument
81
- indicating which argument values on which to specialize for tracing and
82
- caching purposes. Specifying arguments as static can avoid
83
- ConcretizationTypeErrors when tracing, but at the cost of more retracing
84
- overheads. See the example below.
85
- policy: Optional, callable keyword-only argument. It should be one of the
86
- attributes of ``jax.checkpoint_policies``. The callable takes as input a
87
- type-level specification of a first-order primitive application and
88
- returns a boolean indicating whether the corresponding output value(s) can
89
- be saved as residuals (or instead must be recomputed in the (co)tangent
90
- computation if needed).
91
-
92
- Returns:
93
- A function (callable) with the same input/output behavior as ``fun`` but
94
- which, when differentiated using e.g. :func:`jax.grad`, :func:`jax.vjp`, or
95
- :func:`jax.linearize`, recomputes rather than stores intermediate
96
- linearization points, thus potentially saving memory at the cost of extra
97
- computation.
98
-
99
- Here is a simple example:
100
-
101
- >>> import jax
102
- >>> import jax.numpy as jnp
103
-
104
- >>> @jax.checkpoint
105
- ... def g(x):
106
- ... y = jnp.sin(x)
107
- ... z = jnp.sin(y)
108
- ... return z
109
- ...
110
- >>> jax.value_and_grad(g)(2.0)
111
- (Array(0.78907233, dtype=float32, weak_type=True), Array(-0.2556391, dtype=float32, weak_type=True))
112
-
113
- Here, the same value is produced whether or not the :func:`jax.checkpoint`
114
- decorator is present. When the decorator is not present, the values
115
- ``jnp.cos(2.0)`` and ``jnp.cos(jnp.sin(2.0))`` are computed on the forward
116
- pass and are stored for use in the backward pass, because they are needed
117
- on the backward pass and depend only on the primal inputs. When using
118
- :func:`jax.checkpoint`, the forward pass will compute only the primal outputs
119
- and only the primal inputs (``2.0``) will be stored for the backward pass.
120
- At that time, the value ``jnp.sin(2.0)`` is recomputed, along with the values
121
- ``jnp.cos(2.0)`` and ``jnp.cos(jnp.sin(2.0))``.
122
-
123
- While :func:`jax.checkpoint` controls what values are stored from the
124
- forward-pass to be used on the backward pass, the total amount of memory
125
- required to evaluate a function or its VJP depends on many additional internal
126
- details of that function. Those details include which numerical primitives are
127
- used, how they're composed, where jit and control flow primitives like scan
128
- are used, and other factors.
129
-
130
- The :func:`jax.checkpoint` decorator can be applied recursively to express
131
- sophisticated autodiff rematerialization strategies. For example:
132
-
133
- >>> def recursive_checkpoint(funs):
134
- ... if len(funs) == 1:
135
- ... return funs[0]
136
- ... elif len(funs) == 2:
137
- ... f1, f2 = funs
138
- ... return lambda x: f1(f2(x))
139
- ... else:
140
- ... f1 = recursive_checkpoint(funs[:len(funs)//2])
141
- ... f2 = recursive_checkpoint(funs[len(funs)//2:])
142
- ... return lambda x: f1(jax.checkpoint(f2)(x))
143
- ...
144
-
145
- If ``fun`` involves Python control flow that depends on argument values,
146
- it may be necessary to use the ``static_argnums`` parameter. For example,
147
- consider a boolean flag argument::
148
-
149
- from functools import partial
150
-
151
- @partial(jax.checkpoint, static_argnums=(1,))
152
- def foo(x, is_training):
153
- if is_training:
154
- ...
155
- else:
156
- ...
157
-
158
- Here, the use of ``static_argnums`` allows the ``if`` statement's condition
159
- to depends on the value of ``is_training``. The cost to using
160
- ``static_argnums`` is that it introduces re-tracing overheads across calls:
161
- in the example, ``foo`` is re-traced every time it is called with a new value
162
- of ``is_training``. In some situations, ``jax.ensure_compile_time_eval``
163
- is needed as well::
164
-
165
- @partial(jax.checkpoint, static_argnums=(1,))
166
- def foo(x, y):
167
- with jax.ensure_compile_time_eval():
168
- y_pos = y > 0
169
- if y_pos:
170
- ...
171
- else:
172
- ...
173
-
174
- As an alternative to using ``static_argnums`` (and
175
- ``jax.ensure_compile_time_eval``), it may be easier to compute some values
176
- outside the :func:`jax.checkpoint`-decorated function and then close over them.
177
- """
178
- if isinstance(fun, Missing):
179
- return lambda f: checkpoint(f, prevent_cse=prevent_cse, policy=policy, static_argnums=static_argnums)
180
-
181
- static_argnums = _ensure_index_tuple(tuple() if static_argnums is None else static_argnums)
182
- fun = StatefulFunction(fun, static_argnums=static_argnums, name='checkpoint')
183
- checkpointed_fun = jax.checkpoint(
184
- fun.jaxpr_call,
185
- prevent_cse=prevent_cse,
186
- policy=policy,
187
- static_argnums=tuple(i + 1 for i in static_argnums)
188
- )
189
-
190
- @functools.wraps(fun.fun)
191
- def remat_fun(*args, **params):
192
- # compile the function and get the state trace
193
- state_trace = fun.compile_function_and_get_state_trace(*args, **params, return_only_write=True)
194
- read_state_vals = state_trace.get_read_state_values()
195
- # call the checkpointed function
196
- write_state_vals, outs = checkpointed_fun(state_trace.get_state_values(), *args, **params)
197
- # write the state values back to the states
198
- write_back_state_values(state_trace, read_state_vals, write_state_vals)
199
- return outs
200
-
201
- return remat_fun
202
-
203
-
204
- remat = checkpoint
1
+ # Copyright 2024 BDP Ecosystem Limited. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ import functools
17
+ from typing import Callable, Tuple, Union
18
+
19
+ import jax
20
+
21
+ from brainstate.typing import Missing
22
+ from ._make_jaxpr import StatefulFunction, _ensure_index_tuple
23
+ from ._util import write_back_state_values
24
+
25
+ __all__ = [
26
+ 'checkpoint',
27
+ 'remat'
28
+ ]
29
+
30
+
31
+ def checkpoint(
32
+ fun: Callable = Missing(),
33
+ *,
34
+ prevent_cse: bool = True,
35
+ policy: Callable[..., bool] | None = None,
36
+ static_argnums: int | Tuple[int, ...] = (),
37
+ ) -> Union[Callable, Callable[[Callable], Callable]]:
38
+ """Make ``fun`` recompute internal linearization points when differentiated.
39
+
40
+ The :func:`jax.checkpoint` decorator, aliased to :func:`jax.remat`, provides a
41
+ way to trade off computation time and memory cost in the context of automatic
42
+ differentiation, especially with reverse-mode autodiff like :func:`jax.grad`
43
+ and :func:`jax.vjp` but also with :func:`jax.linearize`.
44
+
45
+ When differentiating a function in reverse-mode, by default all the
46
+ linearization points (e.g. inputs to elementwise nonlinear primitive
47
+ operations) are stored when evaluating the forward pass so that they can be
48
+ reused on the backward pass. This evaluation strategy can lead to a high
49
+ memory cost, or even to poor performance on hardware accelerators where memory
50
+ access is much more expensive than FLOPs.
51
+
52
+ An alternative evaluation strategy is for some of the linearization points to
53
+ be recomputed (i.e. rematerialized) rather than stored. This approach can
54
+ reduce memory usage at the cost of increased computation.
55
+
56
+ This function decorator produces a new version of ``fun`` which follows
57
+ the rematerialization strategy rather than the default store-everything
58
+ strategy. That is, it returns a new version of ``fun`` which, when
59
+ differentiated, doesn't store any of its intermediate linearization points.
60
+ Instead, these linearization points are recomputed from the function's saved
61
+ inputs.
62
+
63
+ See the examples below.
64
+
65
+ Args:
66
+ fun: Function for which the autodiff evaluation strategy is to be changed
67
+ from the default of storing all intermediate linearization points to
68
+ recomputing them. Its arguments and return value should be arrays,
69
+ scalars, or (nested) standard Python containers (tuple/list/dict) thereof.
70
+ prevent_cse: Optional, boolean keyword-only argument indicating whether to
71
+ prevent common subexpression elimination (CSE) optimizations in the HLO
72
+ generated from differentiation. This CSE prevention has costs because it
73
+ can foil other optimizations, and because it can incur high overheads on
74
+ some backends, especially GPU. The default is True because otherwise,
75
+ under a :func:`~jax.jit` or :func:`~jax.pmap`, CSE can defeat the purpose
76
+ of this decorator.
77
+ But in some settings, like when used inside a :func:`~jax.lax.scan`, this
78
+ CSE prevention mechanism is unnecessary, in which case ``prevent_cse`` can
79
+ be set to False.
80
+ static_argnums: Optional, int or sequence of ints, a keyword-only argument
81
+ indicating which argument values on which to specialize for tracing and
82
+ caching purposes. Specifying arguments as static can avoid
83
+ ConcretizationTypeErrors when tracing, but at the cost of more retracing
84
+ overheads. See the example below.
85
+ policy: Optional, callable keyword-only argument. It should be one of the
86
+ attributes of ``jax.checkpoint_policies``. The callable takes as input a
87
+ type-level specification of a first-order primitive application and
88
+ returns a boolean indicating whether the corresponding output value(s) can
89
+ be saved as residuals (or instead must be recomputed in the (co)tangent
90
+ computation if needed).
91
+
92
+ Returns:
93
+ A function (callable) with the same input/output behavior as ``fun`` but
94
+ which, when differentiated using e.g. :func:`jax.grad`, :func:`jax.vjp`, or
95
+ :func:`jax.linearize`, recomputes rather than stores intermediate
96
+ linearization points, thus potentially saving memory at the cost of extra
97
+ computation.
98
+
99
+ Here is a simple example:
100
+
101
+ >>> import jax
102
+ >>> import jax.numpy as jnp
103
+
104
+ >>> @jax.checkpoint
105
+ ... def g(x):
106
+ ... y = jnp.sin(x)
107
+ ... z = jnp.sin(y)
108
+ ... return z
109
+ ...
110
+ >>> jax.value_and_grad(g)(2.0)
111
+ (Array(0.78907233, dtype=float32, weak_type=True), Array(-0.2556391, dtype=float32, weak_type=True))
112
+
113
+ Here, the same value is produced whether or not the :func:`jax.checkpoint`
114
+ decorator is present. When the decorator is not present, the values
115
+ ``jnp.cos(2.0)`` and ``jnp.cos(jnp.sin(2.0))`` are computed on the forward
116
+ pass and are stored for use in the backward pass, because they are needed
117
+ on the backward pass and depend only on the primal inputs. When using
118
+ :func:`jax.checkpoint`, the forward pass will compute only the primal outputs
119
+ and only the primal inputs (``2.0``) will be stored for the backward pass.
120
+ At that time, the value ``jnp.sin(2.0)`` is recomputed, along with the values
121
+ ``jnp.cos(2.0)`` and ``jnp.cos(jnp.sin(2.0))``.
122
+
123
+ While :func:`jax.checkpoint` controls what values are stored from the
124
+ forward-pass to be used on the backward pass, the total amount of memory
125
+ required to evaluate a function or its VJP depends on many additional internal
126
+ details of that function. Those details include which numerical primitives are
127
+ used, how they're composed, where jit and control flow primitives like scan
128
+ are used, and other factors.
129
+
130
+ The :func:`jax.checkpoint` decorator can be applied recursively to express
131
+ sophisticated autodiff rematerialization strategies. For example:
132
+
133
+ >>> def recursive_checkpoint(funs):
134
+ ... if len(funs) == 1:
135
+ ... return funs[0]
136
+ ... elif len(funs) == 2:
137
+ ... f1, f2 = funs
138
+ ... return lambda x: f1(f2(x))
139
+ ... else:
140
+ ... f1 = recursive_checkpoint(funs[:len(funs)//2])
141
+ ... f2 = recursive_checkpoint(funs[len(funs)//2:])
142
+ ... return lambda x: f1(jax.checkpoint(f2)(x))
143
+ ...
144
+
145
+ If ``fun`` involves Python control flow that depends on argument values,
146
+ it may be necessary to use the ``static_argnums`` parameter. For example,
147
+ consider a boolean flag argument::
148
+
149
+ from functools import partial
150
+
151
+ @partial(jax.checkpoint, static_argnums=(1,))
152
+ def foo(x, is_training):
153
+ if is_training:
154
+ ...
155
+ else:
156
+ ...
157
+
158
+ Here, the use of ``static_argnums`` allows the ``if`` statement's condition
159
+ to depends on the value of ``is_training``. The cost to using
160
+ ``static_argnums`` is that it introduces re-tracing overheads across calls:
161
+ in the example, ``foo`` is re-traced every time it is called with a new value
162
+ of ``is_training``. In some situations, ``jax.ensure_compile_time_eval``
163
+ is needed as well::
164
+
165
+ @partial(jax.checkpoint, static_argnums=(1,))
166
+ def foo(x, y):
167
+ with jax.ensure_compile_time_eval():
168
+ y_pos = y > 0
169
+ if y_pos:
170
+ ...
171
+ else:
172
+ ...
173
+
174
+ As an alternative to using ``static_argnums`` (and
175
+ ``jax.ensure_compile_time_eval``), it may be easier to compute some values
176
+ outside the :func:`jax.checkpoint`-decorated function and then close over them.
177
+ """
178
+ if isinstance(fun, Missing):
179
+ return lambda f: checkpoint(f, prevent_cse=prevent_cse, policy=policy, static_argnums=static_argnums)
180
+
181
+ static_argnums = _ensure_index_tuple(tuple() if static_argnums is None else static_argnums)
182
+ fun = StatefulFunction(fun, static_argnums=static_argnums, name='checkpoint')
183
+ checkpointed_fun = jax.checkpoint(
184
+ fun.jaxpr_call,
185
+ prevent_cse=prevent_cse,
186
+ policy=policy,
187
+ static_argnums=tuple(i + 1 for i in static_argnums)
188
+ )
189
+
190
+ @functools.wraps(fun.fun)
191
+ def remat_fun(*args, **params):
192
+ # compile the function and get the state trace
193
+ state_trace = fun.compile_function_and_get_state_trace(*args, **params, return_only_write=True)
194
+ read_state_vals = state_trace.get_read_state_values()
195
+ # call the checkpointed function
196
+ write_state_vals, outs = checkpointed_fun(state_trace.get_state_values(), *args, **params)
197
+ # write the state values back to the states
198
+ write_back_state_values(state_trace, read_state_vals, write_state_vals)
199
+ return outs
200
+
201
+ return remat_fun
202
+
203
+
204
+ remat = checkpoint
@@ -1,49 +1,49 @@
1
- # Copyright 2024 BDP Ecosystem Limited. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
-
16
- import jax
17
- import jax.numpy as jnp
18
- from absl.testing import absltest
19
-
20
- import brainstate
21
-
22
-
23
- class TestRemat(absltest.TestCase):
24
- def test_basic_remat(self):
25
- module = brainstate.compile.remat(brainstate.nn.Linear(2, 3))
26
- y = module(jnp.ones((1, 2)))
27
- assert y.shape == (1, 3)
28
-
29
- def test_remat_with_scan(self):
30
- class ScanLinear(brainstate.nn.Module):
31
- def __init__(self):
32
- super().__init__()
33
- self.linear = brainstate.nn.Linear(3, 3)
34
-
35
- def __call__(self, x: jax.Array):
36
- @brainstate.compile.remat
37
- def fun(x: jax.Array, _):
38
- x = self.linear(x)
39
- return x, None
40
-
41
- return brainstate.compile.scan(fun, x, None, length=10)[0]
42
-
43
- m = ScanLinear()
44
-
45
- assert m.linear.weight.value['weight'].shape == (3, 3)
46
- assert m.linear.weight.value['bias'].shape == (3,)
47
-
48
- y = m(jnp.ones((10, 3)))
49
- assert y.shape == (10, 3)
1
+ # Copyright 2024 BDP Ecosystem Limited. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ import jax
17
+ import jax.numpy as jnp
18
+ from absl.testing import absltest
19
+
20
+ import brainstate
21
+
22
+
23
+ class TestRemat(absltest.TestCase):
24
+ def test_basic_remat(self):
25
+ module = brainstate.compile.remat(brainstate.nn.Linear(2, 3))
26
+ y = module(jnp.ones((1, 2)))
27
+ assert y.shape == (1, 3)
28
+
29
+ def test_remat_with_scan(self):
30
+ class ScanLinear(brainstate.nn.Module):
31
+ def __init__(self):
32
+ super().__init__()
33
+ self.linear = brainstate.nn.Linear(3, 3)
34
+
35
+ def __call__(self, x: jax.Array):
36
+ @brainstate.compile.remat
37
+ def fun(x: jax.Array, _):
38
+ x = self.linear(x)
39
+ return x, None
40
+
41
+ return brainstate.compile.scan(fun, x, None, length=10)[0]
42
+
43
+ m = ScanLinear()
44
+
45
+ assert m.linear.weight.value['weight'].shape == (3, 3)
46
+ assert m.linear.weight.value['bias'].shape == (3,)
47
+
48
+ y = m(jnp.ones((10, 3)))
49
+ assert y.shape == (10, 3)