brainstate 0.1.10__py2.py3-none-any.whl → 0.2.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. brainstate/__init__.py +130 -19
  2. brainstate/_compatible_import.py +201 -9
  3. brainstate/_compatible_import_test.py +681 -0
  4. brainstate/_deprecation.py +210 -0
  5. brainstate/_deprecation_test.py +2319 -0
  6. brainstate/{util/error.py → _error.py} +10 -20
  7. brainstate/_state.py +94 -47
  8. brainstate/_state_test.py +1 -1
  9. brainstate/_utils.py +1 -1
  10. brainstate/environ.py +1279 -347
  11. brainstate/environ_test.py +1187 -26
  12. brainstate/graph/__init__.py +6 -13
  13. brainstate/graph/_node.py +240 -0
  14. brainstate/graph/_node_test.py +589 -0
  15. brainstate/graph/{_graph_operation.py → _operation.py} +632 -746
  16. brainstate/graph/_operation_test.py +1147 -0
  17. brainstate/mixin.py +1209 -141
  18. brainstate/mixin_test.py +991 -51
  19. brainstate/nn/__init__.py +74 -72
  20. brainstate/nn/_activations.py +587 -295
  21. brainstate/nn/_activations_test.py +109 -86
  22. brainstate/nn/_collective_ops.py +393 -274
  23. brainstate/nn/_collective_ops_test.py +746 -15
  24. brainstate/nn/_common.py +114 -66
  25. brainstate/nn/_common_test.py +154 -0
  26. brainstate/nn/_conv.py +1652 -143
  27. brainstate/nn/_conv_test.py +838 -227
  28. brainstate/nn/_delay.py +15 -28
  29. brainstate/nn/_delay_test.py +25 -20
  30. brainstate/nn/_dropout.py +359 -167
  31. brainstate/nn/_dropout_test.py +429 -52
  32. brainstate/nn/_dynamics.py +14 -90
  33. brainstate/nn/_dynamics_test.py +1 -12
  34. brainstate/nn/_elementwise.py +492 -313
  35. brainstate/nn/_elementwise_test.py +806 -145
  36. brainstate/nn/_embedding.py +369 -19
  37. brainstate/nn/_embedding_test.py +156 -0
  38. brainstate/nn/{_fixedprob.py → _event_fixedprob.py} +10 -16
  39. brainstate/nn/{_fixedprob_test.py → _event_fixedprob_test.py} +6 -5
  40. brainstate/nn/{_linear_mv.py → _event_linear.py} +2 -2
  41. brainstate/nn/{_linear_mv_test.py → _event_linear_test.py} +6 -5
  42. brainstate/nn/_exp_euler.py +200 -38
  43. brainstate/nn/_exp_euler_test.py +350 -8
  44. brainstate/nn/_linear.py +391 -71
  45. brainstate/nn/_linear_test.py +427 -59
  46. brainstate/nn/_metrics.py +1070 -0
  47. brainstate/nn/_metrics_test.py +611 -0
  48. brainstate/nn/_module.py +10 -3
  49. brainstate/nn/_module_test.py +1 -1
  50. brainstate/nn/_normalizations.py +688 -329
  51. brainstate/nn/_normalizations_test.py +663 -37
  52. brainstate/nn/_paddings.py +1020 -0
  53. brainstate/nn/_paddings_test.py +723 -0
  54. brainstate/nn/_poolings.py +1404 -342
  55. brainstate/nn/_poolings_test.py +828 -92
  56. brainstate/nn/{_rate_rnns.py → _rnns.py} +446 -54
  57. brainstate/nn/_rnns_test.py +593 -0
  58. brainstate/nn/_utils.py +132 -5
  59. brainstate/nn/_utils_test.py +402 -0
  60. brainstate/{init/_random_inits.py → nn/init.py} +301 -45
  61. brainstate/{init/_random_inits_test.py → nn/init_test.py} +51 -20
  62. brainstate/random/__init__.py +247 -1
  63. brainstate/random/_rand_funs.py +668 -346
  64. brainstate/random/_rand_funs_test.py +74 -1
  65. brainstate/random/_rand_seed.py +541 -76
  66. brainstate/random/_rand_seed_test.py +1 -1
  67. brainstate/random/_rand_state.py +601 -393
  68. brainstate/random/_rand_state_test.py +551 -0
  69. brainstate/transform/__init__.py +59 -0
  70. brainstate/transform/_ad_checkpoint.py +176 -0
  71. brainstate/{compile → transform}/_ad_checkpoint_test.py +1 -1
  72. brainstate/{augment → transform}/_autograd.py +360 -113
  73. brainstate/{augment → transform}/_autograd_test.py +2 -2
  74. brainstate/transform/_conditions.py +316 -0
  75. brainstate/{compile → transform}/_conditions_test.py +11 -11
  76. brainstate/{compile → transform}/_error_if.py +22 -20
  77. brainstate/{compile → transform}/_error_if_test.py +1 -1
  78. brainstate/transform/_eval_shape.py +145 -0
  79. brainstate/{augment → transform}/_eval_shape_test.py +1 -1
  80. brainstate/{compile → transform}/_jit.py +99 -46
  81. brainstate/{compile → transform}/_jit_test.py +3 -3
  82. brainstate/{compile → transform}/_loop_collect_return.py +219 -80
  83. brainstate/{compile → transform}/_loop_collect_return_test.py +1 -1
  84. brainstate/{compile → transform}/_loop_no_collection.py +133 -34
  85. brainstate/{compile → transform}/_loop_no_collection_test.py +2 -2
  86. brainstate/transform/_make_jaxpr.py +2016 -0
  87. brainstate/transform/_make_jaxpr_test.py +1510 -0
  88. brainstate/transform/_mapping.py +529 -0
  89. brainstate/transform/_mapping_test.py +194 -0
  90. brainstate/{compile → transform}/_progress_bar.py +78 -25
  91. brainstate/{augment → transform}/_random.py +65 -45
  92. brainstate/{compile → transform}/_unvmap.py +102 -5
  93. brainstate/transform/_util.py +286 -0
  94. brainstate/typing.py +594 -61
  95. brainstate/typing_test.py +780 -0
  96. brainstate/util/__init__.py +9 -32
  97. brainstate/util/_others.py +1025 -0
  98. brainstate/util/_others_test.py +962 -0
  99. brainstate/util/_pretty_pytree.py +1301 -0
  100. brainstate/util/_pretty_pytree_test.py +675 -0
  101. brainstate/util/{pretty_repr.py → _pretty_repr.py} +161 -27
  102. brainstate/util/_pretty_repr_test.py +696 -0
  103. brainstate/util/filter.py +557 -81
  104. brainstate/util/filter_test.py +912 -0
  105. brainstate/util/struct.py +769 -382
  106. brainstate/util/struct_test.py +602 -0
  107. {brainstate-0.1.10.dist-info → brainstate-0.2.0.dist-info}/METADATA +34 -17
  108. brainstate-0.2.0.dist-info/RECORD +111 -0
  109. brainstate/augment/__init__.py +0 -30
  110. brainstate/augment/_eval_shape.py +0 -99
  111. brainstate/augment/_mapping.py +0 -1060
  112. brainstate/augment/_mapping_test.py +0 -597
  113. brainstate/compile/__init__.py +0 -38
  114. brainstate/compile/_ad_checkpoint.py +0 -204
  115. brainstate/compile/_conditions.py +0 -256
  116. brainstate/compile/_make_jaxpr.py +0 -888
  117. brainstate/compile/_make_jaxpr_test.py +0 -156
  118. brainstate/compile/_util.py +0 -147
  119. brainstate/functional/__init__.py +0 -27
  120. brainstate/graph/_graph_node.py +0 -244
  121. brainstate/graph/_graph_node_test.py +0 -73
  122. brainstate/graph/_graph_operation_test.py +0 -563
  123. brainstate/init/__init__.py +0 -26
  124. brainstate/init/_base.py +0 -52
  125. brainstate/init/_generic.py +0 -244
  126. brainstate/init/_regular_inits.py +0 -105
  127. brainstate/init/_regular_inits_test.py +0 -50
  128. brainstate/nn/_inputs.py +0 -608
  129. brainstate/nn/_ltp.py +0 -28
  130. brainstate/nn/_neuron.py +0 -705
  131. brainstate/nn/_neuron_test.py +0 -161
  132. brainstate/nn/_others.py +0 -46
  133. brainstate/nn/_projection.py +0 -486
  134. brainstate/nn/_rate_rnns_test.py +0 -63
  135. brainstate/nn/_readout.py +0 -209
  136. brainstate/nn/_readout_test.py +0 -53
  137. brainstate/nn/_stp.py +0 -236
  138. brainstate/nn/_synapse.py +0 -505
  139. brainstate/nn/_synapse_test.py +0 -131
  140. brainstate/nn/_synaptic_projection.py +0 -423
  141. brainstate/nn/_synouts.py +0 -162
  142. brainstate/nn/_synouts_test.py +0 -57
  143. brainstate/nn/metrics.py +0 -388
  144. brainstate/optim/__init__.py +0 -38
  145. brainstate/optim/_base.py +0 -64
  146. brainstate/optim/_lr_scheduler.py +0 -448
  147. brainstate/optim/_lr_scheduler_test.py +0 -50
  148. brainstate/optim/_optax_optimizer.py +0 -152
  149. brainstate/optim/_optax_optimizer_test.py +0 -53
  150. brainstate/optim/_sgd_optimizer.py +0 -1104
  151. brainstate/random/_random_for_unit.py +0 -52
  152. brainstate/surrogate.py +0 -1957
  153. brainstate/transform.py +0 -23
  154. brainstate/util/caller.py +0 -98
  155. brainstate/util/others.py +0 -540
  156. brainstate/util/pretty_pytree.py +0 -945
  157. brainstate/util/pretty_pytree_test.py +0 -159
  158. brainstate/util/pretty_table.py +0 -2954
  159. brainstate/util/scaling.py +0 -258
  160. brainstate-0.1.10.dist-info/RECORD +0 -130
  161. {brainstate-0.1.10.dist-info → brainstate-0.2.0.dist-info}/WHEEL +0 -0
  162. {brainstate-0.1.10.dist-info → brainstate-0.2.0.dist-info}/licenses/LICENSE +0 -0
  163. {brainstate-0.1.10.dist-info → brainstate-0.2.0.dist-info}/top_level.txt +0 -0
brainstate/nn/_utils.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2025 BDP Ecosystem Limited. All Rights Reserved.
1
+ # Copyright 2025 BrainX Ecosystem Limited. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -15,14 +15,18 @@
15
15
 
16
16
  # -*- coding: utf-8 -*-
17
17
 
18
- from typing import Union, Tuple
19
-
20
18
  from brainstate._state import ParamState
21
- from brainstate.util import PrettyTable
22
19
  from ._module import Module
20
+ from functools import partial
21
+
22
+ import jax
23
+ import jax.numpy as jnp
24
+
25
+ from brainstate.typing import PyTree
23
26
 
24
27
  __all__ = [
25
28
  "count_parameters",
29
+ "clip_grad_norm",
26
30
  ]
27
31
 
28
32
 
@@ -52,7 +56,7 @@ def count_parameters(
52
56
  module: Module,
53
57
  precision: int = 2,
54
58
  return_table: bool = False,
55
- ) -> Union[Tuple[PrettyTable, int], int]:
59
+ ):
56
60
  """
57
61
  Count and display the number of trainable parameters in a neural network model.
58
62
 
@@ -76,6 +80,7 @@ def count_parameters(
76
80
  followed by the total number of trainable parameters.
77
81
  """
78
82
  assert isinstance(module, Module), "Input must be a neural network module" # noqa: E501
83
+ from prettytable import PrettyTable # noqa: E501
79
84
  table = PrettyTable(["Modules", "Parameters"])
80
85
  total_params = 0
81
86
  for name, parameter in module.states(ParamState).items():
@@ -87,3 +92,125 @@ def count_parameters(
87
92
  if return_table:
88
93
  return table, total_params
89
94
  return total_params
95
+
96
+
97
+ def clip_grad_norm(
98
+ grad: PyTree,
99
+ max_norm: float | jax.Array,
100
+ norm_type: int | float | str | None = 2.0,
101
+ return_norm: bool = False,
102
+ ) -> PyTree | tuple[PyTree, jax.Array]:
103
+ """
104
+ Clip gradient norm of a PyTree of parameters.
105
+
106
+ The norm is computed over all gradients together, as if they were
107
+ concatenated into a single vector. Gradients are scaled if their
108
+ norm exceeds the specified maximum.
109
+
110
+ Parameters
111
+ ----------
112
+ grad : PyTree
113
+ A PyTree structure (nested dict, list, tuple, etc.) containing
114
+ JAX arrays representing gradients to be normalized.
115
+ max_norm : float or jax.Array
116
+ Maximum allowed norm of the gradients. If the computed norm
117
+ exceeds this value, gradients will be scaled down proportionally.
118
+ norm_type : int, float, str, or None, optional
119
+ Type of the p-norm to compute. Default is 2.0 (L2 norm).
120
+ Can be:
121
+
122
+ - float: p-norm for any p >= 1
123
+ - 'inf' or jnp.inf: infinity norm (maximum absolute value)
124
+ - '-inf' or -jnp.inf: negative infinity norm (minimum absolute value)
125
+ - int: integer p-norm
126
+ - None: defaults to 2.0 (Euclidean norm)
127
+ return_norm : bool, optional
128
+ If True, returns a tuple (clipped_grad, total_norm).
129
+ If False, returns only clipped_grad. Default is False.
130
+
131
+ Returns
132
+ -------
133
+ clipped_grad : PyTree
134
+ The input gradient structure with norms clipped to max_norm.
135
+ total_norm : jax.Array, optional
136
+ The computed norm of the gradients before clipping.
137
+ Only returned if return_norm=True.
138
+
139
+ Notes
140
+ -----
141
+ The gradient clipping is performed as:
142
+
143
+ .. math::
144
+ g_{\\text{clipped}} = g \\cdot \\min\\left(1, \\frac{\\text{max\\_norm}}{\\|g\\|_p}\\right)
145
+
146
+ where :math:`\\|g\\|_p` is the p-norm of the concatenated gradient vector.
147
+
148
+ Examples
149
+ --------
150
+ .. code-block:: python
151
+
152
+ >>> import jax.numpy as jnp
153
+ >>> import brainstate
154
+
155
+ >>> # Simple gradient clipping without returning norm
156
+ >>> grads = {'w': jnp.array([3.0, 4.0]), 'b': jnp.array([12.0])}
157
+ >>> clipped_grads = brainstate.nn.clip_grad_norm(grads, max_norm=5.0)
158
+ >>> print(f"Clipped w: {clipped_grads['w']}")
159
+ Clipped w: [1.1538461 1.5384616]
160
+
161
+ >>> # Gradient clipping with norm returned
162
+ >>> grads = {'w': jnp.array([3.0, 4.0]), 'b': jnp.array([12.0])}
163
+ >>> clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=5.0, return_norm=True)
164
+ >>> print(f"Original norm: {norm:.2f}")
165
+ Original norm: 13.00
166
+
167
+ >>> # Using different norm types
168
+ >>> grads = {'layer1': jnp.array([[-2.0, 3.0], [1.0, -4.0]])}
169
+ >>>
170
+ >>> # L2 norm (default)
171
+ >>> clipped_l2, norm_l2 = brainstate.nn.clip_grad_norm(grads, max_norm=3.0, norm_type=2, return_norm=True)
172
+ >>> print(f"L2 norm: {norm_l2:.2f}")
173
+ L2 norm: 5.48
174
+ >>>
175
+ >>> # L1 norm
176
+ >>> clipped_l1, norm_l1 = brainstate.nn.clip_grad_norm(grads, max_norm=5.0, norm_type=1, return_norm=True)
177
+ >>> print(f"L1 norm: {norm_l1:.2f}")
178
+ L1 norm: 10.00
179
+ >>>
180
+ >>> # Infinity norm
181
+ >>> clipped_inf, norm_inf = brainstate.nn.clip_grad_norm(grads, max_norm=2.0, norm_type='inf', return_norm=True)
182
+ >>> print(f"Inf norm: {norm_inf:.2f}")
183
+ Inf norm: 4.00
184
+ """
185
+ if norm_type is None:
186
+ norm_type = 2.0
187
+
188
+ # Convert string 'inf' to jnp.inf for compatibility
189
+ if norm_type == 'inf':
190
+ norm_type = jnp.inf
191
+ elif norm_type == '-inf':
192
+ norm_type = -jnp.inf
193
+
194
+ # Get all gradient leaves
195
+ grad_leaves = jax.tree.leaves(grad)
196
+
197
+ # Handle empty PyTree
198
+ if not grad_leaves:
199
+ if return_norm:
200
+ return grad, jnp.array(0.0)
201
+ return grad
202
+
203
+ # Compute norm over flattened gradient values
204
+ norm_fn = partial(jnp.linalg.norm, ord=norm_type)
205
+ flat_grads = jnp.concatenate([g.ravel() for g in grad_leaves])
206
+ total_norm = norm_fn(flat_grads)
207
+
208
+ # Compute scaling factor
209
+ clip_factor = jnp.minimum(1.0, max_norm / (total_norm + 1e-6))
210
+
211
+ # Apply clipping
212
+ clipped_grad = jax.tree.map(lambda g: g * clip_factor, grad)
213
+
214
+ if return_norm:
215
+ return clipped_grad, total_norm
216
+ return clipped_grad
@@ -0,0 +1,402 @@
1
+ # Copyright 2024 BrainX Ecosystem Limited. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ import unittest
17
+ from absl.testing import parameterized
18
+ import jax
19
+ import jax.numpy as jnp
20
+ import numpy as np
21
+
22
+ import brainstate
23
+
24
+
25
+ class TestClipGradNorm(parameterized.TestCase):
26
+ """Comprehensive tests for clip_grad_norm function."""
27
+
28
+ def setUp(self):
29
+ """Set up test fixtures."""
30
+ # Enable 64-bit precision for more accurate testing
31
+ jax.config.update("jax_enable_x64", True)
32
+
33
+ def test_simple_dict_clipping(self):
34
+ """Test basic gradient clipping with dictionary structure."""
35
+ grads = {
36
+ 'w': jnp.array([3.0, 4.0]),
37
+ 'b': jnp.array([12.0])
38
+ }
39
+
40
+ # Test with return_norm=True
41
+ clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=5.0, return_norm=True)
42
+
43
+ # Expected L2 norm: sqrt(3^2 + 4^2 + 12^2) = sqrt(9 + 16 + 144) = sqrt(169) = 13
44
+ self.assertAlmostEqual(norm, 13.0, places=5)
45
+
46
+ # Check clipped values: should be scaled by 5/13
47
+ scale = 5.0 / 13.0
48
+ np.testing.assert_array_almost_equal(
49
+ clipped_grads['w'],
50
+ jnp.array([3.0, 4.0]) * scale,
51
+ decimal=5
52
+ )
53
+ np.testing.assert_array_almost_equal(
54
+ clipped_grads['b'],
55
+ jnp.array([12.0]) * scale,
56
+ decimal=5
57
+ )
58
+
59
+ def test_return_norm_parameter(self):
60
+ """Test the return_norm parameter behavior."""
61
+ grads = {
62
+ 'w': jnp.array([3.0, 4.0]),
63
+ 'b': jnp.array([12.0])
64
+ }
65
+
66
+ # Test with return_norm=False (default)
67
+ clipped_grads_only = brainstate.nn.clip_grad_norm(grads, max_norm=5.0, return_norm=False)
68
+ self.assertIsInstance(clipped_grads_only, dict)
69
+ self.assertIn('w', clipped_grads_only)
70
+ self.assertIn('b', clipped_grads_only)
71
+
72
+ # Test with return_norm=True
73
+ result = brainstate.nn.clip_grad_norm(grads, max_norm=5.0, return_norm=True)
74
+ self.assertIsInstance(result, tuple)
75
+ self.assertEqual(len(result), 2)
76
+ clipped_grads, norm = result
77
+
78
+ # Values should be the same regardless of return_norm
79
+ np.testing.assert_array_almost_equal(
80
+ clipped_grads_only['w'],
81
+ clipped_grads['w'],
82
+ decimal=7
83
+ )
84
+ np.testing.assert_array_almost_equal(
85
+ clipped_grads_only['b'],
86
+ clipped_grads['b'],
87
+ decimal=7
88
+ )
89
+
90
+ def test_nested_structure_clipping(self):
91
+ """Test gradient clipping with nested PyTree structures."""
92
+ grads = {
93
+ 'layer1': {
94
+ 'weight': jnp.array([[1.0, 2.0], [3.0, 4.0]]),
95
+ 'bias': jnp.array([5.0, 6.0])
96
+ },
97
+ 'layer2': {
98
+ 'weight': jnp.array([[7.0, 8.0]]),
99
+ 'bias': jnp.array([9.0])
100
+ }
101
+ }
102
+
103
+ # Calculate expected norm
104
+ flat = jnp.arange(1.0, 10.0)
105
+ expected_norm = jnp.linalg.norm(flat)
106
+
107
+ max_norm = 10.0
108
+ clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=max_norm, return_norm=True)
109
+
110
+ self.assertAlmostEqual(norm, expected_norm, places=5)
111
+
112
+ # Since norm > max_norm, gradients should be scaled
113
+ scale = max_norm / expected_norm
114
+ np.testing.assert_array_almost_equal(
115
+ clipped_grads['layer1']['weight'],
116
+ grads['layer1']['weight'] * scale,
117
+ decimal=5
118
+ )
119
+
120
+ def test_no_clipping_when_under_max(self):
121
+ """Test that gradients are unchanged when norm is below max_norm."""
122
+ grads = {
123
+ 'w': jnp.array([1.0, 2.0]),
124
+ 'b': jnp.array([2.0])
125
+ }
126
+
127
+ # L2 norm = sqrt(1 + 4 + 4) = 3
128
+ max_norm = 5.0
129
+ clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=max_norm, return_norm=True)
130
+
131
+ self.assertAlmostEqual(norm, 3.0, places=5)
132
+
133
+ # Gradients should be unchanged
134
+ np.testing.assert_array_almost_equal(
135
+ clipped_grads['w'], grads['w'], decimal=7
136
+ )
137
+ np.testing.assert_array_almost_equal(
138
+ clipped_grads['b'], grads['b'], decimal=7
139
+ )
140
+
141
+ @parameterized.parameters(
142
+ (1, 'L1'), # L1 norm
143
+ (2, 'L2'), # L2 norm (default)
144
+ (2.0, 'L2'), # L2 norm with float
145
+ (3, 'L3'), # L3 norm
146
+ ('inf', 'Linf'), # Infinity norm
147
+ (jnp.inf, 'Linf'), # Infinity norm with jnp.inf
148
+ )
149
+ def test_different_norm_types(self, norm_type, norm_name):
150
+ """Test gradient clipping with different norm types."""
151
+ grads = {
152
+ 'param': jnp.array([[-2.0, 3.0], [1.0, -4.0]])
153
+ }
154
+
155
+ max_norm = 3.0
156
+ clipped_grads, computed_norm = brainstate.nn.clip_grad_norm(
157
+ grads, max_norm=max_norm, norm_type=norm_type, return_norm=True
158
+ )
159
+
160
+ # Compute expected norm
161
+ flat_grads = grads['param'].ravel()
162
+ if norm_type == 'inf' or norm_type == jnp.inf:
163
+ expected_norm = jnp.max(jnp.abs(flat_grads))
164
+ else:
165
+ expected_norm = jnp.linalg.norm(flat_grads, ord=norm_type)
166
+
167
+ self.assertAlmostEqual(computed_norm, expected_norm, places=5)
168
+
169
+ # Check scaling
170
+ if expected_norm > max_norm:
171
+ scale = max_norm / expected_norm
172
+ np.testing.assert_array_almost_equal(
173
+ clipped_grads['param'],
174
+ grads['param'] * scale,
175
+ decimal=5
176
+ )
177
+ else:
178
+ np.testing.assert_array_almost_equal(
179
+ clipped_grads['param'],
180
+ grads['param'],
181
+ decimal=5
182
+ )
183
+
184
+ def test_zero_gradients(self):
185
+ """Test handling of zero gradients."""
186
+ grads = {
187
+ 'w': jnp.zeros((3, 4)),
188
+ 'b': jnp.zeros(4)
189
+ }
190
+
191
+ clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=1.0, return_norm=True)
192
+
193
+ self.assertAlmostEqual(norm, 0.0, places=7)
194
+ np.testing.assert_array_equal(clipped_grads['w'], grads['w'])
195
+ np.testing.assert_array_equal(clipped_grads['b'], grads['b'])
196
+
197
+ def test_single_tensor_input(self):
198
+ """Test with a single tensor instead of a PyTree."""
199
+ grad = jnp.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
200
+
201
+ max_norm = 5.0
202
+ clipped_grad, norm = brainstate.nn.clip_grad_norm(grad, max_norm=max_norm, return_norm=True)
203
+
204
+ expected_norm = jnp.linalg.norm(grad.ravel())
205
+ self.assertAlmostEqual(norm, expected_norm, places=5)
206
+
207
+ scale = max_norm / expected_norm
208
+ np.testing.assert_array_almost_equal(
209
+ clipped_grad,
210
+ grad * scale,
211
+ decimal=5
212
+ )
213
+
214
+ def test_list_structure(self):
215
+ """Test gradient clipping with list structure."""
216
+ grads = [
217
+ jnp.array([1.0, 2.0]),
218
+ jnp.array([[3.0, 4.0], [5.0, 6.0]]),
219
+ jnp.array([7.0])
220
+ ]
221
+
222
+ max_norm = 10.0
223
+ clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=max_norm, return_norm=True)
224
+
225
+ # Check structure is preserved
226
+ self.assertIsInstance(clipped_grads, list)
227
+ self.assertEqual(len(clipped_grads), 3)
228
+
229
+ # Check norm computation
230
+ flat = jnp.arange(1.0, 8.0)
231
+ expected_norm = jnp.linalg.norm(flat)
232
+ self.assertAlmostEqual(norm, expected_norm, places=5)
233
+
234
+ def test_tuple_structure(self):
235
+ """Test gradient clipping with tuple structure."""
236
+ grads = (
237
+ jnp.array([3.0, 4.0]),
238
+ jnp.array([5.0])
239
+ )
240
+
241
+ max_norm = 5.0
242
+ clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=max_norm, return_norm=True)
243
+
244
+ # Check structure is preserved
245
+ self.assertIsInstance(clipped_grads, tuple)
246
+ self.assertEqual(len(clipped_grads), 2)
247
+
248
+ # Check norm: sqrt(9 + 16 + 25) = sqrt(50) ≈ 7.07
249
+ expected_norm = jnp.sqrt(50.0)
250
+ self.assertAlmostEqual(norm, expected_norm, places=5)
251
+
252
+ def test_max_norm_as_array(self):
253
+ """Test using JAX array for max_norm parameter."""
254
+ grads = {'w': jnp.array([6.0, 8.0])}
255
+ max_norm = jnp.array(5.0)
256
+
257
+ clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=max_norm, return_norm=True)
258
+
259
+ # norm = sqrt(36 + 64) = 10
260
+ self.assertAlmostEqual(norm, 10.0, places=5)
261
+
262
+ # Should be scaled by 5/10 = 0.5
263
+ np.testing.assert_array_almost_equal(
264
+ clipped_grads['w'],
265
+ jnp.array([3.0, 4.0]),
266
+ decimal=5
267
+ )
268
+
269
+ def test_none_norm_type(self):
270
+ """Test that None norm_type defaults to L2 norm."""
271
+ grads = {'param': jnp.array([3.0, 4.0])}
272
+
273
+ # Test with explicit None
274
+ clipped1, norm1 = brainstate.nn.clip_grad_norm(grads, max_norm=10.0, norm_type=None, return_norm=True)
275
+
276
+ # Test with default (should be same as L2)
277
+ clipped2, norm2 = brainstate.nn.clip_grad_norm(grads, max_norm=10.0, norm_type=2.0, return_norm=True)
278
+
279
+ self.assertAlmostEqual(norm1, norm2, places=7)
280
+ np.testing.assert_array_almost_equal(
281
+ clipped1['param'], clipped2['param'], decimal=7
282
+ )
283
+
284
+ def test_very_large_gradients(self):
285
+ """Test clipping very large gradients."""
286
+ grads = {
287
+ 'huge': jnp.array([1e10, 1e10, 1e10])
288
+ }
289
+
290
+ max_norm = 1.0
291
+ clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=max_norm, return_norm=True)
292
+
293
+ # Check that clipped norm is approximately max_norm
294
+ clipped_norm = jnp.linalg.norm(clipped_grads['huge'])
295
+ self.assertAlmostEqual(clipped_norm, max_norm, places=5)
296
+
297
+ def test_very_small_gradients(self):
298
+ """Test handling very small gradients (numerical stability)."""
299
+ grads = {
300
+ 'tiny': jnp.array([1e-10, 1e-10, 1e-10])
301
+ }
302
+
303
+ max_norm = 1.0
304
+ clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=max_norm, return_norm=True)
305
+
306
+ # Should not be clipped
307
+ np.testing.assert_array_almost_equal(
308
+ clipped_grads['tiny'], grads['tiny'], decimal=15
309
+ )
310
+
311
+ def test_mixed_shapes(self):
312
+ """Test with mixed tensor shapes in PyTree."""
313
+ grads = {
314
+ 'scalar': jnp.array(2.0),
315
+ 'vector': jnp.array([3.0, 4.0]),
316
+ 'matrix': jnp.array([[1.0, 2.0], [3.0, 4.0]]),
317
+ 'tensor3d': jnp.ones((2, 3, 4))
318
+ }
319
+
320
+ max_norm = 10.0
321
+ clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=max_norm, return_norm=True)
322
+
323
+ # Check all shapes are preserved
324
+ self.assertEqual(clipped_grads['scalar'].shape, ())
325
+ self.assertEqual(clipped_grads['vector'].shape, (2,))
326
+ self.assertEqual(clipped_grads['matrix'].shape, (2, 2))
327
+ self.assertEqual(clipped_grads['tensor3d'].shape, (2, 3, 4))
328
+
329
+ def test_gradient_clipping_invariants(self):
330
+ """Test mathematical invariants of gradient clipping."""
331
+ grads = {
332
+ 'w1': jnp.array([[1.0, 2.0], [3.0, 4.0]]),
333
+ 'w2': jnp.array([5.0, 6.0])
334
+ }
335
+
336
+ max_norm = 5.0
337
+ clipped_grads, original_norm = brainstate.nn.clip_grad_norm(grads, max_norm=max_norm, return_norm=True)
338
+
339
+ # Compute norm of clipped gradients
340
+ clipped_flat = jnp.concatenate([g.ravel() for g in jax.tree.leaves(clipped_grads)])
341
+ clipped_norm = jnp.linalg.norm(clipped_flat)
342
+
343
+ # Clipped norm should be min(original_norm, max_norm)
344
+ expected_clipped_norm = jnp.minimum(original_norm, max_norm)
345
+ self.assertAlmostEqual(clipped_norm, expected_clipped_norm, places=5)
346
+
347
+ @parameterized.parameters(
348
+ (0.5,),
349
+ (1.0,),
350
+ (2.0,),
351
+ (5.0,),
352
+ (10.0,),
353
+ )
354
+ def test_different_max_norms(self, max_norm):
355
+ """Test gradient clipping with various max_norm values."""
356
+ grads = {'param': jnp.array([6.0, 8.0])} # norm = 10
357
+
358
+ clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=max_norm, return_norm=True)
359
+
360
+ self.assertAlmostEqual(norm, 10.0, places=5)
361
+
362
+ # Check clipped norm
363
+ clipped_norm = jnp.linalg.norm(clipped_grads['param'])
364
+ if max_norm < 10.0:
365
+ self.assertAlmostEqual(clipped_norm, max_norm, places=5)
366
+ else:
367
+ self.assertAlmostEqual(clipped_norm, 10.0, places=5)
368
+
369
+ def test_empty_pytree(self):
370
+ """Test handling of empty PyTree."""
371
+ grads = {}
372
+
373
+ # Test with return_norm=True
374
+ clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=1.0, return_norm=True)
375
+ self.assertEqual(clipped_grads, {})
376
+ self.assertAlmostEqual(norm, 0.0, places=7)
377
+
378
+ # Test with return_norm=False
379
+ clipped_grads_only = brainstate.nn.clip_grad_norm(grads, max_norm=1.0, return_norm=False)
380
+ self.assertEqual(clipped_grads_only, {})
381
+
382
+ def test_pytree_with_none_leaves(self):
383
+ """Test PyTree containing None values (should be filtered out)."""
384
+ grads = {
385
+ 'w': jnp.array([3.0, 4.0]),
386
+ 'b': None, # This should be filtered by jax.tree.leaves
387
+ 'c': jnp.array([5.0])
388
+ }
389
+
390
+ # This test depends on how the function handles None values
391
+ # JAX typically filters them out
392
+ try:
393
+ clipped_grads, norm = brainstate.nn.clip_grad_norm(grads, max_norm=5.0, return_norm=True)
394
+ # If it works, check that None is preserved in structure
395
+ self.assertIn('b', clipped_grads)
396
+ except:
397
+ # Expected if None values cause issues
398
+ pass
399
+
400
+
401
+ if __name__ == '__main__':
402
+ unittest.main()