broccoli-ml 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
broccoli/tensor.py CHANGED
@@ -14,30 +14,36 @@ class SigmaReparamTensor(nn.Module):
14
14
 
15
15
  super().__init__()
16
16
 
17
- self.tensor = nn.Parameter(init_tensor, requires_grad=True)
17
+ self.sigma_reparam_tensor = nn.Parameter(init_tensor, requires_grad=True)
18
18
 
19
19
  with torch.no_grad():
20
- _, sigma, v_transpose = torch.linalg.svd(self.tensor, full_matrices=False)
20
+ _, sigma, v_transpose = torch.linalg.svd(
21
+ self.sigma_reparam_tensor, full_matrices=False
22
+ )
21
23
 
22
24
  self.register_buffer("approx_spectral_norm", sigma[:1])
23
25
  self.register_buffer("right_singular", v_transpose[0])
24
- self.scale = nn.Parameter(
26
+ self.sigma_reparam_scale = nn.Parameter(
25
27
  self.approx_spectral_norm.clone().detach(), requires_grad=True
26
28
  )
27
29
 
28
30
  def power_iteration(self):
29
31
  with torch.no_grad():
30
- approx_right_singular_transpose = self.tensor.mv(self.right_singular)
32
+ approx_right_singular_transpose = self.sigma_reparam_tensor.mv(
33
+ self.right_singular
34
+ )
31
35
  approx_right_singular_transpose = F.normalize(
32
36
  approx_right_singular_transpose, dim=0
33
37
  )
34
- updated_right_singular = self.tensor.T.mv(approx_right_singular_transpose)
38
+ updated_right_singular = self.sigma_reparam_tensor.T.mv(
39
+ approx_right_singular_transpose
40
+ )
35
41
  updated_right_singular = F.normalize(updated_right_singular, dim=0)
36
42
  self.right_singular.data.copy_(updated_right_singular)
37
43
  rayleigh_quotient = torch.einsum(
38
44
  "m,mn,n->",
39
45
  approx_right_singular_transpose,
40
- self.tensor,
46
+ self.sigma_reparam_tensor,
41
47
  updated_right_singular,
42
48
  )
43
49
  self.approx_spectral_norm.data.copy_(rayleigh_quotient)
@@ -45,4 +51,6 @@ class SigmaReparamTensor(nn.Module):
45
51
  def forward(self):
46
52
  if self.training:
47
53
  self.power_iteration()
48
- return self.scale * (self.tensor / self.approx_spectral_norm)
54
+ return self.sigma_reparam_scale * (
55
+ self.sigma_reparam_tensor / self.approx_spectral_norm
56
+ )
broccoli/transformer.py CHANGED
@@ -236,6 +236,7 @@ class FeedforwardBlock(nn.Module):
236
236
  activation_kwargs=None,
237
237
  dropout=0.0,
238
238
  linear_module=nn.Linear,
239
+ regularise_values=True,
239
240
  ):
240
241
  super().__init__()
241
242
 
@@ -252,13 +253,22 @@ class FeedforwardBlock(nn.Module):
252
253
  else ratio * output_features
253
254
  )
254
255
 
256
+ if regularise_values:
257
+ self.memory_type = SpectralNormLinear
258
+ self.bias_memories = False
259
+ else:
260
+ self.memory_type = nn.Linear
261
+ self.bias_memories = True
262
+
255
263
  self.process = nn.Sequential(
256
264
  *[
257
265
  nn.LayerNorm(input_features),
258
266
  linear_module(input_features, self.max_features),
259
267
  self.activation,
260
268
  nn.LayerNorm(ratio * output_features),
261
- linear_module(ratio * output_features, output_features),
269
+ self.memory_type(
270
+ ratio * output_features, output_features, bias=self.bias_memories
271
+ ),
262
272
  self.dropout,
263
273
  ]
264
274
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: broccoli-ml
3
- Version: 0.18.0
3
+ Version: 0.19.0
4
4
  Summary: Some useful Pytorch models, circa 2025
5
5
  License: MIT
6
6
  Author: Nicholas Bailey
@@ -7,11 +7,11 @@ broccoli/cnn.py,sha256=jeRyKIAMWu1E3iyI14MGgSZuZivPMh12iqkqW9ilNjo,17785
7
7
  broccoli/eigenpatches.py,sha256=J6n2usN1oQuHEHYiBNyYpn_a9eQcHjOBiIlvSei520Y,2413
8
8
  broccoli/linear.py,sha256=g8YrxNl6g_WcHrWVmbaBHJU5hv6daFS0r4TxAoPJ9UE,3012
9
9
  broccoli/rope.py,sha256=hw7kBPNR9GQXj4GxyIAffsGKPfcTPOFh8Bc7oEHtaZY,12108
10
- broccoli/tensor.py,sha256=txJiMLXTgWKTDd7Dh2In-ajiQLRUuuPezO0bKnEU3e8,1719
11
- broccoli/transformer.py,sha256=ZJpZIR5d4W_N1uO2AtRVVYzRJSjq1T6lj2mPT6raLg4,15923
10
+ broccoli/tensor.py,sha256=MUvXtwD2f1sPTBym4FB0x_ZfsJUBNLgULUlN8btV8GI,1943
11
+ broccoli/transformer.py,sha256=2hoRg9N0g2HTfrUU6cTXl4msHO0orWiHS9xJTUMHej4,16234
12
12
  broccoli/utils.py,sha256=htq_hOsdhUhL0nJi9WkKiEYOjEoWqFpK5X49PtgTf-0,299
13
13
  broccoli/vit.py,sha256=--eEAxBzCCVGCIvaZWHsonbVXGe04wDEz0Q3V4YZXoI,16599
14
- broccoli_ml-0.18.0.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
15
- broccoli_ml-0.18.0.dist-info/METADATA,sha256=1CcIDoXMMzX88L6sqjEgtrkXJV4HSyQhRymaGBTfQkY,1257
16
- broccoli_ml-0.18.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
17
- broccoli_ml-0.18.0.dist-info/RECORD,,
14
+ broccoli_ml-0.19.0.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
15
+ broccoli_ml-0.19.0.dist-info/METADATA,sha256=aSUin_G9VrKRhl8gBqGirK1yWJyMo6krnWzA0KWF1tQ,1257
16
+ broccoli_ml-0.19.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
17
+ broccoli_ml-0.19.0.dist-info/RECORD,,