broccoli-ml 0.18.1__tar.gz → 0.20.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: broccoli-ml
3
- Version: 0.18.1
3
+ Version: 0.20.0
4
4
  Summary: Some useful Pytorch models, circa 2025
5
5
  License: MIT
6
6
  Author: Nicholas Bailey
@@ -14,30 +14,36 @@ class SigmaReparamTensor(nn.Module):
14
14
 
15
15
  super().__init__()
16
16
 
17
- self.tensor = nn.Parameter(init_tensor, requires_grad=True)
17
+ self.sigma_reparam_tensor = nn.Parameter(init_tensor, requires_grad=True)
18
18
 
19
19
  with torch.no_grad():
20
- _, sigma, v_transpose = torch.linalg.svd(self.tensor, full_matrices=False)
20
+ _, sigma, v_transpose = torch.linalg.svd(
21
+ self.sigma_reparam_tensor, full_matrices=False
22
+ )
21
23
 
22
24
  self.register_buffer("approx_spectral_norm", sigma[:1])
23
25
  self.register_buffer("right_singular", v_transpose[0])
24
- self.scale = nn.Parameter(
26
+ self.sigma_reparam_scale = nn.Parameter(
25
27
  self.approx_spectral_norm.clone().detach(), requires_grad=True
26
28
  )
27
29
 
28
30
  def power_iteration(self):
29
31
  with torch.no_grad():
30
- approx_right_singular_transpose = self.tensor.mv(self.right_singular)
32
+ approx_right_singular_transpose = self.sigma_reparam_tensor.mv(
33
+ self.right_singular
34
+ )
31
35
  approx_right_singular_transpose = F.normalize(
32
36
  approx_right_singular_transpose, dim=0
33
37
  )
34
- updated_right_singular = self.tensor.T.mv(approx_right_singular_transpose)
38
+ updated_right_singular = self.sigma_reparam_tensor.T.mv(
39
+ approx_right_singular_transpose
40
+ )
35
41
  updated_right_singular = F.normalize(updated_right_singular, dim=0)
36
42
  self.right_singular.data.copy_(updated_right_singular)
37
43
  rayleigh_quotient = torch.einsum(
38
44
  "m,mn,n->",
39
45
  approx_right_singular_transpose,
40
- self.tensor,
46
+ self.sigma_reparam_tensor,
41
47
  updated_right_singular,
42
48
  )
43
49
  self.approx_spectral_norm.data.copy_(rayleigh_quotient)
@@ -45,4 +51,6 @@ class SigmaReparamTensor(nn.Module):
45
51
  def forward(self):
46
52
  if self.training:
47
53
  self.power_iteration()
48
- return self.scale * (self.tensor / self.approx_spectral_norm)
54
+ return self.sigma_reparam_scale * (
55
+ self.sigma_reparam_tensor / self.approx_spectral_norm
56
+ )
@@ -255,10 +255,8 @@ class FeedforwardBlock(nn.Module):
255
255
 
256
256
  if regularise_values:
257
257
  self.memory_type = SpectralNormLinear
258
- self.bias_memories = False
259
258
  else:
260
259
  self.memory_type = nn.Linear
261
- self.bias_memories = True
262
260
 
263
261
  self.process = nn.Sequential(
264
262
  *[
@@ -266,9 +264,7 @@ class FeedforwardBlock(nn.Module):
266
264
  linear_module(input_features, self.max_features),
267
265
  self.activation,
268
266
  nn.LayerNorm(ratio * output_features),
269
- self.memory_type(
270
- ratio * output_features, output_features, bias=self.bias_memories
271
- ),
267
+ self.memory_type(ratio * output_features, output_features),
272
268
  self.dropout,
273
269
  ]
274
270
  )
@@ -30,7 +30,7 @@ class GetCLSToken(nn.Module):
30
30
 
31
31
 
32
32
  class SequencePool(nn.Module):
33
- def __init__(self, d_model, linear_module):
33
+ def __init__(self, d_model, linear_module=nn.Linear):
34
34
  super().__init__()
35
35
  self.attention = nn.Sequential(
36
36
  *[
@@ -54,13 +54,6 @@ class ClassificationHead(nn.Module):
54
54
  super().__init__()
55
55
  self.d_model = d_model
56
56
  self.summarize = GetCLSToken()
57
- self.process = nn.Sequential(
58
- *[
59
- linear_module(d_model, 1),
60
- Rearrange("batch seq 1 -> batch seq"),
61
- nn.Softmax(dim=-1),
62
- ]
63
- )
64
57
  self.projection = nn.Linear(d_model, n_classes)
65
58
  if batch_norm:
66
59
  self.batch_norm = nn.BatchNorm1d(n_classes, affine=False)
@@ -87,7 +80,7 @@ class SequencePoolClassificationHead(ClassificationHead):
87
80
  """
88
81
 
89
82
  def __init__(self, d_model, linear_module, out_dim, batch_norm=True):
90
- super().__init__(d_model, linear_module, out_dim, batch_norm=True)
83
+ super().__init__(d_model, linear_module, out_dim, batch_norm=batch_norm)
91
84
  self.summarize = SequencePool(d_model, linear_module)
92
85
 
93
86
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "broccoli-ml"
3
- version = "0.18.1"
3
+ version = "0.20.0"
4
4
  description = "Some useful Pytorch models, circa 2025"
5
5
  authors = [
6
6
  {name = "Nicholas Bailey"}
File without changes
File without changes