broccoli-ml 0.24.0__py3-none-any.whl → 0.24.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
broccoli/activation.py CHANGED
@@ -8,16 +8,18 @@ class ReLU(nn.Module):
8
8
  A ReLU activation function with optional clamp and leakiness.
9
9
  """
10
10
 
11
- def __init__(self, clamp=True, leaky=True, leaky_slope=0.01, clamp_max=6.0) -> None:
11
+ def __init__(
12
+ self, clamp=True, leaky=True, negative_slope=0.01, clamp_max=6.0
13
+ ) -> None:
12
14
  super().__init__()
13
15
  self.clamp = clamp
14
16
  self.leaky = leaky
15
- self.leaky_slope = leaky_slope
17
+ self.negative_slope = negative_slope
16
18
  self.clamp_max = clamp_max
17
19
 
18
20
  def forward(self, x):
19
21
  if self.leaky:
20
- relu = F.leaky_relu(x, leaky_slope=self.leaky_slope)
22
+ relu = F.leaky_relu(x, negative_slope=self.negative_slope)
21
23
  else:
22
24
  relu = F.relu(x)
23
25
  if self.clamp:
@@ -69,17 +71,17 @@ class SquaredReLU(nn.Module):
69
71
  """
70
72
 
71
73
  def __init__(
72
- self, clamp=True, leaky=True, leaky_slope: float = 0.01, clamp_max=6
74
+ self, clamp=True, leaky=True, negative_slope: float = 0.01, clamp_max=6
73
75
  ) -> None:
74
76
  super().__init__()
75
77
  self.clamp = clamp
76
78
  self.leaky = leaky
77
- self.leaky_slope = leaky_slope
79
+ self.negative_slope = negative_slope
78
80
  self.clamp_max = clamp_max
79
81
 
80
82
  def forward(self, x):
81
83
  if self.leaky:
82
- relu = F.leaky_relu(x, leaky_slope=self.leaky_slope)
84
+ relu = F.leaky_relu(x, negative_slope=self.negative_slope)
83
85
  else:
84
86
  relu = F.relu(x)
85
87
  relu_squared = relu**2
@@ -102,12 +104,12 @@ class XGLU(nn.Module):
102
104
  return self.activation(gate) * value
103
105
 
104
106
 
105
- def SquaredReGLU(clamp=True, leaky=True, leaky_slope=0.01, clamp_max=6.0) -> XGLU:
107
+ def SquaredReGLU(clamp=True, leaky=True, negative_slope=0.01, clamp_max=6.0) -> XGLU:
106
108
  """
107
109
  Factory function that creates a GLU with a SquaredReLU activation.
108
110
  """
109
111
  activation_module = SquaredReLU(
110
- clamp=clamp, leaky=leaky, leaky_slope=leaky_slope, clamp_max=clamp_max
112
+ clamp=clamp, leaky=leaky, negative_slope=negative_slope, clamp_max=clamp_max
111
113
  )
112
114
  return XGLU(activation_module)
113
115
 
broccoli/tensor.py CHANGED
@@ -76,21 +76,27 @@ class AnchoredReparamTensor(nn.Module):
76
76
  assert init_tensor.ndim == 2, "Input tensor must be a 2D matrix."
77
77
  super().__init__()
78
78
 
79
- self.weight = nn.Parameter(init_tensor.clone(), requires_grad=True)
79
+ # Use the gradboard convention of calling something nondecay_* if we should
80
+ # exclude it from weight decay
81
+ self.nondecay_weight = nn.Parameter(init_tensor.clone(), requires_grad=True)
80
82
 
81
83
  # At initialization, compute the dominant right-singular vector (v_0)
82
84
  # and store it in a non-trainable buffer.
83
85
  with torch.no_grad():
84
- _, _, v_transpose = torch.linalg.svd(self.weight, full_matrices=False)
86
+ _, _, v_transpose = torch.linalg.svd(
87
+ self.nondecay_weight, full_matrices=False
88
+ )
85
89
  # v_transpose[0] is the first row of V^T, which is the first right-singular vector.
86
90
  self.register_buffer("anchor_vector", v_transpose[0])
87
91
 
88
- initial_norm = torch.linalg.vector_norm(self.weight.mv(self.anchor_vector))
92
+ initial_norm = torch.linalg.vector_norm(
93
+ self.nondecay_weight.mv(self.anchor_vector)
94
+ )
89
95
  self.scale = nn.Parameter(initial_norm.clone().detach(), requires_grad=True)
90
96
 
91
97
  def forward(self) -> torch.Tensor:
92
98
  # Calculate the L2 norm of the matrix-vector product W @ v_0
93
- norm = torch.linalg.vector_norm(self.weight.mv(self.anchor_vector))
99
+ norm = torch.linalg.vector_norm(self.nondecay_weight.mv(self.anchor_vector))
94
100
 
95
101
  # Return the reparameterized tensor.
96
- return self.scale * (self.weight / (norm + 1e-6))
102
+ return self.scale * (self.nondecay_weight / (norm + 1e-6))
broccoli/transformer.py CHANGED
@@ -236,7 +236,7 @@ class FeedforwardBlock(nn.Module):
236
236
  activation_kwargs=None,
237
237
  dropout=0.0,
238
238
  linear_module=nn.Linear,
239
- reparam=False,
239
+ raw_input=False,
240
240
  ):
241
241
  super().__init__()
242
242
 
@@ -253,8 +253,9 @@ class FeedforwardBlock(nn.Module):
253
253
  else ratio * output_features
254
254
  )
255
255
 
256
- if reparam:
256
+ if raw_input:
257
257
  self.memory_type = AnchoredLinear
258
+
258
259
  else:
259
260
  self.memory_type = linear_module
260
261
 
@@ -263,7 +264,7 @@ class FeedforwardBlock(nn.Module):
263
264
  nn.LayerNorm(input_features),
264
265
  linear_module(input_features, self.max_features),
265
266
  self.activation,
266
- # nn.LayerNorm(ratio * output_features),
267
+ nn.LayerNorm(ratio * output_features) if raw_input else nn.Identity(),
267
268
  self.memory_type(ratio * output_features, output_features),
268
269
  self.dropout,
269
270
  ]
broccoli/vit.py CHANGED
@@ -302,7 +302,7 @@ class ViTEncoder(nn.Module):
302
302
  activation_kwargs=transformer_activation_kwargs,
303
303
  dropout=transformer_mlp_dropout,
304
304
  linear_module=linear_module,
305
- reparam=not cnn,
305
+ raw_input=not cnn,
306
306
  )
307
307
  else:
308
308
  self.initial_ff = nn.Identity()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: broccoli-ml
3
- Version: 0.24.0
3
+ Version: 0.24.2
4
4
  Summary: Some useful Pytorch models, circa 2025
5
5
  License: MIT
6
6
  Author: Nicholas Bailey
@@ -1,5 +1,5 @@
1
1
  broccoli/__init__.py,sha256=tmyspsVxqPZHRQCY_NRwpW4SMNBbtE8E_8z7l-SAzSo,127
2
- broccoli/activation.py,sha256=aBEEiWnvfyr8xKE47YWrncJqPEu9ymhd8uYd0s-_CeM,3385
2
+ broccoli/activation.py,sha256=-Jf30C6iGqWCorC9HEGn2oduWwjeaCAxGLUUYIy1zX8,3438
3
3
  broccoli/assets/2025_resnet_imagenet_1k_pretrained_state_dict.pkl,sha256=RZpPupWxFaVfgZrK-gBgfW1hj78oMEGhVWTbjRB3qMo,46835797
4
4
  broccoli/assets/cifar100_eigenvectors_size_2.pt,sha256=DjXDOXMeuMpIqNuGhX9z-OWYVqZwIMScSXZApRr9JjU,2501
5
5
  broccoli/assets/cifar100_eigenvectors_size_3.pt,sha256=gL6k0xtXYiYP6ZSvEiMBdJ7kIkT0AngTpDJHFQqwgxA,7173
@@ -7,11 +7,11 @@ broccoli/cnn.py,sha256=jeRyKIAMWu1E3iyI14MGgSZuZivPMh12iqkqW9ilNjo,17785
7
7
  broccoli/eigenpatches.py,sha256=J6n2usN1oQuHEHYiBNyYpn_a9eQcHjOBiIlvSei520Y,2413
8
8
  broccoli/linear.py,sha256=4bxVDsO8E1d5-RZ23u160ZntazrT7Vt4AYTdAdCQU-w,3300
9
9
  broccoli/rope.py,sha256=hw7kBPNR9GQXj4GxyIAffsGKPfcTPOFh8Bc7oEHtaZY,12108
10
- broccoli/tensor.py,sha256=_YJP9tSFRkoKrR7cfnROSpWqfMyJLjgPmtFxEWRwgz8,3606
11
- broccoli/transformer.py,sha256=L1bVQZLUbtFtOy30yPVkjnqyELGhQoHJ_lFP_WPfYUA,16073
10
+ broccoli/tensor.py,sha256=BVRS9-IWYtKXbXlerULQqitaUT8TPNgi7QTT5G_SmY4,3826
11
+ broccoli/transformer.py,sha256=67y7FDfWAI-cIu8w5gRuG0MX24z1zflB218b1NKvk9A,16108
12
12
  broccoli/utils.py,sha256=htq_hOsdhUhL0nJi9WkKiEYOjEoWqFpK5X49PtgTf-0,299
13
- broccoli/vit.py,sha256=qGCx4cnpAkPpVHFrz6bFHdnPJXPaCxtTxKlI9YQJZWg,15649
14
- broccoli_ml-0.24.0.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
15
- broccoli_ml-0.24.0.dist-info/METADATA,sha256=XwjOu6kQ0tKTUN0uc8H60YL4UvTPEyr2RYrfd4YGiyc,1257
16
- broccoli_ml-0.24.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
17
- broccoli_ml-0.24.0.dist-info/RECORD,,
13
+ broccoli/vit.py,sha256=pCjoJo6d1EhWmzI45sdP8t0olu6FlCGwMkGXcJJXXlo,15651
14
+ broccoli_ml-0.24.2.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
15
+ broccoli_ml-0.24.2.dist-info/METADATA,sha256=591Hk6OMRrWRzjyPS6To4t2uxOFa468XaPB9MlLSzZQ,1257
16
+ broccoli_ml-0.24.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
17
+ broccoli_ml-0.24.2.dist-info/RECORD,,