broccoli-ml 0.11.0__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/PKG-INFO +1 -1
- broccoli_ml-0.12.0/broccoli/linear.py +88 -0
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/broccoli/transformer.py +20 -9
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/broccoli/vit.py +3 -0
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/pyproject.toml +1 -1
- broccoli_ml-0.11.0/broccoli/linear.py +0 -41
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/LICENSE +0 -0
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/README.md +0 -0
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/broccoli/__init__.py +0 -0
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/broccoli/activation.py +0 -0
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/broccoli/assets/2025_resnet_imagenet_1k_pretrained_state_dict.pkl +0 -0
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/broccoli/assets/cifar100_eigenvectors_size_2.pt +0 -0
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/broccoli/assets/cifar100_eigenvectors_size_3.pt +0 -0
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/broccoli/cnn.py +0 -0
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/broccoli/eigenpatches.py +0 -0
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/broccoli/rope.py +0 -0
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/broccoli/tensor.py +0 -0
- {broccoli_ml-0.11.0 → broccoli_ml-0.12.0}/broccoli/utils.py +0 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
# UNDER CONSTRUCTION
|
2
|
+
|
3
|
+
import math
|
4
|
+
import torch
|
5
|
+
from torch import nn
|
6
|
+
from torch.nn import functional as F
|
7
|
+
|
8
|
+
from .tensor import SigmaReparamTensor
|
9
|
+
|
10
|
+
|
11
|
+
class SpectralNormLinear(nn.Module):
|
12
|
+
"""
|
13
|
+
...
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(self, in_features: int, out_features: int, bias: bool = True):
|
17
|
+
super().__init__()
|
18
|
+
self.in_features = in_features
|
19
|
+
self.out_features = out_features
|
20
|
+
self.use_bias = bias
|
21
|
+
|
22
|
+
self.weights = None
|
23
|
+
|
24
|
+
self.weight_init = nn.Parameter(torch.empty(out_features, in_features))
|
25
|
+
|
26
|
+
# Define the bias vector as a learnable parameter if required.
|
27
|
+
if self.use_bias:
|
28
|
+
self.bias = nn.Parameter(torch.empty(out_features))
|
29
|
+
else:
|
30
|
+
# If no bias, register it as None.
|
31
|
+
# This is important so that PyTorch doesn't complain when saving/loading the model.
|
32
|
+
self.register_parameter("bias", None)
|
33
|
+
|
34
|
+
self.reset_parameters()
|
35
|
+
|
36
|
+
def reset_parameters(self) -> None:
|
37
|
+
nn.init.kaiming_uniform_(self.weight_init, a=math.sqrt(5))
|
38
|
+
if self.use_bias:
|
39
|
+
fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weights)
|
40
|
+
bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
|
41
|
+
nn.init.uniform_(self.bias, -bound, bound)
|
42
|
+
self.weights = SigmaReparamTensor(self.weight_init)
|
43
|
+
|
44
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
45
|
+
return F.linear(x, self.weights(), self.bias)
|
46
|
+
|
47
|
+
def __repr__(self) -> str:
|
48
|
+
# Optional: A nice representation for printing the module.
|
49
|
+
return (
|
50
|
+
f"SpectralNormFeedForward(in_features={self.in_features}",
|
51
|
+
f"out_features={self.out_features}, bias={self.use_bias})",
|
52
|
+
)
|
53
|
+
|
54
|
+
|
55
|
+
class RandomLinear(nn.Linear):
|
56
|
+
""" """
|
57
|
+
|
58
|
+
def __init__(
|
59
|
+
self,
|
60
|
+
in_features: int,
|
61
|
+
out_features: int,
|
62
|
+
bias: bool = False, # <---- TODO: explain this
|
63
|
+
beta=0.1,
|
64
|
+
forward_looks_random=True,
|
65
|
+
):
|
66
|
+
super().__init__(in_features, out_features, bias=False)
|
67
|
+
self.beta = beta
|
68
|
+
self.forward_looks_random = forward_looks_random
|
69
|
+
|
70
|
+
def forward(self, inputs: torch.Tensor):
|
71
|
+
if not self.training:
|
72
|
+
return F.linear(inputs, self.weight)
|
73
|
+
else:
|
74
|
+
# Initialise self.random_weights
|
75
|
+
random_weights = torch.empty_like(self.weight)
|
76
|
+
nn.init.trunc_normal_(random_weights)
|
77
|
+
random_weights *= self.beta
|
78
|
+
|
79
|
+
if self.forward_looks_random:
|
80
|
+
# Forward using a reparameterisation trick
|
81
|
+
a = F.linear(inputs.detach(), self.weight, self.bias)
|
82
|
+
b = F.linear(inputs, random_weights, bias=None)
|
83
|
+
else:
|
84
|
+
# Forward as (W_actual * input + W_random * input) + bias
|
85
|
+
a = F.linear(inputs, self.weight, self.bias)
|
86
|
+
b = F.linear(inputs, random_weights, bias=None)
|
87
|
+
|
88
|
+
return a + b
|
@@ -10,6 +10,7 @@ import torch.nn.functional as F
|
|
10
10
|
from einops import rearrange
|
11
11
|
|
12
12
|
from .rope import RotaryEmbedding, apply_rotary_emb
|
13
|
+
from .linear import SpectralNormLinear
|
13
14
|
|
14
15
|
|
15
16
|
class MHAttention(nn.Module):
|
@@ -235,6 +236,7 @@ class FeedforwardLayer(nn.Module):
|
|
235
236
|
activation_kwargs=None,
|
236
237
|
dropout=0.0,
|
237
238
|
linear_module=nn.Linear,
|
239
|
+
norm_memory=False,
|
238
240
|
):
|
239
241
|
super().__init__()
|
240
242
|
|
@@ -245,19 +247,28 @@ class FeedforwardLayer(nn.Module):
|
|
245
247
|
|
246
248
|
self.dropout = nn.Dropout(dropout)
|
247
249
|
|
250
|
+
self.max_features = (
|
251
|
+
2 * ratio * output_features
|
252
|
+
if activation.__name__.endswith("GLU")
|
253
|
+
else ratio * output_features
|
254
|
+
)
|
255
|
+
|
256
|
+
if norm_memory:
|
257
|
+
self.memory_type = SpectralNormLinear
|
258
|
+
self.bias_memories = False
|
259
|
+
else:
|
260
|
+
self.memory_type = linear_module
|
261
|
+
self.bias_memories = True
|
262
|
+
|
248
263
|
self.process = nn.Sequential(
|
249
264
|
*[
|
250
265
|
nn.LayerNorm(input_features),
|
251
|
-
linear_module(
|
252
|
-
input_features,
|
253
|
-
(
|
254
|
-
2 * ratio * output_features
|
255
|
-
if activation.__name__.endswith("GLU")
|
256
|
-
else ratio * output_features
|
257
|
-
),
|
258
|
-
),
|
266
|
+
linear_module(input_features, self.max_features),
|
259
267
|
self.activation,
|
260
|
-
|
268
|
+
nn.LayerNorm(self.max_features),
|
269
|
+
self.memory_type(
|
270
|
+
ratio * output_features, output_features, bias=self.bias_memories
|
271
|
+
),
|
261
272
|
self.dropout,
|
262
273
|
]
|
263
274
|
)
|
@@ -82,6 +82,7 @@ class ViTEncoder(nn.Module):
|
|
82
82
|
pooling_kernel_stride=2,
|
83
83
|
pooling_padding=1,
|
84
84
|
intermediate_feedforward_layer=True,
|
85
|
+
norm_intermediate_ff_memory=True,
|
85
86
|
transformer_position_embedding="relative", # absolute or relative
|
86
87
|
transformer_embedding_size=256,
|
87
88
|
transformer_layers=7,
|
@@ -263,6 +264,7 @@ class ViTEncoder(nn.Module):
|
|
263
264
|
activation_kwargs=transformer_activation_kwargs,
|
264
265
|
dropout=transformer_mlp_dropout,
|
265
266
|
linear_module=linear_module,
|
267
|
+
norm_memory=norm_intermediate_ff_memory,
|
266
268
|
)
|
267
269
|
elif pooling_out_channels < transformer_embedding_size:
|
268
270
|
self.intermediate_feedforward_layer = nn.Identity()
|
@@ -326,6 +328,7 @@ class CCT(nn.Module):
|
|
326
328
|
pooling_kernel_stride=2,
|
327
329
|
pooling_padding=1,
|
328
330
|
intermediate_feedforward_layer=True,
|
331
|
+
norm_intermediate_ff_memory=True,
|
329
332
|
transformer_position_embedding="relative", # absolute or relative
|
330
333
|
transformer_embedding_size=256,
|
331
334
|
transformer_layers=7,
|
@@ -1,41 +0,0 @@
|
|
1
|
-
# UNDER CONSTRUCTION
|
2
|
-
|
3
|
-
import torch
|
4
|
-
from torch import nn
|
5
|
-
from torch.nn import functional as F
|
6
|
-
|
7
|
-
|
8
|
-
class RandomLinear(nn.Linear):
|
9
|
-
""" """
|
10
|
-
|
11
|
-
def __init__(
|
12
|
-
self,
|
13
|
-
in_features: int,
|
14
|
-
out_features: int,
|
15
|
-
bias: bool = False, # <---- TODO: explain this
|
16
|
-
beta=0.1,
|
17
|
-
forward_looks_random=True,
|
18
|
-
):
|
19
|
-
super().__init__(in_features, out_features, bias=False)
|
20
|
-
self.beta = beta
|
21
|
-
self.forward_looks_random = forward_looks_random
|
22
|
-
|
23
|
-
def forward(self, inputs: torch.Tensor):
|
24
|
-
if not self.training:
|
25
|
-
return F.linear(inputs, self.weight)
|
26
|
-
else:
|
27
|
-
# Initialise self.random_weights
|
28
|
-
random_weights = torch.empty_like(self.weight)
|
29
|
-
nn.init.trunc_normal_(random_weights)
|
30
|
-
random_weights *= self.beta
|
31
|
-
|
32
|
-
if self.forward_looks_random:
|
33
|
-
# Forward using a reparameterisation trick
|
34
|
-
a = F.linear(inputs.detach(), self.weight, self.bias)
|
35
|
-
b = F.linear(inputs, random_weights, bias=None)
|
36
|
-
else:
|
37
|
-
# Forward as (W_actual * input + W_random * input) + bias
|
38
|
-
a = F.linear(inputs, self.weight, self.bias)
|
39
|
-
b = F.linear(inputs, random_weights, bias=None)
|
40
|
-
|
41
|
-
return a + b
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|