broccoli-ml 0.26.0__tar.gz → 0.28.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/PKG-INFO +1 -1
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/broccoli/transformer.py +12 -4
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/broccoli/vit.py +1 -0
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/pyproject.toml +1 -1
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/LICENSE +0 -0
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/README.md +0 -0
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/broccoli/__init__.py +0 -0
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/broccoli/activation.py +0 -0
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/broccoli/assets/2025_resnet_imagenet_1k_pretrained_state_dict.pkl +0 -0
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/broccoli/assets/cifar100_eigenvectors_size_2.pt +0 -0
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/broccoli/assets/cifar100_eigenvectors_size_3.pt +0 -0
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/broccoli/cnn.py +0 -0
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/broccoli/eigenpatches.py +0 -0
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/broccoli/linear.py +0 -0
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/broccoli/rope.py +0 -0
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/broccoli/tensor.py +0 -0
- {broccoli_ml-0.26.0 → broccoli_ml-0.28.0}/broccoli/utils.py +0 -0
@@ -238,6 +238,7 @@ class FeedforwardBlock(nn.Module):
|
|
238
238
|
linear_module=nn.Linear,
|
239
239
|
pre_norm=True,
|
240
240
|
normformer=False,
|
241
|
+
raw_input=False,
|
241
242
|
):
|
242
243
|
super().__init__()
|
243
244
|
|
@@ -246,6 +247,11 @@ class FeedforwardBlock(nn.Module):
|
|
246
247
|
else:
|
247
248
|
self.activation = activation()
|
248
249
|
|
250
|
+
if raw_input:
|
251
|
+
self.memory_type = AnchoredLinear
|
252
|
+
else:
|
253
|
+
self.memory_type = nn.Linear
|
254
|
+
|
249
255
|
self.dropout = nn.Dropout(dropout)
|
250
256
|
|
251
257
|
self.max_features = (
|
@@ -260,7 +266,7 @@ class FeedforwardBlock(nn.Module):
|
|
260
266
|
linear_module(input_features, self.max_features),
|
261
267
|
self.activation,
|
262
268
|
nn.LayerNorm(ratio * output_features) if normformer else nn.Identity(),
|
263
|
-
|
269
|
+
self.memory_type(ratio * output_features, output_features),
|
264
270
|
self.dropout,
|
265
271
|
]
|
266
272
|
)
|
@@ -371,9 +377,11 @@ class TransformerBlock(nn.Module):
|
|
371
377
|
process_x = process_x + self.attn(process_x, process_x, process_x)
|
372
378
|
norm_process_x = self.layer_norm_1(process_x)
|
373
379
|
process_x = process_x + self.ff(process_x)
|
374
|
-
|
375
|
-
|
376
|
-
|
380
|
+
|
381
|
+
# Always post norm as eventually we reach the classification head!
|
382
|
+
x = self.layer_norm_2(
|
383
|
+
torch.cat([identity_x, process_x])[unshuffle_indices, :, :].contiguous()
|
384
|
+
)
|
377
385
|
|
378
386
|
return x
|
379
387
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|