broccoli-ml 0.31.0__tar.gz → 0.31.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/PKG-INFO +1 -1
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/broccoli/transformer.py +4 -5
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/pyproject.toml +1 -1
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/LICENSE +0 -0
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/README.md +0 -0
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/broccoli/__init__.py +0 -0
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/broccoli/activation.py +0 -0
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/broccoli/assets/2025_resnet_imagenet_1k_pretrained_state_dict.pkl +0 -0
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/broccoli/assets/cifar100_eigenvectors_size_2.pt +0 -0
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/broccoli/assets/cifar100_eigenvectors_size_3.pt +0 -0
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/broccoli/cnn.py +0 -0
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/broccoli/eigenpatches.py +0 -0
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/broccoli/linear.py +0 -0
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/broccoli/rope.py +0 -0
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/broccoli/tensor.py +0 -0
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/broccoli/utils.py +0 -0
- {broccoli_ml-0.31.0 → broccoli_ml-0.31.2}/broccoli/vit.py +0 -0
@@ -248,7 +248,7 @@ class FeedforwardBlock(nn.Module):
|
|
248
248
|
self.activation = activation()
|
249
249
|
|
250
250
|
if raw_input:
|
251
|
-
self.memory_type =
|
251
|
+
self.memory_type = AnchoredLinear
|
252
252
|
self.memory_bias = False
|
253
253
|
else:
|
254
254
|
self.memory_type = nn.Linear
|
@@ -379,13 +379,12 @@ class TransformerBlock(nn.Module):
|
|
379
379
|
process_x = process_x + self.ff(process_x)
|
380
380
|
else: # post-norm
|
381
381
|
process_x = process_x + self.attn(process_x, process_x, process_x)
|
382
|
-
|
382
|
+
process_x = self.layer_norm_1(process_x)
|
383
383
|
process_x = process_x + self.ff(process_x)
|
384
|
+
process_x = self.layer_norm_2(process_x)
|
384
385
|
|
385
386
|
# Always post norm as eventually we reach the classification head!
|
386
|
-
x =
|
387
|
-
torch.cat([identity_x, process_x])[unshuffle_indices, :, :].contiguous()
|
388
|
-
)
|
387
|
+
x = torch.cat([identity_x, process_x])[unshuffle_indices, :, :].contiguous()
|
389
388
|
|
390
389
|
return x
|
391
390
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|