broccoli-ml 13.0.6__tar.gz → 14.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {broccoli_ml-13.0.6 → broccoli_ml-14.0.0}/PKG-INFO +1 -1
- {broccoli_ml-13.0.6 → broccoli_ml-14.0.0}/broccoli/transformer.py +7 -6
- {broccoli_ml-13.0.6 → broccoli_ml-14.0.0}/pyproject.toml +1 -1
- {broccoli_ml-13.0.6 → broccoli_ml-14.0.0}/LICENSE +0 -0
- {broccoli_ml-13.0.6 → broccoli_ml-14.0.0}/README.md +0 -0
- {broccoli_ml-13.0.6 → broccoli_ml-14.0.0}/broccoli/__init__.py +0 -0
- {broccoli_ml-13.0.6 → broccoli_ml-14.0.0}/broccoli/activation.py +0 -0
- {broccoli_ml-13.0.6 → broccoli_ml-14.0.0}/broccoli/cnn.py +0 -0
- {broccoli_ml-13.0.6 → broccoli_ml-14.0.0}/broccoli/linear.py +0 -0
- {broccoli_ml-13.0.6 → broccoli_ml-14.0.0}/broccoli/rope.py +0 -0
- {broccoli_ml-13.0.6 → broccoli_ml-14.0.0}/broccoli/tensor.py +0 -0
- {broccoli_ml-13.0.6 → broccoli_ml-14.0.0}/broccoli/utils.py +0 -0
- {broccoli_ml-13.0.6 → broccoli_ml-14.0.0}/broccoli/vit.py +0 -0
|
@@ -621,6 +621,7 @@ class EncoderBlock(nn.Module):
|
|
|
621
621
|
|
|
622
622
|
if self.post_norm:
|
|
623
623
|
x = self.post_attention_norm(x)
|
|
624
|
+
process_x = x
|
|
624
625
|
elif self.pre_norm:
|
|
625
626
|
process_x = self.pre_mlp_norm(x)
|
|
626
627
|
else:
|
|
@@ -638,15 +639,15 @@ class EncoderBlock(nn.Module):
|
|
|
638
639
|
def attention_logits(self, x):
|
|
639
640
|
"""
|
|
640
641
|
Give back the attention scores used in this layer.
|
|
642
|
+
Needs to match what the model actually sees during forward()
|
|
643
|
+
by applying the correct normalisations.
|
|
641
644
|
"""
|
|
642
|
-
# Fix: Use the correct attribute name 'pre_attention_norm'
|
|
643
645
|
if self.pre_norm:
|
|
644
|
-
# We must normalize the input before measuring attention logits
|
|
645
|
-
# to match what the model actually sees during forward()
|
|
646
646
|
x = self.pre_attention_norm(x)
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
647
|
+
elif self.post_norm:
|
|
648
|
+
x = self.input_norm(x)
|
|
649
|
+
|
|
650
|
+
return self.attn.attention_logits(x, x, x)
|
|
650
651
|
|
|
651
652
|
def reset_parameters(self):
|
|
652
653
|
if self.pre_norm:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|