broccoli-ml 15.5.0__tar.gz → 15.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: broccoli-ml
3
- Version: 15.5.0
3
+ Version: 15.6.0
4
4
  Summary: Some useful Pytorch models, circa 2025
5
5
  License: MIT
6
6
  Author: Nicholas Bailey
@@ -351,9 +351,9 @@ class MHAttention(nn.Module):
351
351
  self.q_proj.reset_parameters()
352
352
  self.k_proj.reset_parameters()
353
353
  self.v_proj.reset_parameters()
354
- scale_parameters(self.v_proj, self.beta)
354
+ scale_parameters(self.v_proj, math.sqrt(6) * self.beta)
355
355
  self.out_proj.reset_parameters()
356
- scale_parameters(self.out_proj, self.beta)
356
+ scale_parameters(self.out_proj, math.sqrt(6) * self.beta)
357
357
 
358
358
  if self.talking_heads:
359
359
  # Initialize close to identity
@@ -470,8 +470,8 @@ class FeedforwardBlock(nn.Module):
470
470
  if hasattr(module, "reset_parameters"):
471
471
  module.reset_parameters()
472
472
 
473
- scale_parameters(self.linear_in, self.beta)
474
- scale_parameters(self.linear_out, self.beta)
473
+ scale_parameters(self.linear_in, math.sqrt(6) * self.beta)
474
+ scale_parameters(self.linear_out, math.sqrt(6) * self.beta)
475
475
 
476
476
 
477
477
  class EncoderBlock(nn.Module):
@@ -521,7 +521,7 @@ class ViT(nn.Module):
521
521
  }[transformer_activation]
522
522
 
523
523
  self.alpha = (2 * transformer_layers) ** 0.25
524
- self.beta = (8 * transformer_layers) ** 0.25
524
+ self.beta = (8 * transformer_layers) ** -0.25
525
525
 
526
526
  self.encoder = ViTEncoder(
527
527
  input_size=input_size,
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "broccoli-ml"
3
- version = "15.5.0"
3
+ version = "15.6.0"
4
4
  description = "Some useful Pytorch models, circa 2025"
5
5
  authors = [
6
6
  {name = "Nicholas Bailey"}
File without changes
File without changes