broccoli-ml 15.5.0__py3-none-any.whl → 15.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
broccoli/transformer.py CHANGED
@@ -351,9 +351,9 @@ class MHAttention(nn.Module):
351
351
  self.q_proj.reset_parameters()
352
352
  self.k_proj.reset_parameters()
353
353
  self.v_proj.reset_parameters()
354
- scale_parameters(self.v_proj, self.beta)
354
+ scale_parameters(self.v_proj, math.sqrt(6) * self.beta)
355
355
  self.out_proj.reset_parameters()
356
- scale_parameters(self.out_proj, self.beta)
356
+ scale_parameters(self.out_proj, math.sqrt(6) * self.beta)
357
357
 
358
358
  if self.talking_heads:
359
359
  # Initialize close to identity
@@ -470,8 +470,8 @@ class FeedforwardBlock(nn.Module):
470
470
  if hasattr(module, "reset_parameters"):
471
471
  module.reset_parameters()
472
472
 
473
- scale_parameters(self.linear_in, self.beta)
474
- scale_parameters(self.linear_out, self.beta)
473
+ scale_parameters(self.linear_in, math.sqrt(6) * self.beta)
474
+ scale_parameters(self.linear_out, math.sqrt(6) * self.beta)
475
475
 
476
476
 
477
477
  class EncoderBlock(nn.Module):
broccoli/vit.py CHANGED
@@ -521,7 +521,7 @@ class ViT(nn.Module):
521
521
  }[transformer_activation]
522
522
 
523
523
  self.alpha = (2 * transformer_layers) ** 0.25
524
- self.beta = (8 * transformer_layers) ** 0.25
524
+ self.beta = (8 * transformer_layers) ** -0.25
525
525
 
526
526
  self.encoder = ViTEncoder(
527
527
  input_size=input_size,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: broccoli-ml
3
- Version: 15.5.0
3
+ Version: 15.6.0
4
4
  Summary: Some useful Pytorch models, circa 2025
5
5
  License: MIT
6
6
  Author: Nicholas Bailey
@@ -4,10 +4,10 @@ broccoli/cnn.py,sha256=WjoPDSpe3ttwxCBNfCVRdaCHvbeZ7G-a5_i8fUsK_d8,4889
4
4
  broccoli/linear.py,sha256=W-3aNpBjd_0xRyzbCKkmg4H1qmslQOIQhB-WDDay2nM,13125
5
5
  broccoli/rope.py,sha256=GRqApBNmYCFaDak0WL1xE_BC5CTTYKQU_PBdeTcQcjc,12557
6
6
  broccoli/tensor.py,sha256=um8mrxkYbvNDo-QvHlmJm8Aw6qcngOlUZPoAk_PMReA,4480
7
- broccoli/transformer.py,sha256=xhMKGWgQqSMhCpN-cqM6Fv_MfyKU9-Gq1t9nGpUAmzE,27574
7
+ broccoli/transformer.py,sha256=8Drvfvrb1u5zKG84Mcjj39sDz0K7w4lEMFXRexKAPM8,27634
8
8
  broccoli/utils.py,sha256=oOWzn6dJ5nC_9r4zq0emmfmaYACJXJNFS48AOpW2jqc,358
9
- broccoli/vit.py,sha256=v3U_UVIZd2t3Nt60K6KGJcI5ci9t9S8h2ENwklnHg8M,22735
10
- broccoli_ml-15.5.0.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
11
- broccoli_ml-15.5.0.dist-info/METADATA,sha256=ANXSYDDts212i3b0rySkKT71_2ZSpcmpHSloNayfNns,1369
12
- broccoli_ml-15.5.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
13
- broccoli_ml-15.5.0.dist-info/RECORD,,
9
+ broccoli/vit.py,sha256=EsrqlUF8kJ6_lnC5-kKvUsx94B9cE2bXUmsgR7XtIhw,22736
10
+ broccoli_ml-15.6.0.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
11
+ broccoli_ml-15.6.0.dist-info/METADATA,sha256=zcahKBja-QnU2BzydrdTeD3Pp0n1yWqh_hOPCQsFbXo,1369
12
+ broccoli_ml-15.6.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
13
+ broccoli_ml-15.6.0.dist-info/RECORD,,