broccoli-ml 15.1.0__py3-none-any.whl → 15.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
broccoli/transformer.py CHANGED
@@ -354,9 +354,17 @@ class MHAttention(nn.Module):
354
354
  self.q_proj.reset_parameters()
355
355
  self.k_proj.reset_parameters()
356
356
  self.v_proj.reset_parameters()
357
- scale_parameters(self.v_proj, self.beta) # per Microsoft DeepNet
357
+ scale_parameters(
358
+ self.v_proj,
359
+ math.sqrt(6)
360
+ * self.beta, # sqrt(6) to compensate for PyTorch tiny default init
361
+ )
358
362
  self.out_proj.reset_parameters()
359
- scale_parameters(self.out_proj, self.beta) # per Microsoft DeepNet
363
+ scale_parameters(
364
+ self.out_proj,
365
+ math.sqrt(6)
366
+ * self.beta, # sqrt(6) to compensate for PyTorch tiny default init
367
+ )
360
368
 
361
369
  if self.talking_heads:
362
370
  # Initialize close to identity
@@ -473,8 +481,16 @@ class FeedforwardBlock(nn.Module):
473
481
  if hasattr(module, "reset_parameters"):
474
482
  module.reset_parameters()
475
483
 
476
- scale_parameters(self.linear_in, self.beta) # per Microsoft DeepNet
477
- scale_parameters(self.linear_out, self.beta)
484
+ scale_parameters(
485
+ self.linear_in,
486
+ math.sqrt(6)
487
+ * self.beta, # sqrt(6) to compensate for PyTorch tiny default init
488
+ )
489
+ scale_parameters(
490
+ self.linear_out,
491
+ math.sqrt(6)
492
+ * self.beta, # sqrt(6) to compensate for PyTorch tiny default init
493
+ )
478
494
 
479
495
 
480
496
  class EncoderBlock(nn.Module):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: broccoli-ml
3
- Version: 15.1.0
3
+ Version: 15.3.0
4
4
  Summary: Some useful Pytorch models, circa 2025
5
5
  License: MIT
6
6
  Author: Nicholas Bailey
@@ -4,10 +4,10 @@ broccoli/cnn.py,sha256=WjoPDSpe3ttwxCBNfCVRdaCHvbeZ7G-a5_i8fUsK_d8,4889
4
4
  broccoli/linear.py,sha256=W-3aNpBjd_0xRyzbCKkmg4H1qmslQOIQhB-WDDay2nM,13125
5
5
  broccoli/rope.py,sha256=GRqApBNmYCFaDak0WL1xE_BC5CTTYKQU_PBdeTcQcjc,12557
6
6
  broccoli/tensor.py,sha256=um8mrxkYbvNDo-QvHlmJm8Aw6qcngOlUZPoAk_PMReA,4480
7
- broccoli/transformer.py,sha256=Uuz_jCMZRg6GY2DmW3-Tn47gV9a-xkGVN3xQ5BYFM5w,27784
7
+ broccoli/transformer.py,sha256=XCr3fkDrYvNEA3m5rdVDHU5qbf1IYJQS1Cjgmn5RX-o,28177
8
8
  broccoli/utils.py,sha256=oOWzn6dJ5nC_9r4zq0emmfmaYACJXJNFS48AOpW2jqc,358
9
9
  broccoli/vit.py,sha256=9z1MJce38dppX35OGJvmnNIOnt-euMVV4n160W_aQYU,22799
10
- broccoli_ml-15.1.0.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
11
- broccoli_ml-15.1.0.dist-info/METADATA,sha256=lQjyxZ5c98_f4DlTJ_0b0dIbbsa2FfWTx56EG3SXGws,1369
12
- broccoli_ml-15.1.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
13
- broccoli_ml-15.1.0.dist-info/RECORD,,
10
+ broccoli_ml-15.3.0.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
11
+ broccoli_ml-15.3.0.dist-info/METADATA,sha256=StOW7vZd1c4xNddKcA8XJ-zbXgUnAsrQlg9G97wT-2w,1369
12
+ broccoli_ml-15.3.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
13
+ broccoli_ml-15.3.0.dist-info/RECORD,,