broccoli-ml 15.5.0__py3-none-any.whl → 15.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- broccoli/transformer.py +4 -4
- broccoli/vit.py +1 -1
- {broccoli_ml-15.5.0.dist-info → broccoli_ml-15.6.0.dist-info}/METADATA +1 -1
- {broccoli_ml-15.5.0.dist-info → broccoli_ml-15.6.0.dist-info}/RECORD +6 -6
- {broccoli_ml-15.5.0.dist-info → broccoli_ml-15.6.0.dist-info}/LICENSE +0 -0
- {broccoli_ml-15.5.0.dist-info → broccoli_ml-15.6.0.dist-info}/WHEEL +0 -0
broccoli/transformer.py
CHANGED
|
@@ -351,9 +351,9 @@ class MHAttention(nn.Module):
|
|
|
351
351
|
self.q_proj.reset_parameters()
|
|
352
352
|
self.k_proj.reset_parameters()
|
|
353
353
|
self.v_proj.reset_parameters()
|
|
354
|
-
scale_parameters(self.v_proj, self.beta)
|
|
354
|
+
scale_parameters(self.v_proj, math.sqrt(6) * self.beta)
|
|
355
355
|
self.out_proj.reset_parameters()
|
|
356
|
-
scale_parameters(self.out_proj, self.beta)
|
|
356
|
+
scale_parameters(self.out_proj, math.sqrt(6) * self.beta)
|
|
357
357
|
|
|
358
358
|
if self.talking_heads:
|
|
359
359
|
# Initialize close to identity
|
|
@@ -470,8 +470,8 @@ class FeedforwardBlock(nn.Module):
|
|
|
470
470
|
if hasattr(module, "reset_parameters"):
|
|
471
471
|
module.reset_parameters()
|
|
472
472
|
|
|
473
|
-
scale_parameters(self.linear_in, self.beta)
|
|
474
|
-
scale_parameters(self.linear_out, self.beta)
|
|
473
|
+
scale_parameters(self.linear_in, math.sqrt(6) * self.beta)
|
|
474
|
+
scale_parameters(self.linear_out, math.sqrt(6) * self.beta)
|
|
475
475
|
|
|
476
476
|
|
|
477
477
|
class EncoderBlock(nn.Module):
|
broccoli/vit.py
CHANGED
|
@@ -521,7 +521,7 @@ class ViT(nn.Module):
|
|
|
521
521
|
}[transformer_activation]
|
|
522
522
|
|
|
523
523
|
self.alpha = (2 * transformer_layers) ** 0.25
|
|
524
|
-
self.beta = (8 * transformer_layers) ** 0.25
|
|
524
|
+
self.beta = (8 * transformer_layers) ** -0.25
|
|
525
525
|
|
|
526
526
|
self.encoder = ViTEncoder(
|
|
527
527
|
input_size=input_size,
|
|
@@ -4,10 +4,10 @@ broccoli/cnn.py,sha256=WjoPDSpe3ttwxCBNfCVRdaCHvbeZ7G-a5_i8fUsK_d8,4889
|
|
|
4
4
|
broccoli/linear.py,sha256=W-3aNpBjd_0xRyzbCKkmg4H1qmslQOIQhB-WDDay2nM,13125
|
|
5
5
|
broccoli/rope.py,sha256=GRqApBNmYCFaDak0WL1xE_BC5CTTYKQU_PBdeTcQcjc,12557
|
|
6
6
|
broccoli/tensor.py,sha256=um8mrxkYbvNDo-QvHlmJm8Aw6qcngOlUZPoAk_PMReA,4480
|
|
7
|
-
broccoli/transformer.py,sha256=
|
|
7
|
+
broccoli/transformer.py,sha256=8Drvfvrb1u5zKG84Mcjj39sDz0K7w4lEMFXRexKAPM8,27634
|
|
8
8
|
broccoli/utils.py,sha256=oOWzn6dJ5nC_9r4zq0emmfmaYACJXJNFS48AOpW2jqc,358
|
|
9
|
-
broccoli/vit.py,sha256=
|
|
10
|
-
broccoli_ml-15.
|
|
11
|
-
broccoli_ml-15.
|
|
12
|
-
broccoli_ml-15.
|
|
13
|
-
broccoli_ml-15.
|
|
9
|
+
broccoli/vit.py,sha256=EsrqlUF8kJ6_lnC5-kKvUsx94B9cE2bXUmsgR7XtIhw,22736
|
|
10
|
+
broccoli_ml-15.6.0.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
|
|
11
|
+
broccoli_ml-15.6.0.dist-info/METADATA,sha256=zcahKBja-QnU2BzydrdTeD3Pp0n1yWqh_hOPCQsFbXo,1369
|
|
12
|
+
broccoli_ml-15.6.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
13
|
+
broccoli_ml-15.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|