broccoli-ml 14.0.1__py3-none-any.whl → 15.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- broccoli/transformer.py +0 -2
- broccoli/vit.py +9 -8
- {broccoli_ml-14.0.1.dist-info → broccoli_ml-15.0.0.dist-info}/METADATA +1 -1
- {broccoli_ml-14.0.1.dist-info → broccoli_ml-15.0.0.dist-info}/RECORD +6 -6
- {broccoli_ml-14.0.1.dist-info → broccoli_ml-15.0.0.dist-info}/LICENSE +0 -0
- {broccoli_ml-14.0.1.dist-info → broccoli_ml-15.0.0.dist-info}/WHEEL +0 -0
broccoli/transformer.py
CHANGED
broccoli/vit.py
CHANGED
|
@@ -403,8 +403,9 @@ class ViTEncoder(nn.Module):
|
|
|
403
403
|
checkpoint=transformer_checkpoint_ff,
|
|
404
404
|
beta=self.beta,
|
|
405
405
|
)
|
|
406
|
+
self.layer_norm = nn.LayerNorm(transformer_embedding_size)
|
|
406
407
|
else:
|
|
407
|
-
self.initial_ff =
|
|
408
|
+
self.initial_ff = None
|
|
408
409
|
|
|
409
410
|
self.preprocess = nn.Sequential(
|
|
410
411
|
*[
|
|
@@ -424,7 +425,8 @@ class ViTEncoder(nn.Module):
|
|
|
424
425
|
|
|
425
426
|
def forward(self, x):
|
|
426
427
|
x = self.preprocess(x)
|
|
427
|
-
|
|
428
|
+
if self.initial_ff is not None:
|
|
429
|
+
x = self.layer_norm(x + self.initial_ff(x))
|
|
428
430
|
return self.transformer(x)
|
|
429
431
|
|
|
430
432
|
def attention_logits(self, x):
|
|
@@ -498,8 +500,6 @@ class ViT(nn.Module):
|
|
|
498
500
|
batch_norm_logits=True,
|
|
499
501
|
logit_projection_layer=nn.Linear,
|
|
500
502
|
linear_module=nn.Linear,
|
|
501
|
-
alpha=1.0,
|
|
502
|
-
beta=1.0,
|
|
503
503
|
):
|
|
504
504
|
|
|
505
505
|
super().__init__()
|
|
@@ -520,8 +520,9 @@ class ViT(nn.Module):
|
|
|
520
520
|
"SwiGLU": SwiGLU,
|
|
521
521
|
}[transformer_activation]
|
|
522
522
|
|
|
523
|
-
|
|
524
|
-
self.
|
|
523
|
+
# Set alpha and beta according to Microsoft's DeepNorm
|
|
524
|
+
self.alpha = (2 * transformer_layers) ** 0.25
|
|
525
|
+
self.beta = (8 * transformer_layers) ** 0.25
|
|
525
526
|
|
|
526
527
|
self.encoder = ViTEncoder(
|
|
527
528
|
input_size=input_size,
|
|
@@ -571,8 +572,8 @@ class ViT(nn.Module):
|
|
|
571
572
|
transformer_stochastic_depth=transformer_stochastic_depth,
|
|
572
573
|
transformer_checkpoint_ff=transformer_checkpoint_ff,
|
|
573
574
|
linear_module=linear_module,
|
|
574
|
-
alpha=alpha,
|
|
575
|
-
beta=beta,
|
|
575
|
+
alpha=self.alpha,
|
|
576
|
+
beta=self.beta,
|
|
576
577
|
)
|
|
577
578
|
|
|
578
579
|
self.pool = head(
|
|
@@ -4,10 +4,10 @@ broccoli/cnn.py,sha256=WjoPDSpe3ttwxCBNfCVRdaCHvbeZ7G-a5_i8fUsK_d8,4889
|
|
|
4
4
|
broccoli/linear.py,sha256=W-3aNpBjd_0xRyzbCKkmg4H1qmslQOIQhB-WDDay2nM,13125
|
|
5
5
|
broccoli/rope.py,sha256=GRqApBNmYCFaDak0WL1xE_BC5CTTYKQU_PBdeTcQcjc,12557
|
|
6
6
|
broccoli/tensor.py,sha256=um8mrxkYbvNDo-QvHlmJm8Aw6qcngOlUZPoAk_PMReA,4480
|
|
7
|
-
broccoli/transformer.py,sha256=
|
|
7
|
+
broccoli/transformer.py,sha256=jvlSfMXFlz3oB3tXX-LK_HTLFbXTTzFl9NdwWyTQAxY,27844
|
|
8
8
|
broccoli/utils.py,sha256=oOWzn6dJ5nC_9r4zq0emmfmaYACJXJNFS48AOpW2jqc,358
|
|
9
|
-
broccoli/vit.py,sha256=
|
|
10
|
-
broccoli_ml-
|
|
11
|
-
broccoli_ml-
|
|
12
|
-
broccoli_ml-
|
|
13
|
-
broccoli_ml-
|
|
9
|
+
broccoli/vit.py,sha256=RZZNaEN-DbCENyCRyYpmVzxS-bl0wSaRfYICuAt4mv4,22802
|
|
10
|
+
broccoli_ml-15.0.0.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
|
|
11
|
+
broccoli_ml-15.0.0.dist-info/METADATA,sha256=527mYp49whcP9FHdLJTz20dUsgxAFc_cCvdn0jj_oPg,1369
|
|
12
|
+
broccoli_ml-15.0.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
13
|
+
broccoli_ml-15.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|