broccoli-ml 0.4.2__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/PKG-INFO +1 -1
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/broccoli/vit.py +17 -9
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/pyproject.toml +1 -1
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/LICENSE +0 -0
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/README.md +0 -0
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/broccoli/__init__.py +0 -0
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/broccoli/activation.py +0 -0
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/broccoli/assets/2025_resnet_imagenet_1k_pretrained_state_dict.pkl +0 -0
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/broccoli/assets/cifar100_eigenvectors_size_2.pt +0 -0
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/broccoli/assets/cifar100_eigenvectors_size_3.pt +0 -0
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/broccoli/cnn.py +0 -0
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/broccoli/eigenpatches.py +0 -0
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/broccoli/linear.py +0 -0
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/broccoli/rope.py +0 -0
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/broccoli/tensor.py +0 -0
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/broccoli/transformer.py +0 -0
- {broccoli_ml-0.4.2 → broccoli_ml-0.4.3}/broccoli/utils.py +0 -0
@@ -27,7 +27,7 @@ class SequencePool(nn.Module):
|
|
27
27
|
as a generalisation of average pooling.
|
28
28
|
"""
|
29
29
|
|
30
|
-
def __init__(self, d_model, linear_module, out_dim):
|
30
|
+
def __init__(self, d_model, linear_module, out_dim, batch_norm=True):
|
31
31
|
super().__init__()
|
32
32
|
self.d_model = d_model
|
33
33
|
self.attention = nn.Sequential(
|
@@ -38,7 +38,11 @@ class SequencePool(nn.Module):
|
|
38
38
|
]
|
39
39
|
)
|
40
40
|
self.projection = nn.Linear(d_model, out_dim)
|
41
|
-
self.
|
41
|
+
self.batch_norm = batch_norm
|
42
|
+
if batch_norm:
|
43
|
+
self.norm = nn.BatchNorm1d(out_dim, affine=False)
|
44
|
+
else:
|
45
|
+
self.norm = None
|
42
46
|
|
43
47
|
def forward(self, x):
|
44
48
|
weights = self.attention(x)
|
@@ -46,7 +50,7 @@ class SequencePool(nn.Module):
|
|
46
50
|
weights, x, "batch seq, batch seq d_model -> batch d_model"
|
47
51
|
)
|
48
52
|
projection = self.projection(weighted_embedding)
|
49
|
-
return self.norm(projection)
|
53
|
+
return self.norm(projection) if self.batch_norm else projection
|
50
54
|
|
51
55
|
|
52
56
|
class DCTEncoder(nn.Module):
|
@@ -88,7 +92,7 @@ class DCTEncoder(nn.Module):
|
|
88
92
|
msa_dropout=0.1,
|
89
93
|
stochastic_depth=0.1,
|
90
94
|
linear_module=nn.Linear,
|
91
|
-
|
95
|
+
initial_batch_norm=True,
|
92
96
|
):
|
93
97
|
super().__init__()
|
94
98
|
|
@@ -191,7 +195,7 @@ class DCTEncoder(nn.Module):
|
|
191
195
|
nn.Dropout(cnn_dropout),
|
192
196
|
(
|
193
197
|
batchnormxd(cnn_activation_out_channels)
|
194
|
-
if
|
198
|
+
if initial_batch_norm
|
195
199
|
else nn.Identity()
|
196
200
|
),
|
197
201
|
]
|
@@ -279,7 +283,7 @@ class DCTEncoder(nn.Module):
|
|
279
283
|
|
280
284
|
self.encoder = nn.Sequential(
|
281
285
|
*[
|
282
|
-
batchnormxd(cnn_in_channels) if
|
286
|
+
batchnormxd(cnn_in_channels) if initial_batch_norm else nn.Identity(),
|
283
287
|
self.cnn,
|
284
288
|
self.activate_and_dropout,
|
285
289
|
self.pool,
|
@@ -328,8 +332,9 @@ class DCT(nn.Module):
|
|
328
332
|
mlp_dropout=0.0,
|
329
333
|
msa_dropout=0.1,
|
330
334
|
stochastic_depth=0.1,
|
335
|
+
batch_norm_outputs=True,
|
331
336
|
linear_module=nn.Linear,
|
332
|
-
|
337
|
+
initial_batch_norm=True,
|
333
338
|
image_classes=100,
|
334
339
|
):
|
335
340
|
|
@@ -379,10 +384,13 @@ class DCT(nn.Module):
|
|
379
384
|
msa_dropout=msa_dropout,
|
380
385
|
stochastic_depth=stochastic_depth,
|
381
386
|
linear_module=linear_module,
|
382
|
-
|
387
|
+
initial_batch_norm=initial_batch_norm,
|
383
388
|
)
|
384
389
|
self.pool = SequencePool(
|
385
|
-
transformer_embedding_size,
|
390
|
+
transformer_embedding_size,
|
391
|
+
linear_module,
|
392
|
+
image_classes,
|
393
|
+
batch_norm=batch_norm_outputs,
|
386
394
|
)
|
387
395
|
|
388
396
|
@property
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|