broccoli-ml 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- broccoli/vit.py +18 -13
- {broccoli_ml-0.2.0.dist-info → broccoli_ml-0.4.0.dist-info}/METADATA +1 -1
- {broccoli_ml-0.2.0.dist-info → broccoli_ml-0.4.0.dist-info}/RECORD +5 -5
- {broccoli_ml-0.2.0.dist-info → broccoli_ml-0.4.0.dist-info}/LICENSE +0 -0
- {broccoli_ml-0.2.0.dist-info → broccoli_ml-0.4.0.dist-info}/WHEEL +0 -0
broccoli/vit.py
CHANGED
@@ -49,7 +49,7 @@ class SequencePool(nn.Module):
|
|
49
49
|
return self.norm(projection)
|
50
50
|
|
51
51
|
|
52
|
-
class
|
52
|
+
class DCTEncoder(nn.Module):
|
53
53
|
"""
|
54
54
|
Based on the Compact Convolutional Transformer (CCT) of [Hasani et al. (2021)
|
55
55
|
*''Escaping the Big Data Paradigm with Compact Transformers''*](
|
@@ -63,25 +63,26 @@ class CCTEncoder(nn.Module):
|
|
63
63
|
self,
|
64
64
|
input_size=(32, 32),
|
65
65
|
cnn_in_channels=3,
|
66
|
+
minimum_cnn_out_channels=16,
|
66
67
|
cnn_kernel_size=3,
|
67
68
|
cnn_kernel_stride=1,
|
68
69
|
cnn_kernel_padding="same",
|
69
70
|
cnn_kernel_dilation=1,
|
70
71
|
cnn_kernel_groups=1,
|
71
|
-
cnn_activation: nn.Module =
|
72
|
+
cnn_activation: nn.Module = ReLU,
|
72
73
|
cnn_activation_kwargs: Optional[dict] = None,
|
73
74
|
cnn_dropout=0.0,
|
74
|
-
pooling_type="
|
75
|
+
pooling_type="concat", # maxpool or concat
|
75
76
|
pooling_kernel_size=3,
|
76
77
|
pooling_kernel_stride=2,
|
77
78
|
pooling_kernel_padding=1,
|
78
|
-
transformer_position_embedding="
|
79
|
+
transformer_position_embedding="relative", # absolute or relative
|
79
80
|
transformer_embedding_size=256,
|
80
81
|
transformer_layers=7,
|
81
82
|
transformer_heads=4,
|
82
83
|
transformer_mlp_ratio=2,
|
83
84
|
transformer_bos_tokens=4,
|
84
|
-
transformer_activation: nn.Module =
|
85
|
+
transformer_activation: nn.Module = SquaredReLU,
|
85
86
|
transformer_activation_kwargs: Optional[dict] = None,
|
86
87
|
mlp_dropout=0.0,
|
87
88
|
msa_dropout=0.1,
|
@@ -155,8 +156,9 @@ class CCTEncoder(nn.Module):
|
|
155
156
|
if pooling_type in ["maxpool", None]:
|
156
157
|
cnn_out_channels = transformer_embedding_size
|
157
158
|
elif pooling_type == "concat":
|
158
|
-
cnn_out_channels =
|
159
|
-
transformer_embedding_size / pooling_kernel_voxels
|
159
|
+
cnn_out_channels = min(
|
160
|
+
math.floor(transformer_embedding_size / pooling_kernel_voxels),
|
161
|
+
minimum_cnn_out_channels,
|
160
162
|
)
|
161
163
|
else:
|
162
164
|
raise NotImplementedError("Pooling type must be maxpool, concat or None")
|
@@ -289,8 +291,9 @@ class CCTEncoder(nn.Module):
|
|
289
291
|
return self.encoder(x)
|
290
292
|
|
291
293
|
|
292
|
-
class
|
294
|
+
class DCT(nn.Module):
|
293
295
|
"""
|
296
|
+
Denoising convolutional transformer
|
294
297
|
Based on the Compact Convolutional Transformer (CCT) of [Hasani et al. (2021)
|
295
298
|
*''Escaping the Big Data Paradigm with Compact Transformers''*](
|
296
299
|
https://arxiv.org/abs/2104.05704). It's a convolutional neural network
|
@@ -301,25 +304,26 @@ class CCT(nn.Module):
|
|
301
304
|
self,
|
302
305
|
input_size=(32, 32),
|
303
306
|
cnn_in_channels=3,
|
307
|
+
minimum_cnn_out_channels=16,
|
304
308
|
cnn_kernel_size=3,
|
305
309
|
cnn_kernel_stride=1,
|
306
310
|
cnn_kernel_padding="same",
|
307
311
|
cnn_kernel_dilation=1,
|
308
312
|
cnn_kernel_groups=1,
|
309
|
-
cnn_activation: nn.Module =
|
313
|
+
cnn_activation: nn.Module = ReLU,
|
310
314
|
cnn_activation_kwargs: Optional[dict] = None,
|
311
315
|
cnn_dropout=0.0,
|
312
|
-
pooling_type="
|
316
|
+
pooling_type="concat", # maxpool or concat
|
313
317
|
pooling_kernel_size=3,
|
314
318
|
pooling_kernel_stride=2,
|
315
319
|
pooling_kernel_padding=1,
|
316
|
-
transformer_position_embedding="
|
320
|
+
transformer_position_embedding="relative", # absolute or relative
|
317
321
|
transformer_embedding_size=256,
|
318
322
|
transformer_layers=7,
|
319
323
|
transformer_heads=4,
|
320
324
|
transformer_mlp_ratio=2,
|
321
325
|
transformer_bos_tokens=4,
|
322
|
-
transformer_activation: nn.Module =
|
326
|
+
transformer_activation: nn.Module = SquaredReLU,
|
323
327
|
transformer_activation_kwargs: Optional[dict] = None,
|
324
328
|
mlp_dropout=0.0,
|
325
329
|
msa_dropout=0.1,
|
@@ -347,9 +351,10 @@ class CCT(nn.Module):
|
|
347
351
|
"SwiGLU": SwiGLU,
|
348
352
|
}[transformer_activation]
|
349
353
|
|
350
|
-
self.encoder =
|
354
|
+
self.encoder = DCTEncoder(
|
351
355
|
input_size=input_size,
|
352
356
|
cnn_in_channels=cnn_in_channels,
|
357
|
+
minimum_cnn_out_channels=minimum_cnn_out_channels,
|
353
358
|
cnn_kernel_size=cnn_kernel_size,
|
354
359
|
cnn_kernel_stride=cnn_kernel_stride,
|
355
360
|
cnn_kernel_padding=cnn_kernel_padding,
|
@@ -10,8 +10,8 @@ broccoli/rope.py,sha256=hw7kBPNR9GQXj4GxyIAffsGKPfcTPOFh8Bc7oEHtaZY,12108
|
|
10
10
|
broccoli/tensor.py,sha256=E2JK5mQwJf75e23-JGcDoT7QxQf89DJReUo2et1LhRY,1716
|
11
11
|
broccoli/transformer.py,sha256=23R58t3TLZMb9ulhCtQ3gXu0mPlfyPvLM8TaGOpaz58,16310
|
12
12
|
broccoli/utils.py,sha256=htq_hOsdhUhL0nJi9WkKiEYOjEoWqFpK5X49PtgTf-0,299
|
13
|
-
broccoli/vit.py,sha256=
|
14
|
-
broccoli_ml-0.
|
15
|
-
broccoli_ml-0.
|
16
|
-
broccoli_ml-0.
|
17
|
-
broccoli_ml-0.
|
13
|
+
broccoli/vit.py,sha256=wPovWrrK-s7rPcAqvvGUWXu2v_77-GMIgmgb6G_y2x8,13869
|
14
|
+
broccoli_ml-0.4.0.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
|
15
|
+
broccoli_ml-0.4.0.dist-info/METADATA,sha256=88B7KqQ9zmxkzelSdhVirPbla09qQbpQrwilC6xTCng,1256
|
16
|
+
broccoli_ml-0.4.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
17
|
+
broccoli_ml-0.4.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|