broccoli-ml 0.1.38__py3-none-any.whl → 0.1.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
broccoli/vit.py CHANGED
@@ -67,6 +67,7 @@ class CCTEncoder(nn.Module):
67
67
  conv_pooling_kernel_size=3,
68
68
  conv_pooling_kernel_stride=2,
69
69
  conv_pooling_kernel_padding=1,
70
+ conv_dropout=0.0,
70
71
  transformer_position_embedding="absolute", # absolute or relative
71
72
  transformer_embedding_size=256,
72
73
  transformer_layers=7,
@@ -155,15 +156,20 @@ class CCTEncoder(nn.Module):
155
156
  )
156
157
 
157
158
  elif conv_pooling_type == "concat":
158
- concatpool_activation_output_channels = (
159
- conv_pooling_kernel_size**2 * conv_out_channels
160
- )
161
- if cnn_activation.__name__.endswith("GLU"):
162
- concatpool_activation_output_channels /= 2
163
159
 
164
- concatpool_padding = (
165
- transformer_embedding_size - concatpool_activation_output_channels
166
- )
160
+ if transformer_activation_kwargs is not None:
161
+ self.concatpool_activation = transformer_activation(
162
+ **transformer_activation_kwargs
163
+ )
164
+ else:
165
+ self.concatpool_activation = transformer_activation()
166
+
167
+ concatpool_out_channels = conv_pooling_kernel_size**2 * conv_out_channels
168
+
169
+ if cnn_activation.__name__.endswith("GLU"):
170
+ cnn_activation_output_channels = concatpool_out_channels / 2
171
+ else:
172
+ cnn_activation_output_channels = concatpool_out_channels
167
173
 
168
174
  self.pool = nn.Sequential(
169
175
  *[
@@ -176,8 +182,24 @@ class CCTEncoder(nn.Module):
176
182
  "N C H W -> N H W C"
177
183
  ),
178
184
  self.cnn_activation,
179
- Rearrange("N H W C -> N (H W) C"),
180
- PadTensor((0, concatpool_padding)),
185
+ nn.Dropout(conv_dropout),
186
+ Rearrange( # rearrange in case we're using XGLU activation
187
+ "N H W C -> N C H W"
188
+ ),
189
+ nn.BatchNorm2d(cnn_activation_output_channels),
190
+ Rearrange( # rearrange in case we're using XGLU activation
191
+ "N C H W -> N (H W) C"
192
+ ),
193
+ nn.Linear(
194
+ cnn_activation_output_channels,
195
+ (
196
+ 2 * transformer_embedding_size * transformer_mlp_ratio
197
+ if transformer_activation.__name__.endswith("GLU")
198
+ else transformer_embedding_size * transformer_mlp_ratio
199
+ ),
200
+ ),
201
+ self.concatpool_activation,
202
+ nn.Linear(transformer_embedding_size * transformer_mlp_ratio),
181
203
  ]
182
204
  )
183
205
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: broccoli-ml
3
- Version: 0.1.38
3
+ Version: 0.1.40
4
4
  Summary: Some useful Pytorch models, circa 2025
5
5
  License: MIT
6
6
  Author: Nicholas Bailey
@@ -10,8 +10,8 @@ broccoli/rope.py,sha256=hw7kBPNR9GQXj4GxyIAffsGKPfcTPOFh8Bc7oEHtaZY,12108
10
10
  broccoli/tensor.py,sha256=E2JK5mQwJf75e23-JGcDoT7QxQf89DJReUo2et1LhRY,1716
11
11
  broccoli/transformer.py,sha256=gFBIEowGFPSgQhM1RwsRtQlw_WzVJPY-LJyf1MLtPek,16277
12
12
  broccoli/utils.py,sha256=htq_hOsdhUhL0nJi9WkKiEYOjEoWqFpK5X49PtgTf-0,299
13
- broccoli/vit.py,sha256=CSv13ILKw12o1fNznFvgbfw1TR-gDW30h74yjW6HmLc,11692
14
- broccoli_ml-0.1.38.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
15
- broccoli_ml-0.1.38.dist-info/METADATA,sha256=JJpmHolP3y4Yz8oZ5eRSZswLlCBsqxu6Y5VfimaD5c8,1257
16
- broccoli_ml-0.1.38.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
17
- broccoli_ml-0.1.38.dist-info/RECORD,,
13
+ broccoli/vit.py,sha256=rB1hfEwqRDfSFnFgXzZtzIeqFLnDfiyWe6hDJ7OcH8Q,12777
14
+ broccoli_ml-0.1.40.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
15
+ broccoli_ml-0.1.40.dist-info/METADATA,sha256=eTdLGu8jKvslYTs2_1qQd-GdV5vSOLJUuYDFHD3CEk8,1257
16
+ broccoli_ml-0.1.40.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
17
+ broccoli_ml-0.1.40.dist-info/RECORD,,