broccoli-ml 0.1.37__py3-none-any.whl → 0.1.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
broccoli/vit.py CHANGED
@@ -7,6 +7,17 @@ from .activation import ReLU, SquaredReLU, GELU, SwiGLU
7
7
  from einops import einsum
8
8
  from einops.layers.torch import Rearrange
9
9
  import torch.nn as nn
10
+ import torch.nn.functional as F
11
+
12
+
13
+ class PadTensor(nn.Module):
14
+ def __init__(self, *args, **kwargs):
15
+ super().__init__()
16
+ self.args = args
17
+ self.kwargs = kwargs
18
+
19
+ def forward(self, x):
20
+ return F.pad(x, *self.args, **self.kwargs)
10
21
 
11
22
 
12
23
  class SequencePool(nn.Module):
@@ -56,6 +67,7 @@ class CCTEncoder(nn.Module):
56
67
  conv_pooling_kernel_size=3,
57
68
  conv_pooling_kernel_stride=2,
58
69
  conv_pooling_kernel_padding=1,
70
+ conv_dropout=0.0,
59
71
  transformer_position_embedding="absolute", # absolute or relative
60
72
  transformer_embedding_size=256,
61
73
  transformer_layers=7,
@@ -110,7 +122,7 @@ class CCTEncoder(nn.Module):
110
122
  conv_out_channels = transformer_embedding_size
111
123
  elif conv_pooling_type == "concat":
112
124
  conv_out_channels = int(
113
- round(transformer_embedding_size / (conv_pooling_kernel_size**2))
125
+ math.floor(transformer_embedding_size / (conv_pooling_kernel_size**2))
114
126
  )
115
127
 
116
128
  # This if block rhymes:
@@ -144,11 +156,16 @@ class CCTEncoder(nn.Module):
144
156
  )
145
157
 
146
158
  elif conv_pooling_type == "concat":
147
- concatpool_activation_output_size = (
148
- conv_pooling_kernel_size**2 * conv_out_channels
159
+ self.concatpool_activation = transformer_activation(
160
+ **transformer_activation_kwargs
149
161
  )
162
+
163
+ concatpool_out_channels = conv_pooling_kernel_size**2 * conv_out_channels
164
+
150
165
  if cnn_activation.__name__.endswith("GLU"):
151
- concatpool_activation_output_size /= 2
166
+ cnn_activation_output_channels = concatpool_out_channels / 2
167
+ else:
168
+ cnn_activation_output_channels = concatpool_out_channels
152
169
 
153
170
  self.pool = nn.Sequential(
154
171
  *[
@@ -161,11 +178,24 @@ class CCTEncoder(nn.Module):
161
178
  "N C H W -> N H W C"
162
179
  ),
163
180
  self.cnn_activation,
164
- Rearrange("N H W C -> N (H W) C"),
181
+ nn.Dropout(conv_dropout),
182
+ Rearrange( # rearrange in case we're using XGLU activation
183
+ "N H W C -> N C H W"
184
+ ),
185
+ nn.BatchNorm2d(cnn_activation_output_channels),
186
+ Rearrange( # rearrange in case we're using XGLU activation
187
+ "N C H W -> N (H W) C"
188
+ ),
165
189
  nn.Linear(
166
- concatpool_activation_output_size, transformer_embedding_size
190
+ cnn_activation_output_channels,
191
+ (
192
+ 2 * transformer_embedding_size * transformer_mlp_ratio
193
+ if transformer_activation.__name__.endswith("GLU")
194
+ else transformer_embedding_size * transformer_mlp_ratio
195
+ ),
167
196
  ),
168
- self.cnn_activation,
197
+ self.concatpool_activation,
198
+ nn.Linear(transformer_embedding_size * transformer_mlp_ratio),
169
199
  ]
170
200
  )
171
201
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: broccoli-ml
3
- Version: 0.1.37
3
+ Version: 0.1.39
4
4
  Summary: Some useful Pytorch models, circa 2025
5
5
  License: MIT
6
6
  Author: Nicholas Bailey
@@ -10,8 +10,8 @@ broccoli/rope.py,sha256=hw7kBPNR9GQXj4GxyIAffsGKPfcTPOFh8Bc7oEHtaZY,12108
10
10
  broccoli/tensor.py,sha256=E2JK5mQwJf75e23-JGcDoT7QxQf89DJReUo2et1LhRY,1716
11
11
  broccoli/transformer.py,sha256=gFBIEowGFPSgQhM1RwsRtQlw_WzVJPY-LJyf1MLtPek,16277
12
12
  broccoli/utils.py,sha256=htq_hOsdhUhL0nJi9WkKiEYOjEoWqFpK5X49PtgTf-0,299
13
- broccoli/vit.py,sha256=Pp0fU2h5_tgdyJEKmN_ltXNDC3WWXoEiWUPzCuFy-gY,11409
14
- broccoli_ml-0.1.37.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
15
- broccoli_ml-0.1.37.dist-info/METADATA,sha256=RyelojtfF_7Y72XsZSzWUsDIf5Gb5KbTcg6go74Wres,1257
16
- broccoli_ml-0.1.37.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
17
- broccoli_ml-0.1.37.dist-info/RECORD,,
13
+ broccoli/vit.py,sha256=hN9m24HkgxFMQPEFmlv865ejHs7JujMRQfzoplJKu78,12618
14
+ broccoli_ml-0.1.39.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
15
+ broccoli_ml-0.1.39.dist-info/METADATA,sha256=MAYq4HTN1PVZIbWYaqnoU7EnY6-vVFlbcAdASzuoetE,1257
16
+ broccoli_ml-0.1.39.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
17
+ broccoli_ml-0.1.39.dist-info/RECORD,,