broccoli-ml 0.13.0__py3-none-any.whl → 0.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
broccoli/linear.py CHANGED
@@ -10,7 +10,8 @@ from .tensor import SigmaReparamTensor
10
10
 
11
11
  class SpectralNormLinear(nn.Module):
12
12
  """
13
- ...
13
+ Inspired by Apple's Spectral Normed Linear Layers
14
+ (https://github.com/apple/ml-sigma-reparam)
14
15
  """
15
16
 
16
17
  def __init__(self, in_features: int, out_features: int, bias: bool = True):
broccoli/transformer.py CHANGED
@@ -222,7 +222,7 @@ class MHAttention(nn.Module):
222
222
  return self.out_proj(output_without_heads)
223
223
 
224
224
 
225
- class FeedforwardLayer(nn.Module):
225
+ class FeedforwardBlock(nn.Module):
226
226
  """
227
227
  ...
228
228
  """
@@ -324,7 +324,7 @@ class TransformerBlock(nn.Module):
324
324
  )
325
325
 
326
326
  # Submodules for the feedforward process
327
- self.ff = FeedforwardLayer(
327
+ self.ff = FeedforwardBlock(
328
328
  d_model,
329
329
  mlp_ratio,
330
330
  d_model,
broccoli/vit.py CHANGED
@@ -1,9 +1,10 @@
1
1
  import math
2
2
  from typing import Optional
3
3
 
4
- from .transformer import TransformerEncoder, FeedforwardLayer
4
+ from .transformer import TransformerEncoder, FeedforwardBlock
5
5
  from .cnn import SpaceToDepth, calculate_output_spatial_size, spatial_tuple
6
6
  from .activation import ReLU, SquaredReLU, GELU, SwiGLU
7
+ from .linear import SpectralNormLinear
7
8
  from einops import einsum
8
9
  from einops.layers.torch import Rearrange
9
10
  import torch.nn as nn
@@ -103,6 +104,7 @@ class ViTEncoder(nn.Module):
103
104
  def __init__(
104
105
  self,
105
106
  input_size=(32, 32),
107
+ initial_batch_norm=True,
106
108
  cnn=True,
107
109
  cnn_in_channels=3,
108
110
  cnn_out_channels=16,
@@ -132,7 +134,6 @@ class ViTEncoder(nn.Module):
132
134
  transformer_msa_dropout=0.1,
133
135
  transformer_stochastic_depth=0.1,
134
136
  linear_module=nn.Linear,
135
- initial_batch_norm=True,
136
137
  ):
137
138
  super().__init__()
138
139
 
@@ -294,7 +295,7 @@ class ViTEncoder(nn.Module):
294
295
 
295
296
  if intermediate_feedforward_layer:
296
297
  self.pooling_channels_padding = nn.Identity()
297
- self.intermediate_feedforward_layer = FeedforwardLayer(
298
+ self.intermediate_feedforward_layer = FeedforwardBlock(
298
299
  pooling_out_channels,
299
300
  transformer_mlp_ratio,
300
301
  transformer_embedding_size,
@@ -349,6 +350,8 @@ class ViT(nn.Module):
349
350
  def __init__(
350
351
  self,
351
352
  input_size=(32, 32),
353
+ image_classes=100,
354
+ initial_batch_norm=True,
352
355
  cnn=True,
353
356
  cnn_in_channels=3,
354
357
  cnn_out_channels=16,
@@ -378,9 +381,7 @@ class ViT(nn.Module):
378
381
  transformer_msa_dropout=0.1,
379
382
  transformer_stochastic_depth=0.1,
380
383
  batch_norm_outputs=True,
381
- initial_batch_norm=True,
382
- linear_module=nn.Linear,
383
- image_classes=100,
384
+ linear_module=SpectralNormLinear,
384
385
  head=SequencePoolClassificationHead,
385
386
  ):
386
387
 
@@ -404,6 +405,7 @@ class ViT(nn.Module):
404
405
 
405
406
  self.encoder = ViTEncoder(
406
407
  input_size=input_size,
408
+ initial_batch_norm=initial_batch_norm,
407
409
  cnn=cnn,
408
410
  cnn_in_channels=cnn_in_channels,
409
411
  cnn_out_channels=cnn_out_channels,
@@ -433,7 +435,6 @@ class ViT(nn.Module):
433
435
  transformer_msa_dropout=transformer_msa_dropout,
434
436
  transformer_stochastic_depth=transformer_stochastic_depth,
435
437
  linear_module=linear_module,
436
- initial_batch_norm=initial_batch_norm,
437
438
  )
438
439
 
439
440
  self.pool = head(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: broccoli-ml
3
- Version: 0.13.0
3
+ Version: 0.13.1
4
4
  Summary: Some useful Pytorch models, circa 2025
5
5
  License: MIT
6
6
  Author: Nicholas Bailey
@@ -5,13 +5,13 @@ broccoli/assets/cifar100_eigenvectors_size_2.pt,sha256=DjXDOXMeuMpIqNuGhX9z-OWYV
5
5
  broccoli/assets/cifar100_eigenvectors_size_3.pt,sha256=gL6k0xtXYiYP6ZSvEiMBdJ7kIkT0AngTpDJHFQqwgxA,7173
6
6
  broccoli/cnn.py,sha256=jeRyKIAMWu1E3iyI14MGgSZuZivPMh12iqkqW9ilNjo,17785
7
7
  broccoli/eigenpatches.py,sha256=J6n2usN1oQuHEHYiBNyYpn_a9eQcHjOBiIlvSei520Y,2413
8
- broccoli/linear.py,sha256=9ZwqC6kkgkr0uPoEjdi_Uq1QFHb4wCXzuU1r2pDreXM,2910
8
+ broccoli/linear.py,sha256=jiGvLguxzkkmX14kRavaeg7IwN8jYJ06wn-NJ6Ivpzo,3008
9
9
  broccoli/rope.py,sha256=hw7kBPNR9GQXj4GxyIAffsGKPfcTPOFh8Bc7oEHtaZY,12108
10
10
  broccoli/tensor.py,sha256=E2JK5mQwJf75e23-JGcDoT7QxQf89DJReUo2et1LhRY,1716
11
- broccoli/transformer.py,sha256=E2GUsruxd5o3Eqrgra14IH0G0u_Wx4dIHhrUHrRqpLc,15929
11
+ broccoli/transformer.py,sha256=GzkHlzCe4k2-ALMbKpQ0wdsOEKTap6gjOK-FiA7KP3k,15929
12
12
  broccoli/utils.py,sha256=htq_hOsdhUhL0nJi9WkKiEYOjEoWqFpK5X49PtgTf-0,299
13
- broccoli/vit.py,sha256=QNxdjB6xpW9G4hO7YknE5Uv7kXQYGbbdzEjqexQ5piY,16350
14
- broccoli_ml-0.13.0.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
15
- broccoli_ml-0.13.0.dist-info/METADATA,sha256=Q99pX48E_VzsVBXvulm4TFhh3opWwfAVYl5-8pVr5HM,1257
16
- broccoli_ml-0.13.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
17
- broccoli_ml-0.13.0.dist-info/RECORD,,
13
+ broccoli/vit.py,sha256=eEnb4hUwJUVymO3tD8V9JD-9i39ZkeNOYEDa9gwSL60,16398
14
+ broccoli_ml-0.13.1.dist-info/LICENSE,sha256=0BAzJE5BqQ7Iixp_AFdB2W1uO-HCRX-Qfun8PHt6yVM,1073
15
+ broccoli_ml-0.13.1.dist-info/METADATA,sha256=WWNGeC9F48atFNGqfw1Kv0i9QCGaiOIcHpIgAeMAAAw,1257
16
+ broccoli_ml-0.13.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
17
+ broccoli_ml-0.13.1.dist-info/RECORD,,