broccoli-ml 0.38.0__tar.gz → 0.40.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: broccoli-ml
3
- Version: 0.38.0
3
+ Version: 0.40.0
4
4
  Summary: Some useful Pytorch models, circa 2025
5
5
  License: MIT
6
6
  Author: Nicholas Bailey
@@ -34,7 +34,8 @@ class SpectralNormLinear(nn.Module):
34
34
 
35
35
  def reset_parameters(self) -> None:
36
36
  weights = torch.empty(self.out_features, self.in_features)
37
- nn.init.kaiming_uniform_(weights, a=math.sqrt(5))
37
+ stdv = 1.0 / math.sqrt(self.in_features)
38
+ nn.init.uniform_(weights, a=-stdv, b=stdv)
38
39
  if self.use_bias:
39
40
  fan_in, _ = nn.init._calculate_fan_in_and_fan_out(weights)
40
41
  bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
@@ -77,7 +78,8 @@ class AnchoredLinear(nn.Module):
77
78
 
78
79
  def reset_parameters(self) -> None:
79
80
  weights = torch.empty(self.out_features, self.in_features)
80
- nn.init.kaiming_uniform_(weights, a=math.sqrt(5))
81
+ stdv = 1.0 / math.sqrt(self.in_features)
82
+ nn.init.uniform_(weights, a=-stdv, b=stdv)
81
83
  if self.use_bias:
82
84
  fan_in, _ = nn.init._calculate_fan_in_and_fan_out(weights)
83
85
  bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
@@ -120,7 +122,8 @@ class WeightNormedLinear(nn.Module):
120
122
 
121
123
  def reset_parameters(self) -> None:
122
124
  weights = torch.empty(self.out_features, self.in_features)
123
- nn.init.kaiming_uniform_(weights, a=math.sqrt(5))
125
+ stdv = 1.0 / math.sqrt(self.in_features)
126
+ nn.init.uniform_(weights, a=-stdv, b=stdv)
124
127
  if self.use_bias:
125
128
  fan_in, _ = nn.init._calculate_fan_in_and_fan_out(weights)
126
129
  bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
@@ -120,6 +120,7 @@ class ViTEncoder(nn.Module):
120
120
  transformer_initial_ff_residual_path=True,
121
121
  transformer_initial_ff_linear_module_up=None,
122
122
  transformer_initial_ff_linear_module_down=None,
123
+ transformer_initial_ff_mlp_dropout=None,
123
124
  transformer_pre_norm=True,
124
125
  transformer_normformer=False,
125
126
  transformer_post_norm=False,
@@ -307,7 +308,12 @@ class ViTEncoder(nn.Module):
307
308
  transformer_embedding_size,
308
309
  activation=transformer_activation,
309
310
  activation_kwargs=transformer_activation_kwargs,
310
- dropout=transformer_mlp_dropout,
311
+ dropout=(
312
+ # First truthy assigned value
313
+ transformer_initial_ff_mlp_dropout
314
+ if transformer_initial_ff_mlp_dropout is not None
315
+ else transformer_mlp_dropout
316
+ ),
311
317
  linear_module_up=(
312
318
  # First truthy assigned value
313
319
  transformer_initial_ff_linear_module_up
@@ -349,11 +355,7 @@ class ViTEncoder(nn.Module):
349
355
 
350
356
  class ViT(nn.Module):
351
357
  """
352
- Denoising convolutional transformer
353
- Based on the Compact Convolutional Transformer (CCT) of [Hasani et al. (2021)
354
- *''Escaping the Big Data Paradigm with Compact Transformers''*](
355
- https://arxiv.org/abs/2104.05704). It's a convolutional neural network
356
- leading into a transformer encoder, followed by a sequence pooling layer.
358
+ ...
357
359
  """
358
360
 
359
361
  def __init__(
@@ -380,6 +382,7 @@ class ViT(nn.Module):
380
382
  transformer_initial_ff_residual_path=True,
381
383
  transformer_initial_ff_linear_module_up=None,
382
384
  transformer_initial_ff_linear_module_down=None,
385
+ transformer_initial_ff_mlp_dropout=None,
383
386
  transformer_pre_norm=True,
384
387
  transformer_normformer=False,
385
388
  transformer_post_norm=False,
@@ -442,6 +445,7 @@ class ViT(nn.Module):
442
445
  transformer_initial_ff_residual_path=transformer_initial_ff_residual_path,
443
446
  transformer_initial_ff_linear_module_up=transformer_initial_ff_linear_module_up,
444
447
  transformer_initial_ff_linear_module_down=transformer_initial_ff_linear_module_down,
448
+ transformer_initial_ff_mlp_dropout=transformer_initial_ff_mlp_dropout,
445
449
  transformer_pre_norm=transformer_pre_norm,
446
450
  transformer_normformer=transformer_normformer,
447
451
  transformer_post_norm=transformer_post_norm,
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "broccoli-ml"
3
- version = "0.38.0"
3
+ version = "0.40.0"
4
4
  description = "Some useful Pytorch models, circa 2025"
5
5
  authors = [
6
6
  {name = "Nicholas Bailey"}
File without changes
File without changes