PyPI - broccoli-ml - Versions diffs - 9.3.0__tar.gz → 9.4.1__tar.gz - Mend

broccoli-ml 9.3.0tar.gz → 9.4.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{broccoli_ml-9.3.0 → broccoli_ml-9.4.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: broccoli-ml
-Version: 9.3.0
+Version: 9.4.1
 Summary: Some useful Pytorch models, circa 2025
 License: MIT
 Author: Nicholas Bailey

{broccoli_ml-9.3.0 → broccoli_ml-9.4.1}/broccoli/activation.py RENAMED Viewed

@@ -46,10 +46,7 @@ class GELU(nn.Module):
 class Swish(nn.Module):
     """
-    Implementation of (beta) SwiGLU, as introduced in "GLU Variants Improve Transformer"
-        (https://arxiv.org/abs/2002.05202v1) and used to great effect in LLaMa 2.0.
-    Halves the incoming parameter count, which should be scaled up before input.
+    Implementation of (beta) Swish
     """
     def __init__(self) -> None:

{broccoli_ml-9.3.0 → broccoli_ml-9.4.1}/broccoli/linear.py RENAMED Viewed

@@ -202,18 +202,18 @@ class RecyclingLinear(nn.Module):
             idx_tensor = indices
         if idx_tensor.size(0):
-            random_weights = self._random_weights(
-                indices.size(0), self.linear.weight.size(1)
-            )
+            centred_value_weights = self._mean_value_weights()
             if self.xglu:
                 gate_indices = indices
                 value_indices = indices + (self.linear.out_features // 2)
-                self._update_weights(value_indices, 0, random_weights, self.optimisers)
                 centred_gate_weights = self._mean_gate_weights()
                 centred_gate_weights = centred_gate_weights.expand(indices.size(0), -1)
                 self._update_weights(
                     gate_indices, 0, centred_gate_weights, self.optimisers  # dim
                 )
+            self._update_weights(
+                value_indices, 0, centred_value_weights, self.optimisers
+            )
         else:
             return
@@ -226,10 +226,11 @@ class RecyclingLinear(nn.Module):
             idx_tensor = indices
         if idx_tensor.size(0):
-            random_weights = self._random_weights(
-                self.linear.weight.size(0), indices.size(0)
+            zeros = torch.zeros(
+                (self.linear.weight.size(0), indices.size(0)),
+                device=self.linear.weight.device,
             )
-            self._update_weights(indices, 1, random_weights, self.optimisers)  # dim
+            self._update_weights(indices, 1, zeros, self.optimisers)  # dim
         else:
             return
@@ -276,6 +277,14 @@ class RecyclingLinear(nn.Module):
         random_weights *= 2.0 * stdv  # Range [-stdv, +stdv]
         return random_weights
+    def _mean_value_weights(self):
+        """
+        Only used when self.xglu
+        """
+        weights = self.linear.weight.data
+        rows = weights.size(0)
+        return self.linear.weight[int(rows / 2) :].data.mean(dim=0, keepdim=True)
     def _mean_gate_weights(self):
         """
         Only used when self.xglu

{broccoli_ml-9.3.0 → broccoli_ml-9.4.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "broccoli-ml"
-version = "9.3.0"
+version = "9.4.1"
 description = "Some useful Pytorch models, circa 2025"
 authors = [
     {name = "Nicholas Bailey"}