PyPI - hippoformer - Versions diffs - 0.0.4__tar.gz → 0.0.6__tar.gz - Mend

hippoformer 0.0.4tar.gz → 0.0.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{hippoformer-0.0.4 → hippoformer-0.0.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hippoformer
-Version: 0.0.4
+Version: 0.0.6
 Summary: hippoformer
 Project-URL: Homepage, https://pypi.org/project/hippoformer/
 Project-URL: Repository, https://github.com/lucidrains/hippoformer
@@ -36,6 +36,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9
 Requires-Dist: assoc-scan
 Requires-Dist: einops>=0.8.1
+Requires-Dist: einx>=0.3.0
 Requires-Dist: torch>=2.4
 Requires-Dist: x-mlps-pytorch
 Provides-Extra: examples

{hippoformer-0.0.4 → hippoformer-0.0.6}/hippoformer/hippoformer.py RENAMED Viewed

@@ -7,7 +7,8 @@ from torch.nn import Module
 from torch.jit import ScriptModule, script_method
 from torch.func import vmap, grad, functional_call
-from einops import repeat, rearrange
+from einx import multiply
+from einops import repeat, rearrange, pack, unpack
 from einops.layers.torch import Rearrange
 from x_mlps_pytorch import create_mlp
@@ -22,6 +23,16 @@ def exists(v):
 def default(v, d):
     return v if exists(v) else d
+def pack_with_inverse(t, pattern):
+    packed, packed_shape = pack([t], pattern)
+    def inverse(out, inv_pattern = None):
+        inv_pattern = default(inv_pattern, pattern)
+        unpacked, = unpack(out, packed_shape, inv_pattern)
+        return unpacked
+    return packed, inverse
 def l2norm(t):
     return F.normalize(t, dim = -1)
@@ -121,7 +132,8 @@ class mmTEM(Module):
         loss_weight_inference = 1.,
         loss_weight_consistency = 1.,
         loss_weight_relational = 1.,
-        integration_ratio_learned = True
+        integration_ratio_learned = True,
+        assoc_scan_kwargs: dict = dict()
     ):
         super().__init__()
@@ -150,6 +162,9 @@ class mmTEM(Module):
         self.to_keys = nn.Linear(dim_joint_rep, dim, bias = False)
         self.to_values = nn.Linear(dim_joint_rep, dim, bias = False)
+        self.to_learned_optim_hparams = nn.Linear(dim_joint_rep, 3, bias = False) # for learning rate, forget gate, and momentum
+        self.assoc_scan = AssocScan(*assoc_scan_kwargs)
         self.meta_memory_mlp = create_mlp(
             dim = dim * 2,
             depth = meta_mlp_depth,
@@ -158,6 +173,14 @@ class mmTEM(Module):
             activation = nn.ReLU()
         )
+        def forward_with_mse_loss(params, keys, values):
+            pred = functional_call(self.meta_memory_mlp, params, keys)
+            return F.mse_loss(pred, values)
+        grad_fn = grad(forward_with_mse_loss)
+        self.per_sample_grad_fn = vmap(vmap(grad_fn, in_dims = (None, 0, 0)), in_dims = (None, 0, 0))
         # mlp decoder (from meta mlp output to joint)
         self.memory_output_decoder = create_mlp(
@@ -206,8 +229,11 @@ class mmTEM(Module):
     def forward(
         self,
         sensory,
-        actions
+        actions,
+        return_losses = False
     ):
+        batch = actions.shape[0]
         structural_codes = self.path_integrator(actions)
         encoded_sensory = self.sensory_encoder(sensory)
@@ -244,18 +270,63 @@ class mmTEM(Module):
         pred_variance = self.structure_variance_pred_mlp(cat((corrected_structural_code, decoded_gen_structure, sensory_sse), dim = -1))
-        inf_structural_code = decoded_gen_structure + (corrected_structural_code - decoded_gen_structure) * self.integration_ratio * pred_variance
+        inf_structural_code = decoded_gen_structure + (corrected_structural_code - decoded_gen_structure) * self.integration_ratio.sigmoid() * pred_variance
         consistency_loss = F.mse_loss(decoded_gen_structure, inf_structural_code)
         # 4. final inference loss
-        _, inf_encoded_sensory = self.retrieve(inf_structural_code, zeros_like(encoded_sensory))
+        final_structural_code, inf_encoded_sensory = self.retrieve(inf_structural_code, zeros_like(encoded_sensory))
         decoded_inf_sensory = self.sensory_decoder(inf_encoded_sensory)
         inference_pred_loss = F.mse_loss(sensory, decoded_inf_sensory)
+        # 5. store the final structural code from step 4 + encoded sensory
+        joint_code_to_store = cat((final_structural_code, encoded_sensory), dim = -1)
+        keys = self.to_keys(joint_code_to_store)
+        values = self.to_values(joint_code_to_store)
+        lr, forget, beta = self.to_learned_optim_hparams(joint_code_to_store).unbind(dim = -1)
+        params = dict(self.meta_memory_mlp.named_parameters())
+        grads = self.per_sample_grad_fn(params, keys, values)
+        # update the meta mlp parameters
+        init_momentums = {k: zeros_like(v) for k, v in params.items()}
+        next_params = dict()
+        for (
+            (key, param),
+            (_, grad),
+            (_, init_momentum)
+        ) in zip(
+            params.items(),
+            grads.items(),
+            init_momentums.items()
+        ):
+            grad, inverse_pack = pack_with_inverse(grad, 'b t *')
+            grad = multiply('b t ..., b t', grad, lr)
+            expanded_beta = repeat(beta, 'b t -> b t w', w = grad.shape[-1])
+            init_momentum = repeat(init_momentum, '... -> b ...', b = batch)
+            update = self.assoc_scan(grad, expanded_beta.sigmoid(), init_momentum)
+            expanded_forget = repeat(forget, 'b t -> b t w', w = grad.shape[-1])
+            acc_update = self.assoc_scan(update, expanded_forget.sigmoid())
+            acc_update = inverse_pack(acc_update)
+            next_params[key] = param - acc_update[:, -1]
         # losses
         total_loss = (
@@ -272,4 +343,7 @@ class mmTEM(Module):
             inference_pred_loss
         )
+        if not return_losses:
+            return total_loss
         return total_loss, losses

{hippoformer-0.0.4 → hippoformer-0.0.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "hippoformer"
-version = "0.0.4"
+version = "0.0.6"
 description = "hippoformer"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -25,6 +25,7 @@ classifiers=[
 dependencies = [
     "assoc-scan",
+    "einx>=0.3.0",
     "einops>=0.8.1",
     "torch>=2.4",
     "x-mlps-pytorch",

{hippoformer-0.0.4 → hippoformer-0.0.6}/tests/test_hippoformer.py RENAMED Viewed

@@ -33,5 +33,5 @@ def test_mm_tem():
     actions = torch.randn(2, 16, 7)
     sensory = torch.randn(2, 16, 11)
-    loss, losses = model(sensory, actions)
+    loss = model(sensory, actions)
     loss.backward()

{hippoformer-0.0.4 → hippoformer-0.0.6}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{hippoformer-0.0.4 → hippoformer-0.0.6}/.github/workflows/test.yml RENAMED Viewed

File without changes

{hippoformer-0.0.4 → hippoformer-0.0.6}/.gitignore RENAMED Viewed

File without changes

{hippoformer-0.0.4 → hippoformer-0.0.6}/LICENSE RENAMED Viewed

File without changes

{hippoformer-0.0.4 → hippoformer-0.0.6}/README.md RENAMED Viewed

File without changes

{hippoformer-0.0.4 → hippoformer-0.0.6}/hippoformer/__init__.py RENAMED Viewed

File without changes

{hippoformer-0.0.4 → hippoformer-0.0.6}/hippoformer-fig6.png RENAMED Viewed

File without changes

hippoformer 0.0.4__tar.gz → 0.0.6__tar.gz

hippoformer 0.0.4tar.gz → 0.0.6tar.gz