PyPI - hippoformer - Versions diffs - 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl - Mend

hippoformer 0.0.5py3-none-any.whl → 0.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

hippoformer/hippoformer.py CHANGED Viewed

@@ -7,7 +7,8 @@ from torch.nn import Module
 from torch.jit import ScriptModule, script_method
 from torch.func import vmap, grad, functional_call
-from einops import repeat, rearrange
+from einx import multiply
+from einops import repeat, rearrange, pack, unpack
 from einops.layers.torch import Rearrange
 from x_mlps_pytorch import create_mlp
@@ -22,6 +23,16 @@ def exists(v):
 def default(v, d):
     return v if exists(v) else d
+def pack_with_inverse(t, pattern):
+    packed, packed_shape = pack([t], pattern)
+    def inverse(out, inv_pattern = None):
+        inv_pattern = default(inv_pattern, pattern)
+        unpacked, = unpack(out, packed_shape, inv_pattern)
+        return unpacked
+    return packed, inverse
 def l2norm(t):
     return F.normalize(t, dim = -1)
@@ -121,7 +132,8 @@ class mmTEM(Module):
         loss_weight_inference = 1.,
         loss_weight_consistency = 1.,
         loss_weight_relational = 1.,
-        integration_ratio_learned = True
+        integration_ratio_learned = True,
+        assoc_scan_kwargs: dict = dict()
     ):
         super().__init__()
@@ -150,6 +162,9 @@ class mmTEM(Module):
         self.to_keys = nn.Linear(dim_joint_rep, dim, bias = False)
         self.to_values = nn.Linear(dim_joint_rep, dim, bias = False)
+        self.to_learned_optim_hparams = nn.Linear(dim_joint_rep, 3, bias = False) # for learning rate, forget gate, and momentum
+        self.assoc_scan = AssocScan(*assoc_scan_kwargs)
         self.meta_memory_mlp = create_mlp(
             dim = dim * 2,
             depth = meta_mlp_depth,
@@ -164,7 +179,7 @@ class mmTEM(Module):
         grad_fn = grad(forward_with_mse_loss)
-        self.per_sample_grad_fn = vmap(vmap(grad_fn, in_dims = (None, 0, 0)), in_dims = (None, 0, 0))
+        self.per_sample_grad_fn = vmap(vmap(grad_fn, in_dims = (None, 0, 0)), in_dims = (0, 0, 0))
         # mlp decoder (from meta mlp output to joint)
@@ -198,6 +213,19 @@ class mmTEM(Module):
         self.integration_ratio = nn.Parameter(tensor(0.), requires_grad = integration_ratio_learned)
+    def init_params_and_momentum(
+        self,
+        batch_size
+    ):
+        params_dict = dict(self.meta_memory_mlp.named_parameters())
+        params = {name: repeat(param, '... -> b ...', b = batch_size) for name, param in params_dict.items()}
+        momentums = {name: zeros_like(param) for name, param in params.items()}
+        return params, momentums
     def retrieve(
         self,
         structural_codes,
@@ -215,8 +243,12 @@ class mmTEM(Module):
         self,
         sensory,
         actions,
-        return_losses = False
+        memory_mlp_params = None,
+        return_losses = False,
+        return_memory_mlp_params = False
     ):
+        batch = actions.shape[0]
         structural_codes = self.path_integrator(actions)
         encoded_sensory = self.sensory_encoder(sensory)
@@ -272,7 +304,50 @@ class mmTEM(Module):
         keys = self.to_keys(joint_code_to_store)
         values = self.to_values(joint_code_to_store)
-        grads = self.per_sample_grad_fn(dict(self.meta_memory_mlp.named_parameters()), keys, values)
+        lr, forget, beta = self.to_learned_optim_hparams(joint_code_to_store).unbind(dim = -1)
+        if exists(memory_mlp_params):
+            params, momentums = memory_mlp_params
+        else:
+            params, momentums = self.init_params_and_momentum(batch)
+        # store by getting gradients of mse loss of keys and values
+        grads = self.per_sample_grad_fn(params, keys, values)
+        # update the meta mlp parameters and momentums
+        next_params = dict()
+        next_momentum = dict()
+        for (
+            (key, param),
+            (_, grad),
+            (_, momentum)
+        ) in zip(
+            params.items(),
+            grads.items(),
+            momentums.items()
+        ):
+            grad, inverse_pack = pack_with_inverse(grad, 'b t *')
+            grad = multiply('b t ..., b t', grad, lr)
+            expanded_beta = repeat(beta, 'b t -> b t w', w = grad.shape[-1])
+            update = self.assoc_scan(grad, expanded_beta.sigmoid(), momentum)
+            expanded_forget = repeat(forget, 'b t -> b t w', w = grad.shape[-1])
+            acc_update = self.assoc_scan(update, expanded_forget.sigmoid())
+            acc_update = inverse_pack(acc_update)
+            # set the next params and momentum, which can be passed back in
+            next_params[key] = param - acc_update[:, -1]
+            next_momentum[key] = update[:, -1]
         # losses
@@ -290,6 +365,9 @@ class mmTEM(Module):
             inference_pred_loss
         )
+        if return_memory_mlp_params:
+            return next_params, next_momentum
         if not return_losses:
             return total_loss

{hippoformer-0.0.5.dist-info → hippoformer-0.0.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hippoformer
-Version: 0.0.5
+Version: 0.0.7
 Summary: hippoformer
 Project-URL: Homepage, https://pypi.org/project/hippoformer/
 Project-URL: Repository, https://github.com/lucidrains/hippoformer
@@ -36,6 +36,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9
 Requires-Dist: assoc-scan
 Requires-Dist: einops>=0.8.1
+Requires-Dist: einx>=0.3.0
 Requires-Dist: torch>=2.4
 Requires-Dist: x-mlps-pytorch
 Provides-Extra: examples

hippoformer-0.0.7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+hippoformer/__init__.py,sha256=A7N8GsRAZH4yP-L5hb7IVDnNjnhfjNyolg5MZ6vnGyE,71
+hippoformer/hippoformer.py,sha256=yYoJ5XO0YVAyp3LcRxpunU-0HA97mpCBeQFyi-NSkF0,11549
+hippoformer-0.0.7.dist-info/METADATA,sha256=Xg6NZ6VAQGmuiOo8mMwIAM39Gf6TpVOpyn7o4PMq7JE,2800
+hippoformer-0.0.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hippoformer-0.0.7.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+hippoformer-0.0.7.dist-info/RECORD,,

hippoformer-0.0.5.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-hippoformer/__init__.py,sha256=A7N8GsRAZH4yP-L5hb7IVDnNjnhfjNyolg5MZ6vnGyE,71
-hippoformer/hippoformer.py,sha256=PP2KmTygOP6MyYuhmr_8iEBbywIaTW4TpoIycYRugMo,9142
-hippoformer-0.0.5.dist-info/METADATA,sha256=83iG4F_6ibQy6XSCWht-aF2ZVYmiEq-KSF4XR9YaBtY,2773
-hippoformer-0.0.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-hippoformer-0.0.5.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-hippoformer-0.0.5.dist-info/RECORD,,

{hippoformer-0.0.5.dist-info → hippoformer-0.0.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{hippoformer-0.0.5.dist-info → hippoformer-0.0.7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hippoformer 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

hippoformer 0.0.5py3-none-any.whl → 0.0.7py3-none-any.whl