PyPI - hippoformer - Versions diffs - 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl - Mend

hippoformer 0.0.6py3-none-any.whl → 0.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

hippoformer/hippoformer.py CHANGED Viewed

@@ -179,7 +179,7 @@ class mmTEM(Module):
         grad_fn = grad(forward_with_mse_loss)
-        self.per_sample_grad_fn = vmap(vmap(grad_fn, in_dims = (None, 0, 0)), in_dims = (None, 0, 0))
+        self.per_sample_grad_fn = vmap(vmap(grad_fn, in_dims = (None, 0, 0)), in_dims = (0, 0, 0))
         # mlp decoder (from meta mlp output to joint)
@@ -213,6 +213,19 @@ class mmTEM(Module):
         self.integration_ratio = nn.Parameter(tensor(0.), requires_grad = integration_ratio_learned)
+    def init_params_and_momentum(
+        self,
+        batch_size
+    ):
+        params_dict = dict(self.meta_memory_mlp.named_parameters())
+        params = {name: repeat(param, '... -> b ...', b = batch_size) for name, param in params_dict.items()}
+        momentums = {name: zeros_like(param) for name, param in params.items()}
+        return params, momentums
     def retrieve(
         self,
         structural_codes,
@@ -230,7 +243,9 @@ class mmTEM(Module):
         self,
         sensory,
         actions,
-        return_losses = False
+        memory_mlp_params = None,
+        return_losses = False,
+        return_memory_mlp_params = False
     ):
         batch = actions.shape[0]
@@ -291,22 +306,28 @@ class mmTEM(Module):
         lr, forget, beta = self.to_learned_optim_hparams(joint_code_to_store).unbind(dim = -1)
-        params = dict(self.meta_memory_mlp.named_parameters())
+        if exists(memory_mlp_params):
+            params, momentums = memory_mlp_params
+        else:
+            params, momentums = self.init_params_and_momentum(batch)
+        # store by getting gradients of mse loss of keys and values
         grads = self.per_sample_grad_fn(params, keys, values)
-        # update the meta mlp parameters
+        # update the meta mlp parameters and momentums
-        init_momentums = {k: zeros_like(v) for k, v in params.items()}
         next_params = dict()
+        next_momentum = dict()
         for (
             (key, param),
             (_, grad),
-            (_, init_momentum)
+            (_, momentum)
         ) in zip(
             params.items(),
             grads.items(),
-            init_momentums.items()
+            momentums.items()
         ):
             grad, inverse_pack = pack_with_inverse(grad, 'b t *')
@@ -315,9 +336,7 @@ class mmTEM(Module):
             expanded_beta = repeat(beta, 'b t -> b t w', w = grad.shape[-1])
-            init_momentum = repeat(init_momentum, '... -> b ...', b = batch)
-            update = self.assoc_scan(grad, expanded_beta.sigmoid(), init_momentum)
+            update = self.assoc_scan(grad, expanded_beta.sigmoid(), momentum)
             expanded_forget = repeat(forget, 'b t -> b t w', w = grad.shape[-1])
@@ -325,7 +344,10 @@ class mmTEM(Module):
             acc_update = inverse_pack(acc_update)
+            # set the next params and momentum, which can be passed back in
             next_params[key] = param - acc_update[:, -1]
+            next_momentum[key] = update[:, -1]
         # losses
@@ -343,6 +365,9 @@ class mmTEM(Module):
             inference_pred_loss
         )
+        if return_memory_mlp_params:
+            return next_params, next_momentum
         if not return_losses:
             return total_loss

{hippoformer-0.0.6.dist-info → hippoformer-0.0.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hippoformer
-Version: 0.0.6
+Version: 0.0.7
 Summary: hippoformer
 Project-URL: Homepage, https://pypi.org/project/hippoformer/
 Project-URL: Repository, https://github.com/lucidrains/hippoformer

hippoformer-0.0.7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+hippoformer/__init__.py,sha256=A7N8GsRAZH4yP-L5hb7IVDnNjnhfjNyolg5MZ6vnGyE,71
+hippoformer/hippoformer.py,sha256=yYoJ5XO0YVAyp3LcRxpunU-0HA97mpCBeQFyi-NSkF0,11549
+hippoformer-0.0.7.dist-info/METADATA,sha256=Xg6NZ6VAQGmuiOo8mMwIAM39Gf6TpVOpyn7o4PMq7JE,2800
+hippoformer-0.0.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hippoformer-0.0.7.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+hippoformer-0.0.7.dist-info/RECORD,,

hippoformer-0.0.6.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-hippoformer/__init__.py,sha256=A7N8GsRAZH4yP-L5hb7IVDnNjnhfjNyolg5MZ6vnGyE,71
-hippoformer/hippoformer.py,sha256=b6EXU2VXh_ZD7brxpCVNuU-m7cE-zXRR-sOmqfofPCg,10839
-hippoformer-0.0.6.dist-info/METADATA,sha256=ufTBdu8ZGggxwfgzphYV56jjaGdI5sLCE_iZF5Bku6s,2800
-hippoformer-0.0.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-hippoformer-0.0.6.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-hippoformer-0.0.6.dist-info/RECORD,,

{hippoformer-0.0.6.dist-info → hippoformer-0.0.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{hippoformer-0.0.6.dist-info → hippoformer-0.0.7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hippoformer 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl

hippoformer 0.0.6py3-none-any.whl → 0.0.7py3-none-any.whl