PyPI - metacontroller-pytorch - Versions diffs - 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl - Mend

metacontroller-pytorch 0.0.9py3-none-any.whl → 0.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

metacontroller/metacontroller.py CHANGED Viewed

@@ -52,6 +52,13 @@ def straight_through(src, tgt):
 # meta controller
+MetaControllerOutput = namedtuple('MetaControllerOutput', (
+    'prev_hiddens',
+    'action_dist',
+    'actions',
+    'kl_loss'
+))
 class MetaController(Module):
     def __init__(
         self,
@@ -107,9 +114,9 @@ class MetaController(Module):
         return [
             *self.bidirectional_temporal_compressor.parameters(),
             *self.emitter.parameters(),
-            *self.emitter_to_action_mean_log_var.parameters()
+            *self.emitter_to_action_mean_log_var.parameters(),
             *self.decoder.parameters(),
-            *self.switch_gating
+            *self.switch_gating.parameters()
         ]
     def internal_rl_parameters(self):
@@ -121,10 +128,19 @@ class MetaController(Module):
     def forward(
         self,
         residual_stream,
+        cache: MetaControllerOutput | None = None,
         discovery_phase = False,
         hard_switch = False
     ):
+        # destruct prev cache
+        prev_action_proposer_hidden, prev_switching_unit_gru_hidden, prev_switch_gated_hiddens = cache.prev_hiddens if exists(cache) else ((None,) * 3)
+        # getting proposed action for the two phases
+        next_action_proposer_hidden = None
         if discovery_phase:
             temporal_compressed, _ = self.bidirectional_temporal_compressor(residual_stream)
             temporal_compressed = reduce(temporal_compressed, '... (two d) -> ... d', 'mean', two = 2)
@@ -133,7 +149,8 @@ class MetaController(Module):
             readout = self.emitter_to_action_mean_log_var
         else: # else internal rl phase
-            proposed_action_hidden, _ = self.action_proposer(residual_stream)
+            proposed_action_hidden, next_action_proposer_hidden = self.action_proposer(residual_stream, prev_action_proposer_hidden)
             readout = self.action_proposer_mean_log_var
         # sample from the gaussian as the action from the meta controller
@@ -146,35 +163,37 @@ class MetaController(Module):
         batch, _, dim = sampled_action.shape
-        switching_unit_gru_out, switching_unit_gru_hidden = self.switching_unit(residual_stream)
+        switching_unit_gru_out, next_switching_unit_gru_hidden = self.switching_unit(residual_stream, prev_switching_unit_gru_hidden)
         switch_beta = self.to_switching_unit_beta(switching_unit_gru_out).sigmoid()
         # need to encourage normal distribution
-        vae_kl_loss = self.zero
+        kl_loss = self.zero
         if discovery_phase:
             mean, log_var = action_dist.unbind(dim = -1)
-            vae_kl_loss = (0.5 * (
+            kl_loss = (0.5 * (
                 log_var.exp()
                 + mean.square()
                 - log_var
                 - 1.
             ))
-            vae_kl_loss = vae_kl_loss * switch_beta
-            vae_kl_loss = vae_kl_loss.sum(dim = -1).mean()
+            kl_loss = kl_loss * switch_beta
+            kl_loss = kl_loss.sum(dim = -1).mean()
         # maybe hard switch, then use associative scan
         if hard_switch:
-            hard_switch = (switch_beta > 0.5).float()
-            switch_beta = straight_through(switch_beta, hard_switch)
+            hard_switch_beta = (switch_beta > 0.5).float()
+            switch_beta = straight_through(switch_beta, hard_switch_beta)
         forget = 1. - switch_beta
-        gated_action = self.switch_gating(switch_beta, sampled_action * forget)
+        gated_action = self.switch_gating(switch_beta, sampled_action * forget, prev = prev_switch_gated_hiddens)
+        next_switch_gated_action = gated_action[:, -1]
         # decoder
@@ -189,10 +208,23 @@ class MetaController(Module):
         modified_residual_stream = residual_stream + control_signal
-        return modified_residual_stream, action_dist, sampled_action, vae_kl_loss
+        # returning
+        next_hiddens = (
+            next_action_proposer_hidden,
+            next_switching_unit_gru_hidden,
+            next_switch_gated_action
+        )
+        return modified_residual_stream, MetaControllerOutput(next_hiddens, action_dist, sampled_action, kl_loss)
 # main transformer, which is subsumed into the environment after behavioral cloning
+TransformerOutput = namedtuple('TransformerOutput', (
+    'residual_stream_latent',
+    'prev_hiddens'
+))
 class Transformer(Module):
     def __init__(
         self,
@@ -251,10 +283,12 @@ class Transformer(Module):
         self,
         ids,
         meta_controller: Module | None = None,
+        cache: TransformerOutput | None = None,
         discovery_phase = False,
-        return_latents = False,
         no_grad_transformer = None,
-        no_grad_meta_controller = None
+        no_grad_meta_controller = None,
+        return_latents = False,
+        return_cache = False
     ):
         meta_controller = default(meta_controller, self.meta_controller)
@@ -268,28 +302,32 @@ class Transformer(Module):
         transformer_context = torch.no_grad if no_grad_transformer else nullcontext
         meta_controller_context = torch.no_grad if no_grad_meta_controller else nullcontext
+        # handle cache
+        lower_transformer_hiddens, meta_hiddens, upper_transformer_hiddens = cache.prev_hiddens if exists(cache) else ((None,) * 3)
         # transformer lower body
         with transformer_context():
             embed = self.embed(ids)
-            residual_stream = self.lower_body(embed)
+            residual_stream, next_lower_hiddens = self.lower_body(embed, cache = lower_transformer_hiddens, return_hiddens = True)
         # meta controller acts on residual stream here
         with meta_controller_context():
             if exists(meta_controller):
-                modified_residual_stream, action_dist, sampled_action, vae_aux_loss = meta_controller(residual_stream, discovery_phase = discovery_phase)
+                modified_residual_stream, next_meta_hiddens = meta_controller(residual_stream, cache = meta_hiddens, discovery_phase = discovery_phase)
             else:
-                modified_residual_stream, action_dist, sampled_action, vae_aux_loss = residual_stream, None, None, self.zero
+                modified_residual_stream, next_meta_hiddens = residual_stream, None
         # modified residual stream sent back to transformer upper body
         with transformer_context():
-            attended = self.upper_body(modified_residual_stream)
+            attended, next_upper_hiddens = self.upper_body(modified_residual_stream, cache = upper_transformer_hiddens, return_hiddens = True)
             # head readout
@@ -297,7 +335,9 @@ class Transformer(Module):
         # returning
-        if not return_latents:
+        return_one = not (return_latents or return_cache)
+        if return_one:
             return dist_params
-        return dist_params, latents
+        return dist_params, TransformerOutput(residual_stream, (next_lower_hiddens, next_meta_hiddens, next_upper_hiddens))

{metacontroller_pytorch-0.0.9.dist-info → metacontroller_pytorch-0.0.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacontroller-pytorch
-Version: 0.0.9
+Version: 0.0.10
 Summary: Transformer Metacontroller
 Project-URL: Homepage, https://pypi.org/project/metacontroller/
 Project-URL: Repository, https://github.com/lucidrains/metacontroller

metacontroller_pytorch-0.0.10.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+metacontroller/__init__.py,sha256=lj7IOGpN--qMxJWbB-4SGqoPXG7Hd4mgtToTRSyTZ58,57
+metacontroller/metacontroller.py,sha256=H-bZi70445-4JlhUFL8x_fgePY7bTxkDO4CCdItKao4,10642
+metacontroller_pytorch-0.0.10.dist-info/METADATA,sha256=AFk9SUK6TGSG1APtt51yiASCEWIOTIvzAhtJJnS-Dsc,3714
+metacontroller_pytorch-0.0.10.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+metacontroller_pytorch-0.0.10.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+metacontroller_pytorch-0.0.10.dist-info/RECORD,,

metacontroller_pytorch-0.0.9.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-metacontroller/__init__.py,sha256=lj7IOGpN--qMxJWbB-4SGqoPXG7Hd4mgtToTRSyTZ58,57
-metacontroller/metacontroller.py,sha256=V2Nb7ByGj310CalTzho-grwNsoHMp55oN5spkedJihc,9189
-metacontroller_pytorch-0.0.9.dist-info/METADATA,sha256=BA4AHlFW8DsD_NPXNv8N8rmRPISZNTkcjvGautB7xJA,3713
-metacontroller_pytorch-0.0.9.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-metacontroller_pytorch-0.0.9.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-metacontroller_pytorch-0.0.9.dist-info/RECORD,,

{metacontroller_pytorch-0.0.9.dist-info → metacontroller_pytorch-0.0.10.dist-info}/WHEEL RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.9.dist-info → metacontroller_pytorch-0.0.10.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

metacontroller-pytorch 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl

metacontroller-pytorch 0.0.9py3-none-any.whl → 0.0.10py3-none-any.whl