PyPI - metacontroller-pytorch - Versions diffs - 0.0.20__py3-none-any.whl → 0.0.21__py3-none-any.whl - Mend

metacontroller-pytorch 0.0.20py3-none-any.whl → 0.0.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

metacontroller/metacontroller.py CHANGED Viewed

@@ -46,6 +46,17 @@ def default(*args):
             return arg
     return None
+def is_empty(t):
+    return t.numel() == 0
+def pad_at_dim(t, pad: tuple[int, int], dim = -1, value = 0.):
+    if pad == (0, 0):
+        return t
+    dims_from_right = (- dim - 1) if dim < 0 else (t.ndim - dim - 1)
+    zeros = ((0, 0) * dims_from_right)
+    return F.pad(t, (*zeros, *pad), value = value)
 # tensor helpers
 def straight_through(src, tgt):
@@ -101,7 +112,9 @@ class MetaController(Module):
         self.switch_per_latent_dim = switch_per_latent_dim
-        self.switching_unit = GRU(dim_meta, dim_meta)
+        self.dim_latent = dim_latent
+        self.switching_unit = GRU(dim_meta + dim_latent, dim_meta)
         self.to_switching_unit_beta = nn.Linear(dim_meta, dim_latent if switch_per_latent_dim else 1, bias = False)
         self.switch_gating = AssocScan(**assoc_scan_kwargs)
@@ -147,10 +160,11 @@ class MetaController(Module):
         hard_switch = False,
         temperature = 1.
     ):
+        device = residual_stream.device
         # destruct prev cache
-        prev_action_proposer_hidden, prev_switching_unit_gru_hidden, prev_switch_gated_hiddens = cache.prev_hiddens if exists(cache) else ((None,) * 3)
+        prev_action_proposer_hidden, prev_switching_unit_gru_hidden, prev_switch_gated_hiddens, prev_sampled_latent_action = cache.prev_hiddens if exists(cache) else ((None,) * 4)
         # getting proposed action for the two phases
@@ -175,13 +189,34 @@ class MetaController(Module):
         action_dist = readout(proposed_action_hidden)
-        sampled_action = readout.sample(action_dist, temperature = temperature)
+        sampled_latent_action = readout.sample(action_dist, temperature = temperature)
         # switching unit timer
-        batch, _, dim = sampled_action.shape
+        batch, seq_len, dim = sampled_latent_action.shape
+        # initialize prev sampled latent action to be zeros if not available (for first timestep and for discovery phase)
+        if not exists(prev_sampled_latent_action):
+            prev_sampled_latent_action = torch.zeros(batch, 1, self.dim_latent, device = device)
+        if discovery_phase:
+            z_prev = cat((prev_sampled_latent_action, sampled_latent_action[:, :-1]), dim = 1)
+        else:
+            # else during inference, use the previous sampled latent action
-        switching_unit_gru_out, next_switching_unit_gru_hidden = self.switching_unit(meta_embed, prev_switching_unit_gru_hidden)
+            assert seq_len == 1, f'inference RL phase must be done one token at a time'
+            z_prev = prev_sampled_latent_action
+        # switch input is previous latent action and the embedding
+        switch_input = torch.cat((meta_embed, z_prev), dim=-1)
+        switching_unit_gru_out, next_switching_unit_gru_hidden = self.switching_unit(
+            switch_input,
+            prev_switching_unit_gru_hidden
+        )
         switch_beta = self.to_switching_unit_beta(switching_unit_gru_out).sigmoid()
@@ -213,7 +248,7 @@ class MetaController(Module):
             switch_beta = straight_through(switch_beta, hard_switch_beta)
         forget = 1. - switch_beta
-        gated_action = self.switch_gating(switch_beta, sampled_action * forget, prev = prev_switch_gated_hiddens)
+        gated_action = self.switch_gating(switch_beta, sampled_latent_action * forget, prev = prev_switch_gated_hiddens)
         next_switch_gated_action = gated_action[:, -1]
@@ -233,10 +268,11 @@ class MetaController(Module):
         next_hiddens = (
             next_action_proposer_hidden,
             next_switching_unit_gru_hidden,
-            next_switch_gated_action
+            next_switch_gated_action,
+            sampled_latent_action[:, -1:]
         )
-        return control_signal, MetaControllerOutput(next_hiddens, action_dist, sampled_action, kl_loss, switch_loss)
+        return control_signal, MetaControllerOutput(next_hiddens, action_dist, sampled_latent_action, kl_loss, switch_loss)
 # main transformer, which is subsumed into the environment after behavioral cloning
@@ -297,7 +333,7 @@ class Transformer(Module):
     def forward(
         self,
         state,
-        action_ids,
+        action_ids: Tensor | None = None,
         meta_controller: Module | None = None,
         cache: TransformerOutput | None = None,
         discovery_phase = False,
@@ -306,6 +342,8 @@ class Transformer(Module):
         return_latents = False,
         return_cache = False,
     ):
+        device = state.device
         meta_controller = default(meta_controller, self.meta_controller)
         meta_controlling = exists(meta_controller)
@@ -325,6 +363,7 @@ class Transformer(Module):
         # handle maybe behavioral cloning
         if behavioral_cloning or (meta_controlling and discovery_phase):
+            assert not is_empty(action_ids), f'`action_ids` cannot be empty when doing discovery or behavioral cloning'
             state, target_state = state[:, :-1], state[:, 1:]
             action_ids, target_action_ids = action_ids[:, :-1], action_ids[:, 1:]
@@ -334,7 +373,16 @@ class Transformer(Module):
         with lower_transformer_context():
             state_embed = self.state_embed(state)
-            action_embed = self.action_embed(action_ids)
+            # handle no past action for first timestep
+            if exists(action_ids):
+                action_embed = self.action_embed(action_ids)
+            else:
+                action_embed = state_embed[:, 0:0] # empty action embed
+            if action_embed.shape[-2] == (state_embed.shape[-2] - 1):
+                action_embed = pad_at_dim(action_embed, (1, 0), dim = 1)
             embed = state_embed + action_embed

{metacontroller_pytorch-0.0.20.dist-info → metacontroller_pytorch-0.0.21.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacontroller-pytorch
-Version: 0.0.20
+Version: 0.0.21
 Summary: Transformer Metacontroller
 Project-URL: Homepage, https://pypi.org/project/metacontroller/
 Project-URL: Repository, https://github.com/lucidrains/metacontroller
@@ -39,6 +39,7 @@ Requires-Dist: discrete-continuous-embed-readout>=0.1.12
 Requires-Dist: einops>=0.8.1
 Requires-Dist: einx>=0.3.0
 Requires-Dist: loguru
+Requires-Dist: memmap-replay-buffer>=0.0.1
 Requires-Dist: torch>=2.5
 Requires-Dist: x-evolution>=0.1.23
 Requires-Dist: x-mlps-pytorch
@@ -54,6 +55,16 @@ Description-Content-Type: text/markdown
 Implementation of the MetaController proposed in [Emergent temporal abstractions in autoregressive models enable hierarchical reinforcement learning](https://arxiv.org/abs/2512.20605)
+## Install
+```shell
+$ pip install metacontroller-pytorch
+```
+## Appreciation
+- [Pranoy](https://github.com/pranoyr) for submitting a pull request for fixing the previous latent action not being included in the inputs to the switching unit
 ## Citations
 ```bibtex

metacontroller_pytorch-0.0.21.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+metacontroller/__init__.py,sha256=lj7IOGpN--qMxJWbB-4SGqoPXG7Hd4mgtToTRSyTZ58,57
+metacontroller/metacontroller.py,sha256=EP2N1Qtw4WTNthQrMz6bBT9rxTtMFikdOyYtcwSPdHM,14167
+metacontroller_pytorch-0.0.21.dist-info/METADATA,sha256=scUJVoSZ6Tl3RYNiNjK_wIeWVrpVLbQhya-XkCqdieQ,4320
+metacontroller_pytorch-0.0.21.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+metacontroller_pytorch-0.0.21.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+metacontroller_pytorch-0.0.21.dist-info/RECORD,,

metacontroller_pytorch-0.0.20.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-metacontroller/__init__.py,sha256=lj7IOGpN--qMxJWbB-4SGqoPXG7Hd4mgtToTRSyTZ58,57
-metacontroller/metacontroller.py,sha256=3QZrId9z8I6MMQ3GhEQ6Xb5LFRTFJq4EAU4JCvRmm-4,12368
-metacontroller_pytorch-0.0.20.dist-info/METADATA,sha256=5t4rDJiJzbx7m9BNsTTgO5JOnavaX-3jv31HTGuLP6A,4034
-metacontroller_pytorch-0.0.20.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-metacontroller_pytorch-0.0.20.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-metacontroller_pytorch-0.0.20.dist-info/RECORD,,

{metacontroller_pytorch-0.0.20.dist-info → metacontroller_pytorch-0.0.21.dist-info}/WHEEL RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.20.dist-info → metacontroller_pytorch-0.0.21.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

metacontroller-pytorch 0.0.20__py3-none-any.whl → 0.0.21__py3-none-any.whl

metacontroller-pytorch 0.0.20py3-none-any.whl → 0.0.21py3-none-any.whl