PyPI - metacontroller-pytorch - Versions diffs - 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl - Mend

metacontroller-pytorch 0.0.19py3-none-any.whl → 0.0.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

metacontroller/metacontroller.py CHANGED Viewed

@@ -18,7 +18,7 @@ from einops.layers.torch import Rearrange
 # external modules
-from x_transformers import Decoder
+from x_transformers import Encoder, Decoder
 from x_mlps_pytorch import Feedforwards
 from x_evolution import EvoStrategy
@@ -46,6 +46,17 @@ def default(*args):
             return arg
     return None
+def is_empty(t):
+    return t.numel() == 0
+def pad_at_dim(t, pad: tuple[int, int], dim = -1, value = 0.):
+    if pad == (0, 0):
+        return t
+    dims_from_right = (- dim - 1) if dim < 0 else (t.ndim - dim - 1)
+    zeros = ((0, 0) * dims_from_right)
+    return F.pad(t, (*zeros, *pad), value = value)
 # tensor helpers
 def straight_through(src, tgt):
@@ -72,7 +83,11 @@ class MetaController(Module):
         decoder_expansion_factor = 2.,
         decoder_depth = 1,
         hypernetwork_low_rank = 16,
-        assoc_scan_kwargs: dict = dict()
+        assoc_scan_kwargs: dict = dict(),
+        bidirectional_temporal_encoder_kwargs: dict = dict(
+            attn_dim_head = 32,
+            heads = 8
+        )
     ):
         super().__init__()
         dim_meta = default(dim_meta_controller, dim_model)
@@ -81,9 +96,9 @@ class MetaController(Module):
         self.model_to_meta = Linear(dim_model, dim_meta)
-        # there are two phases, the first (discovery ssl phase) uses acausal with some ssm i don't really believe in - let's just use a bidirectional GRU as placeholders
+        # there are two phases, the first (discovery ssl phase) uses acausal with some ssm i don't really believe in - let's just use bidirectional attention as placeholder
-        self.bidirectional_temporal_compressor = GRU(dim_meta, dim_meta, bidirectional = True) # revisit naming
+        self.bidirectional_temporal_encoder = Encoder(dim = dim_meta, depth = 1, **bidirectional_temporal_encoder_kwargs)
         self.emitter = GRU(dim_meta * 2, dim_meta * 2)
         self.emitter_to_action_mean_log_var = Readout(dim_meta * 2, num_continuous = dim_latent)
@@ -97,7 +112,9 @@ class MetaController(Module):
         self.switch_per_latent_dim = switch_per_latent_dim
-        self.switching_unit = GRU(dim_meta, dim_meta)
+        self.dim_latent = dim_latent
+        self.switching_unit = GRU(dim_meta + dim_latent, dim_meta)
         self.to_switching_unit_beta = nn.Linear(dim_meta, dim_latent if switch_per_latent_dim else 1, bias = False)
         self.switch_gating = AssocScan(**assoc_scan_kwargs)
@@ -122,7 +139,7 @@ class MetaController(Module):
     def discovery_parameters(self):
         return [
             *self.model_to_meta.parameters(),
-            *self.bidirectional_temporal_compressor.parameters(),
+            *self.bidirectional_temporal_encoder.parameters(),
             *self.emitter.parameters(),
             *self.emitter_to_action_mean_log_var.parameters(),
             *self.decoder.parameters(),
@@ -143,10 +160,11 @@ class MetaController(Module):
         hard_switch = False,
         temperature = 1.
     ):
+        device = residual_stream.device
         # destruct prev cache
-        prev_action_proposer_hidden, prev_switching_unit_gru_hidden, prev_switch_gated_hiddens = cache.prev_hiddens if exists(cache) else ((None,) * 3)
+        prev_action_proposer_hidden, prev_switching_unit_gru_hidden, prev_switch_gated_hiddens, prev_sampled_latent_action = cache.prev_hiddens if exists(cache) else ((None,) * 4)
         # getting proposed action for the two phases
@@ -157,10 +175,9 @@ class MetaController(Module):
         if discovery_phase:
             logger.warning('meta controller cache being passed back in for discovery phase, which does not make sense given bidirectional encoder')
-            temporal_compressed, _ = self.bidirectional_temporal_compressor(meta_embed)
-            temporal_compressed = reduce(temporal_compressed, '... (two d) -> ... d', 'mean', two = 2)
+            encoded_temporal = self.bidirectional_temporal_encoder(meta_embed)
-            proposed_action_hidden, _ = self.emitter(cat((temporal_compressed, meta_embed), dim = -1))
+            proposed_action_hidden, _ = self.emitter(cat((encoded_temporal, meta_embed), dim = -1))
             readout = self.emitter_to_action_mean_log_var
         else: # else internal rl phase
@@ -172,13 +189,34 @@ class MetaController(Module):
         action_dist = readout(proposed_action_hidden)
-        sampled_action = readout.sample(action_dist, temperature = temperature)
+        sampled_latent_action = readout.sample(action_dist, temperature = temperature)
         # switching unit timer
-        batch, _, dim = sampled_action.shape
+        batch, seq_len, dim = sampled_latent_action.shape
+        # initialize prev sampled latent action to be zeros if not available (for first timestep and for discovery phase)
+        if not exists(prev_sampled_latent_action):
+            prev_sampled_latent_action = torch.zeros(batch, 1, self.dim_latent, device = device)
+        if discovery_phase:
+            z_prev = cat((prev_sampled_latent_action, sampled_latent_action[:, :-1]), dim = 1)
+        else:
+            # else during inference, use the previous sampled latent action
-        switching_unit_gru_out, next_switching_unit_gru_hidden = self.switching_unit(meta_embed, prev_switching_unit_gru_hidden)
+            assert seq_len == 1, f'inference RL phase must be done one token at a time'
+            z_prev = prev_sampled_latent_action
+        # switch input is previous latent action and the embedding
+        switch_input = torch.cat((meta_embed, z_prev), dim=-1)
+        switching_unit_gru_out, next_switching_unit_gru_hidden = self.switching_unit(
+            switch_input,
+            prev_switching_unit_gru_hidden
+        )
         switch_beta = self.to_switching_unit_beta(switching_unit_gru_out).sigmoid()
@@ -210,7 +248,7 @@ class MetaController(Module):
             switch_beta = straight_through(switch_beta, hard_switch_beta)
         forget = 1. - switch_beta
-        gated_action = self.switch_gating(switch_beta, sampled_action * forget, prev = prev_switch_gated_hiddens)
+        gated_action = self.switch_gating(switch_beta, sampled_latent_action * forget, prev = prev_switch_gated_hiddens)
         next_switch_gated_action = gated_action[:, -1]
@@ -230,10 +268,11 @@ class MetaController(Module):
         next_hiddens = (
             next_action_proposer_hidden,
             next_switching_unit_gru_hidden,
-            next_switch_gated_action
+            next_switch_gated_action,
+            sampled_latent_action[:, -1:]
         )
-        return control_signal, MetaControllerOutput(next_hiddens, action_dist, sampled_action, kl_loss, switch_loss)
+        return control_signal, MetaControllerOutput(next_hiddens, action_dist, sampled_latent_action, kl_loss, switch_loss)
 # main transformer, which is subsumed into the environment after behavioral cloning
@@ -294,7 +333,7 @@ class Transformer(Module):
     def forward(
         self,
         state,
-        action_ids,
+        action_ids: Tensor | None = None,
         meta_controller: Module | None = None,
         cache: TransformerOutput | None = None,
         discovery_phase = False,
@@ -303,6 +342,8 @@ class Transformer(Module):
         return_latents = False,
         return_cache = False,
     ):
+        device = state.device
         meta_controller = default(meta_controller, self.meta_controller)
         meta_controlling = exists(meta_controller)
@@ -322,6 +363,7 @@ class Transformer(Module):
         # handle maybe behavioral cloning
         if behavioral_cloning or (meta_controlling and discovery_phase):
+            assert not is_empty(action_ids), f'`action_ids` cannot be empty when doing discovery or behavioral cloning'
             state, target_state = state[:, :-1], state[:, 1:]
             action_ids, target_action_ids = action_ids[:, :-1], action_ids[:, 1:]
@@ -331,7 +373,16 @@ class Transformer(Module):
         with lower_transformer_context():
             state_embed = self.state_embed(state)
-            action_embed = self.action_embed(action_ids)
+            # handle no past action for first timestep
+            if exists(action_ids):
+                action_embed = self.action_embed(action_ids)
+            else:
+                action_embed = state_embed[:, 0:0] # empty action embed
+            if action_embed.shape[-2] == (state_embed.shape[-2] - 1):
+                action_embed = pad_at_dim(action_embed, (1, 0), dim = 1)
             embed = state_embed + action_embed

{metacontroller_pytorch-0.0.19.dist-info → metacontroller_pytorch-0.0.21.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacontroller-pytorch
-Version: 0.0.19
+Version: 0.0.21
 Summary: Transformer Metacontroller
 Project-URL: Homepage, https://pypi.org/project/metacontroller/
 Project-URL: Repository, https://github.com/lucidrains/metacontroller
@@ -39,6 +39,7 @@ Requires-Dist: discrete-continuous-embed-readout>=0.1.12
 Requires-Dist: einops>=0.8.1
 Requires-Dist: einx>=0.3.0
 Requires-Dist: loguru
+Requires-Dist: memmap-replay-buffer>=0.0.1
 Requires-Dist: torch>=2.5
 Requires-Dist: x-evolution>=0.1.23
 Requires-Dist: x-mlps-pytorch
@@ -54,6 +55,16 @@ Description-Content-Type: text/markdown
 Implementation of the MetaController proposed in [Emergent temporal abstractions in autoregressive models enable hierarchical reinforcement learning](https://arxiv.org/abs/2512.20605)
+## Install
+```shell
+$ pip install metacontroller-pytorch
+```
+## Appreciation
+- [Pranoy](https://github.com/pranoyr) for submitting a pull request for fixing the previous latent action not being included in the inputs to the switching unit
 ## Citations
 ```bibtex
@@ -78,3 +89,15 @@ Implementation of the MetaController proposed in [Emergent temporal abstractions
     url     = {https://api.semanticscholar.org/CorpusID:279464702}
 }
 ```
+```bibtex
+@misc{fleuret2025freetransformer,
+    title     = {The Free Transformer},
+    author    = {François Fleuret},
+    year      = {2025},
+    eprint    = {2510.17558},
+    archivePrefix = {arXiv},
+    primaryClass = {cs.LG},
+    url       = {https://arxiv.org/abs/2510.17558},
+}
+```

metacontroller_pytorch-0.0.21.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+metacontroller/__init__.py,sha256=lj7IOGpN--qMxJWbB-4SGqoPXG7Hd4mgtToTRSyTZ58,57
+metacontroller/metacontroller.py,sha256=EP2N1Qtw4WTNthQrMz6bBT9rxTtMFikdOyYtcwSPdHM,14167
+metacontroller_pytorch-0.0.21.dist-info/METADATA,sha256=scUJVoSZ6Tl3RYNiNjK_wIeWVrpVLbQhya-XkCqdieQ,4320
+metacontroller_pytorch-0.0.21.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+metacontroller_pytorch-0.0.21.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+metacontroller_pytorch-0.0.21.dist-info/RECORD,,

metacontroller_pytorch-0.0.19.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-metacontroller/__init__.py,sha256=lj7IOGpN--qMxJWbB-4SGqoPXG7Hd4mgtToTRSyTZ58,57
-metacontroller/metacontroller.py,sha256=GTErzikqVd8XDY8pmDnY8t4uIjbGCUd1GZBJX13peo8,12339
-metacontroller_pytorch-0.0.19.dist-info/METADATA,sha256=lX3L7J3CKoSyxvJniLdSJsCu0UMEbJTxQLEw6zzT7dY,3741
-metacontroller_pytorch-0.0.19.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-metacontroller_pytorch-0.0.19.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-metacontroller_pytorch-0.0.19.dist-info/RECORD,,

{metacontroller_pytorch-0.0.19.dist-info → metacontroller_pytorch-0.0.21.dist-info}/WHEEL RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.19.dist-info → metacontroller_pytorch-0.0.21.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

metacontroller-pytorch 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl

metacontroller-pytorch 0.0.19py3-none-any.whl → 0.0.21py3-none-any.whl