PyPI - metacontroller-pytorch - Versions diffs - 0.0.20__tar.gz → 0.0.22__tar.gz - Mend

metacontroller-pytorch 0.0.20tar.gz → 0.0.22tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of metacontroller-pytorch might be problematic. Click here for more details.

Files changed (12) hide show

{metacontroller_pytorch-0.0.20 → metacontroller_pytorch-0.0.22}/.gitignore RENAMED Viewed

@@ -1,3 +1,6 @@
+replay-data/
+recordings/
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[codz]

{metacontroller_pytorch-0.0.20 → metacontroller_pytorch-0.0.22}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacontroller-pytorch
-Version: 0.0.20
+Version: 0.0.22
 Summary: Transformer Metacontroller
 Project-URL: Homepage, https://pypi.org/project/metacontroller/
 Project-URL: Repository, https://github.com/lucidrains/metacontroller
@@ -39,6 +39,7 @@ Requires-Dist: discrete-continuous-embed-readout>=0.1.12
 Requires-Dist: einops>=0.8.1
 Requires-Dist: einx>=0.3.0
 Requires-Dist: loguru
+Requires-Dist: memmap-replay-buffer>=0.0.1
 Requires-Dist: torch>=2.5
 Requires-Dist: x-evolution>=0.1.23
 Requires-Dist: x-mlps-pytorch
@@ -54,6 +55,16 @@ Description-Content-Type: text/markdown
 Implementation of the MetaController proposed in [Emergent temporal abstractions in autoregressive models enable hierarchical reinforcement learning](https://arxiv.org/abs/2512.20605)
+## Install
+```shell
+$ pip install metacontroller-pytorch
+```
+## Appreciation
+- [Pranoy](https://github.com/pranoyr) for submitting a pull request for fixing the previous latent action not being included in the inputs to the switching unit
 ## Citations
 ```bibtex

{metacontroller_pytorch-0.0.20 → metacontroller_pytorch-0.0.22}/README.md RENAMED Viewed

@@ -4,6 +4,16 @@
 Implementation of the MetaController proposed in [Emergent temporal abstractions in autoregressive models enable hierarchical reinforcement learning](https://arxiv.org/abs/2512.20605)
+## Install
+```shell
+$ pip install metacontroller-pytorch
+```
+## Appreciation
+- [Pranoy](https://github.com/pranoyr) for submitting a pull request for fixing the previous latent action not being included in the inputs to the switching unit
 ## Citations
 ```bibtex

{metacontroller_pytorch-0.0.20 → metacontroller_pytorch-0.0.22}/metacontroller/metacontroller.py RENAMED Viewed

@@ -46,6 +46,14 @@ def default(*args):
             return arg
     return None
+def pad_at_dim(t, pad: tuple[int, int], dim = -1, value = 0.):
+    if pad == (0, 0):
+        return t
+    dims_from_right = (- dim - 1) if dim < 0 else (t.ndim - dim - 1)
+    zeros = ((0, 0) * dims_from_right)
+    return F.pad(t, (*zeros, *pad), value = value)
 # tensor helpers
 def straight_through(src, tgt):
@@ -101,7 +109,9 @@ class MetaController(Module):
         self.switch_per_latent_dim = switch_per_latent_dim
-        self.switching_unit = GRU(dim_meta, dim_meta)
+        self.dim_latent = dim_latent
+        self.switching_unit = GRU(dim_meta + dim_latent, dim_meta)
         self.to_switching_unit_beta = nn.Linear(dim_meta, dim_latent if switch_per_latent_dim else 1, bias = False)
         self.switch_gating = AssocScan(**assoc_scan_kwargs)
@@ -147,10 +157,11 @@ class MetaController(Module):
         hard_switch = False,
         temperature = 1.
     ):
+        device = residual_stream.device
         # destruct prev cache
-        prev_action_proposer_hidden, prev_switching_unit_gru_hidden, prev_switch_gated_hiddens = cache.prev_hiddens if exists(cache) else ((None,) * 3)
+        prev_action_proposer_hidden, prev_switching_unit_gru_hidden, prev_switch_gated_hiddens, prev_sampled_latent_action = cache.prev_hiddens if exists(cache) else ((None,) * 4)
         # getting proposed action for the two phases
@@ -175,13 +186,34 @@ class MetaController(Module):
         action_dist = readout(proposed_action_hidden)
-        sampled_action = readout.sample(action_dist, temperature = temperature)
+        sampled_latent_action = readout.sample(action_dist, temperature = temperature)
         # switching unit timer
-        batch, _, dim = sampled_action.shape
+        batch, seq_len, dim = sampled_latent_action.shape
+        # initialize prev sampled latent action to be zeros if not available (for first timestep and for discovery phase)
+        if not exists(prev_sampled_latent_action):
+            prev_sampled_latent_action = torch.zeros(batch, 1, self.dim_latent, device = device)
+        if discovery_phase:
+            z_prev = cat((prev_sampled_latent_action, sampled_latent_action[:, :-1]), dim = 1)
+        else:
+            # else during inference, use the previous sampled latent action
+            assert seq_len == 1, f'inference RL phase must be done one token at a time'
+            z_prev = prev_sampled_latent_action
-        switching_unit_gru_out, next_switching_unit_gru_hidden = self.switching_unit(meta_embed, prev_switching_unit_gru_hidden)
+        # switch input is previous latent action and the embedding
+        switch_input = torch.cat((meta_embed, z_prev), dim=-1)
+        switching_unit_gru_out, next_switching_unit_gru_hidden = self.switching_unit(
+            switch_input,
+            prev_switching_unit_gru_hidden
+        )
         switch_beta = self.to_switching_unit_beta(switching_unit_gru_out).sigmoid()
@@ -213,7 +245,7 @@ class MetaController(Module):
             switch_beta = straight_through(switch_beta, hard_switch_beta)
         forget = 1. - switch_beta
-        gated_action = self.switch_gating(switch_beta, sampled_action * forget, prev = prev_switch_gated_hiddens)
+        gated_action = self.switch_gating(switch_beta, sampled_latent_action * forget, prev = prev_switch_gated_hiddens)
         next_switch_gated_action = gated_action[:, -1]
@@ -233,10 +265,11 @@ class MetaController(Module):
         next_hiddens = (
             next_action_proposer_hidden,
             next_switching_unit_gru_hidden,
-            next_switch_gated_action
+            next_switch_gated_action,
+            sampled_latent_action[:, -1:]
         )
-        return control_signal, MetaControllerOutput(next_hiddens, action_dist, sampled_action, kl_loss, switch_loss)
+        return control_signal, MetaControllerOutput(next_hiddens, action_dist, sampled_latent_action, kl_loss, switch_loss)
 # main transformer, which is subsumed into the environment after behavioral cloning
@@ -297,7 +330,7 @@ class Transformer(Module):
     def forward(
         self,
         state,
-        action_ids,
+        actions: Tensor | None = None,
         meta_controller: Module | None = None,
         cache: TransformerOutput | None = None,
         discovery_phase = False,
@@ -306,6 +339,8 @@ class Transformer(Module):
         return_latents = False,
         return_cache = False,
     ):
+        device = state.device
         meta_controller = default(meta_controller, self.meta_controller)
         meta_controlling = exists(meta_controller)
@@ -325,16 +360,26 @@ class Transformer(Module):
         # handle maybe behavioral cloning
         if behavioral_cloning or (meta_controlling and discovery_phase):
+            assert exists(actions), f'`actions` cannot be empty when doing discovery or behavioral cloning'
             state, target_state = state[:, :-1], state[:, 1:]
-            action_ids, target_action_ids = action_ids[:, :-1], action_ids[:, 1:]
+            actions, target_actions = actions[:, :-1], actions[:, 1:]
         # transformer lower body
         with lower_transformer_context():
             state_embed = self.state_embed(state)
-            action_embed = self.action_embed(action_ids)
+            # handle no past action for first timestep
+            if exists(actions):
+                action_embed = self.action_embed(actions)
+            else:
+                action_embed = state_embed[:, 0:0] # empty action embed
+            if action_embed.shape[-2] == (state_embed.shape[-2] - 1):
+                action_embed = pad_at_dim(action_embed, (1, 0), dim = 1)
             embed = state_embed + action_embed
@@ -367,13 +412,13 @@ class Transformer(Module):
             state_dist_params = self.state_readout(attended)
             state_clone_loss = self.state_readout.calculate_loss(state_dist_params, target_state)
-            action_clone_loss = self.action_readout.calculate_loss(dist_params, target_action_ids)
+            action_clone_loss = self.action_readout.calculate_loss(dist_params, target_actions)
             return state_clone_loss, action_clone_loss
         elif meta_controlling and discovery_phase:
-            action_recon_loss = self.action_readout.calculate_loss(dist_params, target_action_ids)
+            action_recon_loss = self.action_readout.calculate_loss(dist_params, target_actions)
             return action_recon_loss, next_meta_hiddens.kl_loss, next_meta_hiddens.switch_loss

{metacontroller_pytorch-0.0.20 → metacontroller_pytorch-0.0.22}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "metacontroller-pytorch"
-version = "0.0.20"
+version = "0.0.22"
 description = "Transformer Metacontroller"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -29,6 +29,7 @@ dependencies = [
     "einx>=0.3.0",
     "einops>=0.8.1",
     "loguru",
+    "memmap-replay-buffer>=0.0.1",
     "torch>=2.5",
     "x-evolution>=0.1.23",
     "x-mlps-pytorch",

{metacontroller_pytorch-0.0.20 → metacontroller_pytorch-0.0.22}/tests/test_metacontroller.py RENAMED Viewed

@@ -4,6 +4,8 @@ param = pytest.mark.parametrize
 import torch
 from metacontroller.metacontroller import Transformer, MetaController
+from einops import rearrange
 @param('action_discrete', (False, True))
 @param('switch_per_latent_dim', (False, True))
 def test_metacontroller(
@@ -49,16 +51,18 @@ def test_metacontroller(
     (action_recon_loss, kl_loss, switch_loss) = model(state, actions, meta_controller = meta_controller, discovery_phase = True)
     (action_recon_loss + kl_loss * 0.1 + switch_loss * 0.2).backward()
-    # internal rl
+    # internal rl - done iteratively
-    logits, cache = model(state, actions, meta_controller = meta_controller, return_cache = True)
+    cache = None
+    past_action_id = None
-    assert logits.shape == (1, 1024, *assert_shape)
+    for one_state in state.unbind(dim = 1):
+        one_state = rearrange(one_state, 'b d -> b 1 d')
-    logits, cache = model(state, actions, meta_controller = meta_controller, return_cache = True, cache = cache)
-    logits, cache = model(state, actions, meta_controller = meta_controller, return_cache = True, cache = cache)
+        logits, cache = model(one_state, past_action_id, meta_controller = meta_controller, return_cache = True)
-    assert logits.shape == (1, 1, *assert_shape)
+        assert logits.shape == (1, 1, *assert_shape)
+        past_action_id = model.action_readout.sample(logits)
     # evolutionary strategies over grpo

metacontroller_pytorch-0.0.22/train.py ADDED Viewed

@@ -0,0 +1,97 @@
+# /// script
+# dependencies = [
+#   "fire",
+#   "gymnasium",
+#   "gymnasium[other]",
+#   "memmap-replay-buffer>=0.0.10",
+#   "metacontroller-pytorch",
+#   "minigrid",
+#   "tqdm"
+# ]
+# ///
+from fire import Fire
+from tqdm import tqdm
+from shutil import rmtree
+import torch
+import gymnasium as gym
+import minigrid
+from memmap_replay_buffer import ReplayBuffer
+# functions
+def exists(v):
+    return v is not None
+def default(v, d):
+    return v if exists(v) else d
+def divisible_by(num, den):
+    return (num % den) == 0
+# main
+def main(
+    env_name = 'BabyAI-BossLevel-v0',
+    num_episodes = int(10e6),
+    max_timesteps = 500,
+    buffer_size = 5_000,
+    render_every_eps = 1_000,
+    video_folder = './recordings',
+    seed = None
+):
+    # environment
+    env = gym.make(env_name, render_mode = 'rgb_array')
+    rmtree(video_folder, ignore_errors = True)
+    env = gym.wrappers.RecordVideo(
+        env = env,
+        video_folder = video_folder,
+        name_prefix = 'babyai',
+        episode_trigger = lambda eps_num: divisible_by(eps_num, render_every_eps),
+        disable_logger = True
+    )
+    # replay
+    replay_buffer = ReplayBuffer(
+        './replay-data',
+        max_episodes = buffer_size,
+        max_timesteps = max_timesteps + 1,
+        fields = dict(
+            action = 'int',
+            state_image = ('float', (7, 7, 3)),
+            state_direction = 'int'
+        ),
+        overwrite = True,
+        circular = True
+    )
+    # rollouts
+    for _ in tqdm(range(num_episodes)):
+        state, *_ = env.reset(seed = seed)
+        for _ in range(max_timesteps):
+            action = torch.randint(0, 7, ())
+            next_state, reward, terminated, truncated, *_ = env.step(action.numpy())
+            done = terminated or truncated
+            if done:
+                break
+            state = next_state
+# running
+if __name__ == '__main__':
+    Fire(main)

{metacontroller_pytorch-0.0.20 → metacontroller_pytorch-0.0.22}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.20 → metacontroller_pytorch-0.0.22}/.github/workflows/test.yml RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.20 → metacontroller_pytorch-0.0.22}/LICENSE RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.20 → metacontroller_pytorch-0.0.22}/fig1.png RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.20 → metacontroller_pytorch-0.0.22}/metacontroller/__init__.py RENAMED Viewed

File without changes

metacontroller-pytorch 0.0.20__tar.gz → 0.0.22__tar.gz

Potentially problematic release.

metacontroller-pytorch 0.0.20tar.gz → 0.0.22tar.gz