PyPI - metacontroller-pytorch - Versions diffs - 0.0.14__tar.gz → 0.0.16__tar.gz - Mend

metacontroller-pytorch 0.0.14tar.gz → 0.0.16tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{metacontroller_pytorch-0.0.14 → metacontroller_pytorch-0.0.16}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacontroller-pytorch
-Version: 0.0.14
+Version: 0.0.16
 Summary: Transformer Metacontroller
 Project-URL: Homepage, https://pypi.org/project/metacontroller/
 Project-URL: Repository, https://github.com/lucidrains/metacontroller
@@ -60,7 +60,7 @@ Implementation of the MetaController proposed in [Emergent temporal abstractions
 @misc{kobayashi2025emergenttemporalabstractionsautoregressive,
     title   = {Emergent temporal abstractions in autoregressive models enable hierarchical reinforcement learning},
     author  = {Seijin Kobayashi and Yanick Schimpf and Maximilian Schlegel and Angelika Steger and Maciej Wolczyk and Johannes von Oswald and Nino Scherrer and Kaitlin Maile and Guillaume Lajoie and Blake A. Richards and Rif A. Saurous and James Manyika and Blaise Agüera y Arcas and Alexander Meulemans and João Sacramento},
-    year={2025},
+    year    = {2025},
     eprint  = {2512.20605},
     archivePrefix = {arXiv},
     primaryClass = {cs.LG},

{metacontroller_pytorch-0.0.14 → metacontroller_pytorch-0.0.16}/README.md RENAMED Viewed

@@ -10,7 +10,7 @@ Implementation of the MetaController proposed in [Emergent temporal abstractions
 @misc{kobayashi2025emergenttemporalabstractionsautoregressive,
     title   = {Emergent temporal abstractions in autoregressive models enable hierarchical reinforcement learning},
     author  = {Seijin Kobayashi and Yanick Schimpf and Maximilian Schlegel and Angelika Steger and Maciej Wolczyk and Johannes von Oswald and Nino Scherrer and Kaitlin Maile and Guillaume Lajoie and Blake A. Richards and Rif A. Saurous and James Manyika and Blaise Agüera y Arcas and Alexander Meulemans and João Sacramento},
-    year={2025},
+    year    = {2025},
     eprint  = {2512.20605},
     archivePrefix = {arXiv},
     primaryClass = {cs.LG},

{metacontroller_pytorch-0.0.14 → metacontroller_pytorch-0.0.16}/metacontroller/metacontroller.py RENAMED Viewed

@@ -22,7 +22,7 @@ from x_transformers import Decoder
 from x_mlps_pytorch import Feedforwards
 from x_evolution import EvoStrategy
-from discrete_continuous_embed_readout import Embed, Readout
+from discrete_continuous_embed_readout import Embed, Readout, EmbedAndReadout
 from assoc_scan import AssocScan
@@ -234,30 +234,25 @@ class Transformer(Module):
         self,
         dim,
         *,
-        embed: Embed | dict,
+        state_embed_readout: dict,
+        action_embed_readout: dict,
         lower_body: Decoder | dict,
         upper_body: Decoder | dict,
-        readout: Readout | dict,
         meta_controller: MetaController | None = None
     ):
         super().__init__()
-        if isinstance(embed, dict):
-            embed = Embed(dim = dim, **embed)
         if isinstance(lower_body, dict):
             lower_body = Decoder(dim = dim, **lower_body)
         if isinstance(upper_body, dict):
             upper_body = Decoder(dim = dim, **upper_body)
-        if isinstance(readout, dict):
-            readout = Readout(dim = dim, **readout)
+        self.state_embed, self.state_readout = EmbedAndReadout(dim, **state_embed_readout)
+        self.action_embed, self.action_readout = EmbedAndReadout(dim, **action_embed_readout)
-        self.embed = embed
         self.lower_body = lower_body
         self.upper_body = upper_body
-        self.readout = readout
         # meta controller
@@ -285,11 +280,13 @@ class Transformer(Module):
     def forward(
         self,
-        ids,
+        state,
+        action_ids,
         meta_controller: Module | None = None,
         cache: TransformerOutput | None = None,
         discovery_phase = False,
         meta_controller_temperature = 1.,
+        return_raw_action_dist = False,
         return_latents = False,
         return_cache = False,
     ):
@@ -297,21 +294,33 @@ class Transformer(Module):
         meta_controlling = exists(meta_controller)
+        behavioral_cloning = not meta_controlling and not return_raw_action_dist
         # by default, if meta controller is passed in, transformer is no grad
         lower_transformer_context = nullcontext if not meta_controlling else torch.no_grad
         meta_controller_context = nullcontext if meta_controlling else torch.no_grad
-        upper_transformer_context = nullcontext if meta_controlling and discovery_phase else torch.no_grad
+        upper_transformer_context = nullcontext if (not meta_controlling or discovery_phase) else torch.no_grad
         # handle cache
         lower_transformer_hiddens, meta_hiddens, upper_transformer_hiddens = cache.prev_hiddens if exists(cache) else ((None,) * 3)
+        # handle maybe behavioral cloning
+        if behavioral_cloning or (meta_controlling and discovery_phase):
+            state, target_state = state[:, :-1], state[:, 1:]
+            action_ids, target_action_ids = action_ids[:, :-1], action_ids[:, 1:]
         # transformer lower body
         with lower_transformer_context():
-            embed = self.embed(ids)
+            state_embed = self.state_embed(state)
+            action_embed = self.action_embed(action_ids)
+            embed = state_embed + action_embed
             residual_stream, next_lower_hiddens = self.lower_body(embed, cache = lower_transformer_hiddens, return_hiddens = True)
@@ -332,7 +341,23 @@ class Transformer(Module):
             # head readout
-            dist_params = self.readout(attended)
+            dist_params = self.action_readout(attended)
+        # maybe return behavior cloning loss
+        if behavioral_cloning:
+            state_dist_params = self.state_readout(attended)
+            state_clone_loss = self.state_readout.calculate_loss(state_dist_params, target_state)
+            action_clone_loss = self.action_readout.calculate_loss(dist_params, target_action_ids)
+            return state_clone_loss, action_clone_loss
+        elif meta_controlling and discovery_phase:
+            action_recon_loss = self.action_readout.calculate_loss(dist_params, target_action_ids)
+            return action_recon_loss, next_meta_hiddens.kl_loss
         # returning

{metacontroller_pytorch-0.0.14 → metacontroller_pytorch-0.0.16}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "metacontroller-pytorch"
-version = "0.0.14"
+version = "0.0.16"
 description = "Transformer Metacontroller"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

metacontroller_pytorch-0.0.16/tests/test_metacontroller.py ADDED Viewed

@@ -0,0 +1,64 @@
+import pytest
+param = pytest.mark.parametrize
+import torch
+from metacontroller.metacontroller import Transformer, MetaController
+@param('action_discrete', (False, True))
+@param('switch_per_latent_dim', (False, True))
+def test_metacontroller(
+    action_discrete,
+    switch_per_latent_dim
+):
+    state = torch.randn(1, 1024, 384)
+    if action_discrete:
+        actions = torch.randint(0, 4, (1, 1024))
+        action_embed_readout = dict(num_discrete = 4)
+        assert_shape = (4,)
+    else:
+        actions = torch.randn(1, 1024, 8)
+        action_embed_readout = dict(num_continuous = 8)
+        assert_shape = (8, 2)
+    # behavioral cloning phase
+    model = Transformer(
+        dim = 512,
+        action_embed_readout = action_embed_readout,
+        state_embed_readout = dict(num_continuous = 384),
+        lower_body = dict(depth = 2,),
+        upper_body = dict(depth = 2,),
+    )
+    state_clone_loss, action_clone_loss = model(state, actions)
+    (state_clone_loss + 0.5 * action_clone_loss).backward()
+    # discovery and internal rl phase with meta controller
+    meta_controller = MetaController(
+        dim_latent = 512,
+        switch_per_latent_dim = switch_per_latent_dim
+    )
+    # discovery phase
+    (action_recon_loss, kl_loss) = model(state, actions, meta_controller = meta_controller, discovery_phase = True)
+    (action_recon_loss + kl_loss * 0.1).backward()
+    # internal rl
+    logits, cache = model(state, actions, meta_controller = meta_controller, return_cache = True)
+    assert logits.shape == (1, 1024, *assert_shape)
+    logits, cache = model(state, actions, meta_controller = meta_controller, return_cache = True, cache = cache)
+    logits, cache = model(state, actions, meta_controller = meta_controller, return_cache = True, cache = cache)
+    assert logits.shape == (1, 1, *assert_shape)
+    # evolutionary strategies over grpo
+    model.meta_controller = meta_controller
+    model.evolve(1, lambda _: 1., noise_population_size = 2)

metacontroller_pytorch-0.0.14/tests/test_metacontroller.py DELETED Viewed

@@ -1,39 +0,0 @@
-import pytest
-param = pytest.mark.parametrize
-import torch
-from metacontroller.metacontroller import Transformer, MetaController
-@param('discovery_phase', (False, True))
-@param('switch_per_latent_dim', (False, True))
-def test_metacontroller(
-    discovery_phase,
-    switch_per_latent_dim
-):
-    ids = torch.randint(0, 256, (1, 1024))
-    model = Transformer(
-        512,
-        embed = dict(num_discrete = 256),
-        lower_body = dict(depth = 2,),
-        upper_body = dict(depth = 2,),
-        readout = dict(num_discrete = 256)
-    )
-    meta_controller = MetaController(
-        512,
-        switch_per_latent_dim = switch_per_latent_dim
-    )
-    logits, cache = model(ids, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True)
-    assert logits.shape == (1, 1024, 256)
-    logits, cache = model(ids, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True, cache = cache)
-    logits, cache = model(ids, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True, cache = cache)
-    assert logits.shape == (1, 1, 256)
-    model.meta_controller = meta_controller
-    model.evolve(1, lambda _: 1., noise_population_size = 2)

{metacontroller_pytorch-0.0.14 → metacontroller_pytorch-0.0.16}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.14 → metacontroller_pytorch-0.0.16}/.github/workflows/test.yml RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.14 → metacontroller_pytorch-0.0.16}/.gitignore RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.14 → metacontroller_pytorch-0.0.16}/LICENSE RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.14 → metacontroller_pytorch-0.0.16}/fig1.png RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.14 → metacontroller_pytorch-0.0.16}/metacontroller/__init__.py RENAMED Viewed

File without changes

metacontroller-pytorch 0.0.14__tar.gz → 0.0.16__tar.gz

metacontroller-pytorch 0.0.14tar.gz → 0.0.16tar.gz