PyPI - metacontroller-pytorch - Versions diffs - 0.0.15__tar.gz → 0.0.17__tar.gz - Mend

metacontroller-pytorch 0.0.15tar.gz → 0.0.17tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{metacontroller_pytorch-0.0.15 → metacontroller_pytorch-0.0.17}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacontroller-pytorch
-Version: 0.0.15
+Version: 0.0.17
 Summary: Transformer Metacontroller
 Project-URL: Homepage, https://pypi.org/project/metacontroller/
 Project-URL: Repository, https://github.com/lucidrains/metacontroller
@@ -60,7 +60,7 @@ Implementation of the MetaController proposed in [Emergent temporal abstractions
 @misc{kobayashi2025emergenttemporalabstractionsautoregressive,
     title   = {Emergent temporal abstractions in autoregressive models enable hierarchical reinforcement learning},
     author  = {Seijin Kobayashi and Yanick Schimpf and Maximilian Schlegel and Angelika Steger and Maciej Wolczyk and Johannes von Oswald and Nino Scherrer and Kaitlin Maile and Guillaume Lajoie and Blake A. Richards and Rif A. Saurous and James Manyika and Blaise Agüera y Arcas and Alexander Meulemans and João Sacramento},
-    year={2025},
+    year    = {2025},
     eprint  = {2512.20605},
     archivePrefix = {arXiv},
     primaryClass = {cs.LG},

{metacontroller_pytorch-0.0.15 → metacontroller_pytorch-0.0.17}/README.md RENAMED Viewed

@@ -10,7 +10,7 @@ Implementation of the MetaController proposed in [Emergent temporal abstractions
 @misc{kobayashi2025emergenttemporalabstractionsautoregressive,
     title   = {Emergent temporal abstractions in autoregressive models enable hierarchical reinforcement learning},
     author  = {Seijin Kobayashi and Yanick Schimpf and Maximilian Schlegel and Angelika Steger and Maciej Wolczyk and Johannes von Oswald and Nino Scherrer and Kaitlin Maile and Guillaume Lajoie and Blake A. Richards and Rif A. Saurous and James Manyika and Blaise Agüera y Arcas and Alexander Meulemans and João Sacramento},
-    year={2025},
+    year    = {2025},
     eprint  = {2512.20605},
     archivePrefix = {arXiv},
     primaryClass = {cs.LG},

{metacontroller_pytorch-0.0.15 → metacontroller_pytorch-0.0.17}/metacontroller/metacontroller.py RENAMED Viewed

@@ -57,7 +57,8 @@ MetaControllerOutput = namedtuple('MetaControllerOutput', (
     'prev_hiddens',
     'action_dist',
     'actions',
-    'kl_loss'
+    'kl_loss',
+    'switch_loss'
 ))
 class MetaController(Module):
@@ -173,7 +174,7 @@ class MetaController(Module):
         # need to encourage normal distribution
-        kl_loss = self.zero
+        kl_loss = switch_loss = self.zero
         if discovery_phase:
             mean, log_var = action_dist.unbind(dim = -1)
@@ -188,6 +189,10 @@ class MetaController(Module):
             kl_loss = kl_loss * switch_beta
             kl_loss = kl_loss.sum(dim = -1).mean()
+            # encourage less switching
+            switch_loss = switch_beta.mean()
         # maybe hard switch, then use associative scan
         if hard_switch:
@@ -220,7 +225,7 @@ class MetaController(Module):
             next_switch_gated_action
         )
-        return modified_residual_stream, MetaControllerOutput(next_hiddens, action_dist, sampled_action, kl_loss)
+        return modified_residual_stream, MetaControllerOutput(next_hiddens, action_dist, sampled_action, kl_loss, switch_loss)
 # main transformer, which is subsumed into the environment after behavioral cloning
@@ -308,7 +313,8 @@ class Transformer(Module):
         # handle maybe behavioral cloning
-        if behavioral_cloning:
+        if behavioral_cloning or (meta_controlling and discovery_phase):
             state, target_state = state[:, :-1], state[:, 1:]
             action_ids, target_action_ids = action_ids[:, :-1], action_ids[:, 1:]
@@ -352,6 +358,12 @@ class Transformer(Module):
             return state_clone_loss, action_clone_loss
+        elif meta_controlling and discovery_phase:
+            action_recon_loss = self.action_readout.calculate_loss(dist_params, target_action_ids)
+            return action_recon_loss, next_meta_hiddens.kl_loss, next_meta_hiddens.switch_loss
         # returning
         return_one = not (return_latents or return_cache)

{metacontroller_pytorch-0.0.15 → metacontroller_pytorch-0.0.17}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "metacontroller-pytorch"
-version = "0.0.15"
+version = "0.0.17"
 description = "Transformer Metacontroller"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{metacontroller_pytorch-0.0.15 → metacontroller_pytorch-0.0.17}/tests/test_metacontroller.py RENAMED Viewed

@@ -5,11 +5,9 @@ import torch
 from metacontroller.metacontroller import Transformer, MetaController
 @param('action_discrete', (False, True))
-@param('discovery_phase', (False, True))
 @param('switch_per_latent_dim', (False, True))
 def test_metacontroller(
     action_discrete,
-    discovery_phase,
     switch_per_latent_dim
 ):
@@ -24,7 +22,7 @@ def test_metacontroller(
         action_embed_readout = dict(num_continuous = 8)
         assert_shape = (8, 2)
-    # behavioral cloning pahse
+    # behavioral cloning phase
     model = Transformer(
         dim = 512,
@@ -44,14 +42,23 @@ def test_metacontroller(
         switch_per_latent_dim = switch_per_latent_dim
     )
-    logits, cache = model(state, actions, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True)
+    # discovery phase
+    (action_recon_loss, kl_loss, switch_loss) = model(state, actions, meta_controller = meta_controller, discovery_phase = True)
+    (action_recon_loss + kl_loss * 0.1 + switch_loss * 0.2).backward()
+    # internal rl
+    logits, cache = model(state, actions, meta_controller = meta_controller, return_cache = True)
     assert logits.shape == (1, 1024, *assert_shape)
-    logits, cache = model(state, actions, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True, cache = cache)
-    logits, cache = model(state, actions, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True, cache = cache)
+    logits, cache = model(state, actions, meta_controller = meta_controller, return_cache = True, cache = cache)
+    logits, cache = model(state, actions, meta_controller = meta_controller, return_cache = True, cache = cache)
     assert logits.shape == (1, 1, *assert_shape)
+    # evolutionary strategies over grpo
     model.meta_controller = meta_controller
     model.evolve(1, lambda _: 1., noise_population_size = 2)

{metacontroller_pytorch-0.0.15 → metacontroller_pytorch-0.0.17}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.15 → metacontroller_pytorch-0.0.17}/.github/workflows/test.yml RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.15 → metacontroller_pytorch-0.0.17}/.gitignore RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.15 → metacontroller_pytorch-0.0.17}/LICENSE RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.15 → metacontroller_pytorch-0.0.17}/fig1.png RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.15 → metacontroller_pytorch-0.0.17}/metacontroller/__init__.py RENAMED Viewed

File without changes

metacontroller-pytorch 0.0.15__tar.gz → 0.0.17__tar.gz

metacontroller-pytorch 0.0.15tar.gz → 0.0.17tar.gz