PyPI - metacontroller-pytorch - Versions diffs - 0.0.9__tar.gz → 0.0.12__tar.gz - Mend

metacontroller-pytorch 0.0.9tar.gz → 0.0.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{metacontroller_pytorch-0.0.9 → metacontroller_pytorch-0.0.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacontroller-pytorch
-Version: 0.0.9
+Version: 0.0.12
 Summary: Transformer Metacontroller
 Project-URL: Homepage, https://pypi.org/project/metacontroller/
 Project-URL: Repository, https://github.com/lucidrains/metacontroller
@@ -35,9 +35,10 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9
 Requires-Dist: assoc-scan>=0.0.3
-Requires-Dist: discrete-continuous-embed-readout>=0.1.11
+Requires-Dist: discrete-continuous-embed-readout>=0.1.12
 Requires-Dist: einops>=0.8.1
 Requires-Dist: einx>=0.3.0
+Requires-Dist: loguru
 Requires-Dist: torch>=2.5
 Requires-Dist: x-evolution>=0.1.23
 Requires-Dist: x-mlps-pytorch

{metacontroller_pytorch-0.0.9 → metacontroller_pytorch-0.0.12}/metacontroller/metacontroller.py RENAMED Viewed

@@ -3,6 +3,7 @@ from contextlib import nullcontext
 from functools import partial
 from collections import namedtuple
+from loguru import logger
 import torch
 from torch import nn, cat, stack, tensor
@@ -52,6 +53,13 @@ def straight_through(src, tgt):
 # meta controller
+MetaControllerOutput = namedtuple('MetaControllerOutput', (
+    'prev_hiddens',
+    'action_dist',
+    'actions',
+    'kl_loss'
+))
 class MetaController(Module):
     def __init__(
         self,
@@ -107,9 +115,9 @@ class MetaController(Module):
         return [
             *self.bidirectional_temporal_compressor.parameters(),
             *self.emitter.parameters(),
-            *self.emitter_to_action_mean_log_var.parameters()
+            *self.emitter_to_action_mean_log_var.parameters(),
             *self.decoder.parameters(),
-            *self.switch_gating
+            *self.switch_gating.parameters()
         ]
     def internal_rl_parameters(self):
@@ -121,11 +129,23 @@ class MetaController(Module):
     def forward(
         self,
         residual_stream,
+        cache: MetaControllerOutput | None = None,
         discovery_phase = False,
-        hard_switch = False
+        hard_switch = False,
+        temperature = 1.
     ):
+        # destruct prev cache
+        prev_action_proposer_hidden, prev_switching_unit_gru_hidden, prev_switch_gated_hiddens = cache.prev_hiddens if exists(cache) else ((None,) * 3)
+        # getting proposed action for the two phases
+        next_action_proposer_hidden = None
         if discovery_phase:
+            logger.warning('meta controller cache being passed back in for discovery phase, which does not make sense given bidirectional encoder')
             temporal_compressed, _ = self.bidirectional_temporal_compressor(residual_stream)
             temporal_compressed = reduce(temporal_compressed, '... (two d) -> ... d', 'mean', two = 2)
@@ -133,48 +153,51 @@ class MetaController(Module):
             readout = self.emitter_to_action_mean_log_var
         else: # else internal rl phase
-            proposed_action_hidden, _ = self.action_proposer(residual_stream)
+            proposed_action_hidden, next_action_proposer_hidden = self.action_proposer(residual_stream, prev_action_proposer_hidden)
             readout = self.action_proposer_mean_log_var
         # sample from the gaussian as the action from the meta controller
         action_dist = readout(proposed_action_hidden)
-        sampled_action = readout.sample(action_dist)
+        sampled_action = readout.sample(action_dist, temperature = temperature)
         # switching unit timer
         batch, _, dim = sampled_action.shape
-        switching_unit_gru_out, switching_unit_gru_hidden = self.switching_unit(residual_stream)
+        switching_unit_gru_out, next_switching_unit_gru_hidden = self.switching_unit(residual_stream, prev_switching_unit_gru_hidden)
         switch_beta = self.to_switching_unit_beta(switching_unit_gru_out).sigmoid()
         # need to encourage normal distribution
-        vae_kl_loss = self.zero
+        kl_loss = self.zero
         if discovery_phase:
             mean, log_var = action_dist.unbind(dim = -1)
-            vae_kl_loss = (0.5 * (
+            kl_loss = (0.5 * (
                 log_var.exp()
                 + mean.square()
                 - log_var
                 - 1.
             ))
-            vae_kl_loss = vae_kl_loss * switch_beta
-            vae_kl_loss = vae_kl_loss.sum(dim = -1).mean()
+            kl_loss = kl_loss * switch_beta
+            kl_loss = kl_loss.sum(dim = -1).mean()
         # maybe hard switch, then use associative scan
         if hard_switch:
-            hard_switch = (switch_beta > 0.5).float()
-            switch_beta = straight_through(switch_beta, hard_switch)
+            hard_switch_beta = (switch_beta > 0.5).float()
+            switch_beta = straight_through(switch_beta, hard_switch_beta)
         forget = 1. - switch_beta
-        gated_action = self.switch_gating(switch_beta, sampled_action * forget)
+        gated_action = self.switch_gating(switch_beta, sampled_action * forget, prev = prev_switch_gated_hiddens)
+        next_switch_gated_action = gated_action[:, -1]
         # decoder
@@ -189,10 +212,23 @@ class MetaController(Module):
         modified_residual_stream = residual_stream + control_signal
-        return modified_residual_stream, action_dist, sampled_action, vae_kl_loss
+        # returning
+        next_hiddens = (
+            next_action_proposer_hidden,
+            next_switching_unit_gru_hidden,
+            next_switch_gated_action
+        )
+        return modified_residual_stream, MetaControllerOutput(next_hiddens, action_dist, sampled_action, kl_loss)
 # main transformer, which is subsumed into the environment after behavioral cloning
+TransformerOutput = namedtuple('TransformerOutput', (
+    'residual_stream_latent',
+    'prev_hiddens'
+))
 class Transformer(Module):
     def __init__(
         self,
@@ -251,10 +287,13 @@ class Transformer(Module):
         self,
         ids,
         meta_controller: Module | None = None,
+        cache: TransformerOutput | None = None,
         discovery_phase = False,
-        return_latents = False,
         no_grad_transformer = None,
-        no_grad_meta_controller = None
+        no_grad_meta_controller = None,
+        meta_controller_temperature = 1.,
+        return_latents = False,
+        return_cache = False,
     ):
         meta_controller = default(meta_controller, self.meta_controller)
@@ -268,28 +307,32 @@ class Transformer(Module):
         transformer_context = torch.no_grad if no_grad_transformer else nullcontext
         meta_controller_context = torch.no_grad if no_grad_meta_controller else nullcontext
+        # handle cache
+        lower_transformer_hiddens, meta_hiddens, upper_transformer_hiddens = cache.prev_hiddens if exists(cache) else ((None,) * 3)
         # transformer lower body
         with transformer_context():
             embed = self.embed(ids)
-            residual_stream = self.lower_body(embed)
+            residual_stream, next_lower_hiddens = self.lower_body(embed, cache = lower_transformer_hiddens, return_hiddens = True)
         # meta controller acts on residual stream here
         with meta_controller_context():
             if exists(meta_controller):
-                modified_residual_stream, action_dist, sampled_action, vae_aux_loss = meta_controller(residual_stream, discovery_phase = discovery_phase)
+                modified_residual_stream, next_meta_hiddens = meta_controller(residual_stream, cache = meta_hiddens, discovery_phase = discovery_phase, temperature = meta_controller_temperature)
             else:
-                modified_residual_stream, action_dist, sampled_action, vae_aux_loss = residual_stream, None, None, self.zero
+                modified_residual_stream, next_meta_hiddens = residual_stream, None
         # modified residual stream sent back to transformer upper body
         with transformer_context():
-            attended = self.upper_body(modified_residual_stream)
+            attended, next_upper_hiddens = self.upper_body(modified_residual_stream, cache = upper_transformer_hiddens, return_hiddens = True)
             # head readout
@@ -297,7 +340,9 @@ class Transformer(Module):
         # returning
-        if not return_latents:
+        return_one = not (return_latents or return_cache)
+        if return_one:
             return dist_params
-        return dist_params, latents
+        return dist_params, TransformerOutput(residual_stream, (next_lower_hiddens, next_meta_hiddens, next_upper_hiddens))

{metacontroller_pytorch-0.0.9 → metacontroller_pytorch-0.0.12}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "metacontroller-pytorch"
-version = "0.0.9"
+version = "0.0.12"
 description = "Transformer Metacontroller"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -25,9 +25,10 @@ classifiers=[
 dependencies = [
     "assoc-scan>=0.0.3",
+    "discrete-continuous-embed-readout>=0.1.12",
     "einx>=0.3.0",
     "einops>=0.8.1",
-    "discrete-continuous-embed-readout>=0.1.11",
+    "loguru",
     "torch>=2.5",
     "x-evolution>=0.1.23",
     "x-mlps-pytorch",

{metacontroller_pytorch-0.0.9 → metacontroller_pytorch-0.0.12}/tests/test_metacontroller.py RENAMED Viewed

@@ -26,9 +26,14 @@ def test_metacontroller(
         switch_per_latent_dim = switch_per_latent_dim
     )
-    logits = model(ids, meta_controller = meta_controller, discovery_phase = discovery_phase)
+    logits, cache = model(ids, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True)
     assert logits.shape == (1, 1024, 256)
+    logits, cache = model(ids, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True, cache = cache)
+    logits, cache = model(ids, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True, cache = cache)
+    assert logits.shape == (1, 1, 256)
     model.meta_controller = meta_controller
     model.evolve(1, lambda _: 1., noise_population_size = 2)

{metacontroller_pytorch-0.0.9 → metacontroller_pytorch-0.0.12}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.9 → metacontroller_pytorch-0.0.12}/.github/workflows/test.yml RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.9 → metacontroller_pytorch-0.0.12}/.gitignore RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.9 → metacontroller_pytorch-0.0.12}/LICENSE RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.9 → metacontroller_pytorch-0.0.12}/README.md RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.9 → metacontroller_pytorch-0.0.12}/fig1.png RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.9 → metacontroller_pytorch-0.0.12}/metacontroller/__init__.py RENAMED Viewed

File without changes

metacontroller-pytorch 0.0.9__tar.gz → 0.0.12__tar.gz

metacontroller-pytorch 0.0.9tar.gz → 0.0.12tar.gz