PyPI - metacontroller-pytorch - Versions diffs - 0.0.30__py3-none-any.whl → 0.0.32__py3-none-any.whl - Mend

metacontroller-pytorch 0.0.30py3-none-any.whl → 0.0.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

metacontroller/metacontroller.py CHANGED Viewed

@@ -61,6 +61,7 @@ MetaControllerOutput = namedtuple('MetaControllerOutput', (
     'input_residual_stream',
     'action_dist',
     'actions',
+    'switch_beta',
     'kl_loss',
     'switch_loss'
 ))
@@ -106,7 +107,6 @@ class MetaController(Module):
         self.switch_per_latent_dim = switch_per_latent_dim
         self.dim_latent = dim_latent
         self.switching_unit = GRU(dim_meta + dim_latent, dim_meta)
         self.to_switching_unit_beta = nn.Linear(dim_meta, dim_latent if switch_per_latent_dim else 1, bias = False)
@@ -146,12 +146,19 @@ class MetaController(Module):
             *self.action_proposer_mean_log_var.parameters()
         ]
+    def log_prob(
+        self,
+        action_dist,
+        sampled_latent_action
+    ):
+        return self.action_proposer_mean_log_var.log_prob(action_dist, sampled_latent_action)
     def forward(
         self,
         residual_stream,
         cache: MetaControllerOutput | None = None,
         discovery_phase = False,
-        hard_switch = False,
+        hard_switch = None,
         temperature = 1.,
         episode_lens: Tensor | None = None
     ):
@@ -167,6 +174,8 @@ class MetaController(Module):
         meta_embed = self.model_to_meta(residual_stream)
+        hard_switch = default(hard_switch, not discovery_phase) # think during internal RL phase, it needs to be a hard switch, then only the actions emitted during the switch is reinforced
         if discovery_phase:
             logger.warning('meta controller cache being passed back in for discovery phase, which does not make sense given bidirectional encoder')
@@ -269,10 +278,16 @@ class MetaController(Module):
             sampled_latent_action[:, -1:]
         )
-        return control_signal, MetaControllerOutput(next_hiddens, residual_stream, action_dist, sampled_latent_action, kl_loss, switch_loss)
+        return control_signal, MetaControllerOutput(next_hiddens, residual_stream, action_dist, sampled_latent_action, switch_beta, kl_loss, switch_loss)
 # main transformer, which is subsumed into the environment after behavioral cloning
+Hiddens = namedtuple('Hiddens', (
+    'lower_body',
+    'meta_controller',
+    'upper_body'
+))
 TransformerOutput = namedtuple('TransformerOutput', (
     'residual_stream_latent',
     'prev_hiddens'
@@ -438,4 +453,4 @@ class Transformer(Module):
         if return_one:
             return dist_params
-        return dist_params, TransformerOutput(residual_stream, (next_lower_hiddens, next_meta_hiddens, next_upper_hiddens))
+        return dist_params, TransformerOutput(residual_stream, Hiddens(next_lower_hiddens, next_meta_hiddens, next_upper_hiddens))

metacontroller/metacontroller_with_binary_mapper.py CHANGED Viewed

@@ -28,6 +28,8 @@ from torch_einops_utils.save_load import save_load
 from vector_quantize_pytorch import BinaryMapper
+from metacontroller.metacontroller import MetaControllerOutput
 # constants
 LinearNoBias = partial(Linear, bias = False)
@@ -48,16 +50,10 @@ def default(*args):
 def straight_through(src, tgt):
     return tgt + src - src.detach()
-# meta controller
+def log(t, eps = 1e-20):
+    return t.clamp_min(eps).log()
-MetaControllerOutput = namedtuple('MetaControllerOutput', (
-    'prev_hiddens',
-    'input_residual_stream',
-    'action_dist',
-    'codes',
-    'kl_loss',
-    'switch_loss'
-))
+# meta controller
 @save_load()
 class MetaControllerWithBinaryMapper(Module):
@@ -144,12 +140,29 @@ class MetaControllerWithBinaryMapper(Module):
             *self.proposer_to_binary_logits.parameters()
         ]
+    def log_prob(
+        self,
+        action_dist,
+        sampled_latent_action
+    ):
+        action_prob = action_dist.sigmoid()
+        probs = stack((action_prob, 1. - action_prob), dim = -1)
+        log_probs = log(probs)
+        indices = sampled_latent_action.argmax(dim = -1)
+        codes = self.binary_mapper.codes[indices].long()
+        codes = rearrange(codes, '... -> ... 1')
+        action_log_probs = log_probs.gather(-1, codes)
+        return rearrange(action_log_probs, '... 1 -> ...')
     def forward(
         self,
         residual_stream,
         cache: MetaControllerOutput | None = None,
         discovery_phase = False,
-        hard_switch = False,
+        hard_switch = None,
         temperature = 1.,
         episode_lens: Tensor | None = None
     ):
@@ -165,6 +178,8 @@ class MetaControllerWithBinaryMapper(Module):
         meta_embed = self.model_to_meta(residual_stream)
+        hard_switch = default(hard_switch, not discovery_phase) # think during internal RL phase, it needs to be a hard switch, then only the actions emitted during the switch is reinforced
         if discovery_phase:
             mask = maybe(lens_to_mask)(episode_lens, meta_embed.shape[1])
@@ -266,4 +281,4 @@ class MetaControllerWithBinaryMapper(Module):
             sampled_codes[:, -1:]
         )
-        return control_signal, MetaControllerOutput(next_hiddens, residual_stream, binary_logits, sampled_codes, kl_loss, switch_loss)
+        return control_signal, MetaControllerOutput(next_hiddens, residual_stream, binary_logits, sampled_codes, switch_beta, kl_loss, switch_loss)

{metacontroller_pytorch-0.0.30.dist-info → metacontroller_pytorch-0.0.32.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacontroller-pytorch
-Version: 0.0.30
+Version: 0.0.32
 Summary: Transformer Metacontroller
 Project-URL: Homepage, https://pypi.org/project/metacontroller/
 Project-URL: Repository, https://github.com/lucidrains/metacontroller

metacontroller_pytorch-0.0.32.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+metacontroller/__init__.py,sha256=lj7IOGpN--qMxJWbB-4SGqoPXG7Hd4mgtToTRSyTZ58,57
+metacontroller/metacontroller.py,sha256=somE9gX36c1d9hF2n8Qn4foRY8krHGodvrvulhkIGE8,15006
+metacontroller/metacontroller_with_binary_mapper.py,sha256=CTGK8ruQ3TkioVUwFTHdrbfzubaeuhSdXHfHtaDcwMY,8813
+metacontroller/metacontroller_with_resnet.py,sha256=YKHcazRZrrRParHRH-H_EPvT1-55LHKAs5pM6gwuT20,7394
+metacontroller_pytorch-0.0.32.dist-info/METADATA,sha256=hr08iXm6Mb-rnDu2xPrr9YQ6cwTtX1F79MfBYt54Y94,4747
+metacontroller_pytorch-0.0.32.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+metacontroller_pytorch-0.0.32.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+metacontroller_pytorch-0.0.32.dist-info/RECORD,,

metacontroller_pytorch-0.0.30.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-metacontroller/__init__.py,sha256=lj7IOGpN--qMxJWbB-4SGqoPXG7Hd4mgtToTRSyTZ58,57
-metacontroller/metacontroller.py,sha256=ydkL3gYW5WGXQdQOIJQ_gibJs74laIIx-v4DmcJHi7M,14497
-metacontroller/metacontroller_with_binary_mapper.py,sha256=OGal6dftRPBg_QT1LNDYejNGNlmh4MBvdM41FAQJp9Y,8153
-metacontroller/metacontroller_with_resnet.py,sha256=YKHcazRZrrRParHRH-H_EPvT1-55LHKAs5pM6gwuT20,7394
-metacontroller_pytorch-0.0.30.dist-info/METADATA,sha256=ghasc1GA0ZM-AZimY0FnGuRFsezVIcbI49V6TIOWeq4,4747
-metacontroller_pytorch-0.0.30.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-metacontroller_pytorch-0.0.30.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-metacontroller_pytorch-0.0.30.dist-info/RECORD,,

{metacontroller_pytorch-0.0.30.dist-info → metacontroller_pytorch-0.0.32.dist-info}/WHEEL RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.30.dist-info → metacontroller_pytorch-0.0.32.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

metacontroller-pytorch 0.0.30__py3-none-any.whl → 0.0.32__py3-none-any.whl

metacontroller-pytorch 0.0.30py3-none-any.whl → 0.0.32py3-none-any.whl