PyPI - metacontroller-pytorch - Versions diffs - 0.0.34__tar.gz → 0.0.35__tar.gz - Mend

metacontroller-pytorch 0.0.34tar.gz → 0.0.35tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of metacontroller-pytorch might be problematic. Click here for more details.

Files changed (17) hide show

{metacontroller_pytorch-0.0.34 → metacontroller_pytorch-0.0.35}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacontroller-pytorch
-Version: 0.0.34
+Version: 0.0.35
 Summary: Transformer Metacontroller
 Project-URL: Homepage, https://pypi.org/project/metacontroller/
 Project-URL: Repository, https://github.com/lucidrains/metacontroller

{metacontroller_pytorch-0.0.34 → metacontroller_pytorch-0.0.35}/metacontroller/metacontroller.py RENAMED Viewed

@@ -329,6 +329,11 @@ class MetaController(Module):
             sampled_latent_action[:, -1:]
         )
+        # squeeze out the last dimension of switch_beta if single gate for all latent dimensions
+        if not self.switch_per_latent_dim:
+            switch_beta = rearrange(switch_beta, '... 1 -> ...')
         return control_signal, MetaControllerOutput(next_hiddens, residual_stream, action_dist, sampled_latent_action, switch_beta, kl_loss, switch_loss)
 # main transformer, which is subsumed into the environment after behavioral cloning

{metacontroller_pytorch-0.0.34 → metacontroller_pytorch-0.0.35}/metacontroller/metacontroller_with_binary_mapper.py RENAMED Viewed

@@ -296,4 +296,9 @@ class MetaControllerWithBinaryMapper(Module):
             sampled_codes[:, -1:]
         )
+        # squeeze out the last dimension of switch_beta if single gate for all codes
+        if not self.switch_per_code:
+            switch_beta = rearrange(switch_beta, '... 1 -> ...')
         return control_signal, MetaControllerOutput(next_hiddens, residual_stream, binary_logits, sampled_codes, switch_beta, kl_loss, switch_loss)

{metacontroller_pytorch-0.0.34 → metacontroller_pytorch-0.0.35}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "metacontroller-pytorch"
-version = "0.0.34"
+version = "0.0.35"
 description = "Transformer Metacontroller"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{metacontroller_pytorch-0.0.34 → metacontroller_pytorch-0.0.35}/tests/test_metacontroller.py RENAMED Viewed

@@ -2,6 +2,7 @@ import pytest
 param = pytest.mark.parametrize
 from pathlib import Path
+from functools import partial
 import torch
 from torch import cat
@@ -116,11 +117,11 @@ def test_metacontroller(
         # accumulate across time for the episode data
-        all_episodes.append(dict(
-            states = cat(states, dim = 1),
-            log_probs = cat(log_probs, dim = 1),
-            switch_betas = cat(switch_betas, dim = 1),
-            latent_actions = cat(latent_actions, dim = 1)
+        all_episodes.append((
+            cat(states, dim = 1),
+            cat(log_probs, dim = 1),
+            cat(switch_betas, dim = 1),
+            cat(latent_actions, dim = 1)
         ))
         all_rewards.append(torch.randn(1))
@@ -134,23 +135,19 @@ def test_metacontroller(
     # simulate a policy loss update over the entire group
-    group_states = cat([e['states'] for e in all_episodes], dim = 0)
-    group_log_probs = cat([e['log_probs'] for e in all_episodes], dim = 0)
-    group_latent_actions = cat([e['latent_actions'] for e in all_episodes], dim = 0)
-    group_switch_betas = cat([e['switch_betas'] for e in all_episodes], dim = 0)
+    group_states, group_log_probs, group_switch_betas, group_latent_actions = map(partial(cat, dim = 0), zip(*all_episodes))
-    if not use_binary_mapper_variant:
-        loss = policy_loss(
-            meta_controller,
-            group_states,
-            group_log_probs,
-            group_latent_actions,
-            advantages,
-            group_switch_betas == 1.,
-            episode_lens = episode_lens[:1].repeat(3) if exists(episode_lens) else None
-        )
+    loss = policy_loss(
+        meta_controller,
+        group_states,
+        group_log_probs,
+        group_latent_actions,
+        advantages,
+        group_switch_betas == 1.,
+        episode_lens = episode_lens[:1].repeat(3) if exists(episode_lens) else None
+    )
-        loss.backward()
+    loss.backward()
     # evolutionary strategies over grpo