PyPI - metacontroller-pytorch - Versions diffs - 0.0.12__tar.gz → 0.0.15__tar.gz - Mend

metacontroller-pytorch 0.0.12tar.gz → 0.0.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{metacontroller_pytorch-0.0.12 → metacontroller_pytorch-0.0.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacontroller-pytorch
-Version: 0.0.12
+Version: 0.0.15
 Summary: Transformer Metacontroller
 Project-URL: Homepage, https://pypi.org/project/metacontroller/
 Project-URL: Repository, https://github.com/lucidrains/metacontroller

{metacontroller_pytorch-0.0.12 → metacontroller_pytorch-0.0.15}/metacontroller/metacontroller.py RENAMED Viewed

@@ -22,7 +22,7 @@ from x_transformers import Decoder
 from x_mlps_pytorch import Feedforwards
 from x_evolution import EvoStrategy
-from discrete_continuous_embed_readout import Embed, Readout
+from discrete_continuous_embed_readout import Embed, Readout, EmbedAndReadout
 from assoc_scan import AssocScan
@@ -234,30 +234,25 @@ class Transformer(Module):
         self,
         dim,
         *,
-        embed: Embed | dict,
+        state_embed_readout: dict,
+        action_embed_readout: dict,
         lower_body: Decoder | dict,
         upper_body: Decoder | dict,
-        readout: Readout | dict,
         meta_controller: MetaController | None = None
     ):
         super().__init__()
-        if isinstance(embed, dict):
-            embed = Embed(dim = dim, **embed)
         if isinstance(lower_body, dict):
             lower_body = Decoder(dim = dim, **lower_body)
         if isinstance(upper_body, dict):
             upper_body = Decoder(dim = dim, **upper_body)
-        if isinstance(readout, dict):
-            readout = Readout(dim = dim, **readout)
+        self.state_embed, self.state_readout = EmbedAndReadout(dim, **state_embed_readout)
+        self.action_embed, self.action_readout = EmbedAndReadout(dim, **action_embed_readout)
-        self.embed = embed
         self.lower_body = lower_body
         self.upper_body = upper_body
-        self.readout = readout
         # meta controller
@@ -285,13 +280,13 @@ class Transformer(Module):
     def forward(
         self,
-        ids,
+        state,
+        action_ids,
         meta_controller: Module | None = None,
         cache: TransformerOutput | None = None,
         discovery_phase = False,
-        no_grad_transformer = None,
-        no_grad_meta_controller = None,
         meta_controller_temperature = 1.,
+        return_raw_action_dist = False,
         return_latents = False,
         return_cache = False,
     ):
@@ -299,23 +294,32 @@ class Transformer(Module):
         meta_controlling = exists(meta_controller)
-        # by default, if meta controller is passed in, transformer is no grad
+        behavioral_cloning = not meta_controlling and not return_raw_action_dist
-        no_grad_transformer = default(no_grad_transformer, meta_controlling)
-        no_grad_meta_controller = default(no_grad_meta_controller, no_grad_transformer) # by default, if transformer is eval no grad then meta controller is being learnt
+        # by default, if meta controller is passed in, transformer is no grad
-        transformer_context = torch.no_grad if no_grad_transformer else nullcontext
-        meta_controller_context = torch.no_grad if no_grad_meta_controller else nullcontext
+        lower_transformer_context = nullcontext if not meta_controlling else torch.no_grad
+        meta_controller_context = nullcontext if meta_controlling else torch.no_grad
+        upper_transformer_context = nullcontext if (not meta_controlling or discovery_phase) else torch.no_grad
         # handle cache
         lower_transformer_hiddens, meta_hiddens, upper_transformer_hiddens = cache.prev_hiddens if exists(cache) else ((None,) * 3)
+        # handle maybe behavioral cloning
+        if behavioral_cloning:
+            state, target_state = state[:, :-1], state[:, 1:]
+            action_ids, target_action_ids = action_ids[:, :-1], action_ids[:, 1:]
         # transformer lower body
-        with transformer_context():
+        with lower_transformer_context():
-            embed = self.embed(ids)
+            state_embed = self.state_embed(state)
+            action_embed = self.action_embed(action_ids)
+            embed = state_embed + action_embed
             residual_stream, next_lower_hiddens = self.lower_body(embed, cache = lower_transformer_hiddens, return_hiddens = True)
@@ -330,13 +334,23 @@ class Transformer(Module):
         # modified residual stream sent back to transformer upper body
-        with transformer_context():
+        with upper_transformer_context():
             attended, next_upper_hiddens = self.upper_body(modified_residual_stream, cache = upper_transformer_hiddens, return_hiddens = True)
             # head readout
-            dist_params = self.readout(attended)
+            dist_params = self.action_readout(attended)
+        # maybe return behavior cloning loss
+        if behavioral_cloning:
+            state_dist_params = self.state_readout(attended)
+            state_clone_loss = self.state_readout.calculate_loss(state_dist_params, target_state)
+            action_clone_loss = self.action_readout.calculate_loss(dist_params, target_action_ids)
+            return state_clone_loss, action_clone_loss
         # returning

{metacontroller_pytorch-0.0.12 → metacontroller_pytorch-0.0.15}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "metacontroller-pytorch"
-version = "0.0.12"
+version = "0.0.15"
 description = "Transformer Metacontroller"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

metacontroller_pytorch-0.0.15/tests/test_metacontroller.py ADDED Viewed

@@ -0,0 +1,57 @@
+import pytest
+param = pytest.mark.parametrize
+import torch
+from metacontroller.metacontroller import Transformer, MetaController
+@param('action_discrete', (False, True))
+@param('discovery_phase', (False, True))
+@param('switch_per_latent_dim', (False, True))
+def test_metacontroller(
+    action_discrete,
+    discovery_phase,
+    switch_per_latent_dim
+):
+    state = torch.randn(1, 1024, 384)
+    if action_discrete:
+        actions = torch.randint(0, 4, (1, 1024))
+        action_embed_readout = dict(num_discrete = 4)
+        assert_shape = (4,)
+    else:
+        actions = torch.randn(1, 1024, 8)
+        action_embed_readout = dict(num_continuous = 8)
+        assert_shape = (8, 2)
+    # behavioral cloning pahse
+    model = Transformer(
+        dim = 512,
+        action_embed_readout = action_embed_readout,
+        state_embed_readout = dict(num_continuous = 384),
+        lower_body = dict(depth = 2,),
+        upper_body = dict(depth = 2,),
+    )
+    state_clone_loss, action_clone_loss = model(state, actions)
+    (state_clone_loss + 0.5 * action_clone_loss).backward()
+    # discovery and internal rl phase with meta controller
+    meta_controller = MetaController(
+        dim_latent = 512,
+        switch_per_latent_dim = switch_per_latent_dim
+    )
+    logits, cache = model(state, actions, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True)
+    assert logits.shape == (1, 1024, *assert_shape)
+    logits, cache = model(state, actions, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True, cache = cache)
+    logits, cache = model(state, actions, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True, cache = cache)
+    assert logits.shape == (1, 1, *assert_shape)
+    model.meta_controller = meta_controller
+    model.evolve(1, lambda _: 1., noise_population_size = 2)

metacontroller_pytorch-0.0.12/tests/test_metacontroller.py DELETED Viewed

@@ -1,39 +0,0 @@
-import pytest
-param = pytest.mark.parametrize
-import torch
-from metacontroller.metacontroller import Transformer, MetaController
-@param('discovery_phase', (False, True))
-@param('switch_per_latent_dim', (False, True))
-def test_metacontroller(
-    discovery_phase,
-    switch_per_latent_dim
-):
-    ids = torch.randint(0, 256, (1, 1024))
-    model = Transformer(
-        512,
-        embed = dict(num_discrete = 256),
-        lower_body = dict(depth = 2,),
-        upper_body = dict(depth = 2,),
-        readout = dict(num_discrete = 256)
-    )
-    meta_controller = MetaController(
-        512,
-        switch_per_latent_dim = switch_per_latent_dim
-    )
-    logits, cache = model(ids, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True)
-    assert logits.shape == (1, 1024, 256)
-    logits, cache = model(ids, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True, cache = cache)
-    logits, cache = model(ids, meta_controller = meta_controller, discovery_phase = discovery_phase, return_cache = True, cache = cache)
-    assert logits.shape == (1, 1, 256)
-    model.meta_controller = meta_controller
-    model.evolve(1, lambda _: 1., noise_population_size = 2)

{metacontroller_pytorch-0.0.12 → metacontroller_pytorch-0.0.15}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.12 → metacontroller_pytorch-0.0.15}/.github/workflows/test.yml RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.12 → metacontroller_pytorch-0.0.15}/.gitignore RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.12 → metacontroller_pytorch-0.0.15}/LICENSE RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.12 → metacontroller_pytorch-0.0.15}/README.md RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.12 → metacontroller_pytorch-0.0.15}/fig1.png RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.12 → metacontroller_pytorch-0.0.15}/metacontroller/__init__.py RENAMED Viewed

File without changes

metacontroller-pytorch 0.0.12__tar.gz → 0.0.15__tar.gz

metacontroller-pytorch 0.0.12tar.gz → 0.0.15tar.gz