PyPI - metacontroller-pytorch - Versions diffs - 0.0.28__tar.gz → 0.0.30__tar.gz - Mend

metacontroller-pytorch 0.0.28tar.gz → 0.0.30tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacontroller-pytorch
-Version: 0.0.28
+Version: 0.0.30
 Summary: Transformer Metacontroller
 Project-URL: Homepage, https://pypi.org/project/metacontroller/
 Project-URL: Repository, https://github.com/lucidrains/metacontroller
@@ -67,6 +67,8 @@ $ pip install metacontroller-pytorch
 - [Pranoy](https://github.com/pranoyr) for submitting a pull request for fixing the previous latent action not being included in the inputs to the switching unit
+- [Diego Calanzone](https://github.com/ddidacus) for proposing testing on BabyAI gridworld task, and submitting the [pull request](https://github.com/lucidrains/metacontroller/pull/3) for behavior cloning and discovery phase training for it!
 ## Citations
 ```bibtex
@@ -103,3 +105,5 @@ $ pip install metacontroller-pytorch
     url       = {https://arxiv.org/abs/2510.17558},
 }
 ```
+*Life can only be understood backwards; but it must be lived forwards* - Søren Kierkegaard

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/README.md RENAMED Viewed

@@ -14,6 +14,8 @@ $ pip install metacontroller-pytorch
 - [Pranoy](https://github.com/pranoyr) for submitting a pull request for fixing the previous latent action not being included in the inputs to the switching unit
+- [Diego Calanzone](https://github.com/ddidacus) for proposing testing on BabyAI gridworld task, and submitting the [pull request](https://github.com/lucidrains/metacontroller/pull/3) for behavior cloning and discovery phase training for it!
 ## Citations
 ```bibtex
@@ -50,3 +52,5 @@ $ pip install metacontroller-pytorch
     url       = {https://arxiv.org/abs/2510.17558},
 }
 ```
+*Life can only be understood backwards; but it must be lived forwards* - Søren Kierkegaard

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/gather_babyai_trajs.py RENAMED Viewed

@@ -31,8 +31,13 @@ import gymnasium as gym
 from minigrid.utils.baby_ai_bot import BabyAIBot
 from minigrid.wrappers import FullyObsWrapper, SymbolicObsWrapper
+from gymnasium import spaces
+from gymnasium.core import ObservationWrapper
+from minigrid.core.constants import OBJECT_TO_IDX
 from memmap_replay_buffer import ReplayBuffer
 # helpers
 def exists(val):
@@ -41,6 +46,40 @@ def exists(val):
 def sample(prob):
     return random.random() < prob
+# wrapper, necessarily modified to allow for both rgb obs (policy) and symbolic obs (bot)
+class RGBImgPartialObsWrapper(ObservationWrapper):
+    """
+    Wrapper to use partially observable RGB image as observation.
+    This can be used to have the agent to solve the gridworld in pixel space.
+    """
+    def __init__(self, env, tile_size=1):
+        super().__init__(env)
+        # Rendering attributes for observations
+        self.tile_size = tile_size
+        symbolic_image_space = self.observation_space["image"]
+        obs_shape = env.observation_space.spaces["image"].shape
+        new_image_space = spaces.Box(
+            low=0,
+            high=255,
+            shape=(obs_shape[0] * tile_size, obs_shape[1] * tile_size, 3),
+            dtype="uint8",
+        )
+        self.observation_space = spaces.Dict(
+            {**self.observation_space.spaces, "image": symbolic_image_space, "rgb_image": new_image_space}
+        )
+    def observation(self, obs):
+        rgb_img_partial = self.unwrapped.get_frame(
+            tile_size=self.tile_size, agent_pov=True
+        )
+        return {**obs, "rgb_image": rgb_img_partial}
 # agent
 class BabyAIBotEpsilonGreedy:
@@ -72,6 +111,7 @@ def collect_single_episode(env_id, seed, num_steps, random_action_prob, state_sh
     env = gym.make(env_id, render_mode="rgb_array", highlight=False)
     env = FullyObsWrapper(env.unwrapped)
     env = SymbolicObsWrapper(env.unwrapped)
+    env = RGBImgPartialObsWrapper(env.unwrapped)
     try:
         state_obs, _ = env.reset(seed=seed)
@@ -88,7 +128,7 @@ def collect_single_episode(env_id, seed, num_steps, random_action_prob, state_sh
                 env.close()
                 return None, None, False, 0
-            episode_state[_step] = state_obs["image"]
+            episode_state[_step] = state_obs["rgb_image"] / 255. # normalizd to 0 to 1
             episode_action[_step] = action
             state_obs, reward, terminated, truncated, info = env.step(action)
@@ -127,7 +167,8 @@ def collect_demonstrations(
     temp_env = gym.make(env_id)
     temp_env = FullyObsWrapper(temp_env.unwrapped)
     temp_env = SymbolicObsWrapper(temp_env.unwrapped)
-    state_shape = temp_env.observation_space['image'].shape
+    temp_env = RGBImgPartialObsWrapper(temp_env.unwrapped)
+    state_shape = temp_env.observation_space['rgb_image'].shape
     temp_env.close()
     logger.info(f"Detected state shape: {state_shape} for env {env_id}")

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/metacontroller/metacontroller.py RENAMED Viewed

@@ -58,6 +58,7 @@ def straight_through(src, tgt):
 MetaControllerOutput = namedtuple('MetaControllerOutput', (
     'prev_hiddens',
+    'input_residual_stream',
     'action_dist',
     'actions',
     'kl_loss',
@@ -268,7 +269,7 @@ class MetaController(Module):
             sampled_latent_action[:, -1:]
         )
-        return control_signal, MetaControllerOutput(next_hiddens, action_dist, sampled_latent_action, kl_loss, switch_loss)
+        return control_signal, MetaControllerOutput(next_hiddens, residual_stream, action_dist, sampled_latent_action, kl_loss, switch_loss)
 # main transformer, which is subsumed into the environment after behavioral cloning

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/metacontroller/metacontroller_with_binary_mapper.py RENAMED Viewed

@@ -52,6 +52,7 @@ def straight_through(src, tgt):
 MetaControllerOutput = namedtuple('MetaControllerOutput', (
     'prev_hiddens',
+    'input_residual_stream',
     'action_dist',
     'codes',
     'kl_loss',
@@ -265,4 +266,4 @@ class MetaControllerWithBinaryMapper(Module):
             sampled_codes[:, -1:]
         )
-        return control_signal, MetaControllerOutput(next_hiddens, binary_logits, sampled_codes, kl_loss, switch_loss)
+        return control_signal, MetaControllerOutput(next_hiddens, residual_stream, binary_logits, sampled_codes, kl_loss, switch_loss)

metacontroller_pytorch-0.0.30/metacontroller/metacontroller_with_resnet.py ADDED Viewed

@@ -0,0 +1,250 @@
+from typing import Any, List, Type, Union, Optional
+import torch
+from torch import Tensor
+from torch import nn
+from einops import rearrange
+from metacontroller.metacontroller import Transformer
+class TransformerWithResnetEncoder(Transformer):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.resnet_dim = kwargs["state_embed_readout"]["num_continuous"]
+        self.visual_encoder = resnet18(out_dim=self.resnet_dim)
+    def visual_encode(self, x: torch.Tensor) -> torch.Tensor:
+        b, t = x.shape[:2]
+        x = rearrange(x, 'b t h w c -> (b t) c h w')
+        h = self.visual_encoder(x)
+        h = rearrange(h, '(b t) d -> b t d', b=b, t=t, d = self.resnet_dim)
+        return h
+# resnet components taken from https://github.com/Lornatang/ResNet-PyTorch
+class _BasicBlock(nn.Module):
+    expansion: int = 1
+    def __init__(
+            self,
+            in_channels: int,
+            out_channels: int,
+            stride: int,
+            downsample: Optional[nn.Module] = None,
+            groups: int = 1,
+            base_channels: int = 64,
+    ) -> None:
+        super(_BasicBlock, self).__init__()
+        self.stride = stride
+        self.downsample = downsample
+        self.groups = groups
+        self.base_channels = base_channels
+        self.conv1 = nn.Conv2d(in_channels, out_channels, (3, 3), (stride, stride), (1, 1), bias=False)
+        self.bn1 = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(True)
+        self.conv2 = nn.Conv2d(out_channels, out_channels, (3, 3), (1, 1), (1, 1), bias=False)
+        self.bn2 = nn.BatchNorm2d(out_channels)
+    def forward(self, x: Tensor) -> Tensor:
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out = torch.add(out, identity)
+        out = self.relu(out)
+        return out
+class _Bottleneck(nn.Module):
+    expansion: int = 4
+    def __init__(
+            self,
+            in_channels: int,
+            out_channels: int,
+            stride: int,
+            downsample: Optional[nn.Module] = None,
+            groups: int = 1,
+            base_channels: int = 64,
+    ) -> None:
+        super(_Bottleneck, self).__init__()
+        self.stride = stride
+        self.downsample = downsample
+        self.groups = groups
+        self.base_channels = base_channels
+        channels = int(out_channels * (base_channels / 64.0)) * groups
+        self.conv1 = nn.Conv2d(in_channels, channels, (1, 1), (1, 1), (0, 0), bias=False)
+        self.bn1 = nn.BatchNorm2d(channels)
+        self.conv2 = nn.Conv2d(channels, channels, (3, 3), (stride, stride), (1, 1), groups=groups, bias=False)
+        self.bn2 = nn.BatchNorm2d(channels)
+        self.conv3 = nn.Conv2d(channels, int(out_channels * self.expansion), (1, 1), (1, 1), (0, 0), bias=False)
+        self.bn3 = nn.BatchNorm2d(int(out_channels * self.expansion))
+        self.relu = nn.ReLU(True)
+    def forward(self, x: Tensor) -> Tensor:
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out = torch.add(out, identity)
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(
+            self,
+            arch_cfg: List[int],
+            block: Type[Union[_BasicBlock, _Bottleneck]],
+            groups: int = 1,
+            channels_per_group: int = 64,
+            out_dim: int = 1000,
+    ) -> None:
+        super(ResNet, self).__init__()
+        self.in_channels = 64
+        self.dilation = 1
+        self.groups = groups
+        self.base_channels = channels_per_group
+        self.conv1 = nn.Conv2d(3, self.in_channels, (7, 7), (2, 2), (3, 3), bias=False)
+        self.bn1 = nn.BatchNorm2d(self.in_channels)
+        self.relu = nn.ReLU(True)
+        self.maxpool = nn.MaxPool2d((3, 3), (2, 2), (1, 1))
+        self.layer1 = self._make_layer(arch_cfg[0], block, 64, 1)
+        self.layer2 = self._make_layer(arch_cfg[1], block, 128, 2)
+        self.layer3 = self._make_layer(arch_cfg[2], block, 256, 2)
+        self.layer4 = self._make_layer(arch_cfg[3], block, 512, 2)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * block.expansion, out_dim)
+        # Initialize neural network weights
+        self._initialize_weights()
+    def _make_layer(
+            self,
+            repeat_times: int,
+            block: Type[Union[_BasicBlock, _Bottleneck]],
+            channels: int,
+            stride: int = 1,
+    ) -> nn.Sequential:
+        downsample = None
+        if stride != 1 or self.in_channels != channels * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.in_channels, channels * block.expansion, (1, 1), (stride, stride), (0, 0), bias=False),
+                nn.BatchNorm2d(channels * block.expansion),
+            )
+        layers = [
+            block(
+                self.in_channels,
+                channels,
+                stride,
+                downsample,
+                self.groups,
+                self.base_channels
+            )
+        ]
+        self.in_channels = channels * block.expansion
+        for _ in range(1, repeat_times):
+            layers.append(
+                block(
+                    self.in_channels,
+                    channels,
+                    1,
+                    None,
+                    self.groups,
+                    self.base_channels,
+                )
+            )
+        return nn.Sequential(*layers)
+    def forward(self, x: Tensor) -> Tensor:
+        out = self._forward_impl(x)
+        return out
+    # Support torch.script function
+    def _forward_impl(self, x: Tensor) -> Tensor:
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.maxpool(out)
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = self.avgpool(out)
+        out = torch.flatten(out, 1)
+        out = self.fc(out)
+        return out
+    def _initialize_weights(self) -> None:
+        for module in self.modules():
+            if isinstance(module, nn.Conv2d):
+                nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
+            elif isinstance(module, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(module.weight, 1)
+                nn.init.constant_(module.bias, 0)
+def resnet18(**kwargs: Any) -> ResNet:
+    model = ResNet([2, 2, 2, 2], _BasicBlock, **kwargs)
+    return model
+def resnet34(**kwargs: Any) -> ResNet:
+    model = ResNet([3, 4, 6, 3], _BasicBlock, **kwargs)
+    return model
+def resnet50(**kwargs: Any) -> ResNet:
+    model = ResNet([3, 4, 6, 3], _Bottleneck, **kwargs)
+    return model
+def resnet101(**kwargs: Any) -> ResNet:
+    model = ResNet([3, 4, 23, 3], _Bottleneck, **kwargs)
+    return model
+def resnet152(**kwargs: Any) -> ResNet:
+    model = ResNet([3, 8, 36, 3], _Bottleneck, **kwargs)
+    return model

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "metacontroller-pytorch"
-version = "0.0.28"
+version = "0.0.30"
 description = "Transformer Metacontroller"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

metacontroller_pytorch-0.0.30/test_babyai_e2e.sh ADDED Viewed

@@ -0,0 +1,14 @@
+#!/bin/bash
+set -e
+# 1. Gather trajectories
+echo "Gathering trajectories..."
+uv run gather_babyai_trajs.py --num_seeds 1000 --num_episodes_per_seed 100 --output_dir end_to_end_trajectories --env_id BabyAI-MiniBossLevel-v0
+# 2. Behavioral cloning
+echo "Training behavioral cloning model..."
+uv run train_behavior_clone_babyai.py --cloning_epochs 10 --discovery_epochs 10 --batch_size 256 --input_dir end_to_end_trajectories --env_id BabyAI-MiniBossLevel-v0 --checkpoint_path end_to_end_model.pt --use_resnet
+# 3. Inference rollouts
+echo "Running inference rollouts..."
+uv run train_babyai.py --weights_path end_to_end_model.pt --env_name BabyAI-MiniBossLevel-v0 --num_episodes 5 --buffer_size 100 --max_timesteps 100

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/tests/test_metacontroller.py RENAMED Viewed

@@ -59,7 +59,7 @@ def test_metacontroller(
             dim_model = 512,
             dim_meta_controller = 256,
             switch_per_code = switch_per_latent_dim,
-            dim_code_bits = 8, # 2**8 = 256 codes
+            dim_code_bits = 8, # 2 ** 8 = 256 codes
         )
     # discovery phase

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/train_behavior_clone_babyai.py RENAMED Viewed

@@ -26,6 +26,7 @@ from memmap_replay_buffer import ReplayBuffer
 from einops import rearrange
 from metacontroller.metacontroller import Transformer
+from metacontroller.metacontroller_with_resnet import TransformerWithResnetEncoder
 import minigrid
 import gymnasium as gym
@@ -33,7 +34,8 @@ import gymnasium as gym
 def train(
     input_dir: str = "babyai-minibosslevel-trajectories",
     env_id: str = "BabyAI-MiniBossLevel-v0",
-    epochs: int = 10,
+    cloning_epochs: int = 10,
+    discovery_epochs: int = 10,
     batch_size: int = 32,
     lr: float = 1e-4,
     dim: int = 512,
@@ -44,7 +46,8 @@ def train(
     wandb_project: str = "metacontroller-babyai-bc",
     checkpoint_path: str = "transformer_bc.pt",
     state_loss_weight: float = 1.,
-    action_loss_weight: float = 1.
+    action_loss_weight: float = 1.,
+    use_resnet: bool = False
 ):
     # accelerator
@@ -54,7 +57,8 @@ def train(
         accelerator.init_trackers(
             wandb_project,
             config = {
-                "epochs": epochs,
+                "cloning_epochs": cloning_epochs,
+                "discovery_epochs": discovery_epochs,
                 "batch_size": batch_size,
                 "lr": lr,
                 "dim": dim,
@@ -78,12 +82,8 @@ def train(
     # state shape and action dimension
     # state: (B, T, H, W, C) or (B, T, D)
     state_shape = replay_buffer.shapes['state']
-    state_dim = int(torch.tensor(state_shape).prod().item())
-    # state shape and action dimension
-    # state: (B, T, H, W, C) or (B, T, D)
-    state_shape = replay_buffer.shapes['state']
-    state_dim = int(torch.tensor(state_shape).prod().item())
+    if use_resnet: state_dim = 256
+    else: state_dim = int(torch.tensor(state_shape).prod().item())
     # deduce num_actions from the environment
     minigrid.register_minigrid_envs()
@@ -94,8 +94,9 @@ def train(
     accelerator.print(f"Detected state_dim: {state_dim}, num_actions: {num_actions} from env: {env_id}")
     # transformer
-    model = Transformer(
+    transformer_class = TransformerWithResnetEncoder if use_resnet else Transformer
+    model = transformer_class(
         dim = dim,
         state_embed_readout = dict(num_continuous = state_dim),
         action_embed_readout = dict(num_discrete = num_actions),
@@ -112,13 +113,13 @@ def train(
     model, optim, dataloader = accelerator.prepare(model, optim, dataloader)
     # training
-    for epoch in range(epochs):
+    for epoch in range(cloning_epochs + discovery_epochs):
         model.train()
         total_state_loss = 0.
         total_action_loss = 0.
         progress_bar = tqdm(dataloader, desc = f"Epoch {epoch}", disable = not accelerator.is_local_main_process)
+        is_discovering = (epoch >= cloning_epochs) # discovery phase is BC with metacontroller tuning
         for batch in progress_bar:
             # batch is a NamedTuple (e.g. MemoryMappedBatch)
@@ -128,15 +129,20 @@ def train(
             actions = batch['action'].long()
             episode_lens = batch.get('_lens')
-            # flatten state: (B, T, 7, 7, 3) -> (B, T, 147)
-            states = rearrange(states, 'b t ... -> b t (...)')
+            # use resnet18 to embed visual observations
+            if use_resnet:
+                states = model.visual_encode(states)
+            else: # flatten state: (B, T, 7, 7, 3) -> (B, T, 147)
+                states = rearrange(states, 'b t ... -> b t (...)')
             with accelerator.accumulate(model):
-                state_loss, action_loss = model(states, actions, episode_lens = episode_lens)
+                state_loss, action_loss = model(states, actions, episode_lens = episode_lens, discovery_phase=is_discovering)
                 loss = state_loss * state_loss_weight + action_loss * action_loss_weight
                 accelerator.backward(loss)
+                grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                 optim.step()
                 optim.zero_grad()
@@ -148,7 +154,8 @@ def train(
             accelerator.log({
                 "state_loss": state_loss.item(),
                 "action_loss": action_loss.item(),
-                "total_loss": loss.item()
+                "total_loss": loss.item(),
+                "grad_norm": grad_norm.item()
             })
             progress_bar.set_postfix(

metacontroller_pytorch-0.0.28/test_babyai_e2e.sh DELETED Viewed

@@ -1,14 +0,0 @@
-#!/bin/bash
-set -e
-# 1. Gather trajectories
-echo "Gathering trajectories..."
-uv run gather_babyai_trajs.py --num_seeds 10 --num_episodes_per_seed 10 --output_dir end_to_end_trajectories --env_id BabyAI-MiniBossLevel-v0
-# 2. Behavioral cloning
-echo "Training behavioral cloning model..."
-uv run train_behavior_clone_babyai.py --epochs 1 --batch_size 16 --input_dir end_to_end_trajectories --env_id BabyAI-MiniBossLevel-v0 --checkpoint_path end_to_end_model.pt
-# 3. Inference rollouts
-echo "Running inference rollouts..."
-uv run train_babyai.py --weights_path end_to_end_model.pt --env_name BabyAI-MiniBossLevel-v0 --num_episodes 5 --buffer_size 100 --max_timesteps 100

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/.github/workflows/test.yml RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/.gitignore RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/LICENSE RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/fig1.png RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/metacontroller/__init__.py RENAMED Viewed

File without changes

{metacontroller_pytorch-0.0.28 → metacontroller_pytorch-0.0.30}/train_babyai.py RENAMED Viewed

File without changes

metacontroller-pytorch 0.0.28__tar.gz → 0.0.30__tar.gz

metacontroller-pytorch 0.0.28tar.gz → 0.0.30tar.gz