PyPI - locoformer - Versions diffs - 0.0.6__tar.gz → 0.0.11__tar.gz - Mend

locoformer 0.0.6tar.gz → 0.0.11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{locoformer-0.0.6 → locoformer-0.0.11}/.gitignore RENAMED Viewed

@@ -1,3 +1,5 @@
+recordings/
+replay/
 # Byte-compiled / optimized / DLL files
 __pycache__/

{locoformer-0.0.6 → locoformer-0.0.11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: locoformer
-Version: 0.0.6
+Version: 0.0.11
 Summary: LocoFormer
 Project-URL: Homepage, https://pypi.org/project/locoformer/
 Project-URL: Repository, https://github.com/lucidrains/locoformer
@@ -35,6 +35,7 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9
 Requires-Dist: assoc-scan
+Requires-Dist: beartype
 Requires-Dist: einops>=0.8.0
 Requires-Dist: einx>=0.3.0
 Requires-Dist: rotary-embedding-torch
@@ -53,7 +54,7 @@ Description-Content-Type: text/markdown
 [LocoFormer - Generalist Locomotion via Long-Context Adaptation](https://generalist-locomotion.github.io/)
-The gist is they trained a simple Transformer-XL in simulation on robots with many different bodies (cross-embodiment). When transferring to the real-world, they noticed the robot now gains the ability to adapt to insults. The XL memories span across multiple trials, which allowed the robot to learn in-context adaptation.
+The gist is they trained a simple Transformer-XL in simulation on robots with many different bodies (cross-embodiment) with extreme domain randomization. When transferring to the real-world, they noticed the robot now gains the ability to adapt to insults. The XL memories span across multiple trials, which allowed the robot to learn in-context adaptation.
 ## Sponsors

{locoformer-0.0.6 → locoformer-0.0.11}/README.md RENAMED Viewed

@@ -4,7 +4,7 @@
 [LocoFormer - Generalist Locomotion via Long-Context Adaptation](https://generalist-locomotion.github.io/)
-The gist is they trained a simple Transformer-XL in simulation on robots with many different bodies (cross-embodiment). When transferring to the real-world, they noticed the robot now gains the ability to adapt to insults. The XL memories span across multiple trials, which allowed the robot to learn in-context adaptation.
+The gist is they trained a simple Transformer-XL in simulation on robots with many different bodies (cross-embodiment) with extreme domain randomization. When transferring to the real-world, they noticed the robot now gains the ability to adapt to insults. The XL memories span across multiple trials, which allowed the robot to learn in-context adaptation.
 ## Sponsors

{locoformer-0.0.6 → locoformer-0.0.11}/locoformer/locoformer.py RENAMED Viewed

@@ -1,11 +1,22 @@
 from __future__ import annotations
 from functools import partial
+from pathlib import Path
+from contextlib import contextmanager
+import numpy as np
+from numpy import ndarray
+from numpy.lib.format import open_memmap
+from beartype import beartype
+from beartype.door import is_bearable
 import torch
-from torch import nn, cat, stack, arange, is_tensor
+from torch import nn, cat, stack, arange, Tensor, tensor, is_tensor, from_numpy
 import torch.nn.functional as F
 from torch.nn import Module, ModuleList, Linear, RMSNorm, Identity, Sequential
 from torch.utils._pytree import tree_map
+from torch.utils.data import Dataset, DataLoader
 import einx
 from einops import rearrange, einsum
@@ -37,14 +48,18 @@ def tree_map_tensor(x, fn):
 def detach_all(x):
     return tree_map_tensor(x, lambda t: t.detach())
-def combine_kv_cache(cache1, cache2):
-    combined_cache = []
-    for layer_cache1, layer_cache2 in zip(cache1, cache2):
-        next_cache = cat((layer_cache1, layer_cache2), dim = -2)
-        combined_cache.append(next_cache)
+def pad_at_dim(
+    t,
+    pad: tuple[int, int],
+    dim = -1,
+    value = 0.
+):
+    if pad == (0, 0):
+        return t
-    return combined_cache
+    dims_from_right = (- dim - 1) if dim < 0 else (t.ndim - dim - 1)
+    zeros = ((0, 0) * dims_from_right)
+    return F.pad(t, (*zeros, *pad), value = value)
 # generalized advantage estimate
@@ -146,6 +161,208 @@ def create_sliding_mask(
     create_kwargs = dict(device = device) if exists(device) else dict()
     return create_block_mask(sliding_mask, B = None, H = None, Q_LEN = seq_len, KV_LEN = kv_seq_len, _compile = True, **create_kwargs)
+# data
+def collate_var_time(data):
+    datum = first(data)
+    keys = datum.keys()
+    all_tensors = zip(*[datum.values() for datum in data])
+    collated_values = []
+    for key, tensors in zip(keys, all_tensors):
+        # the episode lens have zero dimension - think of a cleaner way to handle this later
+        if key != '_lens':
+            times = [t.shape[0] for t in tensors]
+            max_time = max(times)
+            tensors = [pad_at_dim(t, (0, max_time - t.shape[0]), dim = 0) for t in tensors]
+        collated_values.append(stack(tensors))
+    return dict(zip(keys, collated_values))
+class ReplayDataset(Dataset):
+    def __init__(
+        self,
+        folder: str | Path,
+        fields: tuple[str, ...] | None = None
+    ):
+        if isinstance(folder, str):
+            folder = Path(folder)
+        episode_lens = folder / 'episode_lens.npy'
+        self.episode_lens = open_memmap(str(episode_lens), mode = 'r')
+        # get indices of non-zero lengthed episodes
+        nonzero_episodes = self.episode_lens > 0
+        self.indices = np.arange(self.episode_lens.shape[-1])[nonzero_episodes]
+        # get all data files
+        filepaths = [*folder.glob('*.data.npy')]
+        assert len(filepaths) > 0
+        fieldname_to_filepath = {path.name.split('.')[0]: path for path in filepaths}
+        fieldnames_from_files = set(fieldname_to_filepath.keys())
+        fields = default(fields, fieldnames_from_files)
+        self.memmaps = dict()
+        for field in fields:
+            assert field in fieldnames_from_files, f'invalid field {field} - must be one of {fieldnames_from_files}'
+            path = fieldname_to_filepath[field]
+            self.memmaps[field] = open_memmap(str(path), mode = 'r')
+    def __len__(self):
+        return len(self.indices)
+    def __getitem__(self, idx):
+        episode_index = self.indices[idx]
+        episode_len = self.episode_lens[episode_index]
+        data = {field: torch.from_numpy(memmap[episode_index, :episode_len]) for field, memmap in self.memmaps.items()}
+        data['_lens'] = tensor(episode_len)
+        return data
+class ReplayBuffer:
+    @beartype
+    def __init__(
+        self,
+        folder: str | Path,
+        max_episodes: int,
+        max_timesteps: int,
+        fields: dict[
+            str,
+            str | tuple[str, int | tuple[int, ...]]
+        ]
+    ):
+        # folder for data
+        if not isinstance(folder, Path):
+            folder = Path(folder)
+            folder.mkdir(exist_ok = True)
+        self.folder = folder
+        assert folder.is_dir()
+        # keeping track of episode length
+        episode_lens = folder / 'episode_lens.npy'
+        self.episode_index = 0
+        self.timestep_index = 0
+        self.max_episodes = max_episodes
+        self.max_timesteps= max_timesteps
+        self.episode_lens = open_memmap(str(episode_lens), mode = 'w+', dtype = np.int32, shape = (max_episodes,))
+        # create the memmap for individual data tracks
+        self.shapes = dict()
+        self.dtypes = dict()
+        self.memmaps = dict()
+        self.fieldnames = set(fields.keys())
+        for field_name, field_info in fields.items():
+            # some flexibility
+            field_info = (field_info, ()) if isinstance(field_info, str) else field_info
+            dtype_str, shape = field_info
+            assert dtype_str in {'int', 'float', 'bool'}
+            dtype = dict(int = np.int32, float = np.float32, bool = np.bool_)[dtype_str]
+            # memmap file
+            filepath = folder / f'{field_name}.data.npy'
+            memmap = open_memmap(str(filepath), mode = 'w+', dtype = dtype, shape = (max_episodes, max_timesteps, *shape))
+            self.memmaps[field_name] = memmap
+            self.shapes[field_name] = shape
+            self.dtypes[field_name] = dtype
+    def advance_episode(self):
+        self.episode_index = (self.episode_index + 1) % self.max_episodes
+        self.timestep_index = 0
+    def flush(self):
+        self.episode_lens[self.episode_index] = self.timestep_index
+        for memmap in self.memmaps.values():
+            memmap.flush()
+        self.episode_lens.flush()
+    @contextmanager
+    def one_episode(self):
+        yield
+        self.flush()
+        self.advance_episode()
+    @beartype
+    def store_datapoint(
+        self,
+        episode_index: int,
+        timestep_index: int,
+        name: str,
+        datapoint: Tensor | ndarray
+    ):
+        assert 0 <= episode_index < self.max_episodes
+        assert 0 <= timestep_index < self.max_timesteps
+        if is_tensor(datapoint):
+            datapoint = datapoint.detach().cpu().numpy()
+        assert name in self.fieldnames, f'invalid field name {name} - must be one of {self.fieldnames}'
+        assert datapoint.shape == self.shapes[name], f'invalid shape {datapoint.shape} - shape must be {self.shapes[name]}'
+        self.memmaps[name][self.episode_index, self.timestep_index] = datapoint
+    def store(
+        self,
+        **data
+    ):
+        assert is_bearable(data, dict[str, Tensor | ndarray])
+        assert not self.timestep_index >= self.max_timesteps, 'you exceeded the `max_timesteps` set on the replay buffer'
+        for name, datapoint in data.items():
+            self.store_datapoint(self.episode_index, self.timestep_index, name, datapoint)
+        self.timestep_index += 1
+    def dataset(self) -> Dataset:
+        self.flush()
+        return ReplayDataset(self.folder)
+    def dataloader(self, **kwargs) -> DataLoader:
+        self.flush()
+        return DataLoader(self.dataset(), collate_fn = collate_var_time, **kwargs)
 # transformer-xl with ppo
 class Attention(Module):
@@ -386,11 +603,46 @@ class Locoformer(Module):
     def device(self):
         return next(self.parameters()).device
+    def actor_parameters(self):
+        return self.unembedder.parameters()
+    def critic_parameters(self):
+        if not exists(self.value_network):
+            return []
+        return self.value_network.parameters()
+    def wrap_env_functions(self, env):
+        def wrapped_reset(*args, **kwargs):
+            state, _ =  env.reset(*args, **kwargs)
+            if isinstance(state, ndarray):
+                state = from_numpy(state)
+            return state, _
+        def wrapped_step(action, *args, **kwargs):
+            out = env.step(action.item(), *args, **kwargs)
+            def transform_output(el):
+                if isinstance(el, ndarray):
+                    return from_numpy(el)
+                elif isinstance(el, (int, bool, float)):
+                    return tensor(el)
+                else:
+                    return el
+            return tree_map(transform_output, out)
+        return wrapped_reset, wrapped_step
     def get_stateful_forward(
         self,
         initial_states: Tensor | None = None,
         inference_mode = False,
         has_batch_dim = False,
+        has_time_dim = False,
         **kwargs
     ):
         window_size = self.window_size
@@ -400,11 +652,14 @@ class Locoformer(Module):
         def stateful_forward(state: Tensor, **override_kwargs):
             nonlocal cache
-            # handle no batch, for easier time rolling out against envs
+            # handle no batch or time, for easier time rolling out against envs
             if not has_batch_dim:
                 state = rearrange(state, '... -> 1 ...')
+            if not has_time_dim:
+                state = rearrange(state, '... d -> ... 1 d')
             # forwards
             out, cache = self.forward(state, cache = cache, **{**kwargs, **override_kwargs})
@@ -416,7 +671,10 @@ class Locoformer(Module):
             if self.fixed_window_size or divisible_by(cache_len, window_size * 2):
                 cache = cache[..., -window_size:, :]
-            # maybe remove batch
+            # maybe remove batch or time
+            if not has_time_dim:
+                out = tree_map_tensor(out, lambda t: rearrange(t, '... 1 d -> ... d'))
             if not has_batch_dim:
                 out = tree_map_tensor(out, lambda t: rearrange(t, '1 ... -> ...'))
@@ -450,6 +708,8 @@ class Locoformer(Module):
         return_values = False
     ):
+        state = state.to(self.device)
         tokens = self.embedder(state)
         embed, kv_cache = self.transformer(tokens, cache = cache, return_kv_cache = True)

{locoformer-0.0.6 → locoformer-0.0.11}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "locoformer"
-version = "0.0.6"
+version = "0.0.11"
 description = "LocoFormer"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -27,6 +27,7 @@ classifiers=[
 dependencies = [
     "assoc-scan",
+    "beartype",
     "einx>=0.3.0",
     "einops>=0.8.0",
     "rotary-embedding-torch",

locoformer-0.0.11/tests/test_locoformer.py ADDED Viewed

@@ -0,0 +1,86 @@
+import pytest
+param = pytest.mark.parametrize
+import torch
+from x_mlps_pytorch import MLP
+from einops import rearrange
+def test_locoformer():
+    from locoformer.locoformer import Locoformer
+    from torch import nn
+    model = Locoformer(
+        embedder = nn.Embedding(256, 128),
+        unembedder = nn.Linear(128, 256, bias = False),
+        value_network = MLP(128, 32, 1),
+        transformer = dict(
+            dim = 128,
+            depth = 1,
+            window_size = 512
+        )
+    )
+    seq = torch.randint(0, 256, (3, 512))
+    (logits, values), cache = model(seq, return_values = True)
+    (logits, values), cache = model(seq, return_values = True, cache = cache)
+    (logits, values), cache = model(seq, return_values = True, cache = cache)
+    assert logits.shape == (3, 512, 256)
+    stateful_forward = model.get_stateful_forward(has_batch_dim = True, has_time_dim = True, return_values = True, inference_mode = True)
+    for state in seq.unbind(dim = -1):
+        state = rearrange(state, 'b -> b 1')
+        logits, values = stateful_forward(state)
+        assert logits.shape == (3, 1, 256)
+def test_replay():
+    from locoformer.locoformer import ReplayBuffer
+    replay_buffer = ReplayBuffer(
+        './replay_data',
+        max_episodes = 10_000,
+        max_timesteps = 501,
+        fields = dict(
+            state = ('float', (8,)),
+            action = 'int',
+            action_log_prob = 'float',
+            reward = 'float',
+            value = 'float',
+            done = 'bool'
+        )
+    )
+    lens = [3, 5, 4]
+    for episode_len in lens:
+        with replay_buffer.one_episode():
+            for _ in range(episode_len):
+                state = torch.randn((8,))
+                action = torch.randint(0, 4, ())
+                log_prob = torch.randn(())
+                reward = torch.randn(())
+                value = torch.randn(())
+                done = torch.randint(0, 2, ()).bool()
+                replay_buffer.store(
+                    state = state,
+                    action = action,
+                    action_log_prob = log_prob,
+                    reward = reward,
+                    value = value,
+                    done = done
+                )
+    dataset = replay_buffer.dataset()
+    assert len(dataset) == 3
+    assert torch.is_tensor(dataset[0]['state'])
+    dataloader = replay_buffer.dataloader(batch_size = 3)
+    assert next(iter(dataloader))['state'].shape[0] == 3

locoformer-0.0.11/train_gym.py ADDED Viewed

@@ -0,0 +1,193 @@
+# /// script
+# dependencies = [
+#     "accelerate",
+#     "fire",
+#     "gymnasium[box2d]>=1.0.0",
+#     "locoformer",
+#     "moviepy",
+#     "tqdm"
+# ]
+# ///
+from fire import Fire
+from shutil import rmtree
+from tqdm import tqdm
+from collections import deque
+from accelerate import Accelerator
+import gymnasium as gym
+import torch
+from torch import from_numpy, randint, tensor, stack
+import torch.nn.functional as F
+from torch.utils.data import TensorDataset, DataLoader
+from torch.optim import Adam
+from einops import rearrange
+from locoformer.locoformer import Locoformer, ReplayBuffer
+from x_mlps_pytorch import MLP
+# helper functions
+def exists(v):
+    return v is not None
+def divisible_by(num, den):
+    return (num % den) == 0
+def log(t, eps = 1e-20):
+    return t.clamp(min = eps).log()
+def gumbel_noise(t):
+    return -log(-log(torch.rand_like(t)))
+def gumbel_sample(logits, temperature = 1., eps = 1e-6):
+    noise = gumbel_noise(logits)
+    return ((logits / max(temperature, eps)) + noise).argmax(dim = -1)
+# main function
+def main(
+    env_name = 'LunarLander-v3',
+    num_episodes = 50_000,
+    max_timesteps = 500,
+    num_timestep_before_learn = 5000,
+    clear_video = True,
+    video_folder = 'recordings',
+    record_every_episode = 250,
+    discount_factor = 0.99,
+    learning_rate = 1e-4,
+    batch_size = 16,
+    epochs = 2
+):
+    # accelerate
+    accelerate = Accelerator()
+    device = accelerate.device
+    # environment
+    env = gym.make(env_name, render_mode = 'rgb_array')
+    if clear_video:
+        rmtree(video_folder, ignore_errors = True)
+    env = gym.wrappers.RecordVideo(
+        env = env,
+        video_folder = video_folder,
+        name_prefix = 'lunar-video',
+        episode_trigger = lambda eps: divisible_by(eps, record_every_episode),
+        disable_logger = True
+    )
+    dim_state = env.observation_space.shape[0]
+    num_actions = env.action_space.n
+    # memory
+    replay = ReplayBuffer(
+        'replay',
+        num_episodes,
+        max_timesteps,
+        fields = dict(
+            state = ('float', (dim_state,)),
+            action = 'int',
+            action_log_prob = 'float',
+            reward = 'float',
+            value = 'float',
+            done = 'bool'
+        )
+    )
+    # networks
+    locoformer = Locoformer(
+        embedder = MLP(dim_state, 64, bias = False),
+        unembedder = MLP(64, num_actions, bias = False),
+        value_network = MLP(64, 1, bias = False),
+        transformer = dict(
+            dim = 64,
+            dim_head = 32,
+            heads = 4,
+            depth = 4,
+            window_size = 16
+        )
+    ).to(device)
+    optim_actor = Adam([*locoformer.transformer.parameters(), *locoformer.actor_parameters()], lr = learning_rate)
+    optim_critic = Adam([*locoformer.transformer.parameters(), *locoformer.critic_parameters()], lr = learning_rate)
+    timesteps_learn = 0
+    # able to wrap the env for all values to torch tensors and back
+    # all environments should follow usual MDP interface, domain randomization should be given at instantiation
+    env_reset, env_step = locoformer.wrap_env_functions(env)
+    # loop
+    for _ in tqdm(range(num_episodes)):
+        state, *_ = env_reset()
+        timestep = 0
+        stateful_forward = locoformer.get_stateful_forward(has_batch_dim = False, has_time_dim = False, inference_mode = True)
+        with replay.one_episode():
+            while True:
+                # predict next action
+                action_logits, value = stateful_forward(state, return_values = True)
+                action = gumbel_sample(action_logits)
+                # pass to environment
+                next_state, reward, truncated, terminated, *_ = env_step(action)
+                # append to memory
+                done = truncated or terminated
+                # get log prob of action
+                action_log_prob = action_logits.gather(-1, rearrange(action, '-> 1'))
+                action_log_prob = rearrange(action_log_prob, '1 ->')
+                replay.store(
+                    state = state,
+                    action = action,
+                    action_log_prob = action_log_prob,
+                    reward = reward,
+                    value = value,
+                    done = done
+                )
+                # increment counters
+                timestep += 1
+                timesteps_learn += 1
+                # learn if hit the number of learn timesteps
+                if timesteps_learn >= num_timestep_before_learn:
+                    # todo - carry out learning
+                    timesteps_learn = 0
+                    memories.clear()
+                # break if done or exceed max timestep
+                if done or timestep >= max_timesteps:
+                    break
+                state = next_state
+# main
+if __name__ == '__main__':
+    Fire(main)

locoformer-0.0.6/tests/test_locoformer.py DELETED Viewed

@@ -1,38 +0,0 @@
-import pytest
-param = pytest.mark.parametrize
-import torch
-from x_mlps_pytorch import MLP
-from einops import rearrange
-def test_locoformer():
-    from locoformer.locoformer import Locoformer
-    from torch import nn
-    model = Locoformer(
-        embedder = nn.Embedding(256, 128),
-        unembedder = nn.Linear(128, 256, bias = False),
-        value_network = MLP(128, 32, 1),
-        transformer = dict(
-            dim = 128,
-            depth = 1,
-            window_size = 256
-        )
-    )
-    seq = torch.randint(0, 256, (3, 512))
-    (logits, values), cache = model(seq, return_values = True)
-    (logits, values), cache = model(seq, return_values = True, cache = cache)
-    (logits, values), cache = model(seq, return_values = True, cache = cache)
-    assert logits.shape == (3, 512, 256)
-    stateful_forward = model.get_stateful_forward(256, has_batch_dim = True, return_values = True, inference_mode = True)
-    for state in seq.unbind(dim = -1):
-        state = rearrange(state, 'b -> b 1')
-        logits, values = stateful_forward(state)
-        assert logits.shape == (3, 1, 256)