PyPI - locoformer - Versions diffs - 0.0.29__py3-none-any.whl → 0.0.43__py3-none-any.whl - Mend

locoformer 0.0.29py3-none-any.whl → 0.0.43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

locoformer/locoformer.py CHANGED Viewed

@@ -1,11 +1,14 @@
 from __future__ import annotations
 from typing import Callable
-from functools import partial
+from types import SimpleNamespace
+from functools import partial, wraps
 from pathlib import Path
 from contextlib import contextmanager
 from collections import namedtuple
+from inspect import signature
 import numpy as np
 from numpy import ndarray
 from numpy.lib.format import open_memmap
@@ -17,7 +20,7 @@ import torch
 from torch import nn, cat, stack, arange, Tensor, tensor, is_tensor, from_numpy
 import torch.nn.functional as F
 from torch.nn import Module, ModuleList, Linear, RMSNorm, Identity, Sequential
-from torch.utils._pytree import tree_map
+from torch.utils._pytree import tree_map, tree_flatten, tree_unflatten
 from torch.utils.data import Dataset, DataLoader
 from torch.optim import Optimizer
@@ -31,6 +34,10 @@ from hl_gauss_pytorch import HLGaussLoss
 from assoc_scan import AssocScan
+from x_mlps_pytorch import MLP
+from x_evolution import EvoStrategy
 # constants
 LinearNoBias = partial(Linear, bias = False)
@@ -48,9 +55,40 @@ def default(v, d):
 def first(arr):
     return arr[0]
+def xnor(x, y):
+    return not (x ^ y)
 def divisible_by(num, den):
     return (num % den) == 0
+def get_param_names(fn):
+    parameters = signature(fn).parameters
+    return list(parameters.keys())
+def check_has_param_attr(
+    param_name,
+    param_attr,
+    default_value = None
+):
+    def decorator(fn):
+        sig = signature(fn)
+        @wraps(fn)
+        def inner(*args, **kwargs):
+            bound_args = sig.bind(*args, **kwargs).arguments
+            if not (
+                param_name in bound_args and
+                hasattr(bound_args[param_name], param_attr)
+            ):
+                return default_value
+            return fn(*args, **kwargs)
+        return inner
+    return decorator
 # tensor helpers
 def log(t, eps = 1e-20):
@@ -78,10 +116,87 @@ def pad_at_dim(
 def normalize(t, eps = 1e-5):
     return (t - t.mean()) / t.std().clamp_min(eps)
+def tensor_to_dict(
+    t: Tensor,
+    config: tuple[tuple[str, int] | str],
+    dim = -1,
+    return_dottable = True
+):
+    config = tuple((c, 1) if isinstance(c, str) else c for c in config)
+    names, sizes = zip(*config)
+    assert sum(sizes) == t.shape[dim]
+    t = t.split(sizes, dim = dim)
+    tensor_dict = dict(zip(names, t))
+    if not return_dottable:
+        return tensor_dict
+    return SimpleNamespace(**tensor_dict)
 def calc_entropy(logits):
     prob = logits.softmax(dim = -1)
     return -(prob * log(prob)).sum(dim = -1)
+# reward functions - A.2
+@check_has_param_attr('state', 'v_xy')
+@check_has_param_attr('command', 'v_xy')
+def reward_linear_velocity_command_tracking(
+    state,
+    command,
+    s1 = 1.
+):
+    error = (state.v_xy - command.v_xy).norm(dim = -1).pow(2)
+    return torch.exp(-error / s1)
+@check_has_param_attr('state', 'w_z')
+@check_has_param_attr('command', 'w_z')
+def reward_angular_velocity_command_tracking(
+    state,
+    command,
+    s2 = 1.
+):
+    error = (state.w_z - command.w_z).norm(dim = -1).pow(2)
+    return torch.exp(-error / s2)
+@check_has_param_attr('state', 'v_z')
+def reward_base_linear_velocity_penalty(
+    state
+):
+    return -state.v_z.norm(dim = -1).pow(2)
+@check_has_param_attr('state', 'w_xy')
+def reward_base_angular_velocity_penalty(
+    state
+):
+    return -state.w_xy.norm(dim = -1).pow(2)
+@check_has_param_attr('state', 'x_z')
+def reward_base_height_penalty(
+    state,
+    x_z_nominal = 0.27
+):
+    return -(state.x_z - x_z_nominal).norm(dim = -1).pow(2)
+@check_has_param_attr('state', 'joint_q')
+def reward_joint_acceleration_penalty(
+    state
+):
+    return -state.joint_q.norm(dim = -1).pow(2)
+@check_has_param_attr('state', 'tau')
+def reward_torque_penalty(
+    state
+):
+    return -state.tau.norm(dim = -1).pow(2)
+def reward_alive(
+    state
+):
+    return 1.
 # generalized advantage estimate
 @torch.no_grad()
@@ -266,7 +381,8 @@ class RemappedReplayDataset(Dataset):
         self,
         dataset: ReplayDataset,
         episode_mapping: Tensor | list[list[int]],
-        shuffle_episodes = False
+        shuffle_episodes = False,
+        num_trials_select = None
     ):
         assert len(dataset) > 0
         self.dataset = dataset
@@ -278,6 +394,10 @@ class RemappedReplayDataset(Dataset):
         self.episode_mapping = episode_mapping
         self.shuffle_episodes = shuffle_episodes
+        assert not (exists(num_trials_select) and num_trials_select >= 1)
+        self.sub_select_trials = exists(num_trials_select)
+        self.num_trials_select = num_trials_select
     def __len__(self):
         return len(self.episode_mapping)
@@ -290,10 +410,22 @@ class RemappedReplayDataset(Dataset):
         assert not is_empty(episode_indices)
-        if self.shuffle_episodes and episode_indices.numel() > 1:
+        # shuffle the episode indices if either shuffle episodes is turned on, or `num_trial_select` passed in (for sub selecting episodes from a set)
+        if (
+            episode_indices.numel() > 1 and
+            (self.shuffle_episodes or self.sub_select_trials)
+        ):
             num_episodes = len(episode_indices)
             episode_indices = episode_indices[torch.randperm(num_episodes)]
+        # crop out the episodes
+        if self.sub_select_trials:
+            episode_indices = episode_indices[:self.num_trials_select]
+        # now select out the episode data and merge along time
         episode_data = [self.dataset[i] for i in episode_indices.tolist()]
         episode_lens = stack([data.pop('_lens') for data in episode_data])
@@ -368,6 +500,10 @@ class ReplayBuffer:
             # memmap file
             filepath = folder / f'{field_name}.data.npy'
+            if isinstance(shape, int):
+                shape = (shape,)
             memmap = open_memmap(str(filepath), mode = 'w+', dtype = dtype, shape = (max_episodes, max_timesteps, *shape))
             self.memmaps[field_name] = memmap
@@ -463,6 +599,70 @@ class ReplayBuffer:
         return DataLoader(self.dataset(episode_mapping), batch_size = batch_size, collate_fn = collate_var_time, **kwargs)
+# normalization + conditioning (needed for the commands to the robot)
+class MaybeAdaRMSNormWrapper(Module):
+    def __init__(
+        self,
+        fn: Module,
+        dim,
+        dim_cond = None
+    ):
+        super().__init__()
+        condition = exists(dim_cond)
+        self.fn = fn
+        self.norm = nn.RMSNorm(dim, elementwise_affine = not condition)
+        self.accept_condition = condition
+        if condition:
+            self.to_gamma = LinearNoBias(dim_cond, dim)
+            self.to_ada_norm_zero = nn.Linear(dim_cond, dim)
+            nn.init.zeros_(self.to_gamma.weight)
+            nn.init.zeros_(self.to_ada_norm_zero.weight)
+            nn.init.constant_(self.to_ada_norm_zero.bias, -5.)
+    def forward(
+        self,
+        x,
+        cond = None,
+        **kwargs
+    ):
+        need_cond = self.accept_condition
+        assert xnor(exists(cond), need_cond)
+        prenormed = self.norm(x)
+        if need_cond:
+            if cond.ndim == 2:
+                cond = rearrange(cond, 'b d -> b 1 d')
+            scale_in = self.to_gamma(cond)
+            prenormed = prenormed * (scale_in + 1.)
+        all_fn_out = self.fn(prenormed, **kwargs)
+        if not need_cond:
+            return all_fn_out
+        # function may return multiple args
+        (out, *rest), tree_spec = tree_flatten(all_fn_out)
+        if need_cond:
+            scale_out = self.to_ada_norm_zero(cond).sigmoid()
+            out = out * scale_out
+        # restore
+        all_fn_out = tree_unflatten((out, *rest), tree_spec)
+        return all_fn_out
 # transformer-xl with ppo
 class Attention(Module):
@@ -472,15 +672,12 @@ class Attention(Module):
         window_size,
         dim_head = 64,
         heads = 8,
-        pre_rmsnorm = True,
         fixed_window_size = False,
         accept_value_residual = False
     ):
         super().__init__()
         self.scale = dim_head ** -0.5
-        self.norm = RMSNorm(dim) if pre_rmsnorm else Identity()
         self.split_heads = Rearrange('b n (h d) -> b h n d', h = heads)
         self.merge_heads = Rearrange('b h n d -> b n (h d)')
@@ -524,8 +721,6 @@ class Attention(Module):
         device = tokens.device
-        tokens = self.norm(tokens)
         q, k, v = (self.to_q(tokens), *self.to_kv(tokens).chunk(2, dim = -1))
         q, k, v = map(self.split_heads, (q, k, v))
@@ -614,19 +809,26 @@ class TransformerXL(Module):
         dim_head = 64,
         heads = 8,
         expansion_factor = 4.,
+        dim_cond = None,
         final_norm = True,
         fixed_window_size = False,
     ):
         super().__init__()
+        condition = exists(dim_cond)
+        self.to_cond_tokens = MLP(dim_cond, dim * 2, activate_last = True) if exists(dim_cond) else None
+        norm_fn = partial(MaybeAdaRMSNormWrapper, dim = dim, dim_cond = (dim * 2) if condition else None)
         layers = ModuleList([])
         for i in range(depth):
             is_first = i == 0
-            attn = Attention(dim = dim, dim_head = dim_head, heads = heads, fixed_window_size = fixed_window_size, window_size = window_size, accept_value_residual = not is_first)
+            attn = norm_fn(Attention(dim = dim, dim_head = dim_head, heads = heads, fixed_window_size = fixed_window_size, window_size = window_size, accept_value_residual = not is_first))
-            ff = FeedForward(dim = dim, expansion_factor = expansion_factor)
+            ff = norm_fn(FeedForward(dim = dim, expansion_factor = expansion_factor))
             layers.append(ModuleList([
                 attn, ff
@@ -644,20 +846,32 @@ class TransformerXL(Module):
         self,
         x,
         cache = None,
-        return_kv_cache = False
+        return_kv_cache = False,
+        condition: Tensor | None = None
     ):
+        # cache and residuals
         cache = default(cache, (None,) * len(self.layers))
         next_kv_caches = []
         value_residual = None
+        # handle condition
+        cond_tokens = None
+        if exists(condition):
+            assert exists(self.to_cond_tokens)
+            cond_tokens = self.to_cond_tokens(condition)
+        # layers
         for (attn, ff), kv_cache in zip(self.layers, cache):
-            attn_out, (next_kv_cache, values) = attn(x, value_residual = value_residual, kv_cache = kv_cache, return_kv_cache = True)
+            attn_out, (next_kv_cache, values) = attn(x, cond = cond_tokens, value_residual = value_residual, kv_cache = kv_cache, return_kv_cache = True)
             x = attn_out + x
-            x = ff(x) + x
+            x = ff(x, cond = cond_tokens) + x
             next_kv_caches.append(next_kv_cache)
             value_residual = default(value_residual, values)
@@ -678,7 +892,7 @@ class TransformerXL(Module):
 class Locoformer(Module):
     def __init__(
         self,
-        embedder: Module,
+        embedder: Module | ModuleList | list[Module],
         unembedder: Module,
         transformer: dict | TransformerXL,
         discount_factor = 0.999,
@@ -686,10 +900,10 @@ class Locoformer(Module):
         ppo_eps_clip = 0.2,
         ppo_entropy_weight = 0.01,
         ppo_value_clip = 0.4,
-        dim_value_input = None, # needs to be set for value network to be available
+        dim_value_input = None,                 # needs to be set for value network to be available
         value_network: Module = nn.Identity(),
         reward_range: tuple[float, float] | None = None,
-        reward_shaping_fns: list[Callable[[Tensor], float | Tensor]] | None = None,
+        reward_shaping_fns: list[Callable[..., float | Tensor]] | None = None,
         num_reward_bins = 32,
         hl_gauss_loss_kwargs = dict(),
         value_loss_weight = 0.5,
@@ -704,7 +918,15 @@ class Locoformer(Module):
         self.transformer = transformer
+        # handle state embedder
+        if isinstance(embedder, list):
+            embedder = ModuleList(embedder)
         self.embedder = embedder
+        # unembed state to actions or ssl predictions
         self.unembedder = unembedder
         self.fixed_window_size = transformer.fixed_window_size
@@ -746,7 +968,7 @@ class Locoformer(Module):
         self.use_spo = use_spo
-        # maybe recurrent kv cache (todo: find and cite this paper from ages ago)
+        # maybe recurrent kv cache, from Ding et al. https://arxiv.org/abs/2012.15688
         self.recurrent_kv_cache = recurrent_kv_cache
@@ -772,6 +994,14 @@ class Locoformer(Module):
         return self.to_value_pred.parameters()
+    def evolve(
+        self,
+        environment,
+        **kwargs
+    ):
+        evo_strat = EvoStrategy(self, environment = environment, **kwargs)
+        evo_strat()
     def ppo(
         self,
         state,
@@ -781,6 +1011,8 @@ class Locoformer(Module):
         old_value,
         mask,
         episode_lens,
+        condition: Tensor | None = None,
+        state_type: int | None = None,
         actor_optim: Optimizer | None = None,
         critic_optim: Optimizer | None = None
     ):
@@ -794,18 +1026,25 @@ class Locoformer(Module):
         advantage = normalize(advantage)
+        data_tensors = (
+            state,
+            action,
+            old_action_log_prob,
+            reward,
+            old_value,
+            mask,
+            advantage,
+            returns
+        )
+        has_condition = exists(condition)
+        if exists(condition):
+            data_tensors = (*data_tensors, condition)
         windowed_tensors = [
             t.split(window_size, dim = 1) for t in
-            (
-                state,
-                action,
-                old_action_log_prob,
-                reward,
-                old_value,
-                mask,
-                advantage,
-                returns
-            )
+            data_tensors
         ]
         mean_actor_loss = self.zero.clone()
@@ -823,10 +1062,14 @@ class Locoformer(Module):
             old_value,
             mask,
             advantage,
-            returns
+            returns,
+            *rest
         ) in zip(*windowed_tensors):
-            (action_logits, value_logits), cache = self.forward(state, cache = cache, detach_cache = True, return_values = True, return_raw_value_logits = True)
+            if has_condition:
+                condition, = rest
+            (action_logits, value_logits), cache = self.forward(state, condition = condition, state_type = state_type, cache = cache, detach_cache = True, return_values = True, return_raw_value_logits = True)
             entropy = calc_entropy(action_logits)
             action = rearrange(action, 'b t -> b t 1')
@@ -882,16 +1125,33 @@ class Locoformer(Module):
         return mean_actor_loss.detach(), mean_critic_loss.detach()
-    def state_to_rewards(
+    def state_and_command_to_rewards(
         self,
-        state
+        state,
+        commands = None
     ) -> Tensor:
         assert self.has_reward_shaping
-        rewards = [fn(state) for fn in self.reward_shaping_fns]
+        rewards = []
+        for fn in self.reward_shaping_fns:
+            param_names = get_param_names(fn)
+            param_names = set(param_names) & {'state', 'command'}
+            if param_names == {'state'}: # only state
+                reward = fn(state = state)
+            elif param_names == {'state', 'command'}: # state and command
+                reward = fn(state = state, command = commands)
+            else:
+                raise ValueError('invalid number of arguments for reward shaping function')
+            rewards.append(reward)
+        # cast to Tensor if returns a float, just make it flexible for researcher
         rewards = [tensor(reward) if not is_tensor(reward) else reward for reward in rewards]
         return stack(rewards)
     def wrap_env_functions(self, env):
@@ -921,7 +1181,7 @@ class Locoformer(Module):
             if not self.has_reward_shaping:
                 return env_step_out_torch
-            shaped_rewards = self.state_to_rewards(env_step_out_torch)
+            shaped_rewards = self.state_and_command_to_rewards(env_step_out_torch)
             return env_step_out_torch, shaped_rewards
@@ -940,20 +1200,36 @@ class Locoformer(Module):
         cache = None
-        def stateful_forward(state: Tensor, **override_kwargs):
+        def stateful_forward(
+            state: Tensor,
+            condition: Tensor | None = None,
+            state_type: int | None = None,
+            **override_kwargs
+        ):
             nonlocal cache
+            state = state.to(self.device)
+            if exists(condition):
+                condition = condition.to(self.device)
             # handle no batch or time, for easier time rolling out against envs
             if not has_batch_dim:
                 state = rearrange(state, '... -> 1 ...')
+                if exists(condition):
+                    condition = rearrange(condition, '... -> 1 ...')
             if not has_time_dim:
                 state = state.unsqueeze(state_time_dim)
+                if exists(condition):
+                    condition = rearrange(condition, '... d -> ... 1 d')
             # forwards
-            out, cache = self.forward(state, cache = cache, **{**kwargs, **override_kwargs})
+            out, cache = self.forward(state, condition = condition, state_type = state_type, cache = cache, **{**kwargs, **override_kwargs})
             # maybe remove batch or time
@@ -988,6 +1264,8 @@ class Locoformer(Module):
         self,
         state: Tensor,
         cache: Cache | None = None,
+        condition: Tensor | None = None,
+        state_type: int | None = None,
         detach_cache = False,
         return_values = False,
         return_raw_value_logits = False
@@ -995,7 +1273,16 @@ class Locoformer(Module):
         state = state.to(self.device)
-        tokens = self.embedder(state)
+        # determine which function to invoke for state to token for transformer
+        state_to_token = self.embedder
+        if exists(state_type):
+            state_to_token = self.embedder[state_type]
+        # embed
+        tokens = state_to_token(state)
         # time
@@ -1015,7 +1302,7 @@ class Locoformer(Module):
         # attention
-        embed, kv_cache = self.transformer(tokens, cache = prev_kv_cache, return_kv_cache = True)
+        embed, kv_cache = self.transformer(tokens, condition = condition, cache = prev_kv_cache, return_kv_cache = True)
         # unembed to actions - in language models this would be the next state

{locoformer-0.0.29.dist-info → locoformer-0.0.43.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: locoformer
-Version: 0.0.29
+Version: 0.0.43
 Summary: LocoFormer
 Project-URL: Homepage, https://pypi.org/project/locoformer/
 Project-URL: Repository, https://github.com/lucidrains/locoformer
@@ -41,6 +41,7 @@ Requires-Dist: einx>=0.3.0
 Requires-Dist: hl-gauss-pytorch>=0.2.0
 Requires-Dist: rotary-embedding-torch
 Requires-Dist: torch>=2.4
+Requires-Dist: x-evolution
 Requires-Dist: x-mlps-pytorch
 Provides-Extra: examples
 Requires-Dist: accelerate; extra == 'examples'

locoformer-0.0.43.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+locoformer/__init__.py,sha256=XctsMGEZSR4mVl75fhds_1BtS5qGFiiItTDV7CmCt_I,45
+locoformer/locoformer.py,sha256=5gQTtseqs92K9ee9HJ1gEqhm8MFPFDFXPnoPxLnf8Nw,37531
+locoformer-0.0.43.dist-info/METADATA,sha256=Vgx50wEmRpwrGxoOntARE2oU7g5TdqcM2ZUvrpOBjIk,3283
+locoformer-0.0.43.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+locoformer-0.0.43.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+locoformer-0.0.43.dist-info/RECORD,,

{locoformer-0.0.29.dist-info → locoformer-0.0.43.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.27.0
+Generator: hatchling 1.28.0
 Root-Is-Purelib: true
 Tag: py3-none-any

locoformer-0.0.29.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-locoformer/__init__.py,sha256=XctsMGEZSR4mVl75fhds_1BtS5qGFiiItTDV7CmCt_I,45
-locoformer/locoformer.py,sha256=Tr_1btuoTZ0huXeDcAeuHxTPaVeCUEGc5iLvMYGDLck,29982
-locoformer-0.0.29.dist-info/METADATA,sha256=5Fi3EOsgpBvpzAFVZQyrlink-HcHE8EgFl10Y5l8mqM,3256
-locoformer-0.0.29.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-locoformer-0.0.29.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-locoformer-0.0.29.dist-info/RECORD,,

{locoformer-0.0.29.dist-info → locoformer-0.0.43.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

locoformer 0.0.29__py3-none-any.whl → 0.0.43__py3-none-any.whl

locoformer 0.0.29py3-none-any.whl → 0.0.43py3-none-any.whl