PyPI - locoformer - Versions diffs - 0.0.30__tar.gz → 0.0.37__tar.gz - Mend

locoformer 0.0.30tar.gz → 0.0.37tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{locoformer-0.0.30 → locoformer-0.0.37}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: locoformer
-Version: 0.0.30
+Version: 0.0.37
 Summary: LocoFormer
 Project-URL: Homepage, https://pypi.org/project/locoformer/
 Project-URL: Repository, https://github.com/lucidrains/locoformer
@@ -41,6 +41,7 @@ Requires-Dist: einx>=0.3.0
 Requires-Dist: hl-gauss-pytorch>=0.2.0
 Requires-Dist: rotary-embedding-torch
 Requires-Dist: torch>=2.4
+Requires-Dist: x-evolution
 Requires-Dist: x-mlps-pytorch
 Provides-Extra: examples
 Requires-Dist: accelerate; extra == 'examples'

{locoformer-0.0.30 → locoformer-0.0.37}/locoformer/locoformer.py RENAMED Viewed

@@ -1,11 +1,14 @@
 from __future__ import annotations
 from typing import Callable
+from types import SimpleNamespace
 from functools import partial
 from pathlib import Path
 from contextlib import contextmanager
 from collections import namedtuple
+from inspect import signature
 import numpy as np
 from numpy import ndarray
 from numpy.lib.format import open_memmap
@@ -31,6 +34,10 @@ from hl_gauss_pytorch import HLGaussLoss
 from assoc_scan import AssocScan
+from x_mlps_pytorch import MLP
+from x_evolution import EvoStrategy
 # constants
 LinearNoBias = partial(Linear, bias = False)
@@ -54,6 +61,10 @@ def xnor(x, y):
 def divisible_by(num, den):
     return (num % den) == 0
+def get_param_names(fn):
+    parameters = signature(fn).parameters
+    return list(parameters.keys())
 # tensor helpers
 def log(t, eps = 1e-20):
@@ -81,10 +92,99 @@ def pad_at_dim(
 def normalize(t, eps = 1e-5):
     return (t - t.mean()) / t.std().clamp_min(eps)
+def tensor_to_dict(
+    t: Tensor,
+    config: tuple[tuple[str, int] | str],
+    dim = -1,
+    return_dottable = True
+):
+    config = tuple((c, 1) if isinstance(c, str) else c for c in config)
+    names, sizes = zip(*config)
+    assert sum(sizes) == t.shape[dim]
+    t = t.split(sizes, dim = dim)
+    tensor_dict = dict(zip(names, t))
+    if not return_dottable:
+        return tensor_dict
+    return SimpleNamespace(**tensor_dict)
 def calc_entropy(logits):
     prob = logits.softmax(dim = -1)
     return -(prob * log(prob)).sum(dim = -1)
+# reward functions - A.2
+def reward_linear_velocity_command_tracking(
+    state,
+    command,
+    s1 = 1.
+):
+    if not (hasattr(state, 'v_xy') and hasattr(command, 'v_xy')):
+        return 0.
+    error = (state.v_xy - command.v_xy).norm(dim = -1).pow(2)
+    return torch.exp(-error / s1)
+def reward_angular_velocity_command_tracking(
+    state,
+    command,
+    s2 = 1.
+):
+    if not (hasattr(state, 'w_z') and hasattr(command, 'w_z')):
+        return 0.
+    error = (state.w_z - command.w_z).norm(dim = -1).pow(2)
+    return torch.exp(-error / s2)
+def reward_base_linear_velocity_penalty(
+    state
+):
+    if not hasattr(state, 'v_z'):
+        return 0.
+    return -state.v_z.norm(dim = -1).pow(2)
+def reward_base_angular_velocity_penalty(
+    state
+):
+    if not hasattr(state, 'w_xy'):
+        return 0.
+    return -state.w_xy.norm(dim = -1).pow(2)
+def reward_base_height_penalty(
+    state,
+    x_z_nominal = 0.27
+):
+    if not hasattr(state, 'x_z'):
+        return 0.
+    return -(state.x_z - x_z_nominal).norm(dim = -1).pow(2)
+def reward_joint_acceleration_penalty(
+    state
+):
+    if not hasattr(state, 'joint_q'):
+        return 0.
+    return -state.joint_q.norm(dim = -1).pow(2)
+def reward_torque_penalty(
+    state
+):
+    if not hasattr(state, 'tau'):
+        return 0.
+    return -state.tau.norm(dim = -1).pow(2)
+def reward_alive(
+    state
+):
+    return 1.
 # generalized advantage estimate
 @torch.no_grad()
@@ -487,8 +587,8 @@ class MaybeAdaRMSNormWrapper(Module):
             self.to_gamma = LinearNoBias(dim_cond, dim)
             self.to_ada_norm_zero = nn.Linear(dim_cond, dim)
-            nn.init.zeros_(self.to_gamma.weight, 0.)
-            nn.init.zeros_(self.to_ada_norm_zero.weight, 0.)
+            nn.init.zeros_(self.to_gamma.weight)
+            nn.init.zeros_(self.to_ada_norm_zero.weight)
             nn.init.constant_(self.to_ada_norm_zero.bias, -5.)
     def forward(
@@ -499,6 +599,7 @@ class MaybeAdaRMSNormWrapper(Module):
     ):
         need_cond = self.accept_condition
         assert xnor(exists(cond), need_cond)
         prenormed = self.norm(x)
@@ -683,7 +784,9 @@ class TransformerXL(Module):
         condition = exists(dim_cond)
-        norm_fn = partial(MaybeAdaRMSNormWrapper, dim = dim, dim_cond = dim_cond)
+        self.to_cond_tokens = MLP(dim_cond, dim * 2, activate_last = True) if exists(dim_cond) else None
+        norm_fn = partial(MaybeAdaRMSNormWrapper, dim = dim, dim_cond = (dim * 2) if condition else None)
         layers = ModuleList([])
@@ -710,20 +813,32 @@ class TransformerXL(Module):
         self,
         x,
         cache = None,
-        return_kv_cache = False
+        return_kv_cache = False,
+        condition: Tensor | None = None
     ):
+        # cache and residuals
         cache = default(cache, (None,) * len(self.layers))
         next_kv_caches = []
         value_residual = None
+        # handle condition
+        cond_tokens = None
+        if exists(condition):
+            assert exists(self.to_cond_tokens)
+            cond_tokens = self.to_cond_tokens(condition)
+        # layers
         for (attn, ff), kv_cache in zip(self.layers, cache):
-            attn_out, (next_kv_cache, values) = attn(x, value_residual = value_residual, kv_cache = kv_cache, return_kv_cache = True)
+            attn_out, (next_kv_cache, values) = attn(x, cond = cond_tokens, value_residual = value_residual, kv_cache = kv_cache, return_kv_cache = True)
             x = attn_out + x
-            x = ff(x) + x
+            x = ff(x, cond = cond_tokens) + x
             next_kv_caches.append(next_kv_cache)
             value_residual = default(value_residual, values)
@@ -752,10 +867,10 @@ class Locoformer(Module):
         ppo_eps_clip = 0.2,
         ppo_entropy_weight = 0.01,
         ppo_value_clip = 0.4,
-        dim_value_input = None, # needs to be set for value network to be available
+        dim_value_input = None,                 # needs to be set for value network to be available
         value_network: Module = nn.Identity(),
         reward_range: tuple[float, float] | None = None,
-        reward_shaping_fns: list[Callable[[Tensor], float | Tensor]] | None = None,
+        reward_shaping_fns: list[Callable[..., float | Tensor]] | None = None,
         num_reward_bins = 32,
         hl_gauss_loss_kwargs = dict(),
         value_loss_weight = 0.5,
@@ -838,6 +953,14 @@ class Locoformer(Module):
         return self.to_value_pred.parameters()
+    def evolve(
+        self,
+        environment,
+        **kwargs
+    ):
+        evo_strat = EvoStrategy(self, environment = environment, **kwargs)
+        evo_strat()
     def ppo(
         self,
         state,
@@ -948,16 +1071,33 @@ class Locoformer(Module):
         return mean_actor_loss.detach(), mean_critic_loss.detach()
-    def state_to_rewards(
+    def state_and_command_to_rewards(
         self,
-        state
+        state,
+        commands = None
     ) -> Tensor:
         assert self.has_reward_shaping
-        rewards = [fn(state) for fn in self.reward_shaping_fns]
+        rewards = []
+        for fn in self.reward_shaping_fns:
+            param_names = get_param_names(fn)
+            param_names = set(param_names) & {'state', 'command'}
+            if param_names == {'state'}: # only state
+                reward = fn(state = state)
+            elif param_names == {'state', 'command'}: # state and command
+                reward = fn(state = state, command = commands)
+            else:
+                raise ValueError('invalid number of arguments for reward shaping function')
+            rewards.append(reward)
+        # cast to Tensor if returns a float, just make it flexible for researcher
         rewards = [tensor(reward) if not is_tensor(reward) else reward for reward in rewards]
         return stack(rewards)
     def wrap_env_functions(self, env):
@@ -987,7 +1127,7 @@ class Locoformer(Module):
             if not self.has_reward_shaping:
                 return env_step_out_torch
-            shaped_rewards = self.state_to_rewards(env_step_out_torch)
+            shaped_rewards = self.state_and_command_to_rewards(env_step_out_torch)
             return env_step_out_torch, shaped_rewards
@@ -1006,7 +1146,11 @@ class Locoformer(Module):
         cache = None
-        def stateful_forward(state: Tensor, **override_kwargs):
+        def stateful_forward(
+            state: Tensor,
+            condition: Tensor | None = None,
+            **override_kwargs
+        ):
             nonlocal cache
             # handle no batch or time, for easier time rolling out against envs
@@ -1014,12 +1158,18 @@ class Locoformer(Module):
             if not has_batch_dim:
                 state = rearrange(state, '... -> 1 ...')
+                if exists(command):
+                    condition = rearrange(condition, '... -> 1 ...')
             if not has_time_dim:
                 state = state.unsqueeze(state_time_dim)
+                if exists(command):
+                    condition = rearrange(condition, '... d -> ... 1 d')
             # forwards
-            out, cache = self.forward(state, cache = cache, **{**kwargs, **override_kwargs})
+            out, cache = self.forward(state, condition = condition, cache = cache, **{**kwargs, **override_kwargs})
             # maybe remove batch or time
@@ -1054,6 +1204,7 @@ class Locoformer(Module):
         self,
         state: Tensor,
         cache: Cache | None = None,
+        condition: Tensor | None = None,
         detach_cache = False,
         return_values = False,
         return_raw_value_logits = False
@@ -1081,7 +1232,7 @@ class Locoformer(Module):
         # attention
-        embed, kv_cache = self.transformer(tokens, cache = prev_kv_cache, return_kv_cache = True)
+        embed, kv_cache = self.transformer(tokens, condition = condition, cache = prev_kv_cache, return_kv_cache = True)
         # unembed to actions - in language models this would be the next state

{locoformer-0.0.30 → locoformer-0.0.37}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "locoformer"
-version = "0.0.30"
+version = "0.0.37"
 description = "LocoFormer"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -33,6 +33,7 @@ dependencies = [
     "hl-gauss-pytorch>=0.2.0",
     "rotary-embedding-torch",
     "torch>=2.4",
+    "x-evolution",
     "x-mlps-pytorch",
 ]

{locoformer-0.0.30 → locoformer-0.0.37}/tests/test_locoformer.py RENAMED Viewed

@@ -10,8 +10,10 @@ from einops import rearrange
 from locoformer.locoformer import Locoformer
 @param('recurrent_kv_cache', (False, True))
+@param('has_commands', (False, True))
 def test_locoformer(
-    recurrent_kv_cache
+    recurrent_kv_cache,
+    has_commands
 ):
     model = Locoformer(
@@ -24,24 +26,31 @@ def test_locoformer(
         transformer = dict(
             dim = 128,
             depth = 1,
-            window_size = 512
+            window_size = 512,
+            dim_cond = 2 if has_commands else None
         )
     )
     seq = torch.randint(0, 256, (3, 512))
-    (logits, values), cache = model(seq, return_values = True)
-    (logits, values), cache = model(seq, return_values = True, cache = cache)
-    (logits, values), cache = model(seq, return_values = True, cache = cache)
+    commands = None
+    if has_commands:
+        commands = torch.randn(3, 512, 2)
+    (logits, values), cache = model(seq, condition = commands, return_values = True)
+    (logits, values), cache = model(seq, condition = commands, return_values = True, cache = cache)
+    (logits, values), cache = model(seq, condition = commands, return_values = True, cache = cache)
     assert logits.shape == (3, 512, 256)
     stateful_forward = model.get_stateful_forward(has_batch_dim = True, has_time_dim = True, return_values = True, inference_mode = True)
+    inference_command = torch.randn(1, 1, 2) if has_commands else None
     for state in seq.unbind(dim = -1):
         state = rearrange(state, 'b -> b 1')
-        logits, values = stateful_forward(state)
+        logits, values = stateful_forward(state, condition = inference_command)
         assert logits.shape == (3, 1, 256)
 def test_replay():
@@ -117,7 +126,7 @@ def test_reward_shaping():
         reward_range = (-100., 100.),
         reward_shaping_fns = [
             lambda state: (state[3] - 2.5).pow(2).mean(),
-            lambda state: state[4:6].norm(dim = -1)
+            lambda state, command: state[4:6].norm(dim = -1)
         ],
         transformer = dict(
             dim = 128,
@@ -145,3 +154,29 @@ def test_reward_shaping():
     _, rewards = step_fn(3)
     assert len(rewards) == 2
+def test_tensor_to_dict():
+    state = torch.randn(1, 3, 5)
+    config = (('xyz', 3), 'vx', 'vy')
+    from locoformer.locoformer import tensor_to_dict
+    state_dict = tensor_to_dict(state, config)
+    assert hasattr(state_dict, 'xyz') and state_dict.xyz.shape == (1, 3, 3)
+def test_evo():
+    model = Locoformer(
+        embedder = nn.Embedding(256, 128),
+        unembedder = nn.Linear(128, 256, bias = False),
+        value_network = MLP(128, 64, 32),
+        dim_value_input = 32,
+        reward_range = (-100., 100.),
+        transformer = dict(
+            dim = 128,
+            depth = 1,
+            window_size = 512,
+        )
+    )
+    model.evolve(lambda model: 1., num_generations = 1)