PyPI - locoformer - Versions diffs - 0.0.7__py3-none-any.whl → 0.0.29__py3-none-any.whl - Mend

locoformer 0.0.7py3-none-any.whl → 0.0.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

locoformer/locoformer.py CHANGED Viewed

@@ -1,11 +1,25 @@
 from __future__ import annotations
+from typing import Callable
 from functools import partial
+from pathlib import Path
+from contextlib import contextmanager
+from collections import namedtuple
+import numpy as np
+from numpy import ndarray
+from numpy.lib.format import open_memmap
+from beartype import beartype
+from beartype.door import is_bearable
 import torch
-from torch import nn, cat, stack, arange, Tensor, is_tensor
+from torch import nn, cat, stack, arange, Tensor, tensor, is_tensor, from_numpy
 import torch.nn.functional as F
 from torch.nn import Module, ModuleList, Linear, RMSNorm, Identity, Sequential
 from torch.utils._pytree import tree_map
+from torch.utils.data import Dataset, DataLoader
+from torch.optim import Optimizer
 import einx
 from einops import rearrange, einsum
@@ -13,10 +27,16 @@ from einops.layers.torch import Rearrange
 from rotary_embedding_torch import RotaryEmbedding
+from hl_gauss_pytorch import HLGaussLoss
 from assoc_scan import AssocScan
+# constants
 LinearNoBias = partial(Linear, bias = False)
+Cache = namedtuple('Cache', ('curr_timestep', 'kv_cache')) # (int, Tensor)
 # helper functions
 def exists(v):
@@ -31,20 +51,36 @@ def first(arr):
 def divisible_by(num, den):
     return (num % den) == 0
+# tensor helpers
+def log(t, eps = 1e-20):
+    return t.clamp_min(eps).log()
+def is_empty(t):
+    return t.numel() == 0
 def tree_map_tensor(x, fn):
     return tree_map(lambda t: t if not is_tensor(t) else fn(t), x)
-def detach_all(x):
-    return tree_map_tensor(x, lambda t: t.detach())
+def pad_at_dim(
+    t,
+    pad: tuple[int, int],
+    dim = -1,
+    value = 0.
+):
+    if pad == (0, 0):
+        return t
-def combine_kv_cache(cache1, cache2):
-    combined_cache = []
+    dims_from_right = (- dim - 1) if dim < 0 else (t.ndim - dim - 1)
+    zeros = ((0, 0) * dims_from_right)
+    return F.pad(t, (*zeros, *pad), value = value)
-    for layer_cache1, layer_cache2 in zip(cache1, cache2):
-        next_cache = cat((layer_cache1, layer_cache2), dim = -2)
-        combined_cache.append(next_cache)
+def normalize(t, eps = 1e-5):
+    return (t - t.mean()) / t.std().clamp_min(eps)
-    return combined_cache
+def calc_entropy(logits):
+    prob = logits.softmax(dim = -1)
+    return -(prob * log(prob)).sum(dim = -1)
 # generalized advantage estimate
@@ -52,7 +88,7 @@ def combine_kv_cache(cache1, cache2):
 def calc_gae(
     rewards,
     values,
-    masks,
+    masks = None,
     gamma = 0.99,
     lam = 0.95,
     use_accelerated = None
@@ -63,6 +99,9 @@ def calc_gae(
     values = F.pad(values, (0, 1), value = 0.)
     values, values_next = values[..., :-1], values[..., 1:]
+    if not exists(masks):
+        masks = torch.ones_like(values)
     delta = rewards + gamma * values_next * masks - values
     gates = gamma * lam * masks
@@ -72,7 +111,7 @@ def calc_gae(
     returns = gae + values
-    return returns
+    return gae, returns
 # transformer-xl mask w/ flex attn
@@ -114,8 +153,8 @@ def create_xl_mask(
         # handle intra-episodic attention if needed
         if exists(episode_ids):
-            q_episode = episodes[b, q + offset]
-            k_episode = episodes[b, k]
+            q_episode = episode_ids[b, q + offset]
+            k_episode = episode_ids[b, k]
             intra_episode_mask = q_episode == k_episode
             mask = mask & intra_episode_mask
@@ -146,6 +185,284 @@ def create_sliding_mask(
     create_kwargs = dict(device = device) if exists(device) else dict()
     return create_block_mask(sliding_mask, B = None, H = None, Q_LEN = seq_len, KV_LEN = kv_seq_len, _compile = True, **create_kwargs)
+# data
+def collate_var_time(data):
+    datum = first(data)
+    keys = datum.keys()
+    all_tensors = zip(*[datum.values() for datum in data])
+    collated_values = []
+    for key, tensors in zip(keys, all_tensors):
+        # the episode lens have zero dimension - think of a cleaner way to handle this later
+        if key != '_lens':
+            times = [t.shape[0] for t in tensors]
+            max_time = max(times)
+            tensors = [pad_at_dim(t, (0, max_time - t.shape[0]), dim = 0) for t in tensors]
+        collated_values.append(stack(tensors))
+    return dict(zip(keys, collated_values))
+class ReplayDataset(Dataset):
+    def __init__(
+        self,
+        folder: str | Path,
+        fields: tuple[str, ...] | None = None
+    ):
+        if isinstance(folder, str):
+            folder = Path(folder)
+        episode_lens = folder / 'episode_lens.npy'
+        self.episode_lens = open_memmap(str(episode_lens), mode = 'r')
+        # get indices of non-zero lengthed episodes
+        nonzero_episodes = self.episode_lens > 0
+        self.indices = np.arange(self.episode_lens.shape[-1])[nonzero_episodes]
+        # get all data files
+        filepaths = [*folder.glob('*.data.npy')]
+        assert len(filepaths) > 0
+        fieldname_to_filepath = {path.name.split('.')[0]: path for path in filepaths}
+        fieldnames_from_files = set(fieldname_to_filepath.keys())
+        fields = default(fields, fieldnames_from_files)
+        self.memmaps = dict()
+        for field in fields:
+            assert field in fieldnames_from_files, f'invalid field {field} - must be one of {fieldnames_from_files}'
+            path = fieldname_to_filepath[field]
+            self.memmaps[field] = open_memmap(str(path), mode = 'r')
+    def __len__(self):
+        return len(self.indices)
+    def __getitem__(self, idx):
+        episode_index = self.indices[idx]
+        episode_len = self.episode_lens[episode_index]
+        data = {field: from_numpy(memmap[episode_index, :episode_len].copy()) for field, memmap in self.memmaps.items()}
+        data['_lens'] = tensor(episode_len)
+        return data
+class RemappedReplayDataset(Dataset):
+    def __init__(
+        self,
+        dataset: ReplayDataset,
+        episode_mapping: Tensor | list[list[int]],
+        shuffle_episodes = False
+    ):
+        assert len(dataset) > 0
+        self.dataset = dataset
+        if is_tensor(episode_mapping):
+            assert episode_mapping.dtype in (torch.int, torch.long) and episode_mapping.ndim == 2
+            episode_mapping = episode_mapping.tolist()
+        self.episode_mapping = episode_mapping
+        self.shuffle_episodes = shuffle_episodes
+    def __len__(self):
+        return len(self.episode_mapping)
+    def __getitem__(self, idx):
+        episode_indices = self.episode_mapping[idx]
+        episode_indices = tensor(episode_indices)
+        episode_indices = episode_indices[(episode_indices >= 0) & (episode_indices < len(self.dataset))]
+        assert not is_empty(episode_indices)
+        if self.shuffle_episodes and episode_indices.numel() > 1:
+            num_episodes = len(episode_indices)
+            episode_indices = episode_indices[torch.randperm(num_episodes)]
+        episode_data = [self.dataset[i] for i in episode_indices.tolist()]
+        episode_lens = stack([data.pop('_lens') for data in episode_data])
+        keys = first(episode_data).keys()
+        values = [list(data.values()) for data in episode_data]
+        values = [cat(field_values) for field_values in zip(*values)] # concat across time
+        multi_episode_data = dict(zip(keys, values))
+        multi_episode_data['_lens'] = episode_lens.sum()
+        multi_episode_data['_episode_indices'] = cat([torch.full((episode_len,), episode_index) for episode_len, episode_index in zip(episode_lens, episode_indices)])
+        return multi_episode_data
+class ReplayBuffer:
+    @beartype
+    def __init__(
+        self,
+        folder: str | Path,
+        max_episodes: int,
+        max_timesteps: int,
+        fields: dict[
+            str,
+            str | tuple[str, int | tuple[int, ...]]
+        ]
+    ):
+        # folder for data
+        if not isinstance(folder, Path):
+            folder = Path(folder)
+            folder.mkdir(exist_ok = True)
+        self.folder = folder
+        assert folder.is_dir()
+        # keeping track of episode length
+        episode_lens = folder / 'episode_lens.npy'
+        self.episode_index = 0
+        self.timestep_index = 0
+        self.max_episodes = max_episodes
+        self.max_timesteps= max_timesteps
+        self.episode_lens = open_memmap(str(episode_lens), mode = 'w+', dtype = np.int32, shape = (max_episodes,))
+        # create the memmap for individual data tracks
+        self.shapes = dict()
+        self.dtypes = dict()
+        self.memmaps = dict()
+        self.fieldnames = set(fields.keys())
+        for field_name, field_info in fields.items():
+            # some flexibility
+            field_info = (field_info, ()) if isinstance(field_info, str) else field_info
+            dtype_str, shape = field_info
+            assert dtype_str in {'int', 'float', 'bool'}
+            dtype = dict(int = np.int32, float = np.float32, bool = np.bool_)[dtype_str]
+            # memmap file
+            filepath = folder / f'{field_name}.data.npy'
+            memmap = open_memmap(str(filepath), mode = 'w+', dtype = dtype, shape = (max_episodes, max_timesteps, *shape))
+            self.memmaps[field_name] = memmap
+            self.shapes[field_name] = shape
+            self.dtypes[field_name] = dtype
+        self.memory_namedtuple = namedtuple('Memory', list(fields.keys()))
+    def __len__(self):
+        return (self.episode_lens > 0).sum().item()
+    def reset_(self):
+        self.episode_lens[:] = 0
+        self.episode_index = 0
+        self.timestep_index = 0
+    def advance_episode(self):
+        self.episode_index = (self.episode_index + 1) % self.max_episodes
+        self.timestep_index = 0
+    def flush(self):
+        self.episode_lens[self.episode_index] = self.timestep_index
+        for memmap in self.memmaps.values():
+            memmap.flush()
+        self.episode_lens.flush()
+    @contextmanager
+    def one_episode(self):
+        yield
+        self.flush()
+        self.advance_episode()
+    @beartype
+    def store_datapoint(
+        self,
+        episode_index: int,
+        timestep_index: int,
+        name: str,
+        datapoint: Tensor | ndarray
+    ):
+        assert 0 <= episode_index < self.max_episodes
+        assert 0 <= timestep_index < self.max_timesteps
+        if is_tensor(datapoint):
+            datapoint = datapoint.detach().cpu().numpy()
+        assert name in self.fieldnames, f'invalid field name {name} - must be one of {self.fieldnames}'
+        assert datapoint.shape == self.shapes[name], f'invalid shape {datapoint.shape} - shape must be {self.shapes[name]}'
+        self.memmaps[name][self.episode_index, self.timestep_index] = datapoint
+    def store(
+        self,
+        **data
+    ):
+        assert is_bearable(data, dict[str, Tensor | ndarray])
+        assert not self.timestep_index >= self.max_timesteps, 'you exceeded the `max_timesteps` set on the replay buffer'
+        for name, datapoint in data.items():
+            self.store_datapoint(self.episode_index, self.timestep_index, name, datapoint)
+        self.timestep_index += 1
+        return self.memory_namedtuple(**data)
+    def dataset(
+        self,
+        episode_mapping: Tensor | list[list[int]] | None = None,
+    ) -> Dataset:
+        self.flush()
+        dataset = ReplayDataset(self.folder)
+        if not exists(episode_mapping):
+            return dataset
+        return RemappedReplayDataset(dataset, episode_mapping)
+    def dataloader(
+        self,
+        batch_size,
+        episode_mapping: Tensor | list[list[int]] | None = None,
+        **kwargs
+    ) -> DataLoader:
+        self.flush()
+        return DataLoader(self.dataset(episode_mapping), batch_size = batch_size, collate_fn = collate_var_time, **kwargs)
 # transformer-xl with ppo
 class Attention(Module):
@@ -204,7 +521,6 @@ class Attention(Module):
         return_kv_cache = False,
     ):
         seq_len = tokens.shape[-2]
-        assert seq_len <= self.window_size
         device = tokens.device
@@ -365,7 +681,21 @@ class Locoformer(Module):
         embedder: Module,
         unembedder: Module,
         transformer: dict | TransformerXL,
-        value_network: Module | None = None
+        discount_factor = 0.999,
+        gae_lam = 0.95,
+        ppo_eps_clip = 0.2,
+        ppo_entropy_weight = 0.01,
+        ppo_value_clip = 0.4,
+        dim_value_input = None, # needs to be set for value network to be available
+        value_network: Module = nn.Identity(),
+        reward_range: tuple[float, float] | None = None,
+        reward_shaping_fns: list[Callable[[Tensor], float | Tensor]] | None = None,
+        num_reward_bins = 32,
+        hl_gauss_loss_kwargs = dict(),
+        value_loss_weight = 0.5,
+        calc_gae_kwargs: dict = dict(),
+        recurrent_kv_cache = True,
+        use_spo = False # simple policy optimization https://arxiv.org/abs/2401.16025 - Levine's group (PI) verified it is more stable than PPO
     ):
         super().__init__()
@@ -377,11 +707,58 @@ class Locoformer(Module):
         self.embedder = embedder
         self.unembedder = unembedder
-        self.value_network = value_network
         self.fixed_window_size = transformer.fixed_window_size
         self.window_size = transformer.window_size
+        # determine value network, using HL Gauss Layer
+        self.to_value_pred = None
+        if exists(dim_value_input):
+            assert exists(reward_range)
+            self.to_value_pred = nn.Sequential(
+                value_network,
+                LinearNoBias(dim_value_input, num_reward_bins)
+            )
+            reward_min, reward_max = reward_range
+            self.hl_gauss_loss = HLGaussLoss(
+                min_value = reward_min,
+                max_value = reward_max,
+                num_bins = num_reward_bins,
+                **hl_gauss_loss_kwargs
+            )
+        # ppo related
+        self.discount_factor = discount_factor
+        self.gae_lam = gae_lam
+        self.ppo_eps_clip = ppo_eps_clip
+        self.ppo_entropy_weight = ppo_entropy_weight
+        self.ppo_value_clip = ppo_value_clip
+        self.value_loss_weight = value_loss_weight
+        self.calc_gae_kwargs = calc_gae_kwargs
+        # maybe use spo
+        self.use_spo = use_spo
+        # maybe recurrent kv cache (todo: find and cite this paper from ages ago)
+        self.recurrent_kv_cache = recurrent_kv_cache
+        # reward shaping function
+        self.has_reward_shaping = exists(reward_shaping_fns)
+        self.reward_shaping_fns = reward_shaping_fns
+        # loss related
+        self.register_buffer('zero', tensor(0.), persistent = False)
     @property
     def device(self):
         return next(self.parameters()).device
@@ -390,7 +767,165 @@ class Locoformer(Module):
         return self.unembedder.parameters()
     def critic_parameters(self):
-        return self.value_network.parameters()
+        if not exists(self.to_value_pred):
+            return []
+        return self.to_value_pred.parameters()
+    def ppo(
+        self,
+        state,
+        action,
+        old_action_log_prob,
+        reward,
+        old_value,
+        mask,
+        episode_lens,
+        actor_optim: Optimizer | None = None,
+        critic_optim: Optimizer | None = None
+    ):
+        window_size = self.window_size
+        total_learnable_tokens = mask.sum().item()
+        seq_len = state.shape[1]
+        gae_mask = einx.less('j, i -> i j', arange(seq_len, device = self.device), episode_lens)
+        advantage, returns = calc_gae(reward, old_value, masks = gae_mask, lam = self.gae_lam, gamma = self.discount_factor, **self.calc_gae_kwargs)
+        advantage = normalize(advantage)
+        windowed_tensors = [
+            t.split(window_size, dim = 1) for t in
+            (
+                state,
+                action,
+                old_action_log_prob,
+                reward,
+                old_value,
+                mask,
+                advantage,
+                returns
+            )
+        ]
+        mean_actor_loss = self.zero.clone()
+        mean_critic_loss = self.zero.clone()
+        # learn across windows
+        cache = None
+        for (
+            state,
+            action,
+            old_action_log_prob,
+            reward,
+            old_value,
+            mask,
+            advantage,
+            returns
+        ) in zip(*windowed_tensors):
+            (action_logits, value_logits), cache = self.forward(state, cache = cache, detach_cache = True, return_values = True, return_raw_value_logits = True)
+            entropy = calc_entropy(action_logits)
+            action = rearrange(action, 'b t -> b t 1')
+            log_prob = action_logits.gather(-1, action)
+            log_prob = rearrange(log_prob, 'b t 1 -> b t')
+            # update actor, classic clipped surrogate loss
+            eps_clip = self.ppo_eps_clip
+            ratio = (log_prob - old_action_log_prob).exp()
+            if self.use_spo:
+                actor_loss = -(ratio * advantage - (advantage.abs() * (ratio - 1.).square()) / (2 * eps_clip))
+            else:
+                actor_loss = -torch.min(ratio * advantage, ratio.clamp(1. - eps_clip, 1. + eps_clip) * advantage)
+            actor_loss = actor_loss - self.ppo_entropy_weight * entropy
+            windowed_actor_loss = actor_loss[mask].sum() / total_learnable_tokens
+            windowed_actor_loss.backward(retain_graph = True)
+            # update critic
+            value_loss = self.hl_gauss_loss(value_logits, returns, reduction = 'none')
+            value_clip = self.ppo_value_clip
+            value = self.hl_gauss_loss(value_logits)
+            clipped_value = old_value + (value - old_value).clamp(-value_clip, value_clip)
+            clipped_value_loss = self.hl_gauss_loss(clipped_value, returns, reduction = 'none')
+            critic_loss = torch.maximum(value_loss, clipped_value_loss) * self.value_loss_weight
+            windowed_critic_loss = critic_loss[mask].sum() / total_learnable_tokens
+            windowed_critic_loss.backward(retain_graph = True)
+            # accumulate
+            mean_actor_loss.add_(windowed_actor_loss)
+            mean_critic_loss.add_(windowed_critic_loss)
+        # optimizer update
+        if exists(actor_optim):
+            actor_optim.step()
+            actor_optim.zero_grad()
+        if exists(critic_optim):
+            critic_optim.step()
+            critic_optim.zero_grad()
+        # return losses for logging
+        return mean_actor_loss.detach(), mean_critic_loss.detach()
+    def state_to_rewards(
+        self,
+        state
+    ) -> Tensor:
+        assert self.has_reward_shaping
+        rewards = [fn(state) for fn in self.reward_shaping_fns]
+        rewards = [tensor(reward) if not is_tensor(reward) else reward for reward in rewards]
+        return stack(rewards)
+    def wrap_env_functions(self, env):
+        def transform_output(el):
+            if isinstance(el, ndarray):
+                return from_numpy(el)
+            elif isinstance(el, (int, bool, float)):
+                return tensor(el)
+            else:
+                return el
+        def wrapped_reset(*args, **kwargs):
+            env_reset_out =  env.reset(*args, **kwargs)
+            return tree_map(transform_output, env_reset_out)
+        def wrapped_step(action, *args, **kwargs):
+            if is_tensor(action):
+                action = action.item()
+            env_step_out = env.step(action, *args, **kwargs)
+            env_step_out_torch = tree_map(transform_output, env_step_out)
+            if not self.has_reward_shaping:
+                return env_step_out_torch
+            shaped_rewards = self.state_to_rewards(env_step_out_torch)
+            return env_step_out_torch, shaped_rewards
+        return wrapped_reset, wrapped_step
     def get_stateful_forward(
         self,
@@ -398,6 +933,7 @@ class Locoformer(Module):
         inference_mode = False,
         has_batch_dim = False,
         has_time_dim = False,
+        state_time_dim = 1,
         **kwargs
     ):
         window_size = self.window_size
@@ -413,23 +949,16 @@ class Locoformer(Module):
                 state = rearrange(state, '... -> 1 ...')
             if not has_time_dim:
-                state = rearrange(state, '... d -> ... 1 d')
+                state = state.unsqueeze(state_time_dim)
             # forwards
             out, cache = self.forward(state, cache = cache, **{**kwargs, **override_kwargs})
-            # handle cache
-            cache_len = cache.shape[-2]
-            if self.fixed_window_size or divisible_by(cache_len, window_size * 2):
-                cache = cache[..., -window_size:, :]
             # maybe remove batch or time
             if not has_time_dim:
-                out = tree_map_tensor(out, lambda t: rearrange(t, '... 1 d -> ... d'))
+                out = tree_map_tensor(out, lambda t: t.squeeze(state_time_dim))
             if not has_batch_dim:
                 out = tree_map_tensor(out, lambda t: rearrange(t, '1 ... -> ...'))
@@ -458,14 +987,35 @@ class Locoformer(Module):
     def forward(
         self,
         state: Tensor,
-        cache: Tensor | None = None,
+        cache: Cache | None = None,
         detach_cache = False,
-        return_values = False
+        return_values = False,
+        return_raw_value_logits = False
     ):
+        state = state.to(self.device)
         tokens = self.embedder(state)
-        embed, kv_cache = self.transformer(tokens, cache = cache, return_kv_cache = True)
+        # time
+        time = tokens.shape[-2]
+        # destruct the cache for the current timestep and the cache
+        prev_kv_cache = None
+        timestep_start = 0
+        if exists(cache):
+            timestep_start, prev_kv_cache = cache
+        # an assert - make sure during training or inference, forward never gets anything that crosses the window segment boundary, to open up some possibilities with extending memory
+        assert ((timestep_start % self.window_size) + time) <= self.window_size
+        # attention
+        embed, kv_cache = self.transformer(tokens, cache = prev_kv_cache, return_kv_cache = True)
         # unembed to actions - in language models this would be the next state
@@ -476,21 +1026,34 @@ class Locoformer(Module):
         # maybe detach cache
         if detach_cache:
-            kv_cache = detach_all(kv_cache)
+            kv_cache = kv_cache.detach()
         # handle returning of values
         if return_values:
-            assert exists(self.value_network)
+            assert exists(self.to_value_pred)
-            values = self.value_network(embed)
+            values = self.to_value_pred(embed)
-            if values.ndim == 3:
-                assert values.shape[-1] == 1
-                values = rearrange(values, '... 1 -> ...')
+            if not return_raw_value_logits:
+                values = self.hl_gauss_loss(values) # converts the value logits to scalar values
             out = (out, values)
         # output and cache
-        return out, kv_cache
+        next_timestep = time + timestep_start
+        # handle curtailing kv cache at the right intervals
+        window_size = self.window_size
+        if self.fixed_window_size or divisible_by(next_timestep, window_size * 2):
+            kv_cache = kv_cache[..., -window_size:, :]
+        # maybe recurrent cache - shift the kv cache from one layer above to the one below, for extending on receptive field of past
+        if self.recurrent_kv_cache and divisible_by(next_timestep, window_size):
+            kv_cache = torch.roll(kv_cache, shifts = -1, dims = 0)
+        return out, (next_timestep, kv_cache)

{locoformer-0.0.7.dist-info → locoformer-0.0.29.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: locoformer
-Version: 0.0.7
+Version: 0.0.29
 Summary: LocoFormer
 Project-URL: Homepage, https://pypi.org/project/locoformer/
 Project-URL: Repository, https://github.com/lucidrains/locoformer
@@ -35,8 +35,10 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9
 Requires-Dist: assoc-scan
+Requires-Dist: beartype
 Requires-Dist: einops>=0.8.0
 Requires-Dist: einx>=0.3.0
+Requires-Dist: hl-gauss-pytorch>=0.2.0
 Requires-Dist: rotary-embedding-torch
 Requires-Dist: torch>=2.4
 Requires-Dist: x-mlps-pytorch
@@ -53,7 +55,7 @@ Description-Content-Type: text/markdown
 [LocoFormer - Generalist Locomotion via Long-Context Adaptation](https://generalist-locomotion.github.io/)
-The gist is they trained a simple Transformer-XL in simulation on robots with many different bodies (cross-embodiment) with extreme domain randomization. When transferring to the real-world, they noticed the robot now gains the ability to adapt to insults. The XL memories span across multiple trials, which allowed the robot to learn in-context adaptation.
+The gist is they trained a simple Transformer-XL in simulation on robots with many different bodies (cross-embodiment) and extreme domain randomization. When transferring to the real-world, they noticed the robot now gains the ability to adapt to insults. The XL memories span across multiple trials, which allowed the robot to learn in-context adaptation.
 ## Sponsors

locoformer-0.0.29.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+locoformer/__init__.py,sha256=XctsMGEZSR4mVl75fhds_1BtS5qGFiiItTDV7CmCt_I,45
+locoformer/locoformer.py,sha256=Tr_1btuoTZ0huXeDcAeuHxTPaVeCUEGc5iLvMYGDLck,29982
+locoformer-0.0.29.dist-info/METADATA,sha256=5Fi3EOsgpBvpzAFVZQyrlink-HcHE8EgFl10Y5l8mqM,3256
+locoformer-0.0.29.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+locoformer-0.0.29.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+locoformer-0.0.29.dist-info/RECORD,,

locoformer-0.0.7.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-locoformer/__init__.py,sha256=XctsMGEZSR4mVl75fhds_1BtS5qGFiiItTDV7CmCt_I,45
-locoformer/locoformer.py,sha256=lJQs0CKr9iztF8tie1FRUVEItCt-IZbIILQqKcgK2sI,13142
-locoformer-0.0.7.dist-info/METADATA,sha256=PZ_phKV3t4Bha0GnUB5HPmE9w8A5fvNevsuN532Ls3s,3193
-locoformer-0.0.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-locoformer-0.0.7.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-locoformer-0.0.7.dist-info/RECORD,,

{locoformer-0.0.7.dist-info → locoformer-0.0.29.dist-info}/WHEEL RENAMED Viewed

File without changes

{locoformer-0.0.7.dist-info → locoformer-0.0.29.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

locoformer 0.0.7__py3-none-any.whl → 0.0.29__py3-none-any.whl

locoformer 0.0.7py3-none-any.whl → 0.0.29py3-none-any.whl