PyPI - evolutionary-policy-optimization - Versions diffs - 0.0.53__tar.gz → 0.0.55__tar.gz - Mend

evolutionary-policy-optimization 0.0.53tar.gz → 0.0.55tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{evolutionary_policy_optimization-0.0.53 → evolutionary_policy_optimization-0.0.55}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.0.53
+Version: 0.0.55
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization

evolutionary_policy_optimization-0.0.55/evolutionary_policy_optimization/distributed.py ADDED Viewed

@@ -0,0 +1,88 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.autograd import Function
+import torch.distributed as dist
+import einx
+from einops import rearrange
+def exists(val):
+    return val is not None
+def default(val, d):
+    return val if exists(val) else d
+def divisible_by(num, den):
+    return (num % den) == 0
+def pad_dim_to(t, length, dim = 0):
+    pad_length = length - t.shape[dim]
+    zero_pairs = (-dim - 1) if dim < 0 else (t.ndim - dim - 1)
+    return F.pad(t, (*((0, 0) * zero_pairs), 0, pad_length))
+def is_distributed():
+    return dist.is_initialized() and dist.get_world_size() > 1
+def get_world_and_rank():
+    if not is_distributed():
+        return 1, 0
+    return dist.get_world_size(), dist.get_rank()
+def maybe_sync_seed(device, max_size = int(1e6)):
+    rand_int = torch.randint(0, max_size, (), device = device)
+    if is_distributed():
+        dist.all_reduce(rand_int)
+    return rand_int.item()
+def maybe_barrier():
+    if not is_distributed():
+        return
+    dist.barrier()
+def all_gather_same_dim(t):
+    t = t.contiguous()
+    world_size = dist.get_world_size()
+    gathered_tensors = [torch.empty_like(t, device = t.device, dtype = t.dtype) for i in range(world_size)]
+    dist.all_gather(gathered_tensors, t)
+    return gathered_tensors
+def gather_sizes(t, *, dim):
+    size = torch.tensor(t.shape[dim], device = t.device, dtype = torch.long)
+    sizes = all_gather_same_dim(size)
+    return torch.stack(sizes)
+def has_only_one_value(t):
+    return (t == t[0]).all()
+def all_gather_variable_dim(t, dim = 0, sizes = None):
+    device, rank, world_size = t.device, dist.get_rank(), dist.get_world_size()
+    if not exists(sizes):
+        sizes = gather_sizes(t, dim = dim)
+    if has_only_one_value(sizes):
+        gathered_tensors = all_gather_same_dim(t)
+        gathered_tensors = torch.cat(gathered_tensors, dim = dim)
+        return gathered_tensors, sizes
+    max_size = sizes.amax().item()
+    padded_t = pad_dim_to(t, max_size, dim = dim)
+    gathered_tensors = all_gather_same_dim(padded_t)
+    gathered_tensors = torch.cat(gathered_tensors, dim = dim)
+    seq = torch.arange(max_size, device = device)
+    mask = einx.less('j i -> (i j)', seq, sizes)
+    seq = torch.arange(mask.shape[-1], device = device)
+    indices = seq[mask]
+    gathered_tensors = gathered_tensors.index_select(dim, indices)
+    return gathered_tensors, sizes

{evolutionary_policy_optimization-0.0.53 → evolutionary_policy_optimization-0.0.55}/evolutionary_policy_optimization/epo.py RENAMED Viewed

@@ -1,21 +1,31 @@
 from __future__ import annotations
-from functools import partial, wraps
 from pathlib import Path
+from math import ceil
+from functools import partial, wraps
 from collections import namedtuple
 from random import randrange
 import torch
 from torch import nn, cat, stack, is_tensor, tensor, Tensor
 import torch.nn.functional as F
+import torch.distributed as dist
 from torch.nn import Linear, Module, ModuleList
 from torch.utils.data import TensorDataset, DataLoader
-from torch.utils._pytree import tree_map, tree_flatten, tree_unflatten
+from torch.utils._pytree import tree_map
 import einx
 from einops import rearrange, repeat, einsum, pack
 from einops.layers.torch import Rearrange
+from evolutionary_policy_optimization.distributed import (
+    is_distributed,
+    get_world_and_rank,
+    maybe_sync_seed,
+    all_gather_variable_dim,
+    maybe_barrier
+)
 from assoc_scan import AssocScan
 from adam_atan2_pytorch import AdoptAtan2
@@ -822,6 +832,15 @@ class Agent(Module):
         fitness_scores = self.get_fitness_scores(cumulative_rewards, memories)
+        # stack memories
+        memories = map(stack, zip(*memories))
+        maybe_barrier()
+        if is_distributed():
+            memories = map(partial(all_gather_variable_dim, dim = 0), memories)
         (
             episode_ids,
             states,
@@ -831,7 +850,7 @@ class Agent(Module):
             rewards,
             values,
             dones
-        ) = map(stack, zip(*memories))
+        ) = memories
         advantages = self.calc_gae(
             rewards[:-1],
@@ -1035,6 +1054,30 @@ class EPO(Module):
         self.episodes_per_latent = episodes_per_latent
         self.max_episode_length = max_episode_length
+        self.register_buffer('dummy', tensor(0))
+    @property
+    def device(self):
+        return self.dummy.device
+    def latents_for_machine(self):
+        num_latents = self.num_latents
+        world_size, rank = get_world_and_rank()
+        assert num_latents >= world_size, 'number of latents must be greater than world size for now'
+        assert rank < world_size
+        num_latents_per_machine = ceil(num_latents / world_size)
+        for i in range(num_latents_per_machine):
+            latent_id = rank * num_latents_per_machine + i
+            if latent_id >= num_latents:
+                continue
+            yield i
     @torch.no_grad()
     def forward(
         self,
@@ -1050,19 +1093,23 @@ class EPO(Module):
         cumulative_rewards = torch.zeros((self.num_latents))
+        latent_ids_gen = self.latents_for_machine()
         for episode_id in tqdm(range(self.episodes_per_latent), desc = 'episode'):
+            maybe_barrier()
             # maybe fix seed for environment across all latents
             env_reset_kwargs = dict()
             if fix_seed_across_latents:
-                seed = randrange(int(1e6))
+                seed = maybe_sync_seed(device = self.device)
                 env_reset_kwargs = dict(seed = seed)
             # for each latent (on a single machine for now)
-            for latent_id in tqdm(range(self.num_latents), desc = 'latent'):
+            for latent_id in tqdm(latent_ids_gen, desc = 'latent'):
                 time = 0
                 # initial state

{evolutionary_policy_optimization-0.0.53 → evolutionary_policy_optimization-0.0.55}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "evolutionary-policy-optimization"
-version = "0.0.53"
+version = "0.0.55"
 description = "EPO - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }