PyPI - evolutionary-policy-optimization - Versions diffs - 0.0.52__tar.gz → 0.0.54__tar.gz - Mend

evolutionary-policy-optimization 0.0.52tar.gz → 0.0.54tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{evolutionary_policy_optimization-0.0.52 → evolutionary_policy_optimization-0.0.54}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.0.52
+Version: 0.0.54
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization

evolutionary_policy_optimization-0.0.54/evolutionary_policy_optimization/distributed.py ADDED Viewed

@@ -0,0 +1,82 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.autograd import Function
+import torch.distributed as dist
+import einx
+from einops import rearrange
+def exists(val):
+    return val is not None
+def default(val, d):
+    return val if exists(val) else d
+def divisible_by(num, den):
+    return (num % den) == 0
+def pad_dim_to(t, length, dim = 0):
+    pad_length = length - t.shape[dim]
+    zero_pairs = (-dim - 1) if dim < 0 else (t.ndim - dim - 1)
+    return F.pad(t, (*((0, 0) * zero_pairs), 0, pad_length))
+def is_distributed():
+    return dist.is_initialized() and dist.get_world_size() > 1
+def maybe_sync_seed(device, max_size = int(1e6)):
+    rand_int = torch.randint(0, max_size, (), device = device)
+    if is_distributed():
+        dist.all_reduce(rand_int)
+    return rand_int.item()
+def maybe_barrier():
+    if not is_distributed():
+        return
+    dist.barrier()
+def all_gather_same_dim(t):
+    t = t.contiguous()
+    world_size = dist.get_world_size()
+    gathered_tensors = [torch.empty_like(t, device = t.device, dtype = t.dtype) for i in range(world_size)]
+    dist.all_gather(gathered_tensors, t)
+    return gathered_tensors
+def gather_sizes(t, *, dim):
+    size = torch.tensor(t.shape[dim], device = t.device, dtype = torch.long)
+    sizes = all_gather_same_dim(size)
+    return torch.stack(sizes)
+def has_only_one_value(t):
+    return (t == t[0]).all()
+def all_gather_variable_dim(t, dim = 0, sizes = None):
+    device, rank, world_size = t.device, dist.get_rank(), dist.get_world_size()
+    if not exists(sizes):
+        sizes = gather_sizes(t, dim = dim)
+    if has_only_one_value(sizes):
+        gathered_tensors = all_gather_same_dim(t)
+        gathered_tensors = torch.cat(gathered_tensors, dim = dim)
+        return gathered_tensors, sizes
+    max_size = sizes.amax().item()
+    padded_t = pad_dim_to(t, max_size, dim = dim)
+    gathered_tensors = all_gather_same_dim(padded_t)
+    gathered_tensors = torch.cat(gathered_tensors, dim = dim)
+    seq = torch.arange(max_size, device = device)
+    mask = einx.less('j i -> (i j)', seq, sizes)
+    seq = torch.arange(mask.shape[-1], device = device)
+    indices = seq[mask]
+    gathered_tensors = gathered_tensors.index_select(dim, indices)
+    return gathered_tensors, sizes

{evolutionary_policy_optimization-0.0.52 → evolutionary_policy_optimization-0.0.54}/evolutionary_policy_optimization/epo.py RENAMED Viewed

@@ -8,14 +8,22 @@ from random import randrange
 import torch
 from torch import nn, cat, stack, is_tensor, tensor, Tensor
 import torch.nn.functional as F
+import torch.distributed as dist
 from torch.nn import Linear, Module, ModuleList
 from torch.utils.data import TensorDataset, DataLoader
-from torch.utils._pytree import tree_map, tree_flatten, tree_unflatten
+from torch.utils._pytree import tree_map
 import einx
 from einops import rearrange, repeat, einsum, pack
 from einops.layers.torch import Rearrange
+from evolutionary_policy_optimization.distributed import (
+    is_distributed,
+    maybe_sync_seed,
+    all_gather_variable_dim,
+    maybe_barrier
+)
 from assoc_scan import AssocScan
 from adam_atan2_pytorch import AdoptAtan2
@@ -360,10 +368,11 @@ class LatentGenePool(Module):
         frac_natural_selected = 0.25,    # number of least fit genes to remove from the pool
         frac_elitism = 0.1,              # frac of population to preserve from being noised
         frac_migrate = 0.1,              # frac of population, excluding elites, that migrate between islands randomly. will use a designated set migration pattern (since for some reason using random it seems to be worse for me)
-        migrate_every = 100,             # how many steps before a migration between islands
         mutation_strength = 1.,          # factor to multiply to gaussian noise as mutation to latents
         should_run_genetic_algorithm: Module | None = None, # eq (3) in paper
         default_should_run_ga_gamma = 1.5,
+        migrate_every = 100,                 # how many steps before a migration between islands
+        apply_genetic_algorithm_every = 2    # how many steps before crossover + mutation happens for genes
     ):
         super().__init__()
@@ -413,7 +422,10 @@ class LatentGenePool(Module):
         self.should_run_genetic_algorithm = should_run_genetic_algorithm
         self.can_migrate = num_islands > 1
         self.migrate_every = migrate_every
+        self.apply_genetic_algorithm_every = apply_genetic_algorithm_every
         self.register_buffer('step', tensor(1))
     def get_distance(self):
@@ -483,6 +495,10 @@ class LatentGenePool(Module):
     ):
         device = self.latents.device
+        if not divisible_by(self.step.item(), self.apply_genetic_algorithm_every):
+            self.advance_step_()
+            return
         """
         i - islands
         p - population
@@ -814,6 +830,15 @@ class Agent(Module):
         fitness_scores = self.get_fitness_scores(cumulative_rewards, memories)
+        # stack memories
+        memories = map(stack, zip(*memories))
+        maybe_barrier()
+        if is_distributed():
+            memories = map(partial(all_gather_variable_dim, dim = 0), memories)
         (
             episode_ids,
             states,
@@ -823,7 +848,7 @@ class Agent(Module):
             rewards,
             values,
             dones
-        ) = map(stack, zip(*memories))
+        ) = memories
         advantages = self.calc_gae(
             rewards[:-1],
@@ -1027,6 +1052,32 @@ class EPO(Module):
         self.episodes_per_latent = episodes_per_latent
         self.max_episode_length = max_episode_length
+        self.register_buffer('dummy', tensor(0))
+    @property
+    def device(self):
+        return self.dummy.device
+    def latents_for_machine(self):
+        num_latents = self.num_latents
+        if not is_distributed():
+            return list(range(self.num_latents))
+        world_size, rank = dist.get_world_size(), dist.get_rank()
+        assert num_latents >= world_size, 'number of latents must be greater than world size for now'
+        assert rank < world_size
+        pad_id = -1
+        num_latents_rounded_up = ceil(num_latents / world_size) * world_size
+        latent_ids = torch.arange(num_latents_rounded_up)
+        latent_ids[latent_ids >= num_latents] = pad_id
+        latent_ids = rearrange(latent_ids, '(world latents) -> world latents', world = world_size)
+        out = latent_ids[rank]
+        return out[out != pad_id].tolist()
     @torch.no_grad()
     def forward(
         self,
@@ -1042,19 +1093,23 @@ class EPO(Module):
         cumulative_rewards = torch.zeros((self.num_latents))
+        latent_ids = self.latents_for_machine()
         for episode_id in tqdm(range(self.episodes_per_latent), desc = 'episode'):
+            maybe_barrier()
             # maybe fix seed for environment across all latents
             env_reset_kwargs = dict()
             if fix_seed_across_latents:
-                seed = randrange(int(1e6))
+                seed = maybe_sync_seed(device = self.device)
                 env_reset_kwargs = dict(seed = seed)
             # for each latent (on a single machine for now)
-            for latent_id in tqdm(range(self.num_latents), desc = 'latent'):
+            for latent_id in tqdm(latent_ids, desc = 'latent'):
                 time = 0
                 # initial state

{evolutionary_policy_optimization-0.0.52 → evolutionary_policy_optimization-0.0.54}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "evolutionary-policy-optimization"
-version = "0.0.52"
+version = "0.0.54"
 description = "EPO - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }