PyPI - evolutionary-policy-optimization - Versions diffs - 0.0.54__tar.gz → 0.0.56__tar.gz - Mend

evolutionary-policy-optimization 0.0.54tar.gz → 0.0.56tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{evolutionary_policy_optimization-0.0.54 → evolutionary_policy_optimization-0.0.56}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.0.54
+Version: 0.0.56
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization

{evolutionary_policy_optimization-0.0.54 → evolutionary_policy_optimization-0.0.56}/evolutionary_policy_optimization/distributed.py RENAMED Viewed

@@ -25,6 +25,12 @@ def pad_dim_to(t, length, dim = 0):
 def is_distributed():
     return dist.is_initialized() and dist.get_world_size() > 1
+def get_world_and_rank():
+    if not is_distributed():
+        return 1, 0
+    return dist.get_world_size(), dist.get_rank()
 def maybe_sync_seed(device, max_size = int(1e6)):
     rand_int = torch.randint(0, max_size, (), device = device)

{evolutionary_policy_optimization-0.0.54 → evolutionary_policy_optimization-0.0.56}/evolutionary_policy_optimization/epo.py RENAMED Viewed

@@ -1,7 +1,8 @@
 from __future__ import annotations
-from functools import partial, wraps
 from pathlib import Path
+from math import ceil
+from functools import partial, wraps
 from collections import namedtuple
 from random import randrange
@@ -19,6 +20,7 @@ from einops.layers.torch import Rearrange
 from evolutionary_policy_optimization.distributed import (
     is_distributed,
+    get_world_and_rank,
     maybe_sync_seed,
     all_gather_variable_dim,
     maybe_barrier
@@ -372,13 +374,16 @@ class LatentGenePool(Module):
         should_run_genetic_algorithm: Module | None = None, # eq (3) in paper
         default_should_run_ga_gamma = 1.5,
         migrate_every = 100,                 # how many steps before a migration between islands
-        apply_genetic_algorithm_every = 2    # how many steps before crossover + mutation happens for genes
+        apply_genetic_algorithm_every = 2,   # how many steps before crossover + mutation happens for genes
+        init_latent_fn: Callable = None
     ):
         super().__init__()
         maybe_l2norm = l2norm if l2norm_latent else identity
-        latents = torch.randn(num_latents, dim_latent)
+        init_fn = default(init_latent_fn, torch.randn)
+        latents = init_fn((num_latents, dim_latent))
         if l2norm_latent:
             latents = maybe_l2norm(latents, dim = -1)
@@ -1061,22 +1066,20 @@ class EPO(Module):
     def latents_for_machine(self):
         num_latents = self.num_latents
-        if not is_distributed():
-            return list(range(self.num_latents))
+        world_size, rank = get_world_and_rank()
-        world_size, rank = dist.get_world_size(), dist.get_rank()
         assert num_latents >= world_size, 'number of latents must be greater than world size for now'
         assert rank < world_size
-        pad_id = -1
-        num_latents_rounded_up = ceil(num_latents / world_size) * world_size
-        latent_ids = torch.arange(num_latents_rounded_up)
-        latent_ids[latent_ids >= num_latents] = pad_id
+        num_latents_per_machine = ceil(num_latents / world_size)
+        for i in range(num_latents_per_machine):
+            latent_id = rank * num_latents_per_machine + i
-        latent_ids = rearrange(latent_ids, '(world latents) -> world latents', world = world_size)
-        out = latent_ids[rank]
+            if latent_id >= num_latents:
+                continue
-        return out[out != pad_id].tolist()
+            yield i
     @torch.no_grad()
     def forward(
@@ -1093,7 +1096,7 @@ class EPO(Module):
         cumulative_rewards = torch.zeros((self.num_latents))
-        latent_ids = self.latents_for_machine()
+        latent_ids_gen = self.latents_for_machine()
         for episode_id in tqdm(range(self.episodes_per_latent), desc = 'episode'):
@@ -1109,7 +1112,7 @@ class EPO(Module):
             # for each latent (on a single machine for now)
-            for latent_id in tqdm(latent_ids, desc = 'latent'):
+            for latent_id in tqdm(latent_ids_gen, desc = 'latent'):
                 time = 0
                 # initial state

{evolutionary_policy_optimization-0.0.54 → evolutionary_policy_optimization-0.0.56}/evolutionary_policy_optimization/experimental.py RENAMED Viewed

@@ -39,9 +39,34 @@ def crossover_weights(w1, w2, transpose = False):
     return out
+def mutate_weight(
+    w,
+    transpose = False,
+    mutation_strength = 1.
+):
+    if transpose:
+        w = w.transpose(-1, -2)
+    rank = min(w2.shape[1:])
+    assert rank >= 2
+    u, s, v = torch.svd(w)
+    u = u + torch.randn_like(u) * mutation_strength
+    v = v + torch.randn_like(v) * mutation_strength
+    out = u @ torch.diag_embed(s) @ v.mT
+    if transpose:
+        out = out.transpose(-1, -2)
+    return out
 if __name__ == '__main__':
     w1 = torch.randn(32, 16)
     w2 = torch.randn(32, 16)
-    child = crossover_weights(w2, w2)
+    child = crossover_weights(w1, w2)
+    mutated_w1 = mutate_weight(w1)
     assert child.shape == w2.shape

{evolutionary_policy_optimization-0.0.54 → evolutionary_policy_optimization-0.0.56}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "evolutionary-policy-optimization"
-version = "0.0.54"
+version = "0.0.56"
 description = "EPO - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }