PyPI - evolutionary-policy-optimization - Versions diffs - 0.0.54__py3-none-any.whl → 0.0.56__py3-none-any.whl - Mend

evolutionary-policy-optimization 0.0.54py3-none-any.whl → 0.0.56py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

evolutionary_policy_optimization/distributed.py CHANGED Viewed

@@ -25,6 +25,12 @@ def pad_dim_to(t, length, dim = 0):
 def is_distributed():
     return dist.is_initialized() and dist.get_world_size() > 1
+def get_world_and_rank():
+    if not is_distributed():
+        return 1, 0
+    return dist.get_world_size(), dist.get_rank()
 def maybe_sync_seed(device, max_size = int(1e6)):
     rand_int = torch.randint(0, max_size, (), device = device)

evolutionary_policy_optimization/epo.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from __future__ import annotations
-from functools import partial, wraps
 from pathlib import Path
+from math import ceil
+from functools import partial, wraps
 from collections import namedtuple
 from random import randrange
@@ -19,6 +20,7 @@ from einops.layers.torch import Rearrange
 from evolutionary_policy_optimization.distributed import (
     is_distributed,
+    get_world_and_rank,
     maybe_sync_seed,
     all_gather_variable_dim,
     maybe_barrier
@@ -372,13 +374,16 @@ class LatentGenePool(Module):
         should_run_genetic_algorithm: Module | None = None, # eq (3) in paper
         default_should_run_ga_gamma = 1.5,
         migrate_every = 100,                 # how many steps before a migration between islands
-        apply_genetic_algorithm_every = 2    # how many steps before crossover + mutation happens for genes
+        apply_genetic_algorithm_every = 2,   # how many steps before crossover + mutation happens for genes
+        init_latent_fn: Callable = None
     ):
         super().__init__()
         maybe_l2norm = l2norm if l2norm_latent else identity
-        latents = torch.randn(num_latents, dim_latent)
+        init_fn = default(init_latent_fn, torch.randn)
+        latents = init_fn((num_latents, dim_latent))
         if l2norm_latent:
             latents = maybe_l2norm(latents, dim = -1)
@@ -1061,22 +1066,20 @@ class EPO(Module):
     def latents_for_machine(self):
         num_latents = self.num_latents
-        if not is_distributed():
-            return list(range(self.num_latents))
+        world_size, rank = get_world_and_rank()
-        world_size, rank = dist.get_world_size(), dist.get_rank()
         assert num_latents >= world_size, 'number of latents must be greater than world size for now'
         assert rank < world_size
-        pad_id = -1
-        num_latents_rounded_up = ceil(num_latents / world_size) * world_size
-        latent_ids = torch.arange(num_latents_rounded_up)
-        latent_ids[latent_ids >= num_latents] = pad_id
+        num_latents_per_machine = ceil(num_latents / world_size)
+        for i in range(num_latents_per_machine):
+            latent_id = rank * num_latents_per_machine + i
-        latent_ids = rearrange(latent_ids, '(world latents) -> world latents', world = world_size)
-        out = latent_ids[rank]
+            if latent_id >= num_latents:
+                continue
-        return out[out != pad_id].tolist()
+            yield i
     @torch.no_grad()
     def forward(
@@ -1093,7 +1096,7 @@ class EPO(Module):
         cumulative_rewards = torch.zeros((self.num_latents))
-        latent_ids = self.latents_for_machine()
+        latent_ids_gen = self.latents_for_machine()
         for episode_id in tqdm(range(self.episodes_per_latent), desc = 'episode'):
@@ -1109,7 +1112,7 @@ class EPO(Module):
             # for each latent (on a single machine for now)
-            for latent_id in tqdm(latent_ids, desc = 'latent'):
+            for latent_id in tqdm(latent_ids_gen, desc = 'latent'):
                 time = 0
                 # initial state

evolutionary_policy_optimization/experimental.py CHANGED Viewed

@@ -39,9 +39,34 @@ def crossover_weights(w1, w2, transpose = False):
     return out
+def mutate_weight(
+    w,
+    transpose = False,
+    mutation_strength = 1.
+):
+    if transpose:
+        w = w.transpose(-1, -2)
+    rank = min(w2.shape[1:])
+    assert rank >= 2
+    u, s, v = torch.svd(w)
+    u = u + torch.randn_like(u) * mutation_strength
+    v = v + torch.randn_like(v) * mutation_strength
+    out = u @ torch.diag_embed(s) @ v.mT
+    if transpose:
+        out = out.transpose(-1, -2)
+    return out
 if __name__ == '__main__':
     w1 = torch.randn(32, 16)
     w2 = torch.randn(32, 16)
-    child = crossover_weights(w2, w2)
+    child = crossover_weights(w1, w2)
+    mutated_w1 = mutate_weight(w1)
     assert child.shape == w2.shape

{evolutionary_policy_optimization-0.0.54.dist-info → evolutionary_policy_optimization-0.0.56.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.0.54
+Version: 0.0.56
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization

evolutionary_policy_optimization-0.0.56.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+evolutionary_policy_optimization/__init__.py,sha256=0q0aBuFgWi06MLMD8FiHzBYQ3_W4LYWrwmCtF3u5H2A,201
+evolutionary_policy_optimization/distributed.py,sha256=7KgZdeS_wxBHo_du9XZFB1Cu318J-Bp66Xdr6Log_20,2423
+evolutionary_policy_optimization/epo.py,sha256=N7xmO3CRXeaJAy-2rysZg-DBvkZCZB2ySJT7Iq__r6w,35217
+evolutionary_policy_optimization/experimental.py,sha256=-IgqjJ_Wk_CMB1y9YYWpoYqTG9GZHAS6kbRdTluVevg,1563
+evolutionary_policy_optimization/mock_env.py,sha256=202KJ5g57wQvOzhGYzgHfBa7Y2do5uuDvl5kFg5o73g,934
+evolutionary_policy_optimization-0.0.56.dist-info/METADATA,sha256=o2-1eCh8MuQVd0SH0GiUBBIAcqdK7cceuiu093cuEA4,6213
+evolutionary_policy_optimization-0.0.56.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+evolutionary_policy_optimization-0.0.56.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+evolutionary_policy_optimization-0.0.56.dist-info/RECORD,,

evolutionary_policy_optimization-0.0.54.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-evolutionary_policy_optimization/__init__.py,sha256=0q0aBuFgWi06MLMD8FiHzBYQ3_W4LYWrwmCtF3u5H2A,201
-evolutionary_policy_optimization/distributed.py,sha256=lSSf_vB04NgVJFBh2n36cGuKZWgOpp8PnPpLDmHT6nU,2296
-evolutionary_policy_optimization/epo.py,sha256=5QJj_l4pihbSdRk1aZnE2dUyWlaqb_VjIKo6Azzksgs,35292
-evolutionary_policy_optimization/experimental.py,sha256=9FrJGviLESlYysHI3i83efT9g2ZB9ha4u3K9HXN98_w,1100
-evolutionary_policy_optimization/mock_env.py,sha256=202KJ5g57wQvOzhGYzgHfBa7Y2do5uuDvl5kFg5o73g,934
-evolutionary_policy_optimization-0.0.54.dist-info/METADATA,sha256=phQq8QaMT7TQQG2Sqz1BW4E1dln1HU10DMExwRvGGkg,6213
-evolutionary_policy_optimization-0.0.54.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-evolutionary_policy_optimization-0.0.54.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-evolutionary_policy_optimization-0.0.54.dist-info/RECORD,,

{evolutionary_policy_optimization-0.0.54.dist-info → evolutionary_policy_optimization-0.0.56.dist-info}/WHEEL RENAMED Viewed

File without changes

{evolutionary_policy_optimization-0.0.54.dist-info → evolutionary_policy_optimization-0.0.56.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

evolutionary-policy-optimization 0.0.54__py3-none-any.whl → 0.0.56__py3-none-any.whl

evolutionary-policy-optimization 0.0.54py3-none-any.whl → 0.0.56py3-none-any.whl