PyPI - evolutionary-policy-optimization - Versions diffs - 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

evolutionary-policy-optimization 0.1.6py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

evolutionary_policy_optimization/epo.py CHANGED Viewed

@@ -207,13 +207,60 @@ def mutation(
 ):
     mutations = torch.randn_like(latents)
-    mutated = latents + mutations * mutation_strength
+    if is_tensor(mutation_strength):
+        mutations = einx.multiply('b, b ...', mutation_strength, mutations)
+    else:
+        mutations *= mutation_strength
+    mutated = latents + mutations
     if not l2norm_output:
         return mutated
     return l2norm(mutated)
+# drawing mutation strengths from power law distribution
+# proposed by https://arxiv.org/abs/1703.03334
+class PowerLawDist(Module):
+    def __init__(
+        self,
+        values: Tensor | list[float] | None = None,
+        bins = None,
+        beta = 1.5,
+    ):
+        super().__init__()
+        assert beta > 1.
+        assert exists(bins) or exists(values)
+        if exists(values):
+            if not is_tensor(values):
+                values = tensor(values)
+            assert values.ndim == 1
+            bins = values.shape[0]
+        self.beta = beta
+        cdf = torch.linspace(1, bins, bins).pow(-beta).cumsum(dim = -1)
+        cdf = cdf / cdf[-1]
+        self.register_buffer('cdf', cdf)
+        self.register_buffer('values', values)
+    def forward(self, shape):
+        device = self.cdf.device
+        uniform = torch.rand(shape, device = device)
+        sampled = torch.searchsorted(self.cdf, uniform)
+        if not exists(self.values):
+            return sampled
+        return self.values[sampled]
 # simple MLP networks, but with latent variables
 # the latent variables are the "genes" with the rest of the network as the scaffold for "gene expression" - as suggested in the paper
@@ -369,7 +416,6 @@ class Critic(Module):
         hl_gauss_loss = self.to_pred.hl_gauss_loss
         self.maybe_bins_to_value = hl_gauss_loss if not use_regression else identity
-        self.maybe_value_to_bins = hl_gauss_loss.transform_to_logprobs if not use_regression else identity
         self.loss_fn = hl_gauss_loss if not use_regression else F.mse_loss
     def forward_for_loss(
@@ -386,7 +432,7 @@ class Critic(Module):
         clipped_value = old_values + (value - old_values).clamp(1. - eps_clip, 1. + eps_clip)
-        loss = self.loss_fn(value, target, reduction = 'none')
+        loss = self.loss_fn(logits, target, reduction = 'none')
         clipped_loss = self.loss_fn(clipped_value, target, reduction = 'none')
         return torch.max(loss, clipped_loss).mean()
@@ -442,6 +488,8 @@ class LatentGenePool(Module):
         frac_elitism = 0.1,              # frac of population to preserve from being noised
         frac_migrate = 0.1,              # frac of population, excluding elites, that migrate between islands randomly. will use a designated set migration pattern (since for some reason using random it seems to be worse for me)
         mutation_strength = 1.,          # factor to multiply to gaussian noise as mutation to latents
+        fast_genetic_algorithm = False,
+        fast_ga_values = torch.linspace(1, 5, 10),
         should_run_genetic_algorithm: Module | None = None, # eq (3) in paper
         default_should_run_ga_gamma = 1.5,
         migrate_every = 100,                 # how many steps before a migration between islands
@@ -489,6 +537,8 @@ class LatentGenePool(Module):
         self.crossover_random  = crossover_random
         self.mutation_strength = mutation_strength
+        self.mutation_strength_sampler = PowerLawDist(fast_ga_values) if fast_genetic_algorithm else None
         self.num_elites = int(frac_elitism * latents_per_island)
         self.has_elites = self.num_elites > 0
@@ -657,9 +707,14 @@ class LatentGenePool(Module):
         if self.has_elites:
             genes, elites = genes[:, :-self.num_elites], genes[:, -self.num_elites:]
-        # 5. mutate with gaussian noise - todo: add drawing the mutation rate from exponential distribution, from the fast genetic algorithms paper from 2017
+        # 5. mutate with gaussian noise
+        if exists(self.mutation_strength_sampler):
+            mutation_strength = self.mutation_strength_sampler(genes.shape[:1])
+        else:
+            mutation_strength = self.mutation_strength
-        genes = mutation(genes, mutation_strength = self.mutation_strength)
+        genes = mutation(genes, mutation_strength = mutation_strength)
         # 6. maybe migration
@@ -844,7 +899,11 @@ class Agent(Module):
         dummy = tensor(0)
+        self.clip_grad_norm_ = nn.utils.clip_grad_norm_
         if wrap_with_accelerate:
+            self.clip_grad_norm_ = self.accelerate.clip_grad_norm_
             (
                 self.actor,
                 self.critic,
@@ -1071,7 +1130,7 @@ class Agent(Module):
                 actor_loss.backward()
                 if exists(self.has_grad_clip):
-                    self.accelerate.clip_grad_norm_(self.actor.parameters(), self.max_grad_norm)
+                    self.clip_grad_norm_(self.actor.parameters(), self.max_grad_norm)
                 self.actor_optim.step()
                 self.actor_optim.zero_grad()
@@ -1089,7 +1148,7 @@ class Agent(Module):
                 critic_loss.backward()
                 if exists(self.has_grad_clip):
-                    self.accelerate.clip_grad_norm_(self.critic.parameters(), self.max_grad_norm)
+                    self.clip_grad_norm_(self.critic.parameters(), self.max_grad_norm)
                 self.critic_optim.step()
                 self.critic_optim.zero_grad()
@@ -1113,7 +1172,7 @@ class Agent(Module):
                     (diversity_loss * self.diversity_aux_loss_weight).backward()
                 if exists(self.has_grad_clip):
-                    self.accelerate.clip_grad_norm_(self.latent_gene_pool.parameters(), self.max_grad_norm)
+                    self.clip_grad_norm_(self.latent_gene_pool.parameters(), self.max_grad_norm)
                 self.latent_optim.step()
                 self.latent_optim.zero_grad()

{evolutionary_policy_optimization-0.1.6.dist-info → evolutionary_policy_optimization-0.1.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.1.6
+Version: 0.1.8
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -196,4 +196,14 @@ agent.load('./agent.pt')
 }
 ```
+```bibtex
+@article{Doerr2017FastGA,
+    title   = {Fast genetic algorithms},
+    author  = {Benjamin Doerr and Huu Phuoc Le and R{\'e}gis Makhmara and Ta Duy Nguyen},
+    journal = {Proceedings of the Genetic and Evolutionary Computation Conference},
+    year    = {2017},
+    url     = {https://api.semanticscholar.org/CorpusID:16196841}
+}
+```
 *Evolution is cleverer than you are.* - Leslie Orgel

{evolutionary_policy_optimization-0.1.6.dist-info → evolutionary_policy_optimization-0.1.8.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
 evolutionary_policy_optimization/__init__.py,sha256=NyiYDYU7DlpmOTM7xiBQET3r1WwX0ebrgMCBLSQrW3c,288
 evolutionary_policy_optimization/distributed.py,sha256=7KgZdeS_wxBHo_du9XZFB1Cu318J-Bp66Xdr6Log_20,2423
 evolutionary_policy_optimization/env_wrappers.py,sha256=bDL06o9_b1iW6k3fw2xifnOnYlzs643tdW6Yv2gsIdw,803
-evolutionary_policy_optimization/epo.py,sha256=P-a8A1ky7FgpENUgb8VHk9qADwyQdzpUp40JoaSG2HY,43395
+evolutionary_policy_optimization/epo.py,sha256=Ua0o4Xe-Z6gy76-nbB1yKndePGurSwW_otXXrrJWhgc,44835
 evolutionary_policy_optimization/experimental.py,sha256=-IgqjJ_Wk_CMB1y9YYWpoYqTG9GZHAS6kbRdTluVevg,1563
 evolutionary_policy_optimization/mock_env.py,sha256=TLyyRm6tOD0Kdn9QqJJQriaSnsR-YmNQHo4OohmZFG4,1410
-evolutionary_policy_optimization-0.1.6.dist-info/METADATA,sha256=Bc4MZKhe2H6q7H2mB4g2qObQYHjG-8ZJkcF1XKxgTgw,6742
-evolutionary_policy_optimization-0.1.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-evolutionary_policy_optimization-0.1.6.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-evolutionary_policy_optimization-0.1.6.dist-info/RECORD,,
+evolutionary_policy_optimization-0.1.8.dist-info/METADATA,sha256=tEVMyHVZjknJMQ0QEIVJhMj6QTDYW5Uqcq6nqa7LHpo,7088
+evolutionary_policy_optimization-0.1.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+evolutionary_policy_optimization-0.1.8.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+evolutionary_policy_optimization-0.1.8.dist-info/RECORD,,

{evolutionary_policy_optimization-0.1.6.dist-info → evolutionary_policy_optimization-0.1.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{evolutionary_policy_optimization-0.1.6.dist-info → evolutionary_policy_optimization-0.1.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

evolutionary-policy-optimization 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

evolutionary-policy-optimization 0.1.6py3-none-any.whl → 0.1.8py3-none-any.whl