PyPI - evolutionary-policy-optimization - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl - Mend

evolutionary-policy-optimization 0.1.7py3-none-any.whl → 0.1.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

evolutionary_policy_optimization/epo.py CHANGED Viewed

@@ -207,13 +207,60 @@ def mutation(
 ):
     mutations = torch.randn_like(latents)
-    mutated = latents + mutations * mutation_strength
+    if is_tensor(mutation_strength):
+        mutations = einx.multiply('b, b ...', mutation_strength, mutations)
+    else:
+        mutations *= mutation_strength
+    mutated = latents + mutations
     if not l2norm_output:
         return mutated
     return l2norm(mutated)
+# drawing mutation strengths from power law distribution
+# proposed by https://arxiv.org/abs/1703.03334
+class PowerLawDist(Module):
+    def __init__(
+        self,
+        values: Tensor | list[float] | None = None,
+        bins = None,
+        beta = 1.5,
+    ):
+        super().__init__()
+        assert beta > 1.
+        assert exists(bins) or exists(values)
+        if exists(values):
+            if not is_tensor(values):
+                values = tensor(values)
+            assert values.ndim == 1
+            bins = values.shape[0]
+        self.beta = beta
+        cdf = torch.linspace(1, bins, bins).pow(-beta).cumsum(dim = -1)
+        cdf = cdf / cdf[-1]
+        self.register_buffer('cdf', cdf)
+        self.register_buffer('values', values)
+    def forward(self, shape):
+        device = self.cdf.device
+        uniform = torch.rand(shape, device = device)
+        sampled = torch.searchsorted(self.cdf, uniform)
+        if not exists(self.values):
+            return sampled
+        return self.values[sampled]
 # simple MLP networks, but with latent variables
 # the latent variables are the "genes" with the rest of the network as the scaffold for "gene expression" - as suggested in the paper
@@ -377,18 +424,38 @@ class Critic(Module):
         latent,
         old_values,
         target,
-        eps_clip = 0.4
+        eps_clip = 0.4,
+        use_improved = True
     ):
         logits = self.forward(state, latent, return_logits = True)
         value = self.maybe_bins_to_value(logits)
-        clipped_value = old_values + (value - old_values).clamp(1. - eps_clip, 1. + eps_clip)
+        if use_improved:
+            clipped_target = target.clamp(-eps_clip, eps_clip)
+            old_values_lo = old_values - eps_clip
+            old_values_hi = old_values + eps_clip
+            is_between = lambda lo, hi: (lo < value) & (value < hi)
-        loss = self.loss_fn(logits, target, reduction = 'none')
-        clipped_loss = self.loss_fn(clipped_value, target, reduction = 'none')
+            clipped_loss = self.loss_fn(logits, clipped_target, reduction = 'none')
+            loss = self.loss_fn(logits, target, reduction = 'none')
-        return torch.max(loss, clipped_loss).mean()
+            value_loss = torch.where(
+                is_between(target, old_values_lo) | is_between(old_values_hi, target),
+                0.,
+                torch.min(loss, clipped_loss)
+            )
+        else:
+            clipped_value = old_values + (value - old_values).clamp(1. - eps_clip, 1. + eps_clip)
+            loss = self.loss_fn(logits, target, reduction = 'none')
+            clipped_loss = self.loss_fn(clipped_value, target, reduction = 'none')
+            value_loss = torch.max(loss, clipped_loss)
+        return value_loss.mean()
     def forward(
         self,
@@ -441,6 +508,8 @@ class LatentGenePool(Module):
         frac_elitism = 0.1,              # frac of population to preserve from being noised
         frac_migrate = 0.1,              # frac of population, excluding elites, that migrate between islands randomly. will use a designated set migration pattern (since for some reason using random it seems to be worse for me)
         mutation_strength = 1.,          # factor to multiply to gaussian noise as mutation to latents
+        fast_genetic_algorithm = False,
+        fast_ga_values = torch.linspace(1, 5, 10),
         should_run_genetic_algorithm: Module | None = None, # eq (3) in paper
         default_should_run_ga_gamma = 1.5,
         migrate_every = 100,                 # how many steps before a migration between islands
@@ -488,6 +557,8 @@ class LatentGenePool(Module):
         self.crossover_random  = crossover_random
         self.mutation_strength = mutation_strength
+        self.mutation_strength_sampler = PowerLawDist(fast_ga_values) if fast_genetic_algorithm else None
         self.num_elites = int(frac_elitism * latents_per_island)
         self.has_elites = self.num_elites > 0
@@ -656,9 +727,14 @@ class LatentGenePool(Module):
         if self.has_elites:
             genes, elites = genes[:, :-self.num_elites], genes[:, -self.num_elites:]
-        # 5. mutate with gaussian noise - todo: add drawing the mutation rate from exponential distribution, from the fast genetic algorithms paper from 2017
+        # 5. mutate with gaussian noise
+        if exists(self.mutation_strength_sampler):
+            mutation_strength = self.mutation_strength_sampler(genes.shape[:1])
+        else:
+            mutation_strength = self.mutation_strength
-        genes = mutation(genes, mutation_strength = self.mutation_strength)
+        genes = mutation(genes, mutation_strength = mutation_strength)
         # 6. maybe migration
@@ -770,6 +846,7 @@ class Agent(Module):
         critic_loss_kwargs: dict = dict(
             eps_clip = 0.4
         ),
+        use_improved_critic_loss = True,
         ema_kwargs: dict = dict(),
         actor_optim_kwargs: dict = dict(),
         critic_optim_kwargs: dict = dict(),
@@ -815,6 +892,8 @@ class Agent(Module):
         self.actor_loss = partial(actor_loss, **actor_loss_kwargs)
         self.critic_loss_kwargs = critic_loss_kwargs
+        self.use_improved_critic_loss = use_improved_critic_loss
         # fitness score related
         self.get_fitness_scores = get_fitness_scores
@@ -1086,6 +1165,7 @@ class Agent(Module):
                     latents,
                     old_values = old_values,
                     target = advantages + old_values,
+                    use_improved = self.use_improved_critic_loss,
                     **self.critic_loss_kwargs
                 )

{evolutionary_policy_optimization-0.1.7.dist-info → evolutionary_policy_optimization-0.1.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.1.7
+Version: 0.1.9
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -196,4 +196,23 @@ agent.load('./agent.pt')
 }
 ```
+```bibtex
+@article{Doerr2017FastGA,
+    title   = {Fast genetic algorithms},
+    author  = {Benjamin Doerr and Huu Phuoc Le and R{\'e}gis Makhmara and Ta Duy Nguyen},
+    journal = {Proceedings of the Genetic and Evolutionary Computation Conference},
+    year    = {2017},
+    url     = {https://api.semanticscholar.org/CorpusID:16196841}
+}
+```
+```bibtex
+@article{Lee2024AnalysisClippedCritic
+    title   = {On Analysis of Clipped Critic Loss in Proximal Policy Gradient},
+    author  = {Yongjin Lee, Moonyoung Chung},
+    journal = {Authorea},
+    year    = {2024}
+}
+```
 *Evolution is cleverer than you are.* - Leslie Orgel

{evolutionary_policy_optimization-0.1.7.dist-info → evolutionary_policy_optimization-0.1.9.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
 evolutionary_policy_optimization/__init__.py,sha256=NyiYDYU7DlpmOTM7xiBQET3r1WwX0ebrgMCBLSQrW3c,288
 evolutionary_policy_optimization/distributed.py,sha256=7KgZdeS_wxBHo_du9XZFB1Cu318J-Bp66Xdr6Log_20,2423
 evolutionary_policy_optimization/env_wrappers.py,sha256=bDL06o9_b1iW6k3fw2xifnOnYlzs643tdW6Yv2gsIdw,803
-evolutionary_policy_optimization/epo.py,sha256=5rOygXAfbb4dmjfseBcHgxHPpTFNMrrMDrY9IsJuZ28,43381
+evolutionary_policy_optimization/epo.py,sha256=9GfSvOz6SwjAuZyhyvsLHPY8b2svMQlM3BRjilwsQ-g,45717
 evolutionary_policy_optimization/experimental.py,sha256=-IgqjJ_Wk_CMB1y9YYWpoYqTG9GZHAS6kbRdTluVevg,1563
 evolutionary_policy_optimization/mock_env.py,sha256=TLyyRm6tOD0Kdn9QqJJQriaSnsR-YmNQHo4OohmZFG4,1410
-evolutionary_policy_optimization-0.1.7.dist-info/METADATA,sha256=yc_7LIYTbAhc7disU0o4ep-xVT1Ku3_nEF01yHcUzDE,6742
-evolutionary_policy_optimization-0.1.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-evolutionary_policy_optimization-0.1.7.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-evolutionary_policy_optimization-0.1.7.dist-info/RECORD,,
+evolutionary_policy_optimization-0.1.9.dist-info/METADATA,sha256=y5w_NwtKNQ07HeYa5r6hcPn7RsqDpehMmt5vj6mTESQ,7316
+evolutionary_policy_optimization-0.1.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+evolutionary_policy_optimization-0.1.9.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+evolutionary_policy_optimization-0.1.9.dist-info/RECORD,,

{evolutionary_policy_optimization-0.1.7.dist-info → evolutionary_policy_optimization-0.1.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{evolutionary_policy_optimization-0.1.7.dist-info → evolutionary_policy_optimization-0.1.9.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

evolutionary-policy-optimization 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

evolutionary-policy-optimization 0.1.7py3-none-any.whl → 0.1.9py3-none-any.whl