PyPI - evolutionary-policy-optimization - Versions diffs - 0.1.9__tar.gz → 0.1.12__tar.gz - Mend

evolutionary-policy-optimization 0.1.9tar.gz → 0.1.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.1.9
+Version: 0.1.12
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -215,4 +215,15 @@ agent.load('./agent.pt')
 }
 ```
+```bibtex
+@article{Ash2019OnTD,
+    title   = {On the Difficulty of Warm-Starting Neural Network Training},
+    author  = {Jordan T. Ash and Ryan P. Adams},
+    journal = {ArXiv},
+    year    = {2019},
+    volume  = {abs/1910.08475},
+    url     = {https://api.semanticscholar.org/CorpusID:204788802}
+}
+```
 *Evolution is cleverer than you are.* - Leslie Orgel

{evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/README.md RENAMED Viewed

@@ -162,4 +162,15 @@ agent.load('./agent.pt')
 }
 ```
+```bibtex
+@article{Ash2019OnTD,
+    title   = {On the Difficulty of Warm-Starting Neural Network Training},
+    author  = {Jordan T. Ash and Ryan P. Adams},
+    journal = {ArXiv},
+    year    = {2019},
+    volume  = {abs/1910.08475},
+    url     = {https://api.semanticscholar.org/CorpusID:204788802}
+}
+```
 *Evolution is cleverer than you are.* - Leslie Orgel

{evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/evolutionary_policy_optimization/epo.py RENAMED Viewed

@@ -146,6 +146,24 @@ def temp_batch_dim(fn):
     return inner
+# plasticity related
+def shrink_and_perturb_(
+    module,
+    shrink_factor = 0.5,
+    perturb_factor = 0.01
+):
+    # Shrink & Perturb
+    # Ash et al. https://arxiv.org/abs/1910.08475
+    assert 0. <= shrink_factor <= 1.
+    for p in module.parameters():
+        noise = torch.randn_like(p.data)
+        p.data.mul_(1. - shrink_factor).add_(noise * perturb_factor)
+    return module
 # fitness related
 def get_fitness_scores(
@@ -431,6 +449,8 @@ class Critic(Module):
         value = self.maybe_bins_to_value(logits)
+        loss_fn = partial(self.loss_fn, reduction = 'none')
         if use_improved:
             clipped_target = target.clamp(-eps_clip, eps_clip)
@@ -439,8 +459,8 @@ class Critic(Module):
             is_between = lambda lo, hi: (lo < value) & (value < hi)
-            clipped_loss = self.loss_fn(logits, clipped_target, reduction = 'none')
-            loss = self.loss_fn(logits, target, reduction = 'none')
+            clipped_loss = loss_fn(logits, clipped_target)
+            loss = loss_fn(logits, target)
             value_loss = torch.where(
                 is_between(target, old_values_lo) | is_between(old_values_hi, target),
@@ -448,10 +468,10 @@ class Critic(Module):
                 torch.min(loss, clipped_loss)
             )
         else:
-            clipped_value = old_values + (value - old_values).clamp(1. - eps_clip, 1. + eps_clip)
+            clipped_value = old_values + (value - old_values).clamp(-eps_clip, eps_clip)
-            loss = self.loss_fn(logits, target, reduction = 'none')
-            clipped_loss = self.loss_fn(clipped_value, target, reduction = 'none')
+            loss = loss_fn(logits, target)
+            clipped_loss = loss_fn(clipped_value, target)
             value_loss = torch.max(loss, clipped_loss)

{evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "evolutionary-policy-optimization"
-version = "0.1.9"
+version = "0.1.12"
 description = "EPO - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/tests/test_epo.py RENAMED Viewed

@@ -1,10 +1,11 @@
 import pytest
 import torch
-from evolutionary_policy_optimization import (
+from evolutionary_policy_optimization.epo import (
     LatentGenePool,
     Actor,
-    Critic
+    Critic,
+    shrink_and_perturb_
 )
 @pytest.mark.parametrize('latent_ids', (2, (2, 4)))
@@ -128,3 +129,5 @@ def test_e2e_with_mock_env(
     agent.save('./agent.pt', overwrite = True)
     agent.load('./agent.pt')
+    shrink_and_perturb_(agent)