evolutionary-policy-optimization 0.1.9__tar.gz → 0.1.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/PKG-INFO +12 -1
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/README.md +11 -0
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/evolutionary_policy_optimization/epo.py +25 -5
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/pyproject.toml +1 -1
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/tests/test_epo.py +5 -2
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/.github/workflows/python-publish.yml +0 -0
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/.github/workflows/test.yml +0 -0
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/.gitignore +0 -0
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/LICENSE +0 -0
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/evolutionary_policy_optimization/__init__.py +0 -0
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/evolutionary_policy_optimization/distributed.py +0 -0
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/evolutionary_policy_optimization/env_wrappers.py +0 -0
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/evolutionary_policy_optimization/experimental.py +0 -0
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/evolutionary_policy_optimization/mock_env.py +0 -0
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/requirements.txt +0 -0
- {evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/train_gym.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.12
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -215,4 +215,15 @@ agent.load('./agent.pt')
|
|
215
215
|
}
|
216
216
|
```
|
217
217
|
|
218
|
+
```bibtex
|
219
|
+
@article{Ash2019OnTD,
|
220
|
+
title = {On the Difficulty of Warm-Starting Neural Network Training},
|
221
|
+
author = {Jordan T. Ash and Ryan P. Adams},
|
222
|
+
journal = {ArXiv},
|
223
|
+
year = {2019},
|
224
|
+
volume = {abs/1910.08475},
|
225
|
+
url = {https://api.semanticscholar.org/CorpusID:204788802}
|
226
|
+
}
|
227
|
+
```
|
228
|
+
|
218
229
|
*Evolution is cleverer than you are.* - Leslie Orgel
|
{evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/README.md
RENAMED
@@ -162,4 +162,15 @@ agent.load('./agent.pt')
|
|
162
162
|
}
|
163
163
|
```
|
164
164
|
|
165
|
+
```bibtex
|
166
|
+
@article{Ash2019OnTD,
|
167
|
+
title = {On the Difficulty of Warm-Starting Neural Network Training},
|
168
|
+
author = {Jordan T. Ash and Ryan P. Adams},
|
169
|
+
journal = {ArXiv},
|
170
|
+
year = {2019},
|
171
|
+
volume = {abs/1910.08475},
|
172
|
+
url = {https://api.semanticscholar.org/CorpusID:204788802}
|
173
|
+
}
|
174
|
+
```
|
175
|
+
|
165
176
|
*Evolution is cleverer than you are.* - Leslie Orgel
|
@@ -146,6 +146,24 @@ def temp_batch_dim(fn):
|
|
146
146
|
|
147
147
|
return inner
|
148
148
|
|
149
|
+
# plasticity related
|
150
|
+
|
151
|
+
def shrink_and_perturb_(
|
152
|
+
module,
|
153
|
+
shrink_factor = 0.5,
|
154
|
+
perturb_factor = 0.01
|
155
|
+
):
|
156
|
+
# Shrink & Perturb
|
157
|
+
# Ash et al. https://arxiv.org/abs/1910.08475
|
158
|
+
|
159
|
+
assert 0. <= shrink_factor <= 1.
|
160
|
+
|
161
|
+
for p in module.parameters():
|
162
|
+
noise = torch.randn_like(p.data)
|
163
|
+
p.data.mul_(1. - shrink_factor).add_(noise * perturb_factor)
|
164
|
+
|
165
|
+
return module
|
166
|
+
|
149
167
|
# fitness related
|
150
168
|
|
151
169
|
def get_fitness_scores(
|
@@ -431,6 +449,8 @@ class Critic(Module):
|
|
431
449
|
|
432
450
|
value = self.maybe_bins_to_value(logits)
|
433
451
|
|
452
|
+
loss_fn = partial(self.loss_fn, reduction = 'none')
|
453
|
+
|
434
454
|
if use_improved:
|
435
455
|
clipped_target = target.clamp(-eps_clip, eps_clip)
|
436
456
|
|
@@ -439,8 +459,8 @@ class Critic(Module):
|
|
439
459
|
|
440
460
|
is_between = lambda lo, hi: (lo < value) & (value < hi)
|
441
461
|
|
442
|
-
clipped_loss =
|
443
|
-
loss =
|
462
|
+
clipped_loss = loss_fn(logits, clipped_target)
|
463
|
+
loss = loss_fn(logits, target)
|
444
464
|
|
445
465
|
value_loss = torch.where(
|
446
466
|
is_between(target, old_values_lo) | is_between(old_values_hi, target),
|
@@ -448,10 +468,10 @@ class Critic(Module):
|
|
448
468
|
torch.min(loss, clipped_loss)
|
449
469
|
)
|
450
470
|
else:
|
451
|
-
clipped_value = old_values + (value - old_values).clamp(
|
471
|
+
clipped_value = old_values + (value - old_values).clamp(-eps_clip, eps_clip)
|
452
472
|
|
453
|
-
loss =
|
454
|
-
clipped_loss =
|
473
|
+
loss = loss_fn(logits, target)
|
474
|
+
clipped_loss = loss_fn(clipped_value, target)
|
455
475
|
|
456
476
|
value_loss = torch.max(loss, clipped_loss)
|
457
477
|
|
{evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/tests/test_epo.py
RENAMED
@@ -1,10 +1,11 @@
|
|
1
1
|
import pytest
|
2
2
|
|
3
3
|
import torch
|
4
|
-
from evolutionary_policy_optimization import (
|
4
|
+
from evolutionary_policy_optimization.epo import (
|
5
5
|
LatentGenePool,
|
6
6
|
Actor,
|
7
|
-
Critic
|
7
|
+
Critic,
|
8
|
+
shrink_and_perturb_
|
8
9
|
)
|
9
10
|
|
10
11
|
@pytest.mark.parametrize('latent_ids', (2, (2, 4)))
|
@@ -128,3 +129,5 @@ def test_e2e_with_mock_env(
|
|
128
129
|
|
129
130
|
agent.save('./agent.pt', overwrite = True)
|
130
131
|
agent.load('./agent.pt')
|
132
|
+
|
133
|
+
shrink_and_perturb_(agent)
|
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/.gitignore
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/requirements.txt
RENAMED
File without changes
|
{evolutionary_policy_optimization-0.1.9 → evolutionary_policy_optimization-0.1.12}/train_gym.py
RENAMED
File without changes
|