evolutionary-policy-optimization 0.2.11__tar.gz → 0.2.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/PKG-INFO +15 -4
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/README.md +14 -3
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/evolutionary_policy_optimization/epo.py +2 -2
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/pyproject.toml +1 -1
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/.github/workflows/lint.yml +0 -0
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/.github/workflows/python-publish.yml +0 -0
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/.github/workflows/test.yml +0 -0
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/.gitignore +0 -0
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/LICENSE +0 -0
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/evolutionary_policy_optimization/__init__.py +0 -0
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/evolutionary_policy_optimization/distributed.py +0 -0
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/evolutionary_policy_optimization/env_wrappers.py +0 -0
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/evolutionary_policy_optimization/experimental.py +0 -0
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/evolutionary_policy_optimization/mock_env.py +0 -0
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/requirements.txt +0 -0
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/tests/test_epo.py +0 -0
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/train_crossover_weight_space.py +0 -0
- {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/train_gym.py +0 -0
{evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.12
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -87,10 +87,10 @@ latent_pool = LatentGenePool(
|
|
87
87
|
dim_latent = 32,
|
88
88
|
)
|
89
89
|
|
90
|
-
state = torch.randn(1,
|
90
|
+
state = torch.randn(1, 32)
|
91
91
|
|
92
|
-
actor = Actor(
|
93
|
-
critic = Critic(
|
92
|
+
actor = Actor(dim_state = 32, dim = 256, mlp_depth = 2, num_actions = 4, dim_latent = 32)
|
93
|
+
critic = Critic(dim_state = 32, dim = 256, mlp_depth = 3, dim_latent = 32)
|
94
94
|
|
95
95
|
latent = latent_pool(latent_id = 2)
|
96
96
|
|
@@ -289,4 +289,15 @@ That's it
|
|
289
289
|
}
|
290
290
|
```
|
291
291
|
|
292
|
+
```bibtex
|
293
|
+
@article{Jiang2022GeneralIR,
|
294
|
+
title = {General intelligence requires rethinking exploration},
|
295
|
+
author = {Minqi Jiang and Tim Rocktaschel and Edward Grefenstette},
|
296
|
+
journal = {Royal Society Open Science},
|
297
|
+
year = {2022},
|
298
|
+
volume = {10},
|
299
|
+
url = {https://api.semanticscholar.org/CorpusID:253523156}
|
300
|
+
}
|
301
|
+
```
|
302
|
+
|
292
303
|
*Evolution is cleverer than you are.* - Leslie Orgel
|
{evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/README.md
RENAMED
@@ -30,10 +30,10 @@ latent_pool = LatentGenePool(
|
|
30
30
|
dim_latent = 32,
|
31
31
|
)
|
32
32
|
|
33
|
-
state = torch.randn(1,
|
33
|
+
state = torch.randn(1, 32)
|
34
34
|
|
35
|
-
actor = Actor(
|
36
|
-
critic = Critic(
|
35
|
+
actor = Actor(dim_state = 32, dim = 256, mlp_depth = 2, num_actions = 4, dim_latent = 32)
|
36
|
+
critic = Critic(dim_state = 32, dim = 256, mlp_depth = 3, dim_latent = 32)
|
37
37
|
|
38
38
|
latent = latent_pool(latent_id = 2)
|
39
39
|
|
@@ -232,4 +232,15 @@ That's it
|
|
232
232
|
}
|
233
233
|
```
|
234
234
|
|
235
|
+
```bibtex
|
236
|
+
@article{Jiang2022GeneralIR,
|
237
|
+
title = {General intelligence requires rethinking exploration},
|
238
|
+
author = {Minqi Jiang and Tim Rocktaschel and Edward Grefenstette},
|
239
|
+
journal = {Royal Society Open Science},
|
240
|
+
year = {2022},
|
241
|
+
volume = {10},
|
242
|
+
url = {https://api.semanticscholar.org/CorpusID:253523156}
|
243
|
+
}
|
244
|
+
```
|
245
|
+
|
235
246
|
*Evolution is cleverer than you are.* - Leslie Orgel
|
@@ -644,11 +644,11 @@ class Critic(Module):
|
|
644
644
|
loss_fn = partial(self.loss_fn, reduction = 'none')
|
645
645
|
|
646
646
|
if use_improved:
|
647
|
-
clipped_target = target.clamp(-eps_clip, eps_clip)
|
648
|
-
|
649
647
|
old_values_lo = old_values - eps_clip
|
650
648
|
old_values_hi = old_values + eps_clip
|
651
649
|
|
650
|
+
clipped_target = target.clamp(old_values_lo, old_values_hi)
|
651
|
+
|
652
652
|
def is_between(lo, hi):
|
653
653
|
return (lo < value) & (value < hi)
|
654
654
|
|
File without changes
|
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/.gitignore
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/requirements.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.12}/train_gym.py
RENAMED
File without changes
|