evolutionary-policy-optimization 0.2.10__py3-none-any.whl → 0.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutionary_policy_optimization/distributed.py +3 -1
- evolutionary_policy_optimization/epo.py +2 -2
- {evolutionary_policy_optimization-0.2.10.dist-info → evolutionary_policy_optimization-0.2.12.dist-info}/METADATA +15 -4
- {evolutionary_policy_optimization-0.2.10.dist-info → evolutionary_policy_optimization-0.2.12.dist-info}/RECORD +6 -6
- {evolutionary_policy_optimization-0.2.10.dist-info → evolutionary_policy_optimization-0.2.12.dist-info}/WHEEL +0 -0
- {evolutionary_policy_optimization-0.2.10.dist-info → evolutionary_policy_optimization-0.2.12.dist-info}/licenses/LICENSE +0 -0
@@ -61,6 +61,8 @@ def has_only_one_value(t):
|
|
61
61
|
return (t == t[0]).all()
|
62
62
|
|
63
63
|
def all_gather_variable_dim(t, dim = 0, sizes = None):
|
64
|
+
device = t.device
|
65
|
+
|
64
66
|
if not exists(sizes):
|
65
67
|
sizes = gather_sizes(t, dim = dim)
|
66
68
|
|
@@ -77,7 +79,7 @@ def all_gather_variable_dim(t, dim = 0, sizes = None):
|
|
77
79
|
gathered_tensors = torch.cat(gathered_tensors, dim = dim)
|
78
80
|
seq = torch.arange(max_size, device = device)
|
79
81
|
|
80
|
-
mask = einx.less('j i -> (i j)', seq, sizes)
|
82
|
+
mask = einx.less('j, i -> (i j)', seq, sizes)
|
81
83
|
seq = torch.arange(mask.shape[-1], device = device)
|
82
84
|
indices = seq[mask]
|
83
85
|
|
@@ -644,11 +644,11 @@ class Critic(Module):
|
|
644
644
|
loss_fn = partial(self.loss_fn, reduction = 'none')
|
645
645
|
|
646
646
|
if use_improved:
|
647
|
-
clipped_target = target.clamp(-eps_clip, eps_clip)
|
648
|
-
|
649
647
|
old_values_lo = old_values - eps_clip
|
650
648
|
old_values_hi = old_values + eps_clip
|
651
649
|
|
650
|
+
clipped_target = target.clamp(old_values_lo, old_values_hi)
|
651
|
+
|
652
652
|
def is_between(lo, hi):
|
653
653
|
return (lo < value) & (value < hi)
|
654
654
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.12
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -87,10 +87,10 @@ latent_pool = LatentGenePool(
|
|
87
87
|
dim_latent = 32,
|
88
88
|
)
|
89
89
|
|
90
|
-
state = torch.randn(1,
|
90
|
+
state = torch.randn(1, 32)
|
91
91
|
|
92
|
-
actor = Actor(
|
93
|
-
critic = Critic(
|
92
|
+
actor = Actor(dim_state = 32, dim = 256, mlp_depth = 2, num_actions = 4, dim_latent = 32)
|
93
|
+
critic = Critic(dim_state = 32, dim = 256, mlp_depth = 3, dim_latent = 32)
|
94
94
|
|
95
95
|
latent = latent_pool(latent_id = 2)
|
96
96
|
|
@@ -289,4 +289,15 @@ That's it
|
|
289
289
|
}
|
290
290
|
```
|
291
291
|
|
292
|
+
```bibtex
|
293
|
+
@article{Jiang2022GeneralIR,
|
294
|
+
title = {General intelligence requires rethinking exploration},
|
295
|
+
author = {Minqi Jiang and Tim Rocktaschel and Edward Grefenstette},
|
296
|
+
journal = {Royal Society Open Science},
|
297
|
+
year = {2022},
|
298
|
+
volume = {10},
|
299
|
+
url = {https://api.semanticscholar.org/CorpusID:253523156}
|
300
|
+
}
|
301
|
+
```
|
302
|
+
|
292
303
|
*Evolution is cleverer than you are.* - Leslie Orgel
|
@@ -1,10 +1,10 @@
|
|
1
1
|
evolutionary_policy_optimization/__init__.py,sha256=NyiYDYU7DlpmOTM7xiBQET3r1WwX0ebrgMCBLSQrW3c,288
|
2
|
-
evolutionary_policy_optimization/distributed.py,sha256=
|
2
|
+
evolutionary_policy_optimization/distributed.py,sha256=clN8Bvhb6JIQy2F8FTF312B0RN3iYoPkKjZreBtAwks,2366
|
3
3
|
evolutionary_policy_optimization/env_wrappers.py,sha256=bDL06o9_b1iW6k3fw2xifnOnYlzs643tdW6Yv2gsIdw,803
|
4
|
-
evolutionary_policy_optimization/epo.py,sha256=
|
4
|
+
evolutionary_policy_optimization/epo.py,sha256=9zOGucA3jWODOdtk9lBfUAaScOnbVfTLqYV8_x1BhKk,53144
|
5
5
|
evolutionary_policy_optimization/experimental.py,sha256=QZG0__wwFqHN_LJK7e-mHxlIL1mwjlvG6o6bcOpeAKg,6166
|
6
6
|
evolutionary_policy_optimization/mock_env.py,sha256=TLyyRm6tOD0Kdn9QqJJQriaSnsR-YmNQHo4OohmZFG4,1410
|
7
|
-
evolutionary_policy_optimization-0.2.
|
8
|
-
evolutionary_policy_optimization-0.2.
|
9
|
-
evolutionary_policy_optimization-0.2.
|
10
|
-
evolutionary_policy_optimization-0.2.
|
7
|
+
evolutionary_policy_optimization-0.2.12.dist-info/METADATA,sha256=tZIKqUx-akzbJZWd0h0hM9F8FWbq6SjpJ7Vqstk4dNo,9972
|
8
|
+
evolutionary_policy_optimization-0.2.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
9
|
+
evolutionary_policy_optimization-0.2.12.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
|
10
|
+
evolutionary_policy_optimization-0.2.12.dist-info/RECORD,,
|
File without changes
|