evolutionary-policy-optimization 0.0.58__tar.gz → 0.0.60__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/PKG-INFO +2 -2
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/evolutionary_policy_optimization/epo.py +6 -7
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/pyproject.toml +3 -3
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/.github/workflows/python-publish.yml +0 -0
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/.github/workflows/test.yml +0 -0
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/.gitignore +0 -0
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/LICENSE +0 -0
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/README.md +0 -0
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/evolutionary_policy_optimization/__init__.py +0 -0
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/evolutionary_policy_optimization/distributed.py +0 -0
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/evolutionary_policy_optimization/experimental.py +0 -0
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/evolutionary_policy_optimization/mock_env.py +0 -0
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/requirements.txt +0 -0
- {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/tests/test_epo.py +0 -0
{evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.60
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -35,7 +35,7 @@ Classifier: Programming Language :: Python :: 3.8
|
|
35
35
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
36
36
|
Requires-Python: >=3.9
|
37
37
|
Requires-Dist: adam-atan2-pytorch
|
38
|
-
Requires-Dist: assoc-scan
|
38
|
+
Requires-Dist: assoc-scan>=0.0.2
|
39
39
|
Requires-Dist: einops>=0.8.1
|
40
40
|
Requires-Dist: einx>=0.3.0
|
41
41
|
Requires-Dist: ema-pytorch>=0.7.7
|
@@ -127,15 +127,9 @@ def calc_generalized_advantage_estimate(
|
|
127
127
|
delta = rewards + gamma * values_next * masks - values
|
128
128
|
gates = gamma * lam * masks
|
129
129
|
|
130
|
-
gates, delta = gates[..., :, None], delta[..., :, None]
|
131
|
-
|
132
130
|
scan = AssocScan(reverse = True, use_accelerated = use_accelerated)
|
133
131
|
|
134
|
-
|
135
|
-
|
136
|
-
gae = gae[..., :, 0]
|
137
|
-
|
138
|
-
return gae
|
132
|
+
return scan(gates, delta)
|
139
133
|
|
140
134
|
# evolution related functions
|
141
135
|
|
@@ -958,7 +952,10 @@ def actor_loss(
|
|
958
952
|
advantages, # Float[b]
|
959
953
|
eps_clip = 0.2,
|
960
954
|
entropy_weight = .01,
|
955
|
+
eps = 1e-5
|
961
956
|
):
|
957
|
+
batch = logits.shape[0]
|
958
|
+
|
962
959
|
log_probs = gather_log_prob(logits, actions)
|
963
960
|
|
964
961
|
ratio = (log_probs - old_log_probs).exp()
|
@@ -967,6 +964,8 @@ def actor_loss(
|
|
967
964
|
|
968
965
|
clipped_ratio = ratio.clamp(min = 1. - eps_clip, max = 1. + eps_clip)
|
969
966
|
|
967
|
+
advantages = F.layer_norm(advantages, (batch,), eps = eps)
|
968
|
+
|
970
969
|
actor_loss = -torch.min(clipped_ratio * advantages, ratio * advantages)
|
971
970
|
|
972
971
|
# add entropy loss for exploration
|
{evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/pyproject.toml
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "evolutionary-policy-optimization"
|
3
|
-
version = "0.0.
|
3
|
+
version = "0.0.60"
|
4
4
|
description = "EPO - Pytorch"
|
5
5
|
authors = [
|
6
6
|
{ name = "Phil Wang", email = "lucidrains@gmail.com" }
|
@@ -15,7 +15,7 @@ keywords = [
|
|
15
15
|
'genetic algorithms',
|
16
16
|
'robotics'
|
17
17
|
]
|
18
|
-
classifiers=[
|
18
|
+
classifiers = [
|
19
19
|
'Development Status :: 4 - Beta',
|
20
20
|
'Intended Audience :: Developers',
|
21
21
|
'Topic :: Scientific/Engineering :: Artificial Intelligence',
|
@@ -25,7 +25,7 @@ classifiers=[
|
|
25
25
|
|
26
26
|
dependencies = [
|
27
27
|
"adam-atan2-pytorch",
|
28
|
-
'assoc-scan',
|
28
|
+
'assoc-scan>=0.0.2',
|
29
29
|
'einx>=0.3.0',
|
30
30
|
'einops>=0.8.1',
|
31
31
|
'ema-pytorch>=0.7.7',
|
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/.gitignore
RENAMED
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/README.md
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/requirements.txt
RENAMED
File without changes
|
File without changes
|