evolutionary-policy-optimization 0.0.58__py3-none-any.whl → 0.0.61__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutionary_policy_optimization/epo.py +8 -7
- {evolutionary_policy_optimization-0.0.58.dist-info → evolutionary_policy_optimization-0.0.61.dist-info}/METADATA +2 -2
- {evolutionary_policy_optimization-0.0.58.dist-info → evolutionary_policy_optimization-0.0.61.dist-info}/RECORD +5 -5
- {evolutionary_policy_optimization-0.0.58.dist-info → evolutionary_policy_optimization-0.0.61.dist-info}/WHEEL +0 -0
- {evolutionary_policy_optimization-0.0.58.dist-info → evolutionary_policy_optimization-0.0.61.dist-info}/licenses/LICENSE +0 -0
@@ -127,15 +127,9 @@ def calc_generalized_advantage_estimate(
|
|
127
127
|
delta = rewards + gamma * values_next * masks - values
|
128
128
|
gates = gamma * lam * masks
|
129
129
|
|
130
|
-
gates, delta = gates[..., :, None], delta[..., :, None]
|
131
|
-
|
132
130
|
scan = AssocScan(reverse = True, use_accelerated = use_accelerated)
|
133
131
|
|
134
|
-
|
135
|
-
|
136
|
-
gae = gae[..., :, 0]
|
137
|
-
|
138
|
-
return gae
|
132
|
+
return scan(gates, delta)
|
139
133
|
|
140
134
|
# evolution related functions
|
141
135
|
|
@@ -845,6 +839,8 @@ class Agent(Module):
|
|
845
839
|
if is_distributed():
|
846
840
|
memories = map(partial(all_gather_variable_dim, dim = 0), memories)
|
847
841
|
|
842
|
+
fitness_scores = all_gather_variable_dim(fitness_scores, dim = 0)
|
843
|
+
|
848
844
|
(
|
849
845
|
episode_ids,
|
850
846
|
states,
|
@@ -958,7 +954,10 @@ def actor_loss(
|
|
958
954
|
advantages, # Float[b]
|
959
955
|
eps_clip = 0.2,
|
960
956
|
entropy_weight = .01,
|
957
|
+
eps = 1e-5
|
961
958
|
):
|
959
|
+
batch = logits.shape[0]
|
960
|
+
|
962
961
|
log_probs = gather_log_prob(logits, actions)
|
963
962
|
|
964
963
|
ratio = (log_probs - old_log_probs).exp()
|
@@ -967,6 +966,8 @@ def actor_loss(
|
|
967
966
|
|
968
967
|
clipped_ratio = ratio.clamp(min = 1. - eps_clip, max = 1. + eps_clip)
|
969
968
|
|
969
|
+
advantages = F.layer_norm(advantages, (batch,), eps = eps)
|
970
|
+
|
970
971
|
actor_loss = -torch.min(clipped_ratio * advantages, ratio * advantages)
|
971
972
|
|
972
973
|
# add entropy loss for exploration
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.61
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -35,7 +35,7 @@ Classifier: Programming Language :: Python :: 3.8
|
|
35
35
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
36
36
|
Requires-Python: >=3.9
|
37
37
|
Requires-Dist: adam-atan2-pytorch
|
38
|
-
Requires-Dist: assoc-scan
|
38
|
+
Requires-Dist: assoc-scan>=0.0.2
|
39
39
|
Requires-Dist: einops>=0.8.1
|
40
40
|
Requires-Dist: einx>=0.3.0
|
41
41
|
Requires-Dist: ema-pytorch>=0.7.7
|
@@ -1,9 +1,9 @@
|
|
1
1
|
evolutionary_policy_optimization/__init__.py,sha256=0q0aBuFgWi06MLMD8FiHzBYQ3_W4LYWrwmCtF3u5H2A,201
|
2
2
|
evolutionary_policy_optimization/distributed.py,sha256=7KgZdeS_wxBHo_du9XZFB1Cu318J-Bp66Xdr6Log_20,2423
|
3
|
-
evolutionary_policy_optimization/epo.py,sha256=
|
3
|
+
evolutionary_policy_optimization/epo.py,sha256=kFT49rJdcmaDehfpx3YyhYhvAcp7S-gRWDkS2y20Q2Y,35377
|
4
4
|
evolutionary_policy_optimization/experimental.py,sha256=-IgqjJ_Wk_CMB1y9YYWpoYqTG9GZHAS6kbRdTluVevg,1563
|
5
5
|
evolutionary_policy_optimization/mock_env.py,sha256=202KJ5g57wQvOzhGYzgHfBa7Y2do5uuDvl5kFg5o73g,934
|
6
|
-
evolutionary_policy_optimization-0.0.
|
7
|
-
evolutionary_policy_optimization-0.0.
|
8
|
-
evolutionary_policy_optimization-0.0.
|
9
|
-
evolutionary_policy_optimization-0.0.
|
6
|
+
evolutionary_policy_optimization-0.0.61.dist-info/METADATA,sha256=3IbcY9kg71P6lTNxZaRBw3IYfDjcK4uTJJaFRD0Skwg,6220
|
7
|
+
evolutionary_policy_optimization-0.0.61.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
8
|
+
evolutionary_policy_optimization-0.0.61.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
|
9
|
+
evolutionary_policy_optimization-0.0.61.dist-info/RECORD,,
|
File without changes
|