evolutionary-policy-optimization 0.0.58__tar.gz → 0.0.60__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (14) hide show
  1. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/PKG-INFO +2 -2
  2. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/evolutionary_policy_optimization/epo.py +6 -7
  3. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/pyproject.toml +3 -3
  4. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/.github/workflows/python-publish.yml +0 -0
  5. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/.github/workflows/test.yml +0 -0
  6. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/.gitignore +0 -0
  7. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/LICENSE +0 -0
  8. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/README.md +0 -0
  9. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/evolutionary_policy_optimization/__init__.py +0 -0
  10. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/evolutionary_policy_optimization/distributed.py +0 -0
  11. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/evolutionary_policy_optimization/experimental.py +0 -0
  12. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/evolutionary_policy_optimization/mock_env.py +0 -0
  13. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/requirements.txt +0 -0
  14. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.60}/tests/test_epo.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.0.58
3
+ Version: 0.0.60
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -35,7 +35,7 @@ Classifier: Programming Language :: Python :: 3.8
35
35
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
36
36
  Requires-Python: >=3.9
37
37
  Requires-Dist: adam-atan2-pytorch
38
- Requires-Dist: assoc-scan
38
+ Requires-Dist: assoc-scan>=0.0.2
39
39
  Requires-Dist: einops>=0.8.1
40
40
  Requires-Dist: einx>=0.3.0
41
41
  Requires-Dist: ema-pytorch>=0.7.7
@@ -127,15 +127,9 @@ def calc_generalized_advantage_estimate(
127
127
  delta = rewards + gamma * values_next * masks - values
128
128
  gates = gamma * lam * masks
129
129
 
130
- gates, delta = gates[..., :, None], delta[..., :, None]
131
-
132
130
  scan = AssocScan(reverse = True, use_accelerated = use_accelerated)
133
131
 
134
- gae = scan(gates, delta)
135
-
136
- gae = gae[..., :, 0]
137
-
138
- return gae
132
+ return scan(gates, delta)
139
133
 
140
134
  # evolution related functions
141
135
 
@@ -958,7 +952,10 @@ def actor_loss(
958
952
  advantages, # Float[b]
959
953
  eps_clip = 0.2,
960
954
  entropy_weight = .01,
955
+ eps = 1e-5
961
956
  ):
957
+ batch = logits.shape[0]
958
+
962
959
  log_probs = gather_log_prob(logits, actions)
963
960
 
964
961
  ratio = (log_probs - old_log_probs).exp()
@@ -967,6 +964,8 @@ def actor_loss(
967
964
 
968
965
  clipped_ratio = ratio.clamp(min = 1. - eps_clip, max = 1. + eps_clip)
969
966
 
967
+ advantages = F.layer_norm(advantages, (batch,), eps = eps)
968
+
970
969
  actor_loss = -torch.min(clipped_ratio * advantages, ratio * advantages)
971
970
 
972
971
  # add entropy loss for exploration
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "evolutionary-policy-optimization"
3
- version = "0.0.58"
3
+ version = "0.0.60"
4
4
  description = "EPO - Pytorch"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -15,7 +15,7 @@ keywords = [
15
15
  'genetic algorithms',
16
16
  'robotics'
17
17
  ]
18
- classifiers=[
18
+ classifiers = [
19
19
  'Development Status :: 4 - Beta',
20
20
  'Intended Audience :: Developers',
21
21
  'Topic :: Scientific/Engineering :: Artificial Intelligence',
@@ -25,7 +25,7 @@ classifiers=[
25
25
 
26
26
  dependencies = [
27
27
  "adam-atan2-pytorch",
28
- 'assoc-scan',
28
+ 'assoc-scan>=0.0.2',
29
29
  'einx>=0.3.0',
30
30
  'einops>=0.8.1',
31
31
  'ema-pytorch>=0.7.7',