evolutionary-policy-optimization 0.0.58__tar.gz → 0.0.61__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (14) hide show
  1. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/PKG-INFO +2 -2
  2. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/evolutionary_policy_optimization/epo.py +8 -7
  3. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/pyproject.toml +3 -3
  4. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/.github/workflows/python-publish.yml +0 -0
  5. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/.github/workflows/test.yml +0 -0
  6. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/.gitignore +0 -0
  7. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/LICENSE +0 -0
  8. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/README.md +0 -0
  9. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/evolutionary_policy_optimization/__init__.py +0 -0
  10. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/evolutionary_policy_optimization/distributed.py +0 -0
  11. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/evolutionary_policy_optimization/experimental.py +0 -0
  12. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/evolutionary_policy_optimization/mock_env.py +0 -0
  13. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/requirements.txt +0 -0
  14. {evolutionary_policy_optimization-0.0.58 → evolutionary_policy_optimization-0.0.61}/tests/test_epo.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.0.58
3
+ Version: 0.0.61
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -35,7 +35,7 @@ Classifier: Programming Language :: Python :: 3.8
35
35
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
36
36
  Requires-Python: >=3.9
37
37
  Requires-Dist: adam-atan2-pytorch
38
- Requires-Dist: assoc-scan
38
+ Requires-Dist: assoc-scan>=0.0.2
39
39
  Requires-Dist: einops>=0.8.1
40
40
  Requires-Dist: einx>=0.3.0
41
41
  Requires-Dist: ema-pytorch>=0.7.7
@@ -127,15 +127,9 @@ def calc_generalized_advantage_estimate(
127
127
  delta = rewards + gamma * values_next * masks - values
128
128
  gates = gamma * lam * masks
129
129
 
130
- gates, delta = gates[..., :, None], delta[..., :, None]
131
-
132
130
  scan = AssocScan(reverse = True, use_accelerated = use_accelerated)
133
131
 
134
- gae = scan(gates, delta)
135
-
136
- gae = gae[..., :, 0]
137
-
138
- return gae
132
+ return scan(gates, delta)
139
133
 
140
134
  # evolution related functions
141
135
 
@@ -845,6 +839,8 @@ class Agent(Module):
845
839
  if is_distributed():
846
840
  memories = map(partial(all_gather_variable_dim, dim = 0), memories)
847
841
 
842
+ fitness_scores = all_gather_variable_dim(fitness_scores, dim = 0)
843
+
848
844
  (
849
845
  episode_ids,
850
846
  states,
@@ -958,7 +954,10 @@ def actor_loss(
958
954
  advantages, # Float[b]
959
955
  eps_clip = 0.2,
960
956
  entropy_weight = .01,
957
+ eps = 1e-5
961
958
  ):
959
+ batch = logits.shape[0]
960
+
962
961
  log_probs = gather_log_prob(logits, actions)
963
962
 
964
963
  ratio = (log_probs - old_log_probs).exp()
@@ -967,6 +966,8 @@ def actor_loss(
967
966
 
968
967
  clipped_ratio = ratio.clamp(min = 1. - eps_clip, max = 1. + eps_clip)
969
968
 
969
+ advantages = F.layer_norm(advantages, (batch,), eps = eps)
970
+
970
971
  actor_loss = -torch.min(clipped_ratio * advantages, ratio * advantages)
971
972
 
972
973
  # add entropy loss for exploration
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "evolutionary-policy-optimization"
3
- version = "0.0.58"
3
+ version = "0.0.61"
4
4
  description = "EPO - Pytorch"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -15,7 +15,7 @@ keywords = [
15
15
  'genetic algorithms',
16
16
  'robotics'
17
17
  ]
18
- classifiers=[
18
+ classifiers = [
19
19
  'Development Status :: 4 - Beta',
20
20
  'Intended Audience :: Developers',
21
21
  'Topic :: Scientific/Engineering :: Artificial Intelligence',
@@ -25,7 +25,7 @@ classifiers=[
25
25
 
26
26
  dependencies = [
27
27
  "adam-atan2-pytorch",
28
- 'assoc-scan',
28
+ 'assoc-scan>=0.0.2',
29
29
  'einx>=0.3.0',
30
30
  'einops>=0.8.1',
31
31
  'ema-pytorch>=0.7.7',