evolutionary-policy-optimization 0.2.11__tar.gz → 0.2.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/PKG-INFO +15 -4
  2. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/README.md +14 -3
  3. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/evolutionary_policy_optimization/epo.py +5 -5
  4. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/pyproject.toml +1 -1
  5. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/.github/workflows/lint.yml +0 -0
  6. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/.github/workflows/python-publish.yml +0 -0
  7. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/.github/workflows/test.yml +0 -0
  8. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/.gitignore +0 -0
  9. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/LICENSE +0 -0
  10. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/evolutionary_policy_optimization/__init__.py +0 -0
  11. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/evolutionary_policy_optimization/distributed.py +0 -0
  12. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/evolutionary_policy_optimization/env_wrappers.py +0 -0
  13. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/evolutionary_policy_optimization/experimental.py +0 -0
  14. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/evolutionary_policy_optimization/mock_env.py +0 -0
  15. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/requirements.txt +0 -0
  16. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/tests/test_epo.py +0 -0
  17. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/train_crossover_weight_space.py +0 -0
  18. {evolutionary_policy_optimization-0.2.11 → evolutionary_policy_optimization-0.2.14}/train_gym.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.2.11
3
+ Version: 0.2.14
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -87,10 +87,10 @@ latent_pool = LatentGenePool(
87
87
  dim_latent = 32,
88
88
  )
89
89
 
90
- state = torch.randn(1, 512)
90
+ state = torch.randn(1, 32)
91
91
 
92
- actor = Actor(512, dim_hiddens = (256, 128), num_actions = 4, dim_latent = 32)
93
- critic = Critic(512, dim_hiddens = (256, 128, 64), dim_latent = 32)
92
+ actor = Actor(dim_state = 32, dim = 256, mlp_depth = 2, num_actions = 4, dim_latent = 32)
93
+ critic = Critic(dim_state = 32, dim = 256, mlp_depth = 3, dim_latent = 32)
94
94
 
95
95
  latent = latent_pool(latent_id = 2)
96
96
 
@@ -289,4 +289,15 @@ That's it
289
289
  }
290
290
  ```
291
291
 
292
+ ```bibtex
293
+ @article{Jiang2022GeneralIR,
294
+ title = {General intelligence requires rethinking exploration},
295
+ author = {Minqi Jiang and Tim Rocktaschel and Edward Grefenstette},
296
+ journal = {Royal Society Open Science},
297
+ year = {2022},
298
+ volume = {10},
299
+ url = {https://api.semanticscholar.org/CorpusID:253523156}
300
+ }
301
+ ```
302
+
292
303
  *Evolution is cleverer than you are.* - Leslie Orgel
@@ -30,10 +30,10 @@ latent_pool = LatentGenePool(
30
30
  dim_latent = 32,
31
31
  )
32
32
 
33
- state = torch.randn(1, 512)
33
+ state = torch.randn(1, 32)
34
34
 
35
- actor = Actor(512, dim_hiddens = (256, 128), num_actions = 4, dim_latent = 32)
36
- critic = Critic(512, dim_hiddens = (256, 128, 64), dim_latent = 32)
35
+ actor = Actor(dim_state = 32, dim = 256, mlp_depth = 2, num_actions = 4, dim_latent = 32)
36
+ critic = Critic(dim_state = 32, dim = 256, mlp_depth = 3, dim_latent = 32)
37
37
 
38
38
  latent = latent_pool(latent_id = 2)
39
39
 
@@ -232,4 +232,15 @@ That's it
232
232
  }
233
233
  ```
234
234
 
235
+ ```bibtex
236
+ @article{Jiang2022GeneralIR,
237
+ title = {General intelligence requires rethinking exploration},
238
+ author = {Minqi Jiang and Tim Rocktaschel and Edward Grefenstette},
239
+ journal = {Royal Society Open Science},
240
+ year = {2022},
241
+ volume = {10},
242
+ url = {https://api.semanticscholar.org/CorpusID:253523156}
243
+ }
244
+ ```
245
+
235
246
  *Evolution is cleverer than you are.* - Leslie Orgel
@@ -589,9 +589,9 @@ class Critic(Module):
589
589
  use_regression = False,
590
590
  state_norm: StateNorm | None = None,
591
591
  hl_gauss_loss_kwargs: dict = dict(
592
- min_value = -100.,
593
- max_value = 100.,
594
- num_bins = 200
592
+ min_value = -10.,
593
+ max_value = 10.,
594
+ num_bins = 250
595
595
  )
596
596
  ):
597
597
  super().__init__()
@@ -644,11 +644,11 @@ class Critic(Module):
644
644
  loss_fn = partial(self.loss_fn, reduction = 'none')
645
645
 
646
646
  if use_improved:
647
- clipped_target = target.clamp(-eps_clip, eps_clip)
648
-
649
647
  old_values_lo = old_values - eps_clip
650
648
  old_values_hi = old_values + eps_clip
651
649
 
650
+ clipped_target = target.clamp(old_values_lo, old_values_hi)
651
+
652
652
  def is_between(lo, hi):
653
653
  return (lo < value) & (value < hi)
654
654
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "evolutionary-policy-optimization"
3
- version = "0.2.11"
3
+ version = "0.2.14"
4
4
  description = "EPO - Pytorch"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }