evolutionary-policy-optimization 0.0.40__tar.gz → 0.0.41__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (13) hide show
  1. {evolutionary_policy_optimization-0.0.40 → evolutionary_policy_optimization-0.0.41}/PKG-INFO +22 -1
  2. {evolutionary_policy_optimization-0.0.40 → evolutionary_policy_optimization-0.0.41}/README.md +21 -0
  3. {evolutionary_policy_optimization-0.0.40 → evolutionary_policy_optimization-0.0.41}/evolutionary_policy_optimization/epo.py +13 -1
  4. {evolutionary_policy_optimization-0.0.40 → evolutionary_policy_optimization-0.0.41}/pyproject.toml +1 -1
  5. {evolutionary_policy_optimization-0.0.40 → evolutionary_policy_optimization-0.0.41}/.github/workflows/python-publish.yml +0 -0
  6. {evolutionary_policy_optimization-0.0.40 → evolutionary_policy_optimization-0.0.41}/.github/workflows/test.yml +0 -0
  7. {evolutionary_policy_optimization-0.0.40 → evolutionary_policy_optimization-0.0.41}/.gitignore +0 -0
  8. {evolutionary_policy_optimization-0.0.40 → evolutionary_policy_optimization-0.0.41}/LICENSE +0 -0
  9. {evolutionary_policy_optimization-0.0.40 → evolutionary_policy_optimization-0.0.41}/evolutionary_policy_optimization/__init__.py +0 -0
  10. {evolutionary_policy_optimization-0.0.40 → evolutionary_policy_optimization-0.0.41}/evolutionary_policy_optimization/experimental.py +0 -0
  11. {evolutionary_policy_optimization-0.0.40 → evolutionary_policy_optimization-0.0.41}/evolutionary_policy_optimization/mock_env.py +0 -0
  12. {evolutionary_policy_optimization-0.0.40 → evolutionary_policy_optimization-0.0.41}/requirements.txt +0 -0
  13. {evolutionary_policy_optimization-0.0.40 → evolutionary_policy_optimization-0.0.41}/tests/test_epo.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.0.40
3
+ Version: 0.0.41
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -162,4 +162,25 @@ agent.load('./agent.pt')
162
162
  }
163
163
  ```
164
164
 
165
+ ```bibtex
166
+ @inproceedings{Khadka2018EvolutionGuidedPG,
167
+ title = {Evolution-Guided Policy Gradient in Reinforcement Learning},
168
+ author = {Shauharda Khadka and Kagan Tumer},
169
+ booktitle = {Neural Information Processing Systems},
170
+ year = {2018},
171
+ url = {https://api.semanticscholar.org/CorpusID:53096951}
172
+ }
173
+ ```
174
+
175
+ ```bibtex
176
+ @article{Fortunato2017NoisyNF,
177
+ title = {Noisy Networks for Exploration},
178
+ author = {Meire Fortunato and Mohammad Gheshlaghi Azar and Bilal Piot and Jacob Menick and Ian Osband and Alex Graves and Vlad Mnih and R{\'e}mi Munos and Demis Hassabis and Olivier Pietquin and Charles Blundell and Shane Legg},
179
+ journal = {ArXiv},
180
+ year = {2017},
181
+ volume = {abs/1706.10295},
182
+ url = {https://api.semanticscholar.org/CorpusID:5176587}
183
+ }
184
+ ```
185
+
165
186
  *Evolution is cleverer than you are.* - Leslie Orgel
@@ -110,4 +110,25 @@ agent.load('./agent.pt')
110
110
  }
111
111
  ```
112
112
 
113
+ ```bibtex
114
+ @inproceedings{Khadka2018EvolutionGuidedPG,
115
+ title = {Evolution-Guided Policy Gradient in Reinforcement Learning},
116
+ author = {Shauharda Khadka and Kagan Tumer},
117
+ booktitle = {Neural Information Processing Systems},
118
+ year = {2018},
119
+ url = {https://api.semanticscholar.org/CorpusID:53096951}
120
+ }
121
+ ```
122
+
123
+ ```bibtex
124
+ @article{Fortunato2017NoisyNF,
125
+ title = {Noisy Networks for Exploration},
126
+ author = {Meire Fortunato and Mohammad Gheshlaghi Azar and Bilal Piot and Jacob Menick and Ian Osband and Alex Graves and Vlad Mnih and R{\'e}mi Munos and Demis Hassabis and Olivier Pietquin and Charles Blundell and Shane Legg},
127
+ journal = {ArXiv},
128
+ year = {2017},
129
+ volume = {abs/1706.10295},
130
+ url = {https://api.semanticscholar.org/CorpusID:5176587}
131
+ }
132
+ ```
133
+
113
134
  *Evolution is cleverer than you are.* - Leslie Orgel
@@ -798,6 +798,10 @@ class Agent(Module):
798
798
 
799
799
  latents = self.latent_gene_pool(latent_id = latent_gene_ids)
800
800
 
801
+ orig_latents = latents
802
+ latents = latents.detach()
803
+ latents.requires_grad_()
804
+
801
805
  # learn actor
802
806
 
803
807
  logits = self.actor(states, latents)
@@ -822,6 +826,14 @@ class Agent(Module):
822
826
  self.critic_optim.step()
823
827
  self.critic_optim.zero_grad()
824
828
 
829
+ # maybe update latents, if not frozen
830
+
831
+ if not self.latent_gene_pool.frozen_latents:
832
+ orig_latents.backward(latents.grad)
833
+
834
+ self.latent_optim.step()
835
+ self.latent_optim.zero_grad()
836
+
825
837
  # apply evolution
826
838
 
827
839
  self.latent_gene_pool.genetic_algorithm_step(fitness_scores)
@@ -951,7 +963,7 @@ class EPO(Module):
951
963
 
952
964
  done = tensor(False)
953
965
 
954
- while time < self.max_episode_length:
966
+ while time < self.max_episode_length and not done:
955
967
 
956
968
  # sample action
957
969
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "evolutionary-policy-optimization"
3
- version = "0.0.40"
3
+ version = "0.0.41"
4
4
  description = "EPO - Pytorch"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }