evolutionary-policy-optimization 0.0.36__py3-none-any.whl → 0.0.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -743,9 +743,11 @@ class Agent(Module):
743
743
  old_values
744
744
  ) in dataloader:
745
745
 
746
+ latents = self.latent_gene_pool(latent_gene_ids)
747
+
746
748
  # learn actor
747
749
 
748
- logits = self.actor(states)
750
+ logits = self.actor(states, latents)
749
751
  actor_loss = self.actor_loss(logits, log_probs, actions, advantages)
750
752
 
751
753
  actor_loss.backward()
@@ -754,7 +756,12 @@ class Agent(Module):
754
756
 
755
757
  # learn critic with maybe classification loss
756
758
 
757
- critic_loss = self.critic(states, advantages + old_values)
759
+ critic_loss = self.critic(
760
+ states,
761
+ latents,
762
+ targets = advantages + old_values
763
+ )
764
+
758
765
  critic_loss.backward()
759
766
 
760
767
  self.critic_optim.step()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.0.36
3
+ Version: 0.0.37
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -1,8 +1,8 @@
1
1
  evolutionary_policy_optimization/__init__.py,sha256=Qavcia0n13jjaWIS_LPW7QrxSLT_BBeKujCjF9kQjbA,133
2
- evolutionary_policy_optimization/epo.py,sha256=wRqGjoiksWY33BQc9jypJbKWroHm3i_aEPNx1twVjWk,25819
2
+ evolutionary_policy_optimization/epo.py,sha256=onIGNWHg1EGQwJ9TfkkJ8Yz8_S-BPoaqrxJwq54BXp0,25992
3
3
  evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
4
4
  evolutionary_policy_optimization/mock_env.py,sha256=3xrd-gwjZeVd_sEvxIyX0lppnMWcfQGOapO-XjKmExI,816
5
- evolutionary_policy_optimization-0.0.36.dist-info/METADATA,sha256=WQpJa1PuiQx1qANilbJ0E7tZoKHDm2wAvjMccQoPH5Q,4992
6
- evolutionary_policy_optimization-0.0.36.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
7
- evolutionary_policy_optimization-0.0.36.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
8
- evolutionary_policy_optimization-0.0.36.dist-info/RECORD,,
5
+ evolutionary_policy_optimization-0.0.37.dist-info/METADATA,sha256=nPWBCvx02MHWdKu5cEoPmHFMFKhwepOfStkXIXR2NHc,4992
6
+ evolutionary_policy_optimization-0.0.37.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
7
+ evolutionary_policy_optimization-0.0.37.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
8
+ evolutionary_policy_optimization-0.0.37.dist-info/RECORD,,