evolutionary-policy-optimization 0.0.36__py3-none-any.whl → 0.0.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutionary_policy_optimization/epo.py +9 -2
- {evolutionary_policy_optimization-0.0.36.dist-info → evolutionary_policy_optimization-0.0.37.dist-info}/METADATA +1 -1
- {evolutionary_policy_optimization-0.0.36.dist-info → evolutionary_policy_optimization-0.0.37.dist-info}/RECORD +5 -5
- {evolutionary_policy_optimization-0.0.36.dist-info → evolutionary_policy_optimization-0.0.37.dist-info}/WHEEL +0 -0
- {evolutionary_policy_optimization-0.0.36.dist-info → evolutionary_policy_optimization-0.0.37.dist-info}/licenses/LICENSE +0 -0
@@ -743,9 +743,11 @@ class Agent(Module):
|
|
743
743
|
old_values
|
744
744
|
) in dataloader:
|
745
745
|
|
746
|
+
latents = self.latent_gene_pool(latent_gene_ids)
|
747
|
+
|
746
748
|
# learn actor
|
747
749
|
|
748
|
-
logits = self.actor(states)
|
750
|
+
logits = self.actor(states, latents)
|
749
751
|
actor_loss = self.actor_loss(logits, log_probs, actions, advantages)
|
750
752
|
|
751
753
|
actor_loss.backward()
|
@@ -754,7 +756,12 @@ class Agent(Module):
|
|
754
756
|
|
755
757
|
# learn critic with maybe classification loss
|
756
758
|
|
757
|
-
critic_loss = self.critic(
|
759
|
+
critic_loss = self.critic(
|
760
|
+
states,
|
761
|
+
latents,
|
762
|
+
targets = advantages + old_values
|
763
|
+
)
|
764
|
+
|
758
765
|
critic_loss.backward()
|
759
766
|
|
760
767
|
self.critic_optim.step()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.37
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -1,8 +1,8 @@
|
|
1
1
|
evolutionary_policy_optimization/__init__.py,sha256=Qavcia0n13jjaWIS_LPW7QrxSLT_BBeKujCjF9kQjbA,133
|
2
|
-
evolutionary_policy_optimization/epo.py,sha256=
|
2
|
+
evolutionary_policy_optimization/epo.py,sha256=onIGNWHg1EGQwJ9TfkkJ8Yz8_S-BPoaqrxJwq54BXp0,25992
|
3
3
|
evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
|
4
4
|
evolutionary_policy_optimization/mock_env.py,sha256=3xrd-gwjZeVd_sEvxIyX0lppnMWcfQGOapO-XjKmExI,816
|
5
|
-
evolutionary_policy_optimization-0.0.
|
6
|
-
evolutionary_policy_optimization-0.0.
|
7
|
-
evolutionary_policy_optimization-0.0.
|
8
|
-
evolutionary_policy_optimization-0.0.
|
5
|
+
evolutionary_policy_optimization-0.0.37.dist-info/METADATA,sha256=nPWBCvx02MHWdKu5cEoPmHFMFKhwepOfStkXIXR2NHc,4992
|
6
|
+
evolutionary_policy_optimization-0.0.37.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
7
|
+
evolutionary_policy_optimization-0.0.37.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
|
8
|
+
evolutionary_policy_optimization-0.0.37.dist-info/RECORD,,
|
File without changes
|