dreamer4 0.0.99__tar.gz → 0.0.101__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dreamer4 might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dreamer4
3
- Version: 0.0.99
3
+ Version: 0.0.101
4
4
  Summary: Dreamer 4
5
5
  Project-URL: Homepage, https://pypi.org/project/dreamer4/
6
6
  Project-URL: Repository, https://github.com/lucidrains/dreamer4
@@ -1900,7 +1900,8 @@ class DynamicsWorldModel(Module):
1900
1900
  gae_lambda = 0.95,
1901
1901
  ppo_eps_clip = 0.2,
1902
1902
  pmpo_pos_to_neg_weight = 0.5, # pos and neg equal weight
1903
- pmpo_kl_div_loss_weight = 1.,
1903
+ pmpo_reverse_kl = True,
1904
+ pmpo_kl_div_loss_weight = .3,
1904
1905
  value_clip = 0.4,
1905
1906
  policy_entropy_weight = .01,
1906
1907
  gae_use_accelerated = False
@@ -2108,6 +2109,7 @@ class DynamicsWorldModel(Module):
2108
2109
 
2109
2110
  self.pmpo_pos_to_neg_weight = pmpo_pos_to_neg_weight
2110
2111
  self.pmpo_kl_div_loss_weight = pmpo_kl_div_loss_weight
2112
+ self.pmpo_reverse_kl = pmpo_reverse_kl
2111
2113
 
2112
2114
  # rewards related
2113
2115
 
@@ -2578,11 +2580,18 @@ class DynamicsWorldModel(Module):
2578
2580
  # take care of kl
2579
2581
 
2580
2582
  if self.pmpo_kl_div_loss_weight > 0.:
2583
+
2581
2584
  new_unembedded_actions = self.action_embedder.unembed(policy_embed, pred_head_index = 0)
2582
2585
 
2586
+ kl_div_inputs, kl_div_targets = new_unembedded_actions, old_action_unembeds
2587
+
2583
2588
  # mentioned that the "reverse direction for the prior KL" was used
2589
+ # make optional, as observed instability in toy task
2590
+
2591
+ if self.pmpo_reverse_kl:
2592
+ kl_div_inputs, kl_div_targets = kl_div_targets, kl_div_inputs
2584
2593
 
2585
- discrete_kl_div, continuous_kl_div = self.action_embedder.kl_div(old_action_unembeds, new_unembedded_actions)
2594
+ discrete_kl_div, continuous_kl_div = self.action_embedder.kl_div(kl_div_inputs, kl_div_targets)
2586
2595
 
2587
2596
  # accumulate discrete and continuous kl div
2588
2597
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dreamer4"
3
- version = "0.0.99"
3
+ version = "0.0.101"
4
4
  description = "Dreamer 4"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
File without changes
File without changes
File without changes
File without changes
File without changes