PyPI - dreamer4 - Versions diffs - 0.0.100__py3-none-any.whl → 0.0.102__py3-none-any.whl - Mend

dreamer4 0.0.100py3-none-any.whl → 0.0.102py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

dreamer4/dreamer4.py CHANGED Viewed

@@ -1900,7 +1900,9 @@ class DynamicsWorldModel(Module):
         gae_lambda = 0.95,
         ppo_eps_clip = 0.2,
         pmpo_pos_to_neg_weight = 0.5, # pos and neg equal weight
+        pmpo_reverse_kl = True,
         pmpo_kl_div_loss_weight = .3,
+        normalize_advantages = None,
         value_clip = 0.4,
         policy_entropy_weight = .01,
         gae_use_accelerated = False
@@ -2108,6 +2110,7 @@ class DynamicsWorldModel(Module):
         self.pmpo_pos_to_neg_weight = pmpo_pos_to_neg_weight
         self.pmpo_kl_div_loss_weight = pmpo_kl_div_loss_weight
+        self.pmpo_reverse_kl = pmpo_reverse_kl
         # rewards related
@@ -2423,6 +2426,7 @@ class DynamicsWorldModel(Module):
         value_optim: Optimizer | None = None,
         only_learn_policy_value_heads = True, # in the paper, they do not finetune the entire dynamics model, they just learn the heads
         use_pmpo = True,
+        normalize_advantages = None,
         eps = 1e-6
     ):
@@ -2505,16 +2509,19 @@ class DynamicsWorldModel(Module):
         else:
             advantage = returns - old_values
-        # apparently they just use the sign of the advantage
+        # if using pmpo, do not normalize advantages, but can be overridden
+        normalize_advantages = default(normalize_advantages, not use_pmpo)
+        if normalize_advantages:
+            advantage = F.layer_norm(advantage, advantage.shape, eps = eps)
         # https://arxiv.org/abs/2410.04166v1
         if use_pmpo:
             pos_advantage_mask = advantage >= 0.
             neg_advantage_mask = ~pos_advantage_mask
-        else:
-            advantage = F.layer_norm(advantage, advantage.shape, eps = eps)
         # replay for the action logits and values
         # but only do so if fine tuning the entire world model for RL
@@ -2578,11 +2585,18 @@ class DynamicsWorldModel(Module):
             # take care of kl
             if self.pmpo_kl_div_loss_weight > 0.:
                 new_unembedded_actions = self.action_embedder.unembed(policy_embed, pred_head_index = 0)
+                kl_div_inputs, kl_div_targets = new_unembedded_actions, old_action_unembeds
                 # mentioned that the "reverse direction for the prior KL" was used
+                # make optional, as observed instability in toy task
+                if self.pmpo_reverse_kl:
+                    kl_div_inputs, kl_div_targets = kl_div_targets, kl_div_inputs
-                discrete_kl_div, continuous_kl_div = self.action_embedder.kl_div(old_action_unembeds, new_unembedded_actions)
+                discrete_kl_div, continuous_kl_div = self.action_embedder.kl_div(kl_div_inputs, kl_div_targets)
                 # accumulate discrete and continuous kl div

{dreamer4-0.0.100.dist-info → dreamer4-0.0.102.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dreamer4
-Version: 0.0.100
+Version: 0.0.102
 Summary: Dreamer 4
 Project-URL: Homepage, https://pypi.org/project/dreamer4/
 Project-URL: Repository, https://github.com/lucidrains/dreamer4

dreamer4-0.0.102.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+dreamer4/__init__.py,sha256=Jssh1obzDRtTfBLZl36kXge1cIQlMjf_8DyjPulvKSk,183
+dreamer4/dreamer4.py,sha256=3qeVN3qdvx7iPxA0OBXw_yy5Re6rX6FIKITH9bp6RBs,119202
+dreamer4/mocks.py,sha256=TfqOB_Gq6N_GggBYwa6ZAJQx38ntlYbXZe23Ne4jshw,2502
+dreamer4/trainers.py,sha256=JsnJwQJcbI_75KBTNddG6b7QVkO6LD1N_HQiVe-VnCM,15087
+dreamer4-0.0.102.dist-info/METADATA,sha256=xxVL1sFimb0azSD5sDOEzugY7rBT6oDek4YdiIS8m18,3066
+dreamer4-0.0.102.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+dreamer4-0.0.102.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+dreamer4-0.0.102.dist-info/RECORD,,

dreamer4-0.0.100.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-dreamer4/__init__.py,sha256=Jssh1obzDRtTfBLZl36kXge1cIQlMjf_8DyjPulvKSk,183
-dreamer4/dreamer4.py,sha256=9r2qDg6SpCe6Y2MWzI44o369t1a4b_LhfQSI_FK5WHQ,118665
-dreamer4/mocks.py,sha256=TfqOB_Gq6N_GggBYwa6ZAJQx38ntlYbXZe23Ne4jshw,2502
-dreamer4/trainers.py,sha256=JsnJwQJcbI_75KBTNddG6b7QVkO6LD1N_HQiVe-VnCM,15087
-dreamer4-0.0.100.dist-info/METADATA,sha256=-hOF9eyycsndS5u8-i6o9IikCDracHn0mIIv_g5dLRo,3066
-dreamer4-0.0.100.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-dreamer4-0.0.100.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-dreamer4-0.0.100.dist-info/RECORD,,

{dreamer4-0.0.100.dist-info → dreamer4-0.0.102.dist-info}/WHEEL RENAMED Viewed

File without changes

{dreamer4-0.0.100.dist-info → dreamer4-0.0.102.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

dreamer4 0.0.100__py3-none-any.whl → 0.0.102__py3-none-any.whl

dreamer4 0.0.100py3-none-any.whl → 0.0.102py3-none-any.whl