PyPI - dreamer4 - Versions diffs - 0.0.85__tar.gz → 0.0.88__tar.gz - Mend

dreamer4 0.0.85tar.gz → 0.0.88tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dreamer4 might be problematic. Click here for more details.

Files changed (13) hide show

{dreamer4-0.0.85 → dreamer4-0.0.88}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dreamer4
-Version: 0.0.85
+Version: 0.0.88
 Summary: Dreamer 4
 Project-URL: Homepage, https://pypi.org/project/dreamer4/
 Project-URL: Repository, https://github.com/lucidrains/dreamer4

{dreamer4-0.0.85 → dreamer4-0.0.88}/dreamer4/dreamer4.py RENAMED Viewed

@@ -77,7 +77,7 @@ class Experience:
     latents: Tensor
     video: Tensor | None = None
     proprio: Tensor | None = None
-    agent_embed: Tensor | None = None,
+    agent_embed: Tensor | None = None
     rewards: Tensor | None = None
     actions: tuple[Tensor, Tensor] | None = None
     log_probs: tuple[Tensor, Tensor] | None = None
@@ -2255,7 +2255,7 @@ class DynamicsWorldModel(Module):
             video = cat((video, next_frame), dim = 2)
             rewards = safe_cat((rewards, reward), dim = 1)
-            acc_agent_embed = safe_cat((acc_agent_embed, agent_embed), dim = 1)
+            acc_agent_embed = safe_cat((acc_agent_embed, one_agent_embed), dim = 1)
         # package up one experience for learning
@@ -2295,6 +2295,7 @@ class DynamicsWorldModel(Module):
         old_log_probs = experience.log_probs
         old_values = experience.values
         rewards = experience.rewards
+        agent_embeds = experience.agent_embed
         step_size = experience.step_size
         agent_index = experience.agent_index
@@ -2374,32 +2375,38 @@ class DynamicsWorldModel(Module):
             advantage = F.layer_norm(advantage, advantage.shape, eps = eps)
         # replay for the action logits and values
+        # but only do so if fine tuning the entire world model for RL
         discrete_actions, continuous_actions = actions
-        with world_model_forward_context():
-            _, (agent_embed, _) = self.forward(
-                latents = latents,
-                signal_levels = self.max_steps - 1,
-                step_sizes = step_size,
-                rewards = rewards,
-                discrete_actions = discrete_actions,
-                continuous_actions = continuous_actions,
-                latent_is_noised = True,
-                return_pred_only = True,
-                return_intermediates = True
-            )
+        if (
+            not only_learn_policy_value_heads or
+            not exists(agent_embeds)
+        ):
-        agent_embed = agent_embed[..., agent_index, :]
+            with world_model_forward_context():
+                _, (agent_embeds, _) = self.forward(
+                    latents = latents,
+                    signal_levels = self.max_steps - 1,
+                    step_sizes = step_size,
+                    rewards = rewards,
+                    discrete_actions = discrete_actions,
+                    continuous_actions = continuous_actions,
+                    latent_is_noised = True,
+                    return_pred_only = True,
+                    return_intermediates = True
+                )
+            agent_embeds = agent_embeds[..., agent_index, :]
         # maybe detach agent embed
         if only_learn_policy_value_heads:
-            agent_embed = agent_embed.detach()
+            agent_embeds = agent_embeds.detach()
         # ppo
-        policy_embed = self.policy_head(agent_embed)
+        policy_embed = self.policy_head(agent_embeds)
         log_probs, entropies = self.action_embedder.log_probs(policy_embed, pred_head_index = 0, discrete_targets = discrete_actions, continuous_targets = continuous_actions, return_entropies = True)
@@ -2448,7 +2455,7 @@ class DynamicsWorldModel(Module):
         # value loss
-        value_bins = self.value_head(agent_embed)
+        value_bins = self.value_head(agent_embeds)
         values = self.reward_encoder.bins_to_scalar_value(value_bins)
         clipped_values = old_values + (values - old_values).clamp(-self.value_clip, self.value_clip)
@@ -2665,7 +2672,9 @@ class DynamicsWorldModel(Module):
             # maybe store agent embed
-            acc_agent_embed = safe_cat((acc_agent_embed, agent_embed), dim = 1)
+            if store_agent_embed:
+                one_agent_embed = agent_embed[:, -1:, agent_index]
+                acc_agent_embed = safe_cat((acc_agent_embed, one_agent_embed), dim = 1)
             # decode the agent actions if needed

{dreamer4-0.0.85 → dreamer4-0.0.88}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dreamer4"
-version = "0.0.85"
+version = "0.0.88"
 description = "Dreamer 4"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }