dreamer4 0.0.101__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dreamer4 might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dreamer4
3
- Version: 0.0.101
3
+ Version: 0.1.0
4
4
  Summary: Dreamer 4
5
5
  Project-URL: Homepage, https://pypi.org/project/dreamer4/
6
6
  Project-URL: Repository, https://github.com/lucidrains/dreamer4
@@ -53,11 +53,75 @@ Description-Content-Type: text/markdown
53
53
 
54
54
  <img src="./dreamer4-fig2.png" width="400px"></img>
55
55
 
56
- ## Dreamer 4 (wip)
56
+ ## Dreamer 4
57
57
 
58
58
  Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v1) for his [Dreamer](https://danijar.com/project/dreamer4/) line of work
59
59
 
60
- [Temporary Discord](https://discord.gg/MkACrrkrYR)
60
+ ## Install
61
+
62
+ ```bash
63
+ $ pip install dreamer4-pytorch
64
+ ```
65
+
66
+ ## Usage
67
+
68
+ ```python
69
+ import torch
70
+ from dreamer4 import VideoTokenizer, DynamicsWorldModel
71
+
72
+ # video tokenizer, learned through MAE + lpips
73
+
74
+ tokenizer = VideoTokenizer(
75
+ dim = 512,
76
+ dim_latent = 32,
77
+ patch_size = 32,
78
+ image_height = 256,
79
+ image_width = 256
80
+ )
81
+
82
+ # dynamics world model
83
+
84
+ dynamics = DynamicsWorldModel(
85
+ dim = 512,
86
+ dim_latent = 32,
87
+ video_tokenizer = tokenizer,
88
+ num_discrete_actions = 4,
89
+ num_residual_streams = 1
90
+ )
91
+
92
+ # state, action, rewards
93
+
94
+ video = torch.randn(2, 3, 10, 256, 256)
95
+ discrete_actions = torch.randint(0, 4, (2, 10, 1))
96
+ rewards = torch.randn(2, 10)
97
+
98
+ # learn dynamics / behavior cloned model
99
+
100
+ loss = dynamics(
101
+ video = video,
102
+ rewards = rewards,
103
+ discrete_actions = discrete_actions
104
+ )
105
+
106
+ loss.backward()
107
+
108
+ # do the above with much data
109
+
110
+ # then generate dreams
111
+
112
+ dreams = dynamics.generate(
113
+ 10,
114
+ batch_size = 2,
115
+ return_decoded_video = True,
116
+ return_for_policy_optimization = True
117
+ )
118
+
119
+ # learn from the dreams
120
+
121
+ actor_loss, critic_loss = dynamics.learn_from_experience(dreams)
122
+
123
+ (actor_loss + critic_loss).backward()
124
+ ```
61
125
 
62
126
  ## Citation
63
127
 
@@ -72,3 +136,5 @@ Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v
72
136
  url = {https://arxiv.org/abs/2509.24527},
73
137
  }
74
138
  ```
139
+
140
+ *the conquest of nature is to be achieved through number and measure* - angels to Descartes, in a dream, the story goes.
@@ -0,0 +1,87 @@
1
+ <img src="./dreamer4-fig2.png" width="400px"></img>
2
+
3
+ ## Dreamer 4
4
+
5
+ Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v1) for his [Dreamer](https://danijar.com/project/dreamer4/) line of work
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ $ pip install dreamer4-pytorch
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```python
16
+ import torch
17
+ from dreamer4 import VideoTokenizer, DynamicsWorldModel
18
+
19
+ # video tokenizer, learned through MAE + lpips
20
+
21
+ tokenizer = VideoTokenizer(
22
+ dim = 512,
23
+ dim_latent = 32,
24
+ patch_size = 32,
25
+ image_height = 256,
26
+ image_width = 256
27
+ )
28
+
29
+ # dynamics world model
30
+
31
+ dynamics = DynamicsWorldModel(
32
+ dim = 512,
33
+ dim_latent = 32,
34
+ video_tokenizer = tokenizer,
35
+ num_discrete_actions = 4,
36
+ num_residual_streams = 1
37
+ )
38
+
39
+ # state, action, rewards
40
+
41
+ video = torch.randn(2, 3, 10, 256, 256)
42
+ discrete_actions = torch.randint(0, 4, (2, 10, 1))
43
+ rewards = torch.randn(2, 10)
44
+
45
+ # learn dynamics / behavior cloned model
46
+
47
+ loss = dynamics(
48
+ video = video,
49
+ rewards = rewards,
50
+ discrete_actions = discrete_actions
51
+ )
52
+
53
+ loss.backward()
54
+
55
+ # do the above with much data
56
+
57
+ # then generate dreams
58
+
59
+ dreams = dynamics.generate(
60
+ 10,
61
+ batch_size = 2,
62
+ return_decoded_video = True,
63
+ return_for_policy_optimization = True
64
+ )
65
+
66
+ # learn from the dreams
67
+
68
+ actor_loss, critic_loss = dynamics.learn_from_experience(dreams)
69
+
70
+ (actor_loss + critic_loss).backward()
71
+ ```
72
+
73
+ ## Citation
74
+
75
+ ```bibtex
76
+ @misc{hafner2025trainingagentsinsidescalable,
77
+ title = {Training Agents Inside of Scalable World Models},
78
+ author = {Danijar Hafner and Wilson Yan and Timothy Lillicrap},
79
+ year = {2025},
80
+ eprint = {2509.24527},
81
+ archivePrefix = {arXiv},
82
+ primaryClass = {cs.AI},
83
+ url = {https://arxiv.org/abs/2509.24527},
84
+ }
85
+ ```
86
+
87
+ *the conquest of nature is to be achieved through number and measure* - angels to Descartes, in a dream, the story goes.
@@ -1902,6 +1902,7 @@ class DynamicsWorldModel(Module):
1902
1902
  pmpo_pos_to_neg_weight = 0.5, # pos and neg equal weight
1903
1903
  pmpo_reverse_kl = True,
1904
1904
  pmpo_kl_div_loss_weight = .3,
1905
+ normalize_advantages = None,
1905
1906
  value_clip = 0.4,
1906
1907
  policy_entropy_weight = .01,
1907
1908
  gae_use_accelerated = False
@@ -2425,8 +2426,10 @@ class DynamicsWorldModel(Module):
2425
2426
  value_optim: Optimizer | None = None,
2426
2427
  only_learn_policy_value_heads = True, # in the paper, they do not finetune the entire dynamics model, they just learn the heads
2427
2428
  use_pmpo = True,
2429
+ normalize_advantages = None,
2428
2430
  eps = 1e-6
2429
2431
  ):
2432
+ assert isinstance(experience, Experience)
2430
2433
 
2431
2434
  latents = experience.latents
2432
2435
  actions = experience.actions
@@ -2439,7 +2442,7 @@ class DynamicsWorldModel(Module):
2439
2442
  step_size = experience.step_size
2440
2443
  agent_index = experience.agent_index
2441
2444
 
2442
- assert all([*map(exists, (old_log_probs, actions, old_values, rewards, step_size))]), 'the generations need to contain the log probs, values, and rewards for policy optimization'
2445
+ assert all([*map(exists, (old_log_probs, actions, old_values, rewards, step_size))]), 'the generations need to contain the log probs, values, and rewards for policy optimization - world_model.generate(..., return_log_probs_and_values = True)'
2443
2446
 
2444
2447
  batch, time = latents.shape[0], latents.shape[1]
2445
2448
 
@@ -2507,16 +2510,19 @@ class DynamicsWorldModel(Module):
2507
2510
  else:
2508
2511
  advantage = returns - old_values
2509
2512
 
2510
- # apparently they just use the sign of the advantage
2513
+ # if using pmpo, do not normalize advantages, but can be overridden
2514
+
2515
+ normalize_advantages = default(normalize_advantages, not use_pmpo)
2516
+
2517
+ if normalize_advantages:
2518
+ advantage = F.layer_norm(advantage, advantage.shape, eps = eps)
2519
+
2511
2520
  # https://arxiv.org/abs/2410.04166v1
2512
2521
 
2513
2522
  if use_pmpo:
2514
2523
  pos_advantage_mask = advantage >= 0.
2515
2524
  neg_advantage_mask = ~pos_advantage_mask
2516
2525
 
2517
- else:
2518
- advantage = F.layer_norm(advantage, advantage.shape, eps = eps)
2519
-
2520
2526
  # replay for the action logits and values
2521
2527
  # but only do so if fine tuning the entire world model for RL
2522
2528
 
@@ -2689,12 +2695,22 @@ class DynamicsWorldModel(Module):
2689
2695
  return_rewards_per_frame = False,
2690
2696
  return_agent_actions = False,
2691
2697
  return_log_probs_and_values = False,
2698
+ return_for_policy_optimization = False,
2692
2699
  return_time_kv_cache = False,
2693
2700
  store_agent_embed = True,
2694
2701
  store_old_action_unembeds = True
2695
2702
 
2696
2703
  ): # (b t n d) | (b c t h w)
2697
2704
 
2705
+ # handy flag for returning generations for rl
2706
+
2707
+ if return_for_policy_optimization:
2708
+ return_agent_actions |= True
2709
+ return_log_probs_and_values |= True
2710
+ return_rewards_per_frame |= True
2711
+
2712
+ # more variables
2713
+
2698
2714
  has_proprio = self.has_proprio
2699
2715
  was_training = self.training
2700
2716
  self.eval()
@@ -2764,6 +2780,19 @@ class DynamicsWorldModel(Module):
2764
2780
 
2765
2781
  curr_time_steps = latents.shape[1]
2766
2782
 
2783
+ # determine whether to take an extra step if
2784
+ # (1) using time kv cache
2785
+ # (2) decoding anything off agent embedding (rewards, actions, etc)
2786
+
2787
+ take_extra_step = (
2788
+ use_time_kv_cache or
2789
+ return_rewards_per_frame or
2790
+ store_agent_embed or
2791
+ return_agent_actions
2792
+ )
2793
+
2794
+ # prepare noised latent / proprio inputs
2795
+
2767
2796
  noised_latent = randn((batch_size, 1, self.num_video_views, *latent_shape), device = self.device)
2768
2797
 
2769
2798
  noised_proprio = None
@@ -2771,7 +2800,10 @@ class DynamicsWorldModel(Module):
2771
2800
  if has_proprio:
2772
2801
  noised_proprio = randn((batch_size, 1, self.dim_proprio), device = self.device)
2773
2802
 
2774
- for step in range(num_steps):
2803
+ # denoising steps
2804
+
2805
+ for step in range(num_steps + int(take_extra_step)):
2806
+
2775
2807
  is_last_step = (step + 1) == num_steps
2776
2808
 
2777
2809
  signal_levels = full((batch_size, 1), step * step_size, dtype = torch.long, device = self.device)
@@ -2814,6 +2846,11 @@ class DynamicsWorldModel(Module):
2814
2846
  if use_time_kv_cache and is_last_step:
2815
2847
  time_kv_cache = next_time_kv_cache
2816
2848
 
2849
+ # early break if taking an extra step for agent embedding off cleaned latents for decoding
2850
+
2851
+ if take_extra_step and is_last_step:
2852
+ break
2853
+
2817
2854
  # maybe proprio
2818
2855
 
2819
2856
  if has_proprio:
@@ -3016,7 +3053,7 @@ class DynamicsWorldModel(Module):
3016
3053
  latent_is_noised = False,
3017
3054
  return_all_losses = False,
3018
3055
  return_intermediates = False,
3019
- add_autoregressive_action_loss = False,
3056
+ add_autoregressive_action_loss = True,
3020
3057
  update_loss_ema = None,
3021
3058
  latent_has_view_dim = False
3022
3059
  ):
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dreamer4"
3
- version = "0.0.101"
3
+ version = "0.1.0"
4
4
  description = "Dreamer 4"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -1,21 +0,0 @@
1
- <img src="./dreamer4-fig2.png" width="400px"></img>
2
-
3
- ## Dreamer 4 (wip)
4
-
5
- Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v1) for his [Dreamer](https://danijar.com/project/dreamer4/) line of work
6
-
7
- [Temporary Discord](https://discord.gg/MkACrrkrYR)
8
-
9
- ## Citation
10
-
11
- ```bibtex
12
- @misc{hafner2025trainingagentsinsidescalable,
13
- title = {Training Agents Inside of Scalable World Models},
14
- author = {Danijar Hafner and Wilson Yan and Timothy Lillicrap},
15
- year = {2025},
16
- eprint = {2509.24527},
17
- archivePrefix = {arXiv},
18
- primaryClass = {cs.AI},
19
- url = {https://arxiv.org/abs/2509.24527},
20
- }
21
- ```
File without changes
File without changes
File without changes
File without changes