dreamer4 0.0.102__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dreamer4 might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dreamer4
3
- Version: 0.0.102
3
+ Version: 0.1.0
4
4
  Summary: Dreamer 4
5
5
  Project-URL: Homepage, https://pypi.org/project/dreamer4/
6
6
  Project-URL: Repository, https://github.com/lucidrains/dreamer4
@@ -53,11 +53,75 @@ Description-Content-Type: text/markdown
53
53
 
54
54
  <img src="./dreamer4-fig2.png" width="400px"></img>
55
55
 
56
- ## Dreamer 4 (wip)
56
+ ## Dreamer 4
57
57
 
58
58
  Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v1) for his [Dreamer](https://danijar.com/project/dreamer4/) line of work
59
59
 
60
- [Temporary Discord](https://discord.gg/MkACrrkrYR)
60
+ ## Install
61
+
62
+ ```bash
63
+ $ pip install dreamer4-pytorch
64
+ ```
65
+
66
+ ## Usage
67
+
68
+ ```python
69
+ import torch
70
+ from dreamer4 import VideoTokenizer, DynamicsWorldModel
71
+
72
+ # video tokenizer, learned through MAE + lpips
73
+
74
+ tokenizer = VideoTokenizer(
75
+ dim = 512,
76
+ dim_latent = 32,
77
+ patch_size = 32,
78
+ image_height = 256,
79
+ image_width = 256
80
+ )
81
+
82
+ # dynamics world model
83
+
84
+ dynamics = DynamicsWorldModel(
85
+ dim = 512,
86
+ dim_latent = 32,
87
+ video_tokenizer = tokenizer,
88
+ num_discrete_actions = 4,
89
+ num_residual_streams = 1
90
+ )
91
+
92
+ # state, action, rewards
93
+
94
+ video = torch.randn(2, 3, 10, 256, 256)
95
+ discrete_actions = torch.randint(0, 4, (2, 10, 1))
96
+ rewards = torch.randn(2, 10)
97
+
98
+ # learn dynamics / behavior cloned model
99
+
100
+ loss = dynamics(
101
+ video = video,
102
+ rewards = rewards,
103
+ discrete_actions = discrete_actions
104
+ )
105
+
106
+ loss.backward()
107
+
108
+ # do the above with much data
109
+
110
+ # then generate dreams
111
+
112
+ dreams = dynamics.generate(
113
+ 10,
114
+ batch_size = 2,
115
+ return_decoded_video = True,
116
+ return_for_policy_optimization = True
117
+ )
118
+
119
+ # learn from the dreams
120
+
121
+ actor_loss, critic_loss = dynamics.learn_from_experience(dreams)
122
+
123
+ (actor_loss + critic_loss).backward()
124
+ ```
61
125
 
62
126
  ## Citation
63
127
 
@@ -72,3 +136,5 @@ Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v
72
136
  url = {https://arxiv.org/abs/2509.24527},
73
137
  }
74
138
  ```
139
+
140
+ *the conquest of nature is to be achieved through number and measure* - angels to Descartes, in a dream, the story goes.
@@ -0,0 +1,87 @@
1
+ <img src="./dreamer4-fig2.png" width="400px"></img>
2
+
3
+ ## Dreamer 4
4
+
5
+ Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v1) for his [Dreamer](https://danijar.com/project/dreamer4/) line of work
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ $ pip install dreamer4-pytorch
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```python
16
+ import torch
17
+ from dreamer4 import VideoTokenizer, DynamicsWorldModel
18
+
19
+ # video tokenizer, learned through MAE + lpips
20
+
21
+ tokenizer = VideoTokenizer(
22
+ dim = 512,
23
+ dim_latent = 32,
24
+ patch_size = 32,
25
+ image_height = 256,
26
+ image_width = 256
27
+ )
28
+
29
+ # dynamics world model
30
+
31
+ dynamics = DynamicsWorldModel(
32
+ dim = 512,
33
+ dim_latent = 32,
34
+ video_tokenizer = tokenizer,
35
+ num_discrete_actions = 4,
36
+ num_residual_streams = 1
37
+ )
38
+
39
+ # state, action, rewards
40
+
41
+ video = torch.randn(2, 3, 10, 256, 256)
42
+ discrete_actions = torch.randint(0, 4, (2, 10, 1))
43
+ rewards = torch.randn(2, 10)
44
+
45
+ # learn dynamics / behavior cloned model
46
+
47
+ loss = dynamics(
48
+ video = video,
49
+ rewards = rewards,
50
+ discrete_actions = discrete_actions
51
+ )
52
+
53
+ loss.backward()
54
+
55
+ # do the above with much data
56
+
57
+ # then generate dreams
58
+
59
+ dreams = dynamics.generate(
60
+ 10,
61
+ batch_size = 2,
62
+ return_decoded_video = True,
63
+ return_for_policy_optimization = True
64
+ )
65
+
66
+ # learn from the dreams
67
+
68
+ actor_loss, critic_loss = dynamics.learn_from_experience(dreams)
69
+
70
+ (actor_loss + critic_loss).backward()
71
+ ```
72
+
73
+ ## Citation
74
+
75
+ ```bibtex
76
+ @misc{hafner2025trainingagentsinsidescalable,
77
+ title = {Training Agents Inside of Scalable World Models},
78
+ author = {Danijar Hafner and Wilson Yan and Timothy Lillicrap},
79
+ year = {2025},
80
+ eprint = {2509.24527},
81
+ archivePrefix = {arXiv},
82
+ primaryClass = {cs.AI},
83
+ url = {https://arxiv.org/abs/2509.24527},
84
+ }
85
+ ```
86
+
87
+ *the conquest of nature is to be achieved through number and measure* - angels to Descartes, in a dream, the story goes.
@@ -2429,6 +2429,7 @@ class DynamicsWorldModel(Module):
2429
2429
  normalize_advantages = None,
2430
2430
  eps = 1e-6
2431
2431
  ):
2432
+ assert isinstance(experience, Experience)
2432
2433
 
2433
2434
  latents = experience.latents
2434
2435
  actions = experience.actions
@@ -2441,7 +2442,7 @@ class DynamicsWorldModel(Module):
2441
2442
  step_size = experience.step_size
2442
2443
  agent_index = experience.agent_index
2443
2444
 
2444
- assert all([*map(exists, (old_log_probs, actions, old_values, rewards, step_size))]), 'the generations need to contain the log probs, values, and rewards for policy optimization'
2445
+ assert all([*map(exists, (old_log_probs, actions, old_values, rewards, step_size))]), 'the generations need to contain the log probs, values, and rewards for policy optimization - world_model.generate(..., return_log_probs_and_values = True)'
2445
2446
 
2446
2447
  batch, time = latents.shape[0], latents.shape[1]
2447
2448
 
@@ -2694,12 +2695,22 @@ class DynamicsWorldModel(Module):
2694
2695
  return_rewards_per_frame = False,
2695
2696
  return_agent_actions = False,
2696
2697
  return_log_probs_and_values = False,
2698
+ return_for_policy_optimization = False,
2697
2699
  return_time_kv_cache = False,
2698
2700
  store_agent_embed = True,
2699
2701
  store_old_action_unembeds = True
2700
2702
 
2701
2703
  ): # (b t n d) | (b c t h w)
2702
2704
 
2705
+ # handy flag for returning generations for rl
2706
+
2707
+ if return_for_policy_optimization:
2708
+ return_agent_actions |= True
2709
+ return_log_probs_and_values |= True
2710
+ return_rewards_per_frame |= True
2711
+
2712
+ # more variables
2713
+
2703
2714
  has_proprio = self.has_proprio
2704
2715
  was_training = self.training
2705
2716
  self.eval()
@@ -2769,6 +2780,19 @@ class DynamicsWorldModel(Module):
2769
2780
 
2770
2781
  curr_time_steps = latents.shape[1]
2771
2782
 
2783
+ # determine whether to take an extra step if
2784
+ # (1) using time kv cache
2785
+ # (2) decoding anything off agent embedding (rewards, actions, etc)
2786
+
2787
+ take_extra_step = (
2788
+ use_time_kv_cache or
2789
+ return_rewards_per_frame or
2790
+ store_agent_embed or
2791
+ return_agent_actions
2792
+ )
2793
+
2794
+ # prepare noised latent / proprio inputs
2795
+
2772
2796
  noised_latent = randn((batch_size, 1, self.num_video_views, *latent_shape), device = self.device)
2773
2797
 
2774
2798
  noised_proprio = None
@@ -2776,7 +2800,10 @@ class DynamicsWorldModel(Module):
2776
2800
  if has_proprio:
2777
2801
  noised_proprio = randn((batch_size, 1, self.dim_proprio), device = self.device)
2778
2802
 
2779
- for step in range(num_steps):
2803
+ # denoising steps
2804
+
2805
+ for step in range(num_steps + int(take_extra_step)):
2806
+
2780
2807
  is_last_step = (step + 1) == num_steps
2781
2808
 
2782
2809
  signal_levels = full((batch_size, 1), step * step_size, dtype = torch.long, device = self.device)
@@ -2819,6 +2846,11 @@ class DynamicsWorldModel(Module):
2819
2846
  if use_time_kv_cache and is_last_step:
2820
2847
  time_kv_cache = next_time_kv_cache
2821
2848
 
2849
+ # early break if taking an extra step for agent embedding off cleaned latents for decoding
2850
+
2851
+ if take_extra_step and is_last_step:
2852
+ break
2853
+
2822
2854
  # maybe proprio
2823
2855
 
2824
2856
  if has_proprio:
@@ -3021,7 +3053,7 @@ class DynamicsWorldModel(Module):
3021
3053
  latent_is_noised = False,
3022
3054
  return_all_losses = False,
3023
3055
  return_intermediates = False,
3024
- add_autoregressive_action_loss = False,
3056
+ add_autoregressive_action_loss = True,
3025
3057
  update_loss_ema = None,
3026
3058
  latent_has_view_dim = False
3027
3059
  ):
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dreamer4"
3
- version = "0.0.102"
3
+ version = "0.1.0"
4
4
  description = "Dreamer 4"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -1,21 +0,0 @@
1
- <img src="./dreamer4-fig2.png" width="400px"></img>
2
-
3
- ## Dreamer 4 (wip)
4
-
5
- Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v1) for his [Dreamer](https://danijar.com/project/dreamer4/) line of work
6
-
7
- [Temporary Discord](https://discord.gg/MkACrrkrYR)
8
-
9
- ## Citation
10
-
11
- ```bibtex
12
- @misc{hafner2025trainingagentsinsidescalable,
13
- title = {Training Agents Inside of Scalable World Models},
14
- author = {Danijar Hafner and Wilson Yan and Timothy Lillicrap},
15
- year = {2025},
16
- eprint = {2509.24527},
17
- archivePrefix = {arXiv},
18
- primaryClass = {cs.AI},
19
- url = {https://arxiv.org/abs/2509.24527},
20
- }
21
- ```
File without changes
File without changes
File without changes
File without changes