dreamer4 0.0.102__tar.gz → 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dreamer4 might be problematic. Click here for more details.
- {dreamer4-0.0.102 → dreamer4-0.1.0}/PKG-INFO +69 -3
- dreamer4-0.1.0/README.md +87 -0
- {dreamer4-0.0.102 → dreamer4-0.1.0}/dreamer4/dreamer4.py +35 -3
- {dreamer4-0.0.102 → dreamer4-0.1.0}/pyproject.toml +1 -1
- dreamer4-0.0.102/README.md +0 -21
- {dreamer4-0.0.102 → dreamer4-0.1.0}/.github/workflows/python-publish.yml +0 -0
- {dreamer4-0.0.102 → dreamer4-0.1.0}/.github/workflows/test.yml +0 -0
- {dreamer4-0.0.102 → dreamer4-0.1.0}/.gitignore +0 -0
- {dreamer4-0.0.102 → dreamer4-0.1.0}/LICENSE +0 -0
- {dreamer4-0.0.102 → dreamer4-0.1.0}/dreamer4/__init__.py +0 -0
- {dreamer4-0.0.102 → dreamer4-0.1.0}/dreamer4/mocks.py +0 -0
- {dreamer4-0.0.102 → dreamer4-0.1.0}/dreamer4/trainers.py +0 -0
- {dreamer4-0.0.102 → dreamer4-0.1.0}/dreamer4-fig2.png +0 -0
- {dreamer4-0.0.102 → dreamer4-0.1.0}/tests/test_dreamer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dreamer4
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.1.0
|
|
4
4
|
Summary: Dreamer 4
|
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/dreamer4/
|
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/dreamer4
|
|
@@ -53,11 +53,75 @@ Description-Content-Type: text/markdown
|
|
|
53
53
|
|
|
54
54
|
<img src="./dreamer4-fig2.png" width="400px"></img>
|
|
55
55
|
|
|
56
|
-
## Dreamer 4
|
|
56
|
+
## Dreamer 4
|
|
57
57
|
|
|
58
58
|
Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v1) for his [Dreamer](https://danijar.com/project/dreamer4/) line of work
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
## Install
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
$ pip install dreamer4-pytorch
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Usage
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
import torch
|
|
70
|
+
from dreamer4 import VideoTokenizer, DynamicsWorldModel
|
|
71
|
+
|
|
72
|
+
# video tokenizer, learned through MAE + lpips
|
|
73
|
+
|
|
74
|
+
tokenizer = VideoTokenizer(
|
|
75
|
+
dim = 512,
|
|
76
|
+
dim_latent = 32,
|
|
77
|
+
patch_size = 32,
|
|
78
|
+
image_height = 256,
|
|
79
|
+
image_width = 256
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# dynamics world model
|
|
83
|
+
|
|
84
|
+
dynamics = DynamicsWorldModel(
|
|
85
|
+
dim = 512,
|
|
86
|
+
dim_latent = 32,
|
|
87
|
+
video_tokenizer = tokenizer,
|
|
88
|
+
num_discrete_actions = 4,
|
|
89
|
+
num_residual_streams = 1
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# state, action, rewards
|
|
93
|
+
|
|
94
|
+
video = torch.randn(2, 3, 10, 256, 256)
|
|
95
|
+
discrete_actions = torch.randint(0, 4, (2, 10, 1))
|
|
96
|
+
rewards = torch.randn(2, 10)
|
|
97
|
+
|
|
98
|
+
# learn dynamics / behavior cloned model
|
|
99
|
+
|
|
100
|
+
loss = dynamics(
|
|
101
|
+
video = video,
|
|
102
|
+
rewards = rewards,
|
|
103
|
+
discrete_actions = discrete_actions
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
loss.backward()
|
|
107
|
+
|
|
108
|
+
# do the above with much data
|
|
109
|
+
|
|
110
|
+
# then generate dreams
|
|
111
|
+
|
|
112
|
+
dreams = dynamics.generate(
|
|
113
|
+
10,
|
|
114
|
+
batch_size = 2,
|
|
115
|
+
return_decoded_video = True,
|
|
116
|
+
return_for_policy_optimization = True
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# learn from the dreams
|
|
120
|
+
|
|
121
|
+
actor_loss, critic_loss = dynamics.learn_from_experience(dreams)
|
|
122
|
+
|
|
123
|
+
(actor_loss + critic_loss).backward()
|
|
124
|
+
```
|
|
61
125
|
|
|
62
126
|
## Citation
|
|
63
127
|
|
|
@@ -72,3 +136,5 @@ Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v
|
|
|
72
136
|
url = {https://arxiv.org/abs/2509.24527},
|
|
73
137
|
}
|
|
74
138
|
```
|
|
139
|
+
|
|
140
|
+
*the conquest of nature is to be achieved through number and measure* - angels to Descartes, in a dream, the story goes.
|
dreamer4-0.1.0/README.md
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
<img src="./dreamer4-fig2.png" width="400px"></img>
|
|
2
|
+
|
|
3
|
+
## Dreamer 4
|
|
4
|
+
|
|
5
|
+
Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v1) for his [Dreamer](https://danijar.com/project/dreamer4/) line of work
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
$ pip install dreamer4-pytorch
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import torch
|
|
17
|
+
from dreamer4 import VideoTokenizer, DynamicsWorldModel
|
|
18
|
+
|
|
19
|
+
# video tokenizer, learned through MAE + lpips
|
|
20
|
+
|
|
21
|
+
tokenizer = VideoTokenizer(
|
|
22
|
+
dim = 512,
|
|
23
|
+
dim_latent = 32,
|
|
24
|
+
patch_size = 32,
|
|
25
|
+
image_height = 256,
|
|
26
|
+
image_width = 256
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# dynamics world model
|
|
30
|
+
|
|
31
|
+
dynamics = DynamicsWorldModel(
|
|
32
|
+
dim = 512,
|
|
33
|
+
dim_latent = 32,
|
|
34
|
+
video_tokenizer = tokenizer,
|
|
35
|
+
num_discrete_actions = 4,
|
|
36
|
+
num_residual_streams = 1
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# state, action, rewards
|
|
40
|
+
|
|
41
|
+
video = torch.randn(2, 3, 10, 256, 256)
|
|
42
|
+
discrete_actions = torch.randint(0, 4, (2, 10, 1))
|
|
43
|
+
rewards = torch.randn(2, 10)
|
|
44
|
+
|
|
45
|
+
# learn dynamics / behavior cloned model
|
|
46
|
+
|
|
47
|
+
loss = dynamics(
|
|
48
|
+
video = video,
|
|
49
|
+
rewards = rewards,
|
|
50
|
+
discrete_actions = discrete_actions
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
loss.backward()
|
|
54
|
+
|
|
55
|
+
# do the above with much data
|
|
56
|
+
|
|
57
|
+
# then generate dreams
|
|
58
|
+
|
|
59
|
+
dreams = dynamics.generate(
|
|
60
|
+
10,
|
|
61
|
+
batch_size = 2,
|
|
62
|
+
return_decoded_video = True,
|
|
63
|
+
return_for_policy_optimization = True
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# learn from the dreams
|
|
67
|
+
|
|
68
|
+
actor_loss, critic_loss = dynamics.learn_from_experience(dreams)
|
|
69
|
+
|
|
70
|
+
(actor_loss + critic_loss).backward()
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Citation
|
|
74
|
+
|
|
75
|
+
```bibtex
|
|
76
|
+
@misc{hafner2025trainingagentsinsidescalable,
|
|
77
|
+
title = {Training Agents Inside of Scalable World Models},
|
|
78
|
+
author = {Danijar Hafner and Wilson Yan and Timothy Lillicrap},
|
|
79
|
+
year = {2025},
|
|
80
|
+
eprint = {2509.24527},
|
|
81
|
+
archivePrefix = {arXiv},
|
|
82
|
+
primaryClass = {cs.AI},
|
|
83
|
+
url = {https://arxiv.org/abs/2509.24527},
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
*the conquest of nature is to be achieved through number and measure* - angels to Descartes, in a dream, the story goes.
|
|
@@ -2429,6 +2429,7 @@ class DynamicsWorldModel(Module):
|
|
|
2429
2429
|
normalize_advantages = None,
|
|
2430
2430
|
eps = 1e-6
|
|
2431
2431
|
):
|
|
2432
|
+
assert isinstance(experience, Experience)
|
|
2432
2433
|
|
|
2433
2434
|
latents = experience.latents
|
|
2434
2435
|
actions = experience.actions
|
|
@@ -2441,7 +2442,7 @@ class DynamicsWorldModel(Module):
|
|
|
2441
2442
|
step_size = experience.step_size
|
|
2442
2443
|
agent_index = experience.agent_index
|
|
2443
2444
|
|
|
2444
|
-
assert all([*map(exists, (old_log_probs, actions, old_values, rewards, step_size))]), 'the generations need to contain the log probs, values, and rewards for policy optimization'
|
|
2445
|
+
assert all([*map(exists, (old_log_probs, actions, old_values, rewards, step_size))]), 'the generations need to contain the log probs, values, and rewards for policy optimization - world_model.generate(..., return_log_probs_and_values = True)'
|
|
2445
2446
|
|
|
2446
2447
|
batch, time = latents.shape[0], latents.shape[1]
|
|
2447
2448
|
|
|
@@ -2694,12 +2695,22 @@ class DynamicsWorldModel(Module):
|
|
|
2694
2695
|
return_rewards_per_frame = False,
|
|
2695
2696
|
return_agent_actions = False,
|
|
2696
2697
|
return_log_probs_and_values = False,
|
|
2698
|
+
return_for_policy_optimization = False,
|
|
2697
2699
|
return_time_kv_cache = False,
|
|
2698
2700
|
store_agent_embed = True,
|
|
2699
2701
|
store_old_action_unembeds = True
|
|
2700
2702
|
|
|
2701
2703
|
): # (b t n d) | (b c t h w)
|
|
2702
2704
|
|
|
2705
|
+
# handy flag for returning generations for rl
|
|
2706
|
+
|
|
2707
|
+
if return_for_policy_optimization:
|
|
2708
|
+
return_agent_actions |= True
|
|
2709
|
+
return_log_probs_and_values |= True
|
|
2710
|
+
return_rewards_per_frame |= True
|
|
2711
|
+
|
|
2712
|
+
# more variables
|
|
2713
|
+
|
|
2703
2714
|
has_proprio = self.has_proprio
|
|
2704
2715
|
was_training = self.training
|
|
2705
2716
|
self.eval()
|
|
@@ -2769,6 +2780,19 @@ class DynamicsWorldModel(Module):
|
|
|
2769
2780
|
|
|
2770
2781
|
curr_time_steps = latents.shape[1]
|
|
2771
2782
|
|
|
2783
|
+
# determine whether to take an extra step if
|
|
2784
|
+
# (1) using time kv cache
|
|
2785
|
+
# (2) decoding anything off agent embedding (rewards, actions, etc)
|
|
2786
|
+
|
|
2787
|
+
take_extra_step = (
|
|
2788
|
+
use_time_kv_cache or
|
|
2789
|
+
return_rewards_per_frame or
|
|
2790
|
+
store_agent_embed or
|
|
2791
|
+
return_agent_actions
|
|
2792
|
+
)
|
|
2793
|
+
|
|
2794
|
+
# prepare noised latent / proprio inputs
|
|
2795
|
+
|
|
2772
2796
|
noised_latent = randn((batch_size, 1, self.num_video_views, *latent_shape), device = self.device)
|
|
2773
2797
|
|
|
2774
2798
|
noised_proprio = None
|
|
@@ -2776,7 +2800,10 @@ class DynamicsWorldModel(Module):
|
|
|
2776
2800
|
if has_proprio:
|
|
2777
2801
|
noised_proprio = randn((batch_size, 1, self.dim_proprio), device = self.device)
|
|
2778
2802
|
|
|
2779
|
-
|
|
2803
|
+
# denoising steps
|
|
2804
|
+
|
|
2805
|
+
for step in range(num_steps + int(take_extra_step)):
|
|
2806
|
+
|
|
2780
2807
|
is_last_step = (step + 1) == num_steps
|
|
2781
2808
|
|
|
2782
2809
|
signal_levels = full((batch_size, 1), step * step_size, dtype = torch.long, device = self.device)
|
|
@@ -2819,6 +2846,11 @@ class DynamicsWorldModel(Module):
|
|
|
2819
2846
|
if use_time_kv_cache and is_last_step:
|
|
2820
2847
|
time_kv_cache = next_time_kv_cache
|
|
2821
2848
|
|
|
2849
|
+
# early break if taking an extra step for agent embedding off cleaned latents for decoding
|
|
2850
|
+
|
|
2851
|
+
if take_extra_step and is_last_step:
|
|
2852
|
+
break
|
|
2853
|
+
|
|
2822
2854
|
# maybe proprio
|
|
2823
2855
|
|
|
2824
2856
|
if has_proprio:
|
|
@@ -3021,7 +3053,7 @@ class DynamicsWorldModel(Module):
|
|
|
3021
3053
|
latent_is_noised = False,
|
|
3022
3054
|
return_all_losses = False,
|
|
3023
3055
|
return_intermediates = False,
|
|
3024
|
-
add_autoregressive_action_loss =
|
|
3056
|
+
add_autoregressive_action_loss = True,
|
|
3025
3057
|
update_loss_ema = None,
|
|
3026
3058
|
latent_has_view_dim = False
|
|
3027
3059
|
):
|
dreamer4-0.0.102/README.md
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
<img src="./dreamer4-fig2.png" width="400px"></img>
|
|
2
|
-
|
|
3
|
-
## Dreamer 4 (wip)
|
|
4
|
-
|
|
5
|
-
Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v1) for his [Dreamer](https://danijar.com/project/dreamer4/) line of work
|
|
6
|
-
|
|
7
|
-
[Temporary Discord](https://discord.gg/MkACrrkrYR)
|
|
8
|
-
|
|
9
|
-
## Citation
|
|
10
|
-
|
|
11
|
-
```bibtex
|
|
12
|
-
@misc{hafner2025trainingagentsinsidescalable,
|
|
13
|
-
title = {Training Agents Inside of Scalable World Models},
|
|
14
|
-
author = {Danijar Hafner and Wilson Yan and Timothy Lillicrap},
|
|
15
|
-
year = {2025},
|
|
16
|
-
eprint = {2509.24527},
|
|
17
|
-
archivePrefix = {arXiv},
|
|
18
|
-
primaryClass = {cs.AI},
|
|
19
|
-
url = {https://arxiv.org/abs/2509.24527},
|
|
20
|
-
}
|
|
21
|
-
```
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|