dreamer4 0.0.101__tar.gz → 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dreamer4 might be problematic. Click here for more details.
- {dreamer4-0.0.101 → dreamer4-0.1.0}/PKG-INFO +69 -3
- dreamer4-0.1.0/README.md +87 -0
- {dreamer4-0.0.101 → dreamer4-0.1.0}/dreamer4/dreamer4.py +44 -7
- {dreamer4-0.0.101 → dreamer4-0.1.0}/pyproject.toml +1 -1
- dreamer4-0.0.101/README.md +0 -21
- {dreamer4-0.0.101 → dreamer4-0.1.0}/.github/workflows/python-publish.yml +0 -0
- {dreamer4-0.0.101 → dreamer4-0.1.0}/.github/workflows/test.yml +0 -0
- {dreamer4-0.0.101 → dreamer4-0.1.0}/.gitignore +0 -0
- {dreamer4-0.0.101 → dreamer4-0.1.0}/LICENSE +0 -0
- {dreamer4-0.0.101 → dreamer4-0.1.0}/dreamer4/__init__.py +0 -0
- {dreamer4-0.0.101 → dreamer4-0.1.0}/dreamer4/mocks.py +0 -0
- {dreamer4-0.0.101 → dreamer4-0.1.0}/dreamer4/trainers.py +0 -0
- {dreamer4-0.0.101 → dreamer4-0.1.0}/dreamer4-fig2.png +0 -0
- {dreamer4-0.0.101 → dreamer4-0.1.0}/tests/test_dreamer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dreamer4
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.1.0
|
|
4
4
|
Summary: Dreamer 4
|
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/dreamer4/
|
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/dreamer4
|
|
@@ -53,11 +53,75 @@ Description-Content-Type: text/markdown
|
|
|
53
53
|
|
|
54
54
|
<img src="./dreamer4-fig2.png" width="400px"></img>
|
|
55
55
|
|
|
56
|
-
## Dreamer 4
|
|
56
|
+
## Dreamer 4
|
|
57
57
|
|
|
58
58
|
Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v1) for his [Dreamer](https://danijar.com/project/dreamer4/) line of work
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
## Install
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
$ pip install dreamer4-pytorch
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Usage
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
import torch
|
|
70
|
+
from dreamer4 import VideoTokenizer, DynamicsWorldModel
|
|
71
|
+
|
|
72
|
+
# video tokenizer, learned through MAE + lpips
|
|
73
|
+
|
|
74
|
+
tokenizer = VideoTokenizer(
|
|
75
|
+
dim = 512,
|
|
76
|
+
dim_latent = 32,
|
|
77
|
+
patch_size = 32,
|
|
78
|
+
image_height = 256,
|
|
79
|
+
image_width = 256
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# dynamics world model
|
|
83
|
+
|
|
84
|
+
dynamics = DynamicsWorldModel(
|
|
85
|
+
dim = 512,
|
|
86
|
+
dim_latent = 32,
|
|
87
|
+
video_tokenizer = tokenizer,
|
|
88
|
+
num_discrete_actions = 4,
|
|
89
|
+
num_residual_streams = 1
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# state, action, rewards
|
|
93
|
+
|
|
94
|
+
video = torch.randn(2, 3, 10, 256, 256)
|
|
95
|
+
discrete_actions = torch.randint(0, 4, (2, 10, 1))
|
|
96
|
+
rewards = torch.randn(2, 10)
|
|
97
|
+
|
|
98
|
+
# learn dynamics / behavior cloned model
|
|
99
|
+
|
|
100
|
+
loss = dynamics(
|
|
101
|
+
video = video,
|
|
102
|
+
rewards = rewards,
|
|
103
|
+
discrete_actions = discrete_actions
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
loss.backward()
|
|
107
|
+
|
|
108
|
+
# do the above with much data
|
|
109
|
+
|
|
110
|
+
# then generate dreams
|
|
111
|
+
|
|
112
|
+
dreams = dynamics.generate(
|
|
113
|
+
10,
|
|
114
|
+
batch_size = 2,
|
|
115
|
+
return_decoded_video = True,
|
|
116
|
+
return_for_policy_optimization = True
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# learn from the dreams
|
|
120
|
+
|
|
121
|
+
actor_loss, critic_loss = dynamics.learn_from_experience(dreams)
|
|
122
|
+
|
|
123
|
+
(actor_loss + critic_loss).backward()
|
|
124
|
+
```
|
|
61
125
|
|
|
62
126
|
## Citation
|
|
63
127
|
|
|
@@ -72,3 +136,5 @@ Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v
|
|
|
72
136
|
url = {https://arxiv.org/abs/2509.24527},
|
|
73
137
|
}
|
|
74
138
|
```
|
|
139
|
+
|
|
140
|
+
*the conquest of nature is to be achieved through number and measure* - angels to Descartes, in a dream, the story goes.
|
dreamer4-0.1.0/README.md
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
<img src="./dreamer4-fig2.png" width="400px"></img>
|
|
2
|
+
|
|
3
|
+
## Dreamer 4
|
|
4
|
+
|
|
5
|
+
Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v1) for his [Dreamer](https://danijar.com/project/dreamer4/) line of work
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
$ pip install dreamer4-pytorch
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import torch
|
|
17
|
+
from dreamer4 import VideoTokenizer, DynamicsWorldModel
|
|
18
|
+
|
|
19
|
+
# video tokenizer, learned through MAE + lpips
|
|
20
|
+
|
|
21
|
+
tokenizer = VideoTokenizer(
|
|
22
|
+
dim = 512,
|
|
23
|
+
dim_latent = 32,
|
|
24
|
+
patch_size = 32,
|
|
25
|
+
image_height = 256,
|
|
26
|
+
image_width = 256
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# dynamics world model
|
|
30
|
+
|
|
31
|
+
dynamics = DynamicsWorldModel(
|
|
32
|
+
dim = 512,
|
|
33
|
+
dim_latent = 32,
|
|
34
|
+
video_tokenizer = tokenizer,
|
|
35
|
+
num_discrete_actions = 4,
|
|
36
|
+
num_residual_streams = 1
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# state, action, rewards
|
|
40
|
+
|
|
41
|
+
video = torch.randn(2, 3, 10, 256, 256)
|
|
42
|
+
discrete_actions = torch.randint(0, 4, (2, 10, 1))
|
|
43
|
+
rewards = torch.randn(2, 10)
|
|
44
|
+
|
|
45
|
+
# learn dynamics / behavior cloned model
|
|
46
|
+
|
|
47
|
+
loss = dynamics(
|
|
48
|
+
video = video,
|
|
49
|
+
rewards = rewards,
|
|
50
|
+
discrete_actions = discrete_actions
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
loss.backward()
|
|
54
|
+
|
|
55
|
+
# do the above with much data
|
|
56
|
+
|
|
57
|
+
# then generate dreams
|
|
58
|
+
|
|
59
|
+
dreams = dynamics.generate(
|
|
60
|
+
10,
|
|
61
|
+
batch_size = 2,
|
|
62
|
+
return_decoded_video = True,
|
|
63
|
+
return_for_policy_optimization = True
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# learn from the dreams
|
|
67
|
+
|
|
68
|
+
actor_loss, critic_loss = dynamics.learn_from_experience(dreams)
|
|
69
|
+
|
|
70
|
+
(actor_loss + critic_loss).backward()
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Citation
|
|
74
|
+
|
|
75
|
+
```bibtex
|
|
76
|
+
@misc{hafner2025trainingagentsinsidescalable,
|
|
77
|
+
title = {Training Agents Inside of Scalable World Models},
|
|
78
|
+
author = {Danijar Hafner and Wilson Yan and Timothy Lillicrap},
|
|
79
|
+
year = {2025},
|
|
80
|
+
eprint = {2509.24527},
|
|
81
|
+
archivePrefix = {arXiv},
|
|
82
|
+
primaryClass = {cs.AI},
|
|
83
|
+
url = {https://arxiv.org/abs/2509.24527},
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
*the conquest of nature is to be achieved through number and measure* - angels to Descartes, in a dream, the story goes.
|
|
@@ -1902,6 +1902,7 @@ class DynamicsWorldModel(Module):
|
|
|
1902
1902
|
pmpo_pos_to_neg_weight = 0.5, # pos and neg equal weight
|
|
1903
1903
|
pmpo_reverse_kl = True,
|
|
1904
1904
|
pmpo_kl_div_loss_weight = .3,
|
|
1905
|
+
normalize_advantages = None,
|
|
1905
1906
|
value_clip = 0.4,
|
|
1906
1907
|
policy_entropy_weight = .01,
|
|
1907
1908
|
gae_use_accelerated = False
|
|
@@ -2425,8 +2426,10 @@ class DynamicsWorldModel(Module):
|
|
|
2425
2426
|
value_optim: Optimizer | None = None,
|
|
2426
2427
|
only_learn_policy_value_heads = True, # in the paper, they do not finetune the entire dynamics model, they just learn the heads
|
|
2427
2428
|
use_pmpo = True,
|
|
2429
|
+
normalize_advantages = None,
|
|
2428
2430
|
eps = 1e-6
|
|
2429
2431
|
):
|
|
2432
|
+
assert isinstance(experience, Experience)
|
|
2430
2433
|
|
|
2431
2434
|
latents = experience.latents
|
|
2432
2435
|
actions = experience.actions
|
|
@@ -2439,7 +2442,7 @@ class DynamicsWorldModel(Module):
|
|
|
2439
2442
|
step_size = experience.step_size
|
|
2440
2443
|
agent_index = experience.agent_index
|
|
2441
2444
|
|
|
2442
|
-
assert all([*map(exists, (old_log_probs, actions, old_values, rewards, step_size))]), 'the generations need to contain the log probs, values, and rewards for policy optimization'
|
|
2445
|
+
assert all([*map(exists, (old_log_probs, actions, old_values, rewards, step_size))]), 'the generations need to contain the log probs, values, and rewards for policy optimization - world_model.generate(..., return_log_probs_and_values = True)'
|
|
2443
2446
|
|
|
2444
2447
|
batch, time = latents.shape[0], latents.shape[1]
|
|
2445
2448
|
|
|
@@ -2507,16 +2510,19 @@ class DynamicsWorldModel(Module):
|
|
|
2507
2510
|
else:
|
|
2508
2511
|
advantage = returns - old_values
|
|
2509
2512
|
|
|
2510
|
-
#
|
|
2513
|
+
# if using pmpo, do not normalize advantages, but can be overridden
|
|
2514
|
+
|
|
2515
|
+
normalize_advantages = default(normalize_advantages, not use_pmpo)
|
|
2516
|
+
|
|
2517
|
+
if normalize_advantages:
|
|
2518
|
+
advantage = F.layer_norm(advantage, advantage.shape, eps = eps)
|
|
2519
|
+
|
|
2511
2520
|
# https://arxiv.org/abs/2410.04166v1
|
|
2512
2521
|
|
|
2513
2522
|
if use_pmpo:
|
|
2514
2523
|
pos_advantage_mask = advantage >= 0.
|
|
2515
2524
|
neg_advantage_mask = ~pos_advantage_mask
|
|
2516
2525
|
|
|
2517
|
-
else:
|
|
2518
|
-
advantage = F.layer_norm(advantage, advantage.shape, eps = eps)
|
|
2519
|
-
|
|
2520
2526
|
# replay for the action logits and values
|
|
2521
2527
|
# but only do so if fine tuning the entire world model for RL
|
|
2522
2528
|
|
|
@@ -2689,12 +2695,22 @@ class DynamicsWorldModel(Module):
|
|
|
2689
2695
|
return_rewards_per_frame = False,
|
|
2690
2696
|
return_agent_actions = False,
|
|
2691
2697
|
return_log_probs_and_values = False,
|
|
2698
|
+
return_for_policy_optimization = False,
|
|
2692
2699
|
return_time_kv_cache = False,
|
|
2693
2700
|
store_agent_embed = True,
|
|
2694
2701
|
store_old_action_unembeds = True
|
|
2695
2702
|
|
|
2696
2703
|
): # (b t n d) | (b c t h w)
|
|
2697
2704
|
|
|
2705
|
+
# handy flag for returning generations for rl
|
|
2706
|
+
|
|
2707
|
+
if return_for_policy_optimization:
|
|
2708
|
+
return_agent_actions |= True
|
|
2709
|
+
return_log_probs_and_values |= True
|
|
2710
|
+
return_rewards_per_frame |= True
|
|
2711
|
+
|
|
2712
|
+
# more variables
|
|
2713
|
+
|
|
2698
2714
|
has_proprio = self.has_proprio
|
|
2699
2715
|
was_training = self.training
|
|
2700
2716
|
self.eval()
|
|
@@ -2764,6 +2780,19 @@ class DynamicsWorldModel(Module):
|
|
|
2764
2780
|
|
|
2765
2781
|
curr_time_steps = latents.shape[1]
|
|
2766
2782
|
|
|
2783
|
+
# determine whether to take an extra step if
|
|
2784
|
+
# (1) using time kv cache
|
|
2785
|
+
# (2) decoding anything off agent embedding (rewards, actions, etc)
|
|
2786
|
+
|
|
2787
|
+
take_extra_step = (
|
|
2788
|
+
use_time_kv_cache or
|
|
2789
|
+
return_rewards_per_frame or
|
|
2790
|
+
store_agent_embed or
|
|
2791
|
+
return_agent_actions
|
|
2792
|
+
)
|
|
2793
|
+
|
|
2794
|
+
# prepare noised latent / proprio inputs
|
|
2795
|
+
|
|
2767
2796
|
noised_latent = randn((batch_size, 1, self.num_video_views, *latent_shape), device = self.device)
|
|
2768
2797
|
|
|
2769
2798
|
noised_proprio = None
|
|
@@ -2771,7 +2800,10 @@ class DynamicsWorldModel(Module):
|
|
|
2771
2800
|
if has_proprio:
|
|
2772
2801
|
noised_proprio = randn((batch_size, 1, self.dim_proprio), device = self.device)
|
|
2773
2802
|
|
|
2774
|
-
|
|
2803
|
+
# denoising steps
|
|
2804
|
+
|
|
2805
|
+
for step in range(num_steps + int(take_extra_step)):
|
|
2806
|
+
|
|
2775
2807
|
is_last_step = (step + 1) == num_steps
|
|
2776
2808
|
|
|
2777
2809
|
signal_levels = full((batch_size, 1), step * step_size, dtype = torch.long, device = self.device)
|
|
@@ -2814,6 +2846,11 @@ class DynamicsWorldModel(Module):
|
|
|
2814
2846
|
if use_time_kv_cache and is_last_step:
|
|
2815
2847
|
time_kv_cache = next_time_kv_cache
|
|
2816
2848
|
|
|
2849
|
+
# early break if taking an extra step for agent embedding off cleaned latents for decoding
|
|
2850
|
+
|
|
2851
|
+
if take_extra_step and is_last_step:
|
|
2852
|
+
break
|
|
2853
|
+
|
|
2817
2854
|
# maybe proprio
|
|
2818
2855
|
|
|
2819
2856
|
if has_proprio:
|
|
@@ -3016,7 +3053,7 @@ class DynamicsWorldModel(Module):
|
|
|
3016
3053
|
latent_is_noised = False,
|
|
3017
3054
|
return_all_losses = False,
|
|
3018
3055
|
return_intermediates = False,
|
|
3019
|
-
add_autoregressive_action_loss =
|
|
3056
|
+
add_autoregressive_action_loss = True,
|
|
3020
3057
|
update_loss_ema = None,
|
|
3021
3058
|
latent_has_view_dim = False
|
|
3022
3059
|
):
|
dreamer4-0.0.101/README.md
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
<img src="./dreamer4-fig2.png" width="400px"></img>
|
|
2
|
-
|
|
3
|
-
## Dreamer 4 (wip)
|
|
4
|
-
|
|
5
|
-
Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v1) for his [Dreamer](https://danijar.com/project/dreamer4/) line of work
|
|
6
|
-
|
|
7
|
-
[Temporary Discord](https://discord.gg/MkACrrkrYR)
|
|
8
|
-
|
|
9
|
-
## Citation
|
|
10
|
-
|
|
11
|
-
```bibtex
|
|
12
|
-
@misc{hafner2025trainingagentsinsidescalable,
|
|
13
|
-
title = {Training Agents Inside of Scalable World Models},
|
|
14
|
-
author = {Danijar Hafner and Wilson Yan and Timothy Lillicrap},
|
|
15
|
-
year = {2025},
|
|
16
|
-
eprint = {2509.24527},
|
|
17
|
-
archivePrefix = {arXiv},
|
|
18
|
-
primaryClass = {cs.AI},
|
|
19
|
-
url = {https://arxiv.org/abs/2509.24527},
|
|
20
|
-
}
|
|
21
|
-
```
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|