dreamer4 0.0.75__tar.gz → 0.0.76__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dreamer4 might be problematic. Click here for more details.
- {dreamer4-0.0.75 → dreamer4-0.0.76}/PKG-INFO +1 -1
- {dreamer4-0.0.75 → dreamer4-0.0.76}/dreamer4/dreamer4.py +11 -2
- {dreamer4-0.0.75 → dreamer4-0.0.76}/pyproject.toml +1 -1
- {dreamer4-0.0.75 → dreamer4-0.0.76}/.github/workflows/python-publish.yml +0 -0
- {dreamer4-0.0.75 → dreamer4-0.0.76}/.github/workflows/test.yml +0 -0
- {dreamer4-0.0.75 → dreamer4-0.0.76}/.gitignore +0 -0
- {dreamer4-0.0.75 → dreamer4-0.0.76}/LICENSE +0 -0
- {dreamer4-0.0.75 → dreamer4-0.0.76}/README.md +0 -0
- {dreamer4-0.0.75 → dreamer4-0.0.76}/dreamer4/__init__.py +0 -0
- {dreamer4-0.0.75 → dreamer4-0.0.76}/dreamer4/mocks.py +0 -0
- {dreamer4-0.0.75 → dreamer4-0.0.76}/dreamer4/trainers.py +0 -0
- {dreamer4-0.0.75 → dreamer4-0.0.76}/dreamer4-fig2.png +0 -0
- {dreamer4-0.0.75 → dreamer4-0.0.76}/tests/test_dreamer.py +0 -0
|
@@ -284,7 +284,7 @@ def create_multi_token_prediction_targets(
|
|
|
284
284
|
batch, seq_len, device = *t.shape[:2], t.device
|
|
285
285
|
|
|
286
286
|
batch_arange = arange(batch, device = device)
|
|
287
|
-
seq_arange = arange(seq_len, device = device)
|
|
287
|
+
seq_arange = arange(seq_len, device = device)
|
|
288
288
|
steps_arange = arange(steps_future, device = device)
|
|
289
289
|
|
|
290
290
|
indices = add('t, steps -> t steps', seq_arange, steps_arange)
|
|
@@ -3100,7 +3100,7 @@ class DynamicsWorldModel(Module):
|
|
|
3100
3100
|
|
|
3101
3101
|
reward_pred = rearrange(reward_pred, 'mtp b t l -> b l t mtp')
|
|
3102
3102
|
|
|
3103
|
-
reward_targets, reward_loss_mask = create_multi_token_prediction_targets(two_hot_encoding, self.multi_token_pred_len)
|
|
3103
|
+
reward_targets, reward_loss_mask = create_multi_token_prediction_targets(two_hot_encoding[:, :-1], self.multi_token_pred_len)
|
|
3104
3104
|
|
|
3105
3105
|
reward_targets = rearrange(reward_targets, 'b t mtp l -> b l t mtp')
|
|
3106
3106
|
|
|
@@ -3126,6 +3126,15 @@ class DynamicsWorldModel(Module):
|
|
|
3126
3126
|
):
|
|
3127
3127
|
assert self.action_embedder.has_actions
|
|
3128
3128
|
|
|
3129
|
+
# handle actions having time vs time - 1 length
|
|
3130
|
+
# remove the first action if it is equal to time (as it would come from some agent token in the past)
|
|
3131
|
+
|
|
3132
|
+
if exists(discrete_actions) and discrete_actions.shape[1] == time:
|
|
3133
|
+
discrete_actions = discrete_actions[:, 1:]
|
|
3134
|
+
|
|
3135
|
+
if exists(continuous_actions) and continuous_actions.shape[1] == time:
|
|
3136
|
+
continuous_actions = continuous_actions[:, 1:]
|
|
3137
|
+
|
|
3129
3138
|
# only for 1 agent
|
|
3130
3139
|
|
|
3131
3140
|
agent_tokens = rearrange(agent_tokens, 'b t 1 d -> b t d')
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|