dreamer4 0.0.75__tar.gz → 0.0.76__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dreamer4 might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dreamer4
3
- Version: 0.0.75
3
+ Version: 0.0.76
4
4
  Summary: Dreamer 4
5
5
  Project-URL: Homepage, https://pypi.org/project/dreamer4/
6
6
  Project-URL: Repository, https://github.com/lucidrains/dreamer4
@@ -284,7 +284,7 @@ def create_multi_token_prediction_targets(
284
284
  batch, seq_len, device = *t.shape[:2], t.device
285
285
 
286
286
  batch_arange = arange(batch, device = device)
287
- seq_arange = arange(seq_len, device = device)[1:]
287
+ seq_arange = arange(seq_len, device = device)
288
288
  steps_arange = arange(steps_future, device = device)
289
289
 
290
290
  indices = add('t, steps -> t steps', seq_arange, steps_arange)
@@ -3100,7 +3100,7 @@ class DynamicsWorldModel(Module):
3100
3100
 
3101
3101
  reward_pred = rearrange(reward_pred, 'mtp b t l -> b l t mtp')
3102
3102
 
3103
- reward_targets, reward_loss_mask = create_multi_token_prediction_targets(two_hot_encoding, self.multi_token_pred_len)
3103
+ reward_targets, reward_loss_mask = create_multi_token_prediction_targets(two_hot_encoding[:, :-1], self.multi_token_pred_len)
3104
3104
 
3105
3105
  reward_targets = rearrange(reward_targets, 'b t mtp l -> b l t mtp')
3106
3106
 
@@ -3126,6 +3126,15 @@ class DynamicsWorldModel(Module):
3126
3126
  ):
3127
3127
  assert self.action_embedder.has_actions
3128
3128
 
3129
+ # handle actions having time vs time - 1 length
3130
+ # remove the first action if it is equal to time (as it would come from some agent token in the past)
3131
+
3132
+ if exists(discrete_actions) and discrete_actions.shape[1] == time:
3133
+ discrete_actions = discrete_actions[:, 1:]
3134
+
3135
+ if exists(continuous_actions) and continuous_actions.shape[1] == time:
3136
+ continuous_actions = continuous_actions[:, 1:]
3137
+
3129
3138
  # only for 1 agent
3130
3139
 
3131
3140
  agent_tokens = rearrange(agent_tokens, 'b t 1 d -> b t d')
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dreamer4"
3
- version = "0.0.75"
3
+ version = "0.0.76"
4
4
  description = "Dreamer 4"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
File without changes
File without changes
File without changes
File without changes
File without changes