dreamer4 0.0.100__tar.gz → 0.0.101__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dreamer4 might be problematic. Click here for more details.
- {dreamer4-0.0.100 → dreamer4-0.0.101}/PKG-INFO +1 -1
- {dreamer4-0.0.100 → dreamer4-0.0.101}/dreamer4/dreamer4.py +10 -1
- {dreamer4-0.0.100 → dreamer4-0.0.101}/pyproject.toml +1 -1
- {dreamer4-0.0.100 → dreamer4-0.0.101}/.github/workflows/python-publish.yml +0 -0
- {dreamer4-0.0.100 → dreamer4-0.0.101}/.github/workflows/test.yml +0 -0
- {dreamer4-0.0.100 → dreamer4-0.0.101}/.gitignore +0 -0
- {dreamer4-0.0.100 → dreamer4-0.0.101}/LICENSE +0 -0
- {dreamer4-0.0.100 → dreamer4-0.0.101}/README.md +0 -0
- {dreamer4-0.0.100 → dreamer4-0.0.101}/dreamer4/__init__.py +0 -0
- {dreamer4-0.0.100 → dreamer4-0.0.101}/dreamer4/mocks.py +0 -0
- {dreamer4-0.0.100 → dreamer4-0.0.101}/dreamer4/trainers.py +0 -0
- {dreamer4-0.0.100 → dreamer4-0.0.101}/dreamer4-fig2.png +0 -0
- {dreamer4-0.0.100 → dreamer4-0.0.101}/tests/test_dreamer.py +0 -0
|
@@ -1900,6 +1900,7 @@ class DynamicsWorldModel(Module):
|
|
|
1900
1900
|
gae_lambda = 0.95,
|
|
1901
1901
|
ppo_eps_clip = 0.2,
|
|
1902
1902
|
pmpo_pos_to_neg_weight = 0.5, # pos and neg equal weight
|
|
1903
|
+
pmpo_reverse_kl = True,
|
|
1903
1904
|
pmpo_kl_div_loss_weight = .3,
|
|
1904
1905
|
value_clip = 0.4,
|
|
1905
1906
|
policy_entropy_weight = .01,
|
|
@@ -2108,6 +2109,7 @@ class DynamicsWorldModel(Module):
|
|
|
2108
2109
|
|
|
2109
2110
|
self.pmpo_pos_to_neg_weight = pmpo_pos_to_neg_weight
|
|
2110
2111
|
self.pmpo_kl_div_loss_weight = pmpo_kl_div_loss_weight
|
|
2112
|
+
self.pmpo_reverse_kl = pmpo_reverse_kl
|
|
2111
2113
|
|
|
2112
2114
|
# rewards related
|
|
2113
2115
|
|
|
@@ -2578,11 +2580,18 @@ class DynamicsWorldModel(Module):
|
|
|
2578
2580
|
# take care of kl
|
|
2579
2581
|
|
|
2580
2582
|
if self.pmpo_kl_div_loss_weight > 0.:
|
|
2583
|
+
|
|
2581
2584
|
new_unembedded_actions = self.action_embedder.unembed(policy_embed, pred_head_index = 0)
|
|
2582
2585
|
|
|
2586
|
+
kl_div_inputs, kl_div_targets = new_unembedded_actions, old_action_unembeds
|
|
2587
|
+
|
|
2583
2588
|
# mentioned that the "reverse direction for the prior KL" was used
|
|
2589
|
+
# make optional, as observed instability in toy task
|
|
2590
|
+
|
|
2591
|
+
if self.pmpo_reverse_kl:
|
|
2592
|
+
kl_div_inputs, kl_div_targets = kl_div_targets, kl_div_inputs
|
|
2584
2593
|
|
|
2585
|
-
discrete_kl_div, continuous_kl_div = self.action_embedder.kl_div(
|
|
2594
|
+
discrete_kl_div, continuous_kl_div = self.action_embedder.kl_div(kl_div_inputs, kl_div_targets)
|
|
2586
2595
|
|
|
2587
2596
|
# accumulate discrete and continuous kl div
|
|
2588
2597
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|