evolutionary-policy-optimization 0.0.63__py3-none-any.whl → 0.0.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutionary_policy_optimization/epo.py +15 -9
- {evolutionary_policy_optimization-0.0.63.dist-info → evolutionary_policy_optimization-0.0.64.dist-info}/METADATA +1 -1
- {evolutionary_policy_optimization-0.0.63.dist-info → evolutionary_policy_optimization-0.0.64.dist-info}/RECORD +5 -5
- {evolutionary_policy_optimization-0.0.63.dist-info → evolutionary_policy_optimization-0.0.64.dist-info}/WHEEL +0 -0
- {evolutionary_policy_optimization-0.0.63.dist-info → evolutionary_policy_optimization-0.0.64.dist-info}/licenses/LICENSE +0 -0
@@ -1068,7 +1068,8 @@ class EPO(Module):
|
|
1068
1068
|
agent: Agent,
|
1069
1069
|
episodes_per_latent,
|
1070
1070
|
max_episode_length,
|
1071
|
-
action_sample_temperature = 1
|
1071
|
+
action_sample_temperature = 1.,
|
1072
|
+
fix_environ_across_latents = True
|
1072
1073
|
):
|
1073
1074
|
super().__init__()
|
1074
1075
|
self.agent = agent
|
@@ -1077,6 +1078,7 @@ class EPO(Module):
|
|
1077
1078
|
self.num_latents = agent.latent_gene_pool.num_latents
|
1078
1079
|
self.episodes_per_latent = episodes_per_latent
|
1079
1080
|
self.max_episode_length = max_episode_length
|
1081
|
+
self.fix_environ_across_latents = fix_environ_across_latents
|
1080
1082
|
|
1081
1083
|
self.register_buffer('dummy', tensor(0))
|
1082
1084
|
|
@@ -1133,9 +1135,11 @@ class EPO(Module):
|
|
1133
1135
|
def forward(
|
1134
1136
|
self,
|
1135
1137
|
env,
|
1136
|
-
fix_environ_across_latents =
|
1138
|
+
fix_environ_across_latents = None
|
1137
1139
|
) -> MemoriesAndCumulativeRewards:
|
1138
1140
|
|
1141
|
+
fix_environ_across_latents = default(fix_environ_across_latents, self.fix_environ_across_latents)
|
1142
|
+
|
1139
1143
|
self.agent.eval()
|
1140
1144
|
|
1141
1145
|
invalid_episode = tensor(-1) # will use `episode_id` value of `-1` for the `next_value`, needed for not discarding last reward for generalized advantage estimate
|
@@ -1202,16 +1206,18 @@ class EPO(Module):
|
|
1202
1206
|
|
1203
1207
|
time += 1
|
1204
1208
|
|
1205
|
-
|
1209
|
+
if not done:
|
1210
|
+
# add bootstrap value if truncated
|
1206
1211
|
|
1207
|
-
|
1212
|
+
next_value = temp_batch_dim(self.agent.get_critic_values)(state, latent = latent)
|
1208
1213
|
|
1209
|
-
|
1210
|
-
|
1211
|
-
|
1212
|
-
|
1214
|
+
memory_for_gae = memory._replace(
|
1215
|
+
episode_id = invalid_episode,
|
1216
|
+
value = next_value,
|
1217
|
+
done = tensor(True)
|
1218
|
+
)
|
1213
1219
|
|
1214
|
-
|
1220
|
+
memories.append(memory_for_gae)
|
1215
1221
|
|
1216
1222
|
return MemoriesAndCumulativeRewards(
|
1217
1223
|
memories = memories,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.64
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -1,9 +1,9 @@
|
|
1
1
|
evolutionary_policy_optimization/__init__.py,sha256=0q0aBuFgWi06MLMD8FiHzBYQ3_W4LYWrwmCtF3u5H2A,201
|
2
2
|
evolutionary_policy_optimization/distributed.py,sha256=7KgZdeS_wxBHo_du9XZFB1Cu318J-Bp66Xdr6Log_20,2423
|
3
|
-
evolutionary_policy_optimization/epo.py,sha256=
|
3
|
+
evolutionary_policy_optimization/epo.py,sha256=0_jC9Tbl6FiscLHklvTKtuQTwZL8egqFKW-4JUxxwvw,37001
|
4
4
|
evolutionary_policy_optimization/experimental.py,sha256=-IgqjJ_Wk_CMB1y9YYWpoYqTG9GZHAS6kbRdTluVevg,1563
|
5
5
|
evolutionary_policy_optimization/mock_env.py,sha256=gvATGA51Ym5sf3jiR2VmlpjiCcT7KCDDY_SrR-MEwsU,941
|
6
|
-
evolutionary_policy_optimization-0.0.
|
7
|
-
evolutionary_policy_optimization-0.0.
|
8
|
-
evolutionary_policy_optimization-0.0.
|
9
|
-
evolutionary_policy_optimization-0.0.
|
6
|
+
evolutionary_policy_optimization-0.0.64.dist-info/METADATA,sha256=vWdnTe2a86wTenEh29TNJlYEjD8A5CPtsyylxh4XsE0,6220
|
7
|
+
evolutionary_policy_optimization-0.0.64.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
8
|
+
evolutionary_policy_optimization-0.0.64.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
|
9
|
+
evolutionary_policy_optimization-0.0.64.dist-info/RECORD,,
|
File without changes
|