evolutionary-policy-optimization 0.0.63__py3-none-any.whl → 0.0.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1068,7 +1068,8 @@ class EPO(Module):
1068
1068
  agent: Agent,
1069
1069
  episodes_per_latent,
1070
1070
  max_episode_length,
1071
- action_sample_temperature = 1.
1071
+ action_sample_temperature = 1.,
1072
+ fix_environ_across_latents = True
1072
1073
  ):
1073
1074
  super().__init__()
1074
1075
  self.agent = agent
@@ -1077,6 +1078,7 @@ class EPO(Module):
1077
1078
  self.num_latents = agent.latent_gene_pool.num_latents
1078
1079
  self.episodes_per_latent = episodes_per_latent
1079
1080
  self.max_episode_length = max_episode_length
1081
+ self.fix_environ_across_latents = fix_environ_across_latents
1080
1082
 
1081
1083
  self.register_buffer('dummy', tensor(0))
1082
1084
 
@@ -1133,9 +1135,11 @@ class EPO(Module):
1133
1135
  def forward(
1134
1136
  self,
1135
1137
  env,
1136
- fix_environ_across_latents = True
1138
+ fix_environ_across_latents = None
1137
1139
  ) -> MemoriesAndCumulativeRewards:
1138
1140
 
1141
+ fix_environ_across_latents = default(fix_environ_across_latents, self.fix_environ_across_latents)
1142
+
1139
1143
  self.agent.eval()
1140
1144
 
1141
1145
  invalid_episode = tensor(-1) # will use `episode_id` value of `-1` for the `next_value`, needed for not discarding last reward for generalized advantage estimate
@@ -1202,16 +1206,18 @@ class EPO(Module):
1202
1206
 
1203
1207
  time += 1
1204
1208
 
1205
- # need the final next value for GAE, iiuc
1209
+ if not done:
1210
+ # add bootstrap value if truncated
1206
1211
 
1207
- next_value = temp_batch_dim(self.agent.get_critic_values)(state, latent = latent)
1212
+ next_value = temp_batch_dim(self.agent.get_critic_values)(state, latent = latent)
1208
1213
 
1209
- memory_for_gae = memory._replace(
1210
- episode_id = invalid_episode,
1211
- value = next_value
1212
- )
1214
+ memory_for_gae = memory._replace(
1215
+ episode_id = invalid_episode,
1216
+ value = next_value,
1217
+ done = tensor(True)
1218
+ )
1213
1219
 
1214
- memories.append(memory_for_gae)
1220
+ memories.append(memory_for_gae)
1215
1221
 
1216
1222
  return MemoriesAndCumulativeRewards(
1217
1223
  memories = memories,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.0.63
3
+ Version: 0.0.64
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -1,9 +1,9 @@
1
1
  evolutionary_policy_optimization/__init__.py,sha256=0q0aBuFgWi06MLMD8FiHzBYQ3_W4LYWrwmCtF3u5H2A,201
2
2
  evolutionary_policy_optimization/distributed.py,sha256=7KgZdeS_wxBHo_du9XZFB1Cu318J-Bp66Xdr6Log_20,2423
3
- evolutionary_policy_optimization/epo.py,sha256=DSG2fYWLk0cyHhfoiwqmSzh2TBOWhz25sD1oWIM5p1k,36695
3
+ evolutionary_policy_optimization/epo.py,sha256=0_jC9Tbl6FiscLHklvTKtuQTwZL8egqFKW-4JUxxwvw,37001
4
4
  evolutionary_policy_optimization/experimental.py,sha256=-IgqjJ_Wk_CMB1y9YYWpoYqTG9GZHAS6kbRdTluVevg,1563
5
5
  evolutionary_policy_optimization/mock_env.py,sha256=gvATGA51Ym5sf3jiR2VmlpjiCcT7KCDDY_SrR-MEwsU,941
6
- evolutionary_policy_optimization-0.0.63.dist-info/METADATA,sha256=X2FKT8WJ9T1t0ydEdtxrJsJGXY1ubfvydQSykv2G03M,6220
7
- evolutionary_policy_optimization-0.0.63.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
8
- evolutionary_policy_optimization-0.0.63.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
9
- evolutionary_policy_optimization-0.0.63.dist-info/RECORD,,
6
+ evolutionary_policy_optimization-0.0.64.dist-info/METADATA,sha256=vWdnTe2a86wTenEh29TNJlYEjD8A5CPtsyylxh4XsE0,6220
7
+ evolutionary_policy_optimization-0.0.64.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
8
+ evolutionary_policy_optimization-0.0.64.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
9
+ evolutionary_policy_optimization-0.0.64.dist-info/RECORD,,