evolutionary-policy-optimization 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -392,6 +392,8 @@ class MLP(Module):
392
392
 
393
393
  self.layers = ModuleList(layers)
394
394
 
395
+ self.final_lime = DynamicLIMe(dim, depth + 1)
396
+
395
397
  def forward(
396
398
  self,
397
399
  x,
@@ -430,7 +432,7 @@ class MLP(Module):
430
432
 
431
433
  prev_layer_inputs.append(x)
432
434
 
433
- return x
435
+ return self.final_lime(x, prev_layer_inputs)
434
436
 
435
437
  # actor, critic, and agent (actor + critic)
436
438
  # eventually, should just create a separate repo and aggregate all the MLP related architectures
@@ -1066,6 +1068,12 @@ class Agent(Module):
1066
1068
  def unwrapped_latent_gene_pool(self):
1067
1069
  return self.unwrap_model(self.latent_gene_pool)
1068
1070
 
1071
+ def log(self, **data_kwargs):
1072
+ if not self.wrap_with_accelerate:
1073
+ return
1074
+
1075
+ self.accelerate.log(data_kwargs, step = self.step)
1076
+
1069
1077
  def save(self, path, overwrite = False):
1070
1078
  path = Path(path)
1071
1079
 
@@ -1281,6 +1289,14 @@ class Agent(Module):
1281
1289
  self.critic_optim.step()
1282
1290
  self.critic_optim.zero_grad()
1283
1291
 
1292
+ # log actor critic loss
1293
+
1294
+ self.log(
1295
+ actor_loss = actor_loss.item(),
1296
+ critic_loss = critic_loss.item(),
1297
+ fitness_scores = fitness_scores
1298
+ )
1299
+
1284
1300
  # maybe ema update critic
1285
1301
 
1286
1302
  if self.use_critic_ema:
@@ -1305,6 +1321,11 @@ class Agent(Module):
1305
1321
  self.latent_optim.step()
1306
1322
  self.latent_optim.zero_grad()
1307
1323
 
1324
+ if self.has_diversity_loss:
1325
+ self.log(
1326
+ diversity_loss = diversity_loss.item()
1327
+ )
1328
+
1308
1329
  # apply evolution
1309
1330
 
1310
1331
  if self.has_latent_genes:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.1.17
3
+ Version: 0.1.19
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -1,10 +1,10 @@
1
1
  evolutionary_policy_optimization/__init__.py,sha256=NyiYDYU7DlpmOTM7xiBQET3r1WwX0ebrgMCBLSQrW3c,288
2
2
  evolutionary_policy_optimization/distributed.py,sha256=7KgZdeS_wxBHo_du9XZFB1Cu318J-Bp66Xdr6Log_20,2423
3
3
  evolutionary_policy_optimization/env_wrappers.py,sha256=bDL06o9_b1iW6k3fw2xifnOnYlzs643tdW6Yv2gsIdw,803
4
- evolutionary_policy_optimization/epo.py,sha256=TW-9l9oRN8XQZxSeG5Glkk4rWuxO9JOjjRJO7hJgHZs,48433
4
+ evolutionary_policy_optimization/epo.py,sha256=JzZdNbzerIMgPg6dlL4eLNJ9_LbmW0xNkgQrgSNoSKA,49084
5
5
  evolutionary_policy_optimization/experimental.py,sha256=-IgqjJ_Wk_CMB1y9YYWpoYqTG9GZHAS6kbRdTluVevg,1563
6
6
  evolutionary_policy_optimization/mock_env.py,sha256=TLyyRm6tOD0Kdn9QqJJQriaSnsR-YmNQHo4OohmZFG4,1410
7
- evolutionary_policy_optimization-0.1.17.dist-info/METADATA,sha256=-2Lt54pfQkw2_LAwB8pjcvLsc4vSMVQjAt-ZiObCRdw,7979
8
- evolutionary_policy_optimization-0.1.17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- evolutionary_policy_optimization-0.1.17.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
10
- evolutionary_policy_optimization-0.1.17.dist-info/RECORD,,
7
+ evolutionary_policy_optimization-0.1.19.dist-info/METADATA,sha256=Jmbgx_z8dJv1W-FQVRJCI14MW2Tv7wbc4VhP_YN3WNw,7979
8
+ evolutionary_policy_optimization-0.1.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ evolutionary_policy_optimization-0.1.19.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
10
+ evolutionary_policy_optimization-0.1.19.dist-info/RECORD,,