evolutionary-policy-optimization 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutionary_policy_optimization/epo.py +22 -1
- {evolutionary_policy_optimization-0.1.17.dist-info → evolutionary_policy_optimization-0.1.19.dist-info}/METADATA +1 -1
- {evolutionary_policy_optimization-0.1.17.dist-info → evolutionary_policy_optimization-0.1.19.dist-info}/RECORD +5 -5
- {evolutionary_policy_optimization-0.1.17.dist-info → evolutionary_policy_optimization-0.1.19.dist-info}/WHEEL +0 -0
- {evolutionary_policy_optimization-0.1.17.dist-info → evolutionary_policy_optimization-0.1.19.dist-info}/licenses/LICENSE +0 -0
@@ -392,6 +392,8 @@ class MLP(Module):
|
|
392
392
|
|
393
393
|
self.layers = ModuleList(layers)
|
394
394
|
|
395
|
+
self.final_lime = DynamicLIMe(dim, depth + 1)
|
396
|
+
|
395
397
|
def forward(
|
396
398
|
self,
|
397
399
|
x,
|
@@ -430,7 +432,7 @@ class MLP(Module):
|
|
430
432
|
|
431
433
|
prev_layer_inputs.append(x)
|
432
434
|
|
433
|
-
return x
|
435
|
+
return self.final_lime(x, prev_layer_inputs)
|
434
436
|
|
435
437
|
# actor, critic, and agent (actor + critic)
|
436
438
|
# eventually, should just create a separate repo and aggregate all the MLP related architectures
|
@@ -1066,6 +1068,12 @@ class Agent(Module):
|
|
1066
1068
|
def unwrapped_latent_gene_pool(self):
|
1067
1069
|
return self.unwrap_model(self.latent_gene_pool)
|
1068
1070
|
|
1071
|
+
def log(self, **data_kwargs):
|
1072
|
+
if not self.wrap_with_accelerate:
|
1073
|
+
return
|
1074
|
+
|
1075
|
+
self.accelerate.log(data_kwargs, step = self.step)
|
1076
|
+
|
1069
1077
|
def save(self, path, overwrite = False):
|
1070
1078
|
path = Path(path)
|
1071
1079
|
|
@@ -1281,6 +1289,14 @@ class Agent(Module):
|
|
1281
1289
|
self.critic_optim.step()
|
1282
1290
|
self.critic_optim.zero_grad()
|
1283
1291
|
|
1292
|
+
# log actor critic loss
|
1293
|
+
|
1294
|
+
self.log(
|
1295
|
+
actor_loss = actor_loss.item(),
|
1296
|
+
critic_loss = critic_loss.item(),
|
1297
|
+
fitness_scores = fitness_scores
|
1298
|
+
)
|
1299
|
+
|
1284
1300
|
# maybe ema update critic
|
1285
1301
|
|
1286
1302
|
if self.use_critic_ema:
|
@@ -1305,6 +1321,11 @@ class Agent(Module):
|
|
1305
1321
|
self.latent_optim.step()
|
1306
1322
|
self.latent_optim.zero_grad()
|
1307
1323
|
|
1324
|
+
if self.has_diversity_loss:
|
1325
|
+
self.log(
|
1326
|
+
diversity_loss = diversity_loss.item()
|
1327
|
+
)
|
1328
|
+
|
1308
1329
|
# apply evolution
|
1309
1330
|
|
1310
1331
|
if self.has_latent_genes:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.19
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -1,10 +1,10 @@
|
|
1
1
|
evolutionary_policy_optimization/__init__.py,sha256=NyiYDYU7DlpmOTM7xiBQET3r1WwX0ebrgMCBLSQrW3c,288
|
2
2
|
evolutionary_policy_optimization/distributed.py,sha256=7KgZdeS_wxBHo_du9XZFB1Cu318J-Bp66Xdr6Log_20,2423
|
3
3
|
evolutionary_policy_optimization/env_wrappers.py,sha256=bDL06o9_b1iW6k3fw2xifnOnYlzs643tdW6Yv2gsIdw,803
|
4
|
-
evolutionary_policy_optimization/epo.py,sha256=
|
4
|
+
evolutionary_policy_optimization/epo.py,sha256=JzZdNbzerIMgPg6dlL4eLNJ9_LbmW0xNkgQrgSNoSKA,49084
|
5
5
|
evolutionary_policy_optimization/experimental.py,sha256=-IgqjJ_Wk_CMB1y9YYWpoYqTG9GZHAS6kbRdTluVevg,1563
|
6
6
|
evolutionary_policy_optimization/mock_env.py,sha256=TLyyRm6tOD0Kdn9QqJJQriaSnsR-YmNQHo4OohmZFG4,1410
|
7
|
-
evolutionary_policy_optimization-0.1.
|
8
|
-
evolutionary_policy_optimization-0.1.
|
9
|
-
evolutionary_policy_optimization-0.1.
|
10
|
-
evolutionary_policy_optimization-0.1.
|
7
|
+
evolutionary_policy_optimization-0.1.19.dist-info/METADATA,sha256=Jmbgx_z8dJv1W-FQVRJCI14MW2Tv7wbc4VhP_YN3WNw,7979
|
8
|
+
evolutionary_policy_optimization-0.1.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
9
|
+
evolutionary_policy_optimization-0.1.19.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
|
10
|
+
evolutionary_policy_optimization-0.1.19.dist-info/RECORD,,
|
File without changes
|