evolutionary-policy-optimization 0.1.6__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/PKG-INFO +1 -1
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/evolutionary_policy_optimization/epo.py +8 -5
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/pyproject.toml +1 -1
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/tests/test_epo.py +2 -1
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/.github/workflows/python-publish.yml +0 -0
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/.github/workflows/test.yml +0 -0
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/.gitignore +0 -0
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/LICENSE +0 -0
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/README.md +0 -0
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/evolutionary_policy_optimization/__init__.py +0 -0
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/evolutionary_policy_optimization/distributed.py +0 -0
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/evolutionary_policy_optimization/env_wrappers.py +0 -0
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/evolutionary_policy_optimization/experimental.py +0 -0
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/evolutionary_policy_optimization/mock_env.py +0 -0
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/requirements.txt +0 -0
- {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/train_gym.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.7
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -369,7 +369,6 @@ class Critic(Module):
|
|
369
369
|
hl_gauss_loss = self.to_pred.hl_gauss_loss
|
370
370
|
|
371
371
|
self.maybe_bins_to_value = hl_gauss_loss if not use_regression else identity
|
372
|
-
self.maybe_value_to_bins = hl_gauss_loss.transform_to_logprobs if not use_regression else identity
|
373
372
|
self.loss_fn = hl_gauss_loss if not use_regression else F.mse_loss
|
374
373
|
|
375
374
|
def forward_for_loss(
|
@@ -386,7 +385,7 @@ class Critic(Module):
|
|
386
385
|
|
387
386
|
clipped_value = old_values + (value - old_values).clamp(1. - eps_clip, 1. + eps_clip)
|
388
387
|
|
389
|
-
loss = self.loss_fn(
|
388
|
+
loss = self.loss_fn(logits, target, reduction = 'none')
|
390
389
|
clipped_loss = self.loss_fn(clipped_value, target, reduction = 'none')
|
391
390
|
|
392
391
|
return torch.max(loss, clipped_loss).mean()
|
@@ -844,7 +843,11 @@ class Agent(Module):
|
|
844
843
|
|
845
844
|
dummy = tensor(0)
|
846
845
|
|
846
|
+
self.clip_grad_norm_ = nn.utils.clip_grad_norm_
|
847
|
+
|
847
848
|
if wrap_with_accelerate:
|
849
|
+
self.clip_grad_norm_ = self.accelerate.clip_grad_norm_
|
850
|
+
|
848
851
|
(
|
849
852
|
self.actor,
|
850
853
|
self.critic,
|
@@ -1071,7 +1074,7 @@ class Agent(Module):
|
|
1071
1074
|
actor_loss.backward()
|
1072
1075
|
|
1073
1076
|
if exists(self.has_grad_clip):
|
1074
|
-
self.
|
1077
|
+
self.clip_grad_norm_(self.actor.parameters(), self.max_grad_norm)
|
1075
1078
|
|
1076
1079
|
self.actor_optim.step()
|
1077
1080
|
self.actor_optim.zero_grad()
|
@@ -1089,7 +1092,7 @@ class Agent(Module):
|
|
1089
1092
|
critic_loss.backward()
|
1090
1093
|
|
1091
1094
|
if exists(self.has_grad_clip):
|
1092
|
-
self.
|
1095
|
+
self.clip_grad_norm_(self.critic.parameters(), self.max_grad_norm)
|
1093
1096
|
|
1094
1097
|
self.critic_optim.step()
|
1095
1098
|
self.critic_optim.zero_grad()
|
@@ -1113,7 +1116,7 @@ class Agent(Module):
|
|
1113
1116
|
(diversity_loss * self.diversity_aux_loss_weight).backward()
|
1114
1117
|
|
1115
1118
|
if exists(self.has_grad_clip):
|
1116
|
-
self.
|
1119
|
+
self.clip_grad_norm_(self.latent_gene_pool.parameters(), self.max_grad_norm)
|
1117
1120
|
|
1118
1121
|
self.latent_optim.step()
|
1119
1122
|
self.latent_optim.zero_grad()
|
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/.gitignore
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/requirements.txt
RENAMED
File without changes
|
{evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/train_gym.py
RENAMED
File without changes
|