evolutionary-policy-optimization 0.1.6__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/PKG-INFO +1 -1
  2. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/evolutionary_policy_optimization/epo.py +8 -5
  3. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/pyproject.toml +1 -1
  4. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/tests/test_epo.py +2 -1
  5. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/.github/workflows/python-publish.yml +0 -0
  6. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/.github/workflows/test.yml +0 -0
  7. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/.gitignore +0 -0
  8. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/LICENSE +0 -0
  9. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/README.md +0 -0
  10. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/evolutionary_policy_optimization/__init__.py +0 -0
  11. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/evolutionary_policy_optimization/distributed.py +0 -0
  12. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/evolutionary_policy_optimization/env_wrappers.py +0 -0
  13. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/evolutionary_policy_optimization/experimental.py +0 -0
  14. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/evolutionary_policy_optimization/mock_env.py +0 -0
  15. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/requirements.txt +0 -0
  16. {evolutionary_policy_optimization-0.1.6 → evolutionary_policy_optimization-0.1.7}/train_gym.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -369,7 +369,6 @@ class Critic(Module):
369
369
  hl_gauss_loss = self.to_pred.hl_gauss_loss
370
370
 
371
371
  self.maybe_bins_to_value = hl_gauss_loss if not use_regression else identity
372
- self.maybe_value_to_bins = hl_gauss_loss.transform_to_logprobs if not use_regression else identity
373
372
  self.loss_fn = hl_gauss_loss if not use_regression else F.mse_loss
374
373
 
375
374
  def forward_for_loss(
@@ -386,7 +385,7 @@ class Critic(Module):
386
385
 
387
386
  clipped_value = old_values + (value - old_values).clamp(1. - eps_clip, 1. + eps_clip)
388
387
 
389
- loss = self.loss_fn(value, target, reduction = 'none')
388
+ loss = self.loss_fn(logits, target, reduction = 'none')
390
389
  clipped_loss = self.loss_fn(clipped_value, target, reduction = 'none')
391
390
 
392
391
  return torch.max(loss, clipped_loss).mean()
@@ -844,7 +843,11 @@ class Agent(Module):
844
843
 
845
844
  dummy = tensor(0)
846
845
 
846
+ self.clip_grad_norm_ = nn.utils.clip_grad_norm_
847
+
847
848
  if wrap_with_accelerate:
849
+ self.clip_grad_norm_ = self.accelerate.clip_grad_norm_
850
+
848
851
  (
849
852
  self.actor,
850
853
  self.critic,
@@ -1071,7 +1074,7 @@ class Agent(Module):
1071
1074
  actor_loss.backward()
1072
1075
 
1073
1076
  if exists(self.has_grad_clip):
1074
- self.accelerate.clip_grad_norm_(self.actor.parameters(), self.max_grad_norm)
1077
+ self.clip_grad_norm_(self.actor.parameters(), self.max_grad_norm)
1075
1078
 
1076
1079
  self.actor_optim.step()
1077
1080
  self.actor_optim.zero_grad()
@@ -1089,7 +1092,7 @@ class Agent(Module):
1089
1092
  critic_loss.backward()
1090
1093
 
1091
1094
  if exists(self.has_grad_clip):
1092
- self.accelerate.clip_grad_norm_(self.critic.parameters(), self.max_grad_norm)
1095
+ self.clip_grad_norm_(self.critic.parameters(), self.max_grad_norm)
1093
1096
 
1094
1097
  self.critic_optim.step()
1095
1098
  self.critic_optim.zero_grad()
@@ -1113,7 +1116,7 @@ class Agent(Module):
1113
1116
  (diversity_loss * self.diversity_aux_loss_weight).backward()
1114
1117
 
1115
1118
  if exists(self.has_grad_clip):
1116
- self.accelerate.clip_grad_norm_(self.latent_gene_pool.parameters(), self.max_grad_norm)
1119
+ self.clip_grad_norm_(self.latent_gene_pool.parameters(), self.max_grad_norm)
1117
1120
 
1118
1121
  self.latent_optim.step()
1119
1122
  self.latent_optim.zero_grad()
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "evolutionary-policy-optimization"
3
- version = "0.1.6"
3
+ version = "0.1.7"
4
4
  description = "EPO - Pytorch"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -103,7 +103,8 @@ def test_e2e_with_mock_env(
103
103
  frozen_latents = frozen_latents,
104
104
  frac_natural_selected = 0.75,
105
105
  frac_tournaments = 0.9
106
- )
106
+ ),
107
+ wrap_with_accelerate = False
107
108
  )
108
109
 
109
110
  epo = EPO(