evolutionary-policy-optimization 0.0.51__tar.gz → 0.0.53__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (13) hide show
  1. {evolutionary_policy_optimization-0.0.51 → evolutionary_policy_optimization-0.0.53}/PKG-INFO +1 -1
  2. {evolutionary_policy_optimization-0.0.51 → evolutionary_policy_optimization-0.0.53}/evolutionary_policy_optimization/epo.py +15 -3
  3. {evolutionary_policy_optimization-0.0.51 → evolutionary_policy_optimization-0.0.53}/pyproject.toml +1 -1
  4. {evolutionary_policy_optimization-0.0.51 → evolutionary_policy_optimization-0.0.53}/tests/test_epo.py +2 -1
  5. {evolutionary_policy_optimization-0.0.51 → evolutionary_policy_optimization-0.0.53}/.github/workflows/python-publish.yml +0 -0
  6. {evolutionary_policy_optimization-0.0.51 → evolutionary_policy_optimization-0.0.53}/.github/workflows/test.yml +0 -0
  7. {evolutionary_policy_optimization-0.0.51 → evolutionary_policy_optimization-0.0.53}/.gitignore +0 -0
  8. {evolutionary_policy_optimization-0.0.51 → evolutionary_policy_optimization-0.0.53}/LICENSE +0 -0
  9. {evolutionary_policy_optimization-0.0.51 → evolutionary_policy_optimization-0.0.53}/README.md +0 -0
  10. {evolutionary_policy_optimization-0.0.51 → evolutionary_policy_optimization-0.0.53}/evolutionary_policy_optimization/__init__.py +0 -0
  11. {evolutionary_policy_optimization-0.0.51 → evolutionary_policy_optimization-0.0.53}/evolutionary_policy_optimization/experimental.py +0 -0
  12. {evolutionary_policy_optimization-0.0.51 → evolutionary_policy_optimization-0.0.53}/evolutionary_policy_optimization/mock_env.py +0 -0
  13. {evolutionary_policy_optimization-0.0.51 → evolutionary_policy_optimization-0.0.53}/requirements.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.0.51
3
+ Version: 0.0.53
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -48,6 +48,9 @@ def divisible_by(num, den):
48
48
  def l2norm(t):
49
49
  return F.normalize(t, p = 2, dim = -1)
50
50
 
51
+ def batch_randperm(shape, device):
52
+ return torch.randn(shape, device = device).argsort(dim = -1)
53
+
51
54
  def log(t, eps = 1e-20):
52
55
  return t.clamp(min = eps).log()
53
56
 
@@ -357,10 +360,11 @@ class LatentGenePool(Module):
357
360
  frac_natural_selected = 0.25, # number of least fit genes to remove from the pool
358
361
  frac_elitism = 0.1, # frac of population to preserve from being noised
359
362
  frac_migrate = 0.1, # frac of population, excluding elites, that migrate between islands randomly. will use a designated set migration pattern (since for some reason using random it seems to be worse for me)
360
- migrate_every = 100, # how many steps before a migration between islands
361
363
  mutation_strength = 1., # factor to multiply to gaussian noise as mutation to latents
362
364
  should_run_genetic_algorithm: Module | None = None, # eq (3) in paper
363
365
  default_should_run_ga_gamma = 1.5,
366
+ migrate_every = 100, # how many steps before a migration between islands
367
+ apply_genetic_algorithm_every = 2 # how many steps before crossover + mutation happens for genes
364
368
  ):
365
369
  super().__init__()
366
370
 
@@ -393,7 +397,6 @@ class LatentGenePool(Module):
393
397
 
394
398
  latents_per_island = num_latents // num_islands
395
399
  self.num_natural_selected = int(frac_natural_selected * latents_per_island)
396
-
397
400
  self.num_tournament_participants = int(frac_tournaments * self.num_natural_selected)
398
401
 
399
402
  self.crossover_random = crossover_random
@@ -411,7 +414,10 @@ class LatentGenePool(Module):
411
414
  self.should_run_genetic_algorithm = should_run_genetic_algorithm
412
415
 
413
416
  self.can_migrate = num_islands > 1
417
+
414
418
  self.migrate_every = migrate_every
419
+ self.apply_genetic_algorithm_every = apply_genetic_algorithm_every
420
+
415
421
  self.register_buffer('step', tensor(1))
416
422
 
417
423
  def get_distance(self):
@@ -481,6 +487,10 @@ class LatentGenePool(Module):
481
487
  ):
482
488
  device = self.latents.device
483
489
 
490
+ if not divisible_by(self.step.item(), self.apply_genetic_algorithm_every):
491
+ self.advance_step_()
492
+ return
493
+
484
494
  """
485
495
  i - islands
486
496
  p - population
@@ -530,7 +540,9 @@ class LatentGenePool(Module):
530
540
 
531
541
  # 2. for finding pairs of parents to replete gene pool, we will go with the popular tournament strategy
532
542
 
533
- rand_tournament_gene_ids = torch.randn((islands, pop_size_per_island - self.num_natural_selected, tournament_participants), device = device).argsort(dim = -1)
543
+ tournament_shape = (islands, pop_size_per_island - self.num_natural_selected, self.num_natural_selected) # (island, num children needed, natural selected population to be bred)
544
+
545
+ rand_tournament_gene_ids = batch_randperm(tournament_shape, device)[..., :tournament_participants]
534
546
  rand_tournament_gene_ids_for_gather = rearrange(rand_tournament_gene_ids, 'i p t -> i (p t)')
535
547
 
536
548
  participant_fitness = fitness.gather(1, rand_tournament_gene_ids_for_gather)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "evolutionary-policy-optimization"
3
- version = "0.0.51"
3
+ version = "0.0.53"
4
4
  description = "EPO - Pytorch"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -78,7 +78,8 @@ def test_create_agent(
78
78
  @pytest.mark.parametrize('diversity_aux_loss_weight', (0., 1e-3))
79
79
  def test_e2e_with_mock_env(
80
80
  frozen_latents,
81
- use_critic_ema
81
+ use_critic_ema,
82
+ diversity_aux_loss_weight
82
83
  ):
83
84
  from evolutionary_policy_optimization import create_agent, EPO, Env
84
85