PyPI - evolutionary-policy-optimization - Versions diffs - 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl - Mend

evolutionary-policy-optimization 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

evolutionary_policy_optimization/epo.py CHANGED Viewed

@@ -303,7 +303,6 @@ class LatentGenePool(Module):
         self,
         num_latents,                     # same as gene pool size
         dim_latent,                      # gene dimension
-        num_latent_sets = 1,             # allow for sets of latents / gene per individual, expression of a set controlled by the environment
         num_islands = 1,                 # add the island strategy, which has been effectively used in a few recent works
         dim_state = None,
         frozen_latents = True,
@@ -312,6 +311,7 @@ class LatentGenePool(Module):
         frac_tournaments = 0.25,         # fraction of genes to participate in tournament - the lower the value, the more chance a less fit gene could be selected
         frac_natural_selected = 0.25,    # number of least fit genes to remove from the pool
         frac_elitism = 0.1,              # frac of population to preserve from being noised
+        frac_migrate = 0.1,              # frac of population, excluding elites, that migrate between islands randomly. will use a designated set migration pattern (since for some reason using random it seems to be worse for me)
         mutation_strength = 1.,          # factor to multiply to gaussian noise as mutation to latents
         should_run_genetic_algorithm: Module | None = None, # eq (3) in paper
         default_should_run_ga_gamma = 1.5
@@ -320,29 +320,17 @@ class LatentGenePool(Module):
         maybe_l2norm = l2norm if l2norm_latent else identity
-        latents = torch.randn(num_latents, num_latent_sets, dim_latent)
+        latents = torch.randn(num_latents, dim_latent)
         if l2norm_latent:
             latents = maybe_l2norm(latents, dim = -1)
         self.num_latents = num_latents
-        self.needs_latent_gate = num_latent_sets > 1
+        self.frozen_latents = frozen_latents
         self.latents = nn.Parameter(latents, requires_grad = not frozen_latents)
         self.maybe_l2norm = maybe_l2norm
-        # gene expression as a function of environment
-        self.num_latent_sets = num_latent_sets
-        if self.needs_latent_gate:
-            assert exists(dim_state), '`dim_state` must be passed in if using gated gene expression'
-        self.to_latent_gate = nn.Sequential(
-            Linear(dim_state, num_latent_sets),
-            nn.Softmax(dim = -1)
-        ) if self.needs_latent_gate else None
         # some derived values
         assert num_islands >= 1
@@ -361,12 +349,16 @@ class LatentGenePool(Module):
         self.num_natural_selected = int(frac_natural_selected * latents_per_island)
         self.num_tournament_participants = int(frac_tournaments * self.num_natural_selected)
         self.crossover_random  = crossover_random
         self.mutation_strength = mutation_strength
         self.num_elites = int(frac_elitism * latents_per_island)
         self.has_elites = self.num_elites > 0
+        latents_without_elites = num_latents - self.num_elites
+        self.num_migrate = int(frac_migrate * latents_without_elites)
         if not exists(should_run_genetic_algorithm):
             should_run_genetic_algorithm = ShouldRunGeneticAlgorithm(gamma = default_should_run_ga_gamma)
@@ -378,7 +370,6 @@ class LatentGenePool(Module):
         beta0 = 2.,           # exploitation factor, moving fireflies of low light intensity to high
         gamma = 1.,           # controls light intensity decay over distance - setting this to zero will make firefly equivalent to vanilla PSO
         alpha = 0.1,          # exploration factor
-        alpha_decay = 0.995,  # exploration decay each step
         inplace = True,
     ):
         islands = self.num_islands
@@ -424,8 +415,11 @@ class LatentGenePool(Module):
     def genetic_algorithm_step(
         self,
         fitness, # Float['p'],
-        inplace = True
+        inplace = True,
+        migrate = False # trigger a migration in the setting of multiple islands, the loop outside will need to have some `migrate_every` hyperparameter
     ):
+        device = self.latents.device
         """
         i - islands
         p - population
@@ -460,7 +454,7 @@ class LatentGenePool(Module):
             return genes
-        genes = rearrange(genes, '(i p) n g -> i p n g', i = islands)
+        genes = rearrange(genes, '(i p) ... -> i p ...', i = islands)
         orig_genes = genes
@@ -469,13 +463,13 @@ class LatentGenePool(Module):
         sorted_indices = fitness.sort(dim = -1).indices
         natural_selected_indices = sorted_indices[..., -self.num_natural_selected:]
-        natural_select_gene_indices = repeat(natural_selected_indices, '... -> ... n g', n = genes.shape[-2], g = genes.shape[-1])
+        natural_select_gene_indices = repeat(natural_selected_indices, '... -> ... g', g = genes.shape[-1])
         genes, fitness = genes.gather(1, natural_select_gene_indices), fitness.gather(1, natural_selected_indices)
         # 2. for finding pairs of parents to replete gene pool, we will go with the popular tournament strategy
-        rand_tournament_gene_ids = torch.randn((islands, pop_size_per_island - self.num_natural_selected, tournament_participants)).argsort(dim = -1)
+        rand_tournament_gene_ids = torch.randn((islands, pop_size_per_island - self.num_natural_selected, tournament_participants), device = device).argsort(dim = -1)
         rand_tournament_gene_ids_for_gather = rearrange(rand_tournament_gene_ids, 'i p t -> i (p t)')
         participant_fitness = fitness.gather(1, rand_tournament_gene_ids_for_gather)
@@ -484,7 +478,7 @@ class LatentGenePool(Module):
         parent_indices_at_tournament = participant_fitness.topk(2, dim = -1).indices
         parent_gene_ids = rand_tournament_gene_ids.gather(-1, parent_indices_at_tournament)
-        parent_gene_ids_for_gather = repeat(parent_gene_ids, 'i p parents -> i (p parents) n g', n = genes.shape[-2], g = genes.shape[-1])
+        parent_gene_ids_for_gather = repeat(parent_gene_ids, 'i p parents -> i (p parents) g', g = genes.shape[-1])
         parents = genes.gather(1, parent_gene_ids_for_gather)
         parents = rearrange(parents, 'i (p parents) ... -> i p parents ...', parents = 2)
@@ -507,6 +501,16 @@ class LatentGenePool(Module):
         genes = mutation(genes, mutation_strength = self.mutation_strength)
+        # 6. maybe migration
+        if migrate:
+            assert self.num_islands > 1
+            randperm = torch.randn(genes.shape[:-1], device = device).argsort(dim = -1)
+            migrate_mask = randperm < self.num_migrate
+            maybe_migrated_genes = torch.roll(genes, 1, dims = 0)
+            genes = einx.where('i p, i p g, i p g', migrate_mask, maybe_migrated_genes, genes)
         # add back the elites
         if self.has_elites:
@@ -555,22 +559,6 @@ class LatentGenePool(Module):
         latent = self.latents[latent_id]
-        if self.needs_latent_gate:
-            assert exists(state), 'state must be passed in if greater than number of 1 latent set'
-            if not fetching_multiple_latents:
-                latent = repeat(latent, '... -> b ...', b = state.shape[0])
-            assert latent.shape[0] == state.shape[0]
-            gates = self.to_latent_gate(state)
-            latent = einsum(latent, gates, 'b n g, b n -> b g')
-        elif fetching_multiple_latents:
-            latent = latent[:, 0]
-        else:
-            latent = latent[0]
         latent = self.maybe_l2norm(latent)
         if not exists(net):
@@ -612,7 +600,7 @@ class Agent(Module):
         self.actor_optim = optim_klass(actor.parameters(), lr = actor_lr, **actor_optim_kwargs)
         self.critic_optim = optim_klass(critic.parameters(), lr = critic_lr, **critic_optim_kwargs)
-        self.latent_optim = optim_klass(latent_gene_pool.parameters(), lr = latent_lr, **latent_optim_kwargs) if latent_gene_pool.needs_latent_gate else None
+        self.latent_optim = optim_klass(latent_gene_pool.parameters(), lr = latent_lr, **latent_optim_kwargs) if not latent_gene_pool.frozen_latents else None
     def get_actor_actions(
         self,
@@ -687,7 +675,6 @@ def create_agent(
     actor_num_actions,
     actor_dim_hiddens: int | tuple[int, ...],
     critic_dim_hiddens: int | tuple[int, ...],
-    num_latent_sets = 1
 ) -> Agent:
     actor = Actor(
@@ -707,7 +694,6 @@ def create_agent(
         dim_state = dim_state,
         num_latents = num_latents,
         dim_latent = dim_latent,
-        num_latent_sets = num_latent_sets
     )
     return Agent(actor = actor, critic = critic, latent_gene_pool = latent_gene_pool)

{evolutionary_policy_optimization-0.0.26.dist-info → evolutionary_policy_optimization-0.0.28.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.0.26
+Version: 0.0.28
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization

evolutionary_policy_optimization-0.0.28.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+evolutionary_policy_optimization/__init__.py,sha256=Qavcia0n13jjaWIS_LPW7QrxSLT_BBeKujCjF9kQjbA,133
+evolutionary_policy_optimization/epo.py,sha256=GckXFGdRoZT149cOlMqLUVe9oXr1QXP-gPZTv4H_HFU,21692
+evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
+evolutionary_policy_optimization-0.0.28.dist-info/METADATA,sha256=Fn846Lxaxo_OrXFD-_8IECOJ9fZL2JosriGUKMO0CfQ,4958
+evolutionary_policy_optimization-0.0.28.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+evolutionary_policy_optimization-0.0.28.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+evolutionary_policy_optimization-0.0.28.dist-info/RECORD,,

evolutionary_policy_optimization-0.0.26.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-evolutionary_policy_optimization/__init__.py,sha256=Qavcia0n13jjaWIS_LPW7QrxSLT_BBeKujCjF9kQjbA,133
-evolutionary_policy_optimization/epo.py,sha256=zYKRKUkvFdxgHkc2yduN76Hph3asWX33mnpDF3isDfo,22019
-evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
-evolutionary_policy_optimization-0.0.26.dist-info/METADATA,sha256=l24aFXZu4kp1oxZeIdFTUw1mwkyzln9C64S3HNqebF4,4958
-evolutionary_policy_optimization-0.0.26.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-evolutionary_policy_optimization-0.0.26.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-evolutionary_policy_optimization-0.0.26.dist-info/RECORD,,

{evolutionary_policy_optimization-0.0.26.dist-info → evolutionary_policy_optimization-0.0.28.dist-info}/WHEEL RENAMED Viewed

File without changes

{evolutionary_policy_optimization-0.0.26.dist-info → evolutionary_policy_optimization-0.0.28.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

evolutionary-policy-optimization 0.0.26__py3-none-any.whl → 0.0.28__py3-none-any.whl

evolutionary-policy-optimization 0.0.26py3-none-any.whl → 0.0.28py3-none-any.whl