PyPI - evolutionary-policy-optimization - Versions diffs - 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl - Mend

evolutionary-policy-optimization 0.0.9py3-none-any.whl → 0.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

evolutionary_policy_optimization/__init__.py CHANGED Viewed

@@ -1,4 +1,7 @@
 from evolutionary_policy_optimization.epo import (
     MLP,
+    Actor,
+    Critic,
+    Agent,
     LatentGenePool
 )

evolutionary_policy_optimization/epo.py CHANGED Viewed

@@ -10,6 +10,7 @@ import torch.nn.functional as F
 from torch.nn import Linear, Module, ModuleList
 from einops import rearrange, repeat, einsum
+from einops.layers.torch import Rearrange
 from assoc_scan import AssocScan
@@ -162,7 +163,6 @@ class MLP(Module):
         self,
         dims: tuple[int, ...],
         dim_latent = 0,
-        num_latent_sets = 1
     ):
         super().__init__()
         assert len(dims) >= 2, 'must have at least two dimensions'
@@ -172,24 +172,15 @@ class MLP(Module):
         first_dim, *rest_dims = dims
         dims = (first_dim + dim_latent, *rest_dims)
-        assert num_latent_sets >= 1
         self.dim_latent = dim_latent
-        self.num_latent_sets = num_latent_sets
         self.needs_latent = dim_latent > 0
-        self.needs_latent_gate = num_latent_sets > 1
         self.encode_latent = nn.Sequential(
             Linear(dim_latent, dim_latent),
             nn.SiLU()
         ) if self.needs_latent else None
-        self.to_latent_gate = nn.Sequential(
-            Linear(first_dim, num_latent_sets),
-            nn.Softmax(dim = -1)
-        ) if self.needs_latent_gate else None
         # pairs of dimension
         dim_pairs = tuple(zip(dims[:-1], dims[1:]))
@@ -209,15 +200,6 @@ class MLP(Module):
         assert xnor(self.needs_latent, exists(latent))
-        if exists(latent) and self.needs_latent_gate:
-            # an improvisation where set of genes with controlled expression by environment
-            gates = self.to_latent_gate(x)
-            latent = einsum(latent, gates, 'n g, b n -> b g')
-        else:
-            assert latent.shape[0] == 1
-            latent = latent[0]
         if exists(latent):
             # start with naive concatenative conditioning
             # but will also offer some alternatives once a spark is seen (film, adaptive linear from stylegan, etc)
@@ -247,7 +229,7 @@ class MLP(Module):
 class Actor(Module):
     def __init__(
         self,
-        dim_in,
+        dim_state,
         num_actions,
         dim_hiddens: tuple[int, ...],
         dim_latent = 0,
@@ -258,7 +240,7 @@ class Actor(Module):
         dim_first, *_, dim_last = dim_hiddens
         self.init_layer = nn.Sequential(
-            nn.Linear(dim_in, dim_first),
+            nn.Linear(dim_state, dim_first),
             nn.SiLU()
         )
@@ -277,14 +259,14 @@ class Actor(Module):
         hidden = self.init_layer(state)
-        hidden = self.mlp(state, latent)
+        hidden = self.mlp(hidden, latent)
         return self.to_out(hidden)
 class Critic(Module):
     def __init__(
         self,
-        dim_in,
+        dim_state,
         dim_hiddens: tuple[int, ...],
         dim_latent = 0,
     ):
@@ -294,7 +276,7 @@ class Critic(Module):
         dim_first, *_, dim_last = dim_hiddens
         self.init_layer = nn.Sequential(
-            nn.Linear(dim_in, dim_first),
+            nn.Linear(dim_state, dim_first),
             nn.SiLU()
         )
@@ -314,27 +296,10 @@ class Critic(Module):
         hidden = self.init_layer(state)
-        hidden = self.mlp(state, latent)
+        hidden = self.mlp(hidden, latent)
         return self.to_out(hidden)
-class Agent(Module):
-    def __init__(
-        self,
-        actor: Actor,
-        critic: Critic,
-    ):
-        super().__init__()
-        self.actor = actor
-        self.critic = critic
-    def forward(
-        self,
-        memories: list[Memory]
-    ):
-        raise NotImplementedError
 # criteria for running genetic algorithm
 class ShouldRunGeneticAlgorithm(Module):
@@ -362,13 +327,13 @@ class LatentGenePool(Module):
         num_latents,                     # same as gene pool size
         dim_latent,                      # gene dimension
         num_latent_sets = 1,             # allow for sets of latents / gene per individual, expression of a set controlled by the environment
+        dim_state = None,
         crossover_random = True,         # random interp from parent1 to parent2 for crossover, set to `False` for averaging (0.5 constant value)
         l2norm_latent = False,           # whether to enforce latents on hypersphere,
         frac_tournaments = 0.25,         # fraction of genes to participate in tournament - the lower the value, the more chance a less fit gene could be selected
         frac_natural_selected = 0.25,    # number of least fit genes to remove from the pool
         frac_elitism = 0.1,              # frac of population to preserve from being noised
         mutation_strength = 1.,          # factor to multiply to gaussian noise as mutation to latents
-        net: MLP | Module | dict | None = None,
         should_run_genetic_algorithm: Module | None = None, # eq (3) in paper
         default_should_run_ga_gamma = 1.5
     ):
@@ -382,11 +347,23 @@ class LatentGenePool(Module):
             latents = maybe_l2norm(latents, dim = -1)
         self.num_latents = num_latents
-        self.num_latent_sets = num_latent_sets
+        self.needs_latent_gate = num_latent_sets > 1
         self.latents = nn.Parameter(latents, requires_grad = False)
         self.maybe_l2norm = maybe_l2norm
+        # gene expression as a function of environment
+        self.num_latent_sets = num_latent_sets
+        if self.needs_latent_gate:
+            assert exists(dim_state), '`dim_state` must be passed in if using gated gene expression'
+        self.to_latent_gate = nn.Sequential(
+            Linear(dim_state, num_latent_sets),
+            nn.Softmax(dim = -1)
+        ) if self.needs_latent_gate else None
         # some derived values
         assert 0. < frac_tournaments < 1.
@@ -405,22 +382,6 @@ class LatentGenePool(Module):
         self.num_elites = int(frac_elitism * num_latents)
         self.has_elites = self.num_elites > 0
-        # network for the latent / gene
-        if isinstance(net, dict):
-            assert 'dim_latent' not in net
-            assert 'num_latent_sets' not in net
-            net.update(dim_latent = dim_latent)
-            net.update(num_latent_sets = num_latent_sets)
-            net = MLP(**net)
-        assert net.dim_latent == dim_latent, f'the latent dimension set on the MLP {net.dim_latent} must be what was passed into the latent gene pool module ({dim_latent})'
-        assert net.num_latent_sets == num_latent_sets, 'number of latent sets must be equal between MLP and and latent gene pool container'
-        self.net = net
         if not exists(should_run_genetic_algorithm):
             should_run_genetic_algorithm = ShouldRunGeneticAlgorithm(gamma = default_should_run_ga_gamma)
@@ -467,7 +428,7 @@ class LatentGenePool(Module):
         tournament_winner_indices = repeat(tournament_winner_indices, '... -> ... n g', g = self.dim_latent, n = self.num_latent_sets)
-        parents = participants.gather(-2, tournament_winner_indices)
+        parents = participants.gather(-3, tournament_winner_indices)
         # 3. do a crossover of the parents - in their case they went for a simple averaging, but since we are doing tournament style and the same pair of parents may be re-selected, lets make it random interpolation
@@ -504,12 +465,12 @@ class LatentGenePool(Module):
     def forward(
         self,
         *args,
+        state: Tensor | None = None,
         latent_id: int | None = None,
+        net: Module | None = None,
         **kwargs,
     ):
-        assert exists(self.net)
         # if only 1 latent, assume doing ablation and get lone gene
         if not exists(latent_id) and self.num_latents == 1:
@@ -521,12 +482,46 @@ class LatentGenePool(Module):
         latent = self.latents[latent_id]
-        return self.net(
+        if self.needs_latent_gate:
+            assert exists(state), 'state must be passed in if greater than number of 1 latent set'
+            gates = self.to_latent_gate(state)
+            latent = einsum(latent, gates, 'n g, b n -> b g')
+        else:
+            assert latent.shape[0] == 1
+            latent = latent[0]
+        if not exists(net):
+            return latent
+        return net(
             *args,
             latent = latent,
             **kwargs
         )
+# agent contains the actor, critic, and the latent genetic pool
+class Agent(Module):
+    def __init__(
+        self,
+        actor: Actor,
+        critic: Critic,
+        latent_gene_pool: LatentGenePool
+    ):
+        super().__init__()
+        self.actor = actor
+        self.critic = critic
+        self.latent_gene_pool = latent_gene_pool
+    def forward(
+        self,
+        memories: list[Memory]
+    ):
+        raise NotImplementedError
 # EPO - which is just PPO with natural selection of a population of latent variables conditioning the agent
 # the tricky part is that the latent ids for each episode / trajectory needs to be tracked

{evolutionary_policy_optimization-0.0.9.dist-info → evolutionary_policy_optimization-0.0.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.0.9
+Version: 0.0.11
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -60,37 +60,40 @@ This paper stands out, as I have witnessed the positive effects first hand in an
 Besides their latent variable strategy, I'll also throw in some attempts with crossover in weight space
-Update: I see, mixing genetic algorithms with gradient based method is already a research field, under [Memetic algorithms](https://en.wikipedia.org/wiki/Memetic_algorithm). This is also incidentally what I have concluded what Science is. I am in direct exposure to this phenomenon on a daily basis
+Update: I see, mixing genetic algorithms with gradient based method is already a research field, under [Memetic algorithms](https://en.wikipedia.org/wiki/Memetic_algorithm)
 ## Usage
 ```python
 import torch
 from evolutionary_policy_optimization import (
     LatentGenePool,
-    MLP
+    Actor,
+    Critic
 )
 latent_pool = LatentGenePool(
-    num_latents = 32,
+    num_latents = 128,
     dim_latent = 32,
-    net = MLP(
-        dims = (512, 256),
-        dim_latent = 32,
-    )
 )
 state = torch.randn(1, 512)
-action = latent_pool(state, latent_id = 3) # use latent / gene 4
+actor = Actor(512, dim_hiddens = (256, 128), num_actions = 4, dim_latent = 32)
+critic = Critic(512, dim_hiddens = (256, 128, 64), dim_latent = 32)
+latent = latent_pool(latent_id = 2)
+actions = actor(state, latent)
+value = critic(state, latent)
 # interact with environment and receive rewards, termination etc
 # derive a fitness score for each gene / latent
-fitness = torch.randn(32)
-latent_pool.genetic_algorithm_step(fitness) # update latents using one generation of genetic algorithm
+fitness = torch.randn(128)
 ```

evolutionary_policy_optimization-0.0.11.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+evolutionary_policy_optimization/__init__.py,sha256=A07bhbBI_p-GlSTkI15pioQ1XgtJ0V4tBN6v3vs2nuU,115
+evolutionary_policy_optimization/epo.py,sha256=JGow9ofx7IgFy7QNL0dL0K_SCL_bVkBUznMG8aSGM9Q,15591
+evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
+evolutionary_policy_optimization-0.0.11.dist-info/METADATA,sha256=fkouRBZU5nrPgHt0eT5izSHdOiYGAg67N5Gn3t039mQ,4357
+evolutionary_policy_optimization-0.0.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+evolutionary_policy_optimization-0.0.11.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+evolutionary_policy_optimization-0.0.11.dist-info/RECORD,,

evolutionary_policy_optimization-0.0.9.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-evolutionary_policy_optimization/__init__.py,sha256=Nu-_OMA8abe4AiW9Gw6MvbZH0JZpMHMqjeXmkC9-7UU,81
-evolutionary_policy_optimization/epo.py,sha256=eiOJg0J14miB3ZWpcTD1dMC7M6abxtVaMD_Oxza0cYI,15880
-evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
-evolutionary_policy_optimization-0.0.9.dist-info/METADATA,sha256=MT4_JXsUQCrcBWB-0m9uJZHYtGnSFMbQzclZ32HZKnQ,4460
-evolutionary_policy_optimization-0.0.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-evolutionary_policy_optimization-0.0.9.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-evolutionary_policy_optimization-0.0.9.dist-info/RECORD,,

{evolutionary_policy_optimization-0.0.9.dist-info → evolutionary_policy_optimization-0.0.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{evolutionary_policy_optimization-0.0.9.dist-info → evolutionary_policy_optimization-0.0.11.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

evolutionary-policy-optimization 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl

evolutionary-policy-optimization 0.0.9py3-none-any.whl → 0.0.11py3-none-any.whl