PyPI - evolutionary-policy-optimization - Versions diffs - 0.0.5__py3-none-any.whl → 0.0.8__py3-none-any.whl - Mend

evolutionary-policy-optimization 0.0.5py3-none-any.whl → 0.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

evolutionary_policy_optimization/epo.py CHANGED Viewed

@@ -9,7 +9,7 @@ import torch.nn.functional as F
 import torch.nn.functional as F
 from torch.nn import Linear, Module, ModuleList
-from einops import rearrange, repeat
+from einops import rearrange, repeat, einsum
 from assoc_scan import AssocScan
@@ -162,6 +162,7 @@ class MLP(Module):
         self,
         dims: tuple[int, ...],
         dim_latent = 0,
+        num_latent_sets = 1
     ):
         super().__init__()
         assert len(dims) >= 2, 'must have at least two dimensions'
@@ -169,17 +170,26 @@ class MLP(Module):
         # add the latent to the first dim
         first_dim, *rest_dims = dims
-        first_dim += dim_latent
-        dims = (first_dim, *rest_dims)
+        dims = (first_dim + dim_latent, *rest_dims)
+        assert num_latent_sets >= 1
         self.dim_latent = dim_latent
+        self.num_latent_sets = num_latent_sets
         self.needs_latent = dim_latent > 0
+        self.needs_latent_gate = num_latent_sets > 1
         self.encode_latent = nn.Sequential(
             Linear(dim_latent, dim_latent),
             nn.SiLU()
         ) if self.needs_latent else None
+        self.to_latent_gate = nn.Sequential(
+            Linear(first_dim, num_latent_sets),
+            nn.Softmax(dim = -1)
+        ) if self.needs_latent_gate else None
         # pairs of dimension
         dim_pairs = tuple(zip(dims[:-1], dims[1:]))
@@ -195,16 +205,27 @@ class MLP(Module):
         x,
         latent = None
     ):
+        batch = x.shape[0]
         assert xnor(self.needs_latent, exists(latent))
+        if exists(latent) and self.needs_latent_gate:
+            # an improvisation where set of genes with controlled expression by environment
+            gates = self.to_latent_gate(x)
+            latent = einsum(latent, gates, 'n g, b n -> b g')
+        else:
+            assert latent.shape[0] == 1
+            latent = latent[0]
         if exists(latent):
             # start with naive concatenative conditioning
             # but will also offer some alternatives once a spark is seen (film, adaptive linear from stylegan, etc)
-            batch = x.shape[0]
             latent = self.encode_latent(latent)
-            latent = repeat(latent, 'd -> b d', b = batch)
+            if latent.ndim == 1:
+                latent = repeat(latent, 'd -> b d', b = batch)
             x = cat((x, latent), dim = -1)
@@ -314,6 +335,25 @@ class Agent(Module):
     ):
         raise NotImplementedError
+# criteria for running genetic algorithm
+class ShouldRunGeneticAlgorithm(Module):
+    def __init__(
+        self,
+        gamma = 2. # not sure what the value is
+    ):
+        super().__init__()
+        self.gamma = gamma
+    def forward(self, fitnesses):
+        # equation (3)
+        # max(fitness) - min(fitness) > gamma * median(fitness)
+        # however, this equation does not make much sense to me if fitness increases unbounded
+        # just let it be customizable, and offer a variant where mean and variance is over some threshold (could account for skew too)
+        return (fitnesses.amax() - fitnesses.amin()) > (self.gamma * torch.median(fitnesses))
 # classes
 class LatentGenePool(Module):
@@ -321,6 +361,7 @@ class LatentGenePool(Module):
         self,
         num_latents,                     # same as gene pool size
         dim_latent,                      # gene dimension
+        num_latent_sets = 1,             # allow for sets of latents / gene per individual, expression of a set controlled by the environment
         crossover_random = True,         # random interp from parent1 to parent2 for crossover, set to `False` for averaging (0.5 constant value)
         l2norm_latent = False,           # whether to enforce latents on hypersphere,
         frac_tournaments = 0.25,         # fraction of genes to participate in tournament - the lower the value, the more chance a less fit gene could be selected
@@ -328,17 +369,19 @@ class LatentGenePool(Module):
         frac_elitism = 0.1,              # frac of population to preserve from being noised
         mutation_strength = 1.,          # factor to multiply to gaussian noise as mutation to latents
         net: MLP | Module | dict | None = None,
+        should_run_genetic_algorithm: Module = ShouldRunGeneticAlgorithm() # eq (3) in paper
     ):
         super().__init__()
         maybe_l2norm = l2norm if l2norm_latent else identity
-        latents = torch.randn(num_latents, dim_latent)
+        latents = torch.randn(num_latents, num_latent_sets, dim_latent)
         if l2norm_latent:
             latents = maybe_l2norm(latents, dim = -1)
         self.num_latents = num_latents
+        self.num_latent_sets = num_latent_sets
         self.latents = nn.Parameter(latents, requires_grad = False)
         self.maybe_l2norm = maybe_l2norm
@@ -364,11 +407,21 @@ class LatentGenePool(Module):
         # network for the latent / gene
         if isinstance(net, dict):
+            assert 'dim_latent' not in net
+            assert 'num_latent_sets' not in net
+            net.update(dim_latent = dim_latent)
+            net.update(num_latent_sets = num_latent_sets)
             net = MLP(**net)
         assert net.dim_latent == dim_latent, f'the latent dimension set on the MLP {net.dim_latent} must be what was passed into the latent gene pool module ({dim_latent})'
+        assert net.num_latent_sets == num_latent_sets, 'number of latent sets must be equal between MLP and and latent gene pool container'
         self.net = net
+        self.should_run_genetic_algorithm = should_run_genetic_algorithm
     @torch.no_grad()
     # non-gradient optimization, at least, not on the individual level (taken care of by rl component)
     def genetic_algorithm_step(
@@ -379,7 +432,12 @@ class LatentGenePool(Module):
         """
         p - population
         g - gene dimension
+        n - number of genes per individual
         """
+        if not self.should_run_genetic_algorithm(fitness):
+            return
         assert self.num_latents > 1
         genes = self.latents # the latents are the genes
@@ -403,7 +461,7 @@ class LatentGenePool(Module):
         tournament_winner_indices = participant_fitness.topk(2, dim = -1).indices
-        tournament_winner_indices = repeat(tournament_winner_indices, '... -> ... g', g = self.dim_latent)
+        tournament_winner_indices = repeat(tournament_winner_indices, '... -> ... n g', g = self.dim_latent, n = self.num_latent_sets)
         parents = participants.gather(-2, tournament_winner_indices)

{evolutionary_policy_optimization-0.0.5.dist-info → evolutionary_policy_optimization-0.0.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.0.5
+Version: 0.0.8
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -46,6 +46,8 @@ Provides-Extra: examples-gym
 Requires-Dist: box2d-py; extra == 'examples-gym'
 Requires-Dist: gymnasium[box2d]>=1.0.0; extra == 'examples-gym'
 Requires-Dist: tqdm; extra == 'examples-gym'
+Provides-Extra: test
+Requires-Dist: pytest; extra == 'test'
 Description-Content-Type: text/markdown
 <img width="450px" alt="fig1" src="https://github.com/user-attachments/assets/33bef569-e786-4f09-bdee-56bad7ea9e6d" />
@@ -56,7 +58,9 @@ Pytorch implementation of [Evolutionary Policy Optimization](https://web3.arxiv.
 This paper stands out, as I have witnessed the positive effects first hand in an [exploratory project](https://github.com/lucidrains/firefly-torch) (mixing evolution with gradient based methods). Perhaps the Alexnet moment for genetic algorithms has not come to pass yet.
-Besides their latent variable method, I'll also throw in some attempts with crossover in weight space
+Besides their latent variable strategy, I'll also throw in some attempts with crossover in weight space
+Update: I see, mixing genetic algorithms with gradient based method is already a research field, under [Memetic algorithms](https://en.wikipedia.org/wiki/Memetic_algorithm). This is also incidentally what I have concluded what Science is. I am in direct exposure to this phenomenon on a daily basis
 ## Usage

evolutionary_policy_optimization-0.0.8.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+evolutionary_policy_optimization/__init__.py,sha256=Nu-_OMA8abe4AiW9Gw6MvbZH0JZpMHMqjeXmkC9-7UU,81
+evolutionary_policy_optimization/epo.py,sha256=LA81Yi6o3EFbJZHkxx1vyBFZWvNqpZ9mGhEauLZu9Ig,15692
+evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
+evolutionary_policy_optimization-0.0.8.dist-info/METADATA,sha256=42kS9DROtA90mUCJhim940ysydx3apEerwNbNs1wj_A,4460
+evolutionary_policy_optimization-0.0.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+evolutionary_policy_optimization-0.0.8.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+evolutionary_policy_optimization-0.0.8.dist-info/RECORD,,

evolutionary_policy_optimization-0.0.5.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-evolutionary_policy_optimization/__init__.py,sha256=Nu-_OMA8abe4AiW9Gw6MvbZH0JZpMHMqjeXmkC9-7UU,81
-evolutionary_policy_optimization/epo.py,sha256=lDhMV535MhUw1di7D7RM-Rr_J6aiuLqV-puh4EaNCd8,13455
-evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
-evolutionary_policy_optimization-0.0.5.dist-info/METADATA,sha256=uzkB4DrpzLLxbMEeiTID4CDxDxmEX1pO9fabwryDQcY,4098
-evolutionary_policy_optimization-0.0.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-evolutionary_policy_optimization-0.0.5.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-evolutionary_policy_optimization-0.0.5.dist-info/RECORD,,

{evolutionary_policy_optimization-0.0.5.dist-info → evolutionary_policy_optimization-0.0.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{evolutionary_policy_optimization-0.0.5.dist-info → evolutionary_policy_optimization-0.0.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

evolutionary-policy-optimization 0.0.5__py3-none-any.whl → 0.0.8__py3-none-any.whl

evolutionary-policy-optimization 0.0.5py3-none-any.whl → 0.0.8py3-none-any.whl