PyPI - evolutionary-policy-optimization - Versions diffs - 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl - Mend

evolutionary-policy-optimization 0.0.9py3-none-any.whl → 0.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

evolutionary_policy_optimization/__init__.py CHANGED Viewed

@@ -1,4 +1,7 @@
 from evolutionary_policy_optimization.epo import (
     MLP,
+    Actor,
+    Critic,
+    Agent,
     LatentGenePool
 )

evolutionary_policy_optimization/epo.py CHANGED Viewed

@@ -10,6 +10,7 @@ import torch.nn.functional as F
 from torch.nn import Linear, Module, ModuleList
 from einops import rearrange, repeat, einsum
+from einops.layers.torch import Rearrange
 from assoc_scan import AssocScan
@@ -277,7 +278,7 @@ class Actor(Module):
         hidden = self.init_layer(state)
-        hidden = self.mlp(state, latent)
+        hidden = self.mlp(hidden, latent)
         return self.to_out(hidden)
@@ -314,27 +315,10 @@ class Critic(Module):
         hidden = self.init_layer(state)
-        hidden = self.mlp(state, latent)
+        hidden = self.mlp(hidden, latent)
         return self.to_out(hidden)
-class Agent(Module):
-    def __init__(
-        self,
-        actor: Actor,
-        critic: Critic,
-    ):
-        super().__init__()
-        self.actor = actor
-        self.critic = critic
-    def forward(
-        self,
-        memories: list[Memory]
-    ):
-        raise NotImplementedError
 # criteria for running genetic algorithm
 class ShouldRunGeneticAlgorithm(Module):
@@ -368,7 +352,6 @@ class LatentGenePool(Module):
         frac_natural_selected = 0.25,    # number of least fit genes to remove from the pool
         frac_elitism = 0.1,              # frac of population to preserve from being noised
         mutation_strength = 1.,          # factor to multiply to gaussian noise as mutation to latents
-        net: MLP | Module | dict | None = None,
         should_run_genetic_algorithm: Module | None = None, # eq (3) in paper
         default_should_run_ga_gamma = 1.5
     ):
@@ -405,22 +388,6 @@ class LatentGenePool(Module):
         self.num_elites = int(frac_elitism * num_latents)
         self.has_elites = self.num_elites > 0
-        # network for the latent / gene
-        if isinstance(net, dict):
-            assert 'dim_latent' not in net
-            assert 'num_latent_sets' not in net
-            net.update(dim_latent = dim_latent)
-            net.update(num_latent_sets = num_latent_sets)
-            net = MLP(**net)
-        assert net.dim_latent == dim_latent, f'the latent dimension set on the MLP {net.dim_latent} must be what was passed into the latent gene pool module ({dim_latent})'
-        assert net.num_latent_sets == num_latent_sets, 'number of latent sets must be equal between MLP and and latent gene pool container'
-        self.net = net
         if not exists(should_run_genetic_algorithm):
             should_run_genetic_algorithm = ShouldRunGeneticAlgorithm(gamma = default_should_run_ga_gamma)
@@ -467,7 +434,7 @@ class LatentGenePool(Module):
         tournament_winner_indices = repeat(tournament_winner_indices, '... -> ... n g', g = self.dim_latent, n = self.num_latent_sets)
-        parents = participants.gather(-2, tournament_winner_indices)
+        parents = participants.gather(-3, tournament_winner_indices)
         # 3. do a crossover of the parents - in their case they went for a simple averaging, but since we are doing tournament style and the same pair of parents may be re-selected, lets make it random interpolation
@@ -505,11 +472,10 @@ class LatentGenePool(Module):
         self,
         *args,
         latent_id: int | None = None,
+        net: Module | None = None,
         **kwargs,
     ):
-        assert exists(self.net)
         # if only 1 latent, assume doing ablation and get lone gene
         if not exists(latent_id) and self.num_latents == 1:
@@ -521,12 +487,37 @@ class LatentGenePool(Module):
         latent = self.latents[latent_id]
-        return self.net(
+        if not exists(net):
+            return latent
+        return net(
             *args,
             latent = latent,
             **kwargs
         )
+# agent contains the actor, critic, and the latent genetic pool
+class Agent(Module):
+    def __init__(
+        self,
+        actor: Actor,
+        critic: Critic,
+        latent_gene_pool: LatentGenePool
+    ):
+        super().__init__()
+        self.actor = actor
+        self.critic = critic
+        self.latent_gene_pool = latent_gene_pool
+    def forward(
+        self,
+        memories: list[Memory]
+    ):
+        raise NotImplementedError
 # EPO - which is just PPO with natural selection of a population of latent variables conditioning the agent
 # the tricky part is that the latent ids for each episode / trajectory needs to be tracked

{evolutionary_policy_optimization-0.0.9.dist-info → evolutionary_policy_optimization-0.0.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.0.9
+Version: 0.0.10
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -60,37 +60,40 @@ This paper stands out, as I have witnessed the positive effects first hand in an
 Besides their latent variable strategy, I'll also throw in some attempts with crossover in weight space
-Update: I see, mixing genetic algorithms with gradient based method is already a research field, under [Memetic algorithms](https://en.wikipedia.org/wiki/Memetic_algorithm). This is also incidentally what I have concluded what Science is. I am in direct exposure to this phenomenon on a daily basis
+Update: I see, mixing genetic algorithms with gradient based method is already a research field, under [Memetic algorithms](https://en.wikipedia.org/wiki/Memetic_algorithm)
 ## Usage
 ```python
 import torch
 from evolutionary_policy_optimization import (
     LatentGenePool,
-    MLP
+    Actor,
+    Critic
 )
 latent_pool = LatentGenePool(
-    num_latents = 32,
+    num_latents = 128,
     dim_latent = 32,
-    net = MLP(
-        dims = (512, 256),
-        dim_latent = 32,
-    )
 )
 state = torch.randn(1, 512)
-action = latent_pool(state, latent_id = 3) # use latent / gene 4
+actor = Actor(512, dim_hiddens = (256, 128), num_actions = 4, dim_latent = 32)
+critic = Critic(512, dim_hiddens = (256, 128, 64), dim_latent = 32)
+latent = latent_pool(latent_id = 2)
+actions = actor(state, latent)
+value = critic(state, latent)
 # interact with environment and receive rewards, termination etc
 # derive a fitness score for each gene / latent
-fitness = torch.randn(32)
-latent_pool.genetic_algorithm_step(fitness) # update latents using one generation of genetic algorithm
+fitness = torch.randn(128)
 ```

evolutionary_policy_optimization-0.0.10.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+evolutionary_policy_optimization/__init__.py,sha256=A07bhbBI_p-GlSTkI15pioQ1XgtJ0V4tBN6v3vs2nuU,115
+evolutionary_policy_optimization/epo.py,sha256=66GOQq8_s5kmQI7G-2Z0J_0g4E5QarjQPJfWEP7mmKg,15442
+evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
+evolutionary_policy_optimization-0.0.10.dist-info/METADATA,sha256=bD3fw2Zw1IxhfkCvzjsRhODyL_XIC5ZsvNQqFbZXNc4,4357
+evolutionary_policy_optimization-0.0.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+evolutionary_policy_optimization-0.0.10.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+evolutionary_policy_optimization-0.0.10.dist-info/RECORD,,

evolutionary_policy_optimization-0.0.9.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-evolutionary_policy_optimization/__init__.py,sha256=Nu-_OMA8abe4AiW9Gw6MvbZH0JZpMHMqjeXmkC9-7UU,81
-evolutionary_policy_optimization/epo.py,sha256=eiOJg0J14miB3ZWpcTD1dMC7M6abxtVaMD_Oxza0cYI,15880
-evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
-evolutionary_policy_optimization-0.0.9.dist-info/METADATA,sha256=MT4_JXsUQCrcBWB-0m9uJZHYtGnSFMbQzclZ32HZKnQ,4460
-evolutionary_policy_optimization-0.0.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-evolutionary_policy_optimization-0.0.9.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-evolutionary_policy_optimization-0.0.9.dist-info/RECORD,,

{evolutionary_policy_optimization-0.0.9.dist-info → evolutionary_policy_optimization-0.0.10.dist-info}/WHEEL RENAMED Viewed

File without changes

{evolutionary_policy_optimization-0.0.9.dist-info → evolutionary_policy_optimization-0.0.10.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

evolutionary-policy-optimization 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl

evolutionary-policy-optimization 0.0.9py3-none-any.whl → 0.0.10py3-none-any.whl