PyPI - evolutionary-policy-optimization - Versions diffs - 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

evolutionary-policy-optimization 0.0.4py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

evolutionary_policy_optimization/epo.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from __future__ import annotations
+from collections import namedtuple
 import torch
 from torch import nn, cat
 import torch.nn.functional as F
@@ -7,7 +9,7 @@ import torch.nn.functional as F
 import torch.nn.functional as F
 from torch.nn import Linear, Module, ModuleList
-from einops import rearrange, repeat
+from einops import rearrange, repeat, einsum
 from assoc_scan import AssocScan
@@ -85,9 +87,9 @@ def critic_loss(
 # generalized advantage estimate
 def calc_generalized_advantage_estimate(
-    rewards: Float['g n'],
-    values: Float['g n+1'],
-    masks: Bool['n'],
+    rewards, # Float[g n]
+    values,  # Float[g n+1]
+    masks,   # Bool[n]
     gamma = 0.99,
     lam = 0.95,
     use_accelerated = None
@@ -160,6 +162,7 @@ class MLP(Module):
         self,
         dims: tuple[int, ...],
         dim_latent = 0,
+        num_latent_sets = 1
     ):
         super().__init__()
         assert len(dims) >= 2, 'must have at least two dimensions'
@@ -167,17 +170,26 @@ class MLP(Module):
         # add the latent to the first dim
         first_dim, *rest_dims = dims
-        first_dim += dim_latent
-        dims = (first_dim, *rest_dims)
+        dims = (first_dim + dim_latent, *rest_dims)
+        assert num_latent_sets >= 1
         self.dim_latent = dim_latent
+        self.num_latent_sets = num_latent_sets
         self.needs_latent = dim_latent > 0
+        self.needs_latent_gate = num_latent_sets > 1
         self.encode_latent = nn.Sequential(
             Linear(dim_latent, dim_latent),
             nn.SiLU()
         ) if self.needs_latent else None
+        self.to_latent_gate = nn.Sequential(
+            Linear(first_dim, num_latent_sets),
+            nn.Softmax(dim = -1)
+        ) if self.needs_latent_gate else None
         # pairs of dimension
         dim_pairs = tuple(zip(dims[:-1], dims[1:]))
@@ -193,16 +205,27 @@ class MLP(Module):
         x,
         latent = None
     ):
+        batch = x.shape[0]
         assert xnor(self.needs_latent, exists(latent))
+        if exists(latent) and self.needs_latent_gate:
+            # an improvisation where set of genes with controlled expression by environment
+            gates = self.to_latent_gate(x)
+            latent = einsum(latent, gates, 'n g, b n -> b g')
+        else:
+            assert latent.shape[0] == 1
+            latent = latent[0]
         if exists(latent):
             # start with naive concatenative conditioning
             # but will also offer some alternatives once a spark is seen (film, adaptive linear from stylegan, etc)
-            batch = x.shape[0]
             latent = self.encode_latent(latent)
-            latent = repeat(latent, 'd -> b d', b = batch)
+            if latent.ndim == 1:
+                latent = repeat(latent, 'd -> b d', b = batch)
             x = cat((x, latent), dim = -1)
@@ -218,6 +241,100 @@ class MLP(Module):
         return x
+# actor, critic, and agent (actor + critic)
+# eventually, should just create a separate repo and aggregate all the MLP related architectures
+class Actor(Module):
+    def __init__(
+        self,
+        dim_in,
+        num_actions,
+        dim_hiddens: tuple[int, ...],
+        dim_latent = 0,
+    ):
+        super().__init__()
+        assert len(dim_hiddens) >= 2
+        dim_first, *_, dim_last = dim_hiddens
+        self.init_layer = nn.Sequential(
+            nn.Linear(dim_in, dim_first),
+            nn.SiLU()
+        )
+        self.mlp = MLP(dims = dim_hiddens, dim_latent = dim_latent)
+        self.to_out = nn.Sequential(
+            nn.SiLU(),
+            nn.Linear(dim_last, num_actions),
+        )
+    def forward(
+        self,
+        state,
+        latent
+    ):
+        hidden = self.init_layer(state)
+        hidden = self.mlp(state, latent)
+        return self.to_out(hidden)
+class Critic(Module):
+    def __init__(
+        self,
+        dim_in,
+        dim_hiddens: tuple[int, ...],
+        dim_latent = 0,
+    ):
+        super().__init__()
+        assert len(dim_hiddens) >= 2
+        dim_first, *_, dim_last = dim_hiddens
+        self.init_layer = nn.Sequential(
+            nn.Linear(dim_in, dim_first),
+            nn.SiLU()
+        )
+        self.mlp = MLP(dims = dim_hiddens, dim_latent = dim_latent)
+        self.to_out = nn.Sequential(
+            nn.SiLU(),
+            nn.Linear(dim_last, 1),
+            Rearrange('... 1 -> ...')
+        )
+    def forward(
+        self,
+        state,
+        latent
+    ):
+        hidden = self.init_layer(state)
+        hidden = self.mlp(state, latent)
+        return self.to_out(hidden)
+class Agent(Module):
+    def __init__(
+        self,
+        actor: Actor,
+        critic: Critic,
+    ):
+        super().__init__()
+        self.actor = actor
+        self.critic = critic
+    def forward(
+        self,
+        memories: list[Memory]
+    ):
+        raise NotImplementedError
 # classes
 class LatentGenePool(Module):
@@ -225,6 +342,7 @@ class LatentGenePool(Module):
         self,
         num_latents,                     # same as gene pool size
         dim_latent,                      # gene dimension
+        num_latent_sets = 1,             # allow for sets of latents / gene per individual, expression of a set controlled by the environment
         crossover_random = True,         # random interp from parent1 to parent2 for crossover, set to `False` for averaging (0.5 constant value)
         l2norm_latent = False,           # whether to enforce latents on hypersphere,
         frac_tournaments = 0.25,         # fraction of genes to participate in tournament - the lower the value, the more chance a less fit gene could be selected
@@ -237,12 +355,13 @@ class LatentGenePool(Module):
         maybe_l2norm = l2norm if l2norm_latent else identity
-        latents = torch.randn(num_latents, dim_latent)
+        latents = torch.randn(num_latents, num_latent_sets, dim_latent)
         if l2norm_latent:
             latents = maybe_l2norm(latents, dim = -1)
         self.num_latents = num_latents
+        self.num_latent_sets = num_latent_sets
         self.latents = nn.Parameter(latents, requires_grad = False)
         self.maybe_l2norm = maybe_l2norm
@@ -268,9 +387,17 @@ class LatentGenePool(Module):
         # network for the latent / gene
         if isinstance(net, dict):
+            assert 'dim_latent' not in net
+            assert 'num_latent_sets' not in net
+            net.update(dim_latent = dim_latent)
+            net.update(num_latent_sets = num_latent_sets)
             net = MLP(**net)
         assert net.dim_latent == dim_latent, f'the latent dimension set on the MLP {net.dim_latent} must be what was passed into the latent gene pool module ({dim_latent})'
+        assert net.num_latent_sets == num_latent_sets, 'number of latent sets must be equal between MLP and and latent gene pool container'
         self.net = net
     @torch.no_grad()
@@ -283,6 +410,7 @@ class LatentGenePool(Module):
         """
         p - population
         g - gene dimension
+        n - number of genes per individual
         """
         assert self.num_latents > 1
@@ -307,7 +435,7 @@ class LatentGenePool(Module):
         tournament_winner_indices = participant_fitness.topk(2, dim = -1).indices
-        tournament_winner_indices = repeat(tournament_winner_indices, '... -> ... g', g = self.dim_latent)
+        tournament_winner_indices = repeat(tournament_winner_indices, '... -> ... n g', g = self.dim_latent, n = self.num_latent_sets)
         parents = participants.gather(-2, tournament_winner_indices)
@@ -368,3 +496,35 @@ class LatentGenePool(Module):
             latent = latent,
             **kwargs
         )
+# EPO - which is just PPO with natural selection of a population of latent variables conditioning the agent
+# the tricky part is that the latent ids for each episode / trajectory needs to be tracked
+Memory = namedtuple('Memory', [
+    'state',
+    'latent_gene_id',
+    'action',
+    'log_prob',
+    'reward',
+    'values',
+    'done'
+])
+class EPO(Module):
+    def __init__(
+        self,
+        agent: Agent,
+        latent_gene_pool: LatentGenePool
+    ):
+        super().__init__()
+        self.agent = agent
+        self.latent_gene_pool = latent_gene_pool
+    def forward(
+        self,
+        env
+    ) -> list[Memory]:
+        raise NotImplementedError

{evolutionary_policy_optimization-0.0.4.dist-info → evolutionary_policy_optimization-0.0.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.0.4
+Version: 0.0.6
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -46,6 +46,8 @@ Provides-Extra: examples-gym
 Requires-Dist: box2d-py; extra == 'examples-gym'
 Requires-Dist: gymnasium[box2d]>=1.0.0; extra == 'examples-gym'
 Requires-Dist: tqdm; extra == 'examples-gym'
+Provides-Extra: test
+Requires-Dist: pytest; extra == 'test'
 Description-Content-Type: text/markdown
 <img width="450px" alt="fig1" src="https://github.com/user-attachments/assets/33bef569-e786-4f09-bdee-56bad7ea9e6d" />
@@ -56,7 +58,9 @@ Pytorch implementation of [Evolutionary Policy Optimization](https://web3.arxiv.
 This paper stands out, as I have witnessed the positive effects first hand in an [exploratory project](https://github.com/lucidrains/firefly-torch) (mixing evolution with gradient based methods). Perhaps the Alexnet moment for genetic algorithms has not come to pass yet.
-Besides their latent variable method, I'll also throw in some attempts with crossover in weight space
+Besides their latent variable strategy, I'll also throw in some attempts with crossover in weight space
+Update: I see, mixing genetic algorithms with gradient based method is already a research field, under [Memetic algorithms](https://en.wikipedia.org/wiki/Memetic_algorithm). This is also incidentally what I have concluded what Science is. I am in direct exposure to this phenomenon on a daily basis
 ## Usage

evolutionary_policy_optimization-0.0.6.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+evolutionary_policy_optimization/__init__.py,sha256=Nu-_OMA8abe4AiW9Gw6MvbZH0JZpMHMqjeXmkC9-7UU,81
+evolutionary_policy_optimization/epo.py,sha256=vXkwsQE0CNEUPpguZP-XXsuDyIBN-bS3xDJDXpYlTHM,14772
+evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
+evolutionary_policy_optimization-0.0.6.dist-info/METADATA,sha256=M_0SbTqdifHQ_R9LWIe7ZfHMXgCiFDJ0sDpD29ctiNk,4460
+evolutionary_policy_optimization-0.0.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+evolutionary_policy_optimization-0.0.6.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+evolutionary_policy_optimization-0.0.6.dist-info/RECORD,,

evolutionary_policy_optimization-0.0.4.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-evolutionary_policy_optimization/__init__.py,sha256=Nu-_OMA8abe4AiW9Gw6MvbZH0JZpMHMqjeXmkC9-7UU,81
-evolutionary_policy_optimization/epo.py,sha256=jW6wZ_IbTdO05agc9AghDHawLb0rStfOzHKpSh-vEe0,10783
-evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
-evolutionary_policy_optimization-0.0.4.dist-info/METADATA,sha256=ZmVUGRQkqOYs1fAyPXjyvIeyc_mShKVTfRVZsIE_Z1Q,4098
-evolutionary_policy_optimization-0.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-evolutionary_policy_optimization-0.0.4.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-evolutionary_policy_optimization-0.0.4.dist-info/RECORD,,

{evolutionary_policy_optimization-0.0.4.dist-info → evolutionary_policy_optimization-0.0.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{evolutionary_policy_optimization-0.0.4.dist-info → evolutionary_policy_optimization-0.0.6.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

evolutionary-policy-optimization 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

evolutionary-policy-optimization 0.0.4py3-none-any.whl → 0.0.6py3-none-any.whl