PyPI - evolutionary-policy-optimization - Versions diffs - 0.0.28__tar.gz → 0.0.31__tar.gz - Mend

evolutionary-policy-optimization 0.0.28tar.gz → 0.0.31tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{evolutionary_policy_optimization-0.0.28 → evolutionary_policy_optimization-0.0.31}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.0.28
+Version: 0.0.31
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization

{evolutionary_policy_optimization-0.0.28 → evolutionary_policy_optimization-0.0.31}/evolutionary_policy_optimization/epo.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+from pathlib import Path
 from collections import namedtuple
 import torch
@@ -244,6 +245,13 @@ class Critic(Module):
         dim_state,
         dim_hiddens: tuple[int, ...],
         dim_latent = 0,
+        use_regression = False,
+        hl_gauss_loss_kwargs: dict = dict(
+            min_value = -10.,
+            max_value = 10.,
+            num_bins = 25,
+            sigma = 0.5
+        )
     ):
         super().__init__()
@@ -259,23 +267,28 @@ class Critic(Module):
         self.mlp = MLP(dims = dim_hiddens, dim_latent = dim_latent)
-        self.to_out = nn.Sequential(
-            nn.SiLU(),
-            nn.Linear(dim_last, 1),
-            Rearrange('... 1 -> ...')
+        self.final_act = nn.SiLU()
+        self.to_pred = HLGaussLayer(
+            dim = dim_last,
+            use_regression = False,
+            hl_gauss_loss = hl_gauss_loss_kwargs
         )
     def forward(
         self,
         state,
-        latent
+        latent,
+        target = None
     ):
         hidden = self.init_layer(state)
         hidden = self.mlp(hidden, latent)
-        return self.to_out(hidden)
+        hidden = self.final_act(hidden)
+        return self.to_pred(hidden, target = target)
 # criteria for running genetic algorithm
@@ -508,8 +521,12 @@ class LatentGenePool(Module):
             randperm = torch.randn(genes.shape[:-1], device = device).argsort(dim = -1)
             migrate_mask = randperm < self.num_migrate
-            maybe_migrated_genes = torch.roll(genes, 1, dims = 0)
-            genes = einx.where('i p, i p g, i p g', migrate_mask, maybe_migrated_genes, genes)
+            nonmigrants = rearrange(genes[~migrate_mask], '(i p) g -> i p g', i = islands)
+            migrants = rearrange(genes[migrate_mask], '(i p) g -> i p g', i = islands)
+            migrants = torch.roll(migrants, 1, dims = 0)
+            genes = cat((nonmigrants, migrants), dim = 1)
         # add back the elites
@@ -591,6 +608,7 @@ class Agent(Module):
         self.actor = actor
         self.critic = critic
+        self.num_latents = latent_gene_pool.num_latents
         self.latent_gene_pool = latent_gene_pool
         assert actor.dim_latent == critic.dim_latent == latent_gene_pool.dim_latent
@@ -602,6 +620,39 @@ class Agent(Module):
         self.latent_optim = optim_klass(latent_gene_pool.parameters(), lr = latent_lr, **latent_optim_kwargs) if not latent_gene_pool.frozen_latents else None
+    def save(self, path, overwrite = False):
+        path = Path(path)
+        assert not path.exists() or overwrite
+        pkg = dict(
+            actor = self.actor.state_dict(),
+            critic = self.critic.state_dict(),
+            latents = self.latent_gene_pool.state_dict(),
+            actor_optim = self.actor_optim.state_dict(),
+            critic_optim = self.critic_optim.state_dict(),
+            latent_optim = self.latent_optim.state_dict() if exists(self.latent_optim) else None
+        )
+        torch.save(pkg, str(path))
+    def load(self, path):
+        path = Path(path)
+        assert path.exists()
+        pkg = torch.load(str(path), weights_only = True)
+        self.actor.load_state_dict(pkg['actor'])
+        self.critic.load_state_dict(pkg['critic'])
+        self.latent_gene_pool.load_state_dict(pkg['latents'])
+        self.actor_optim.load_state_dict(pkg['actor_optim'])
+        self.critic_optim.load_state_dict(pkg['critic_optim'])
+        if exists(pkg.get('latent_optim', None)):
+            self.latent_optim.load_state_dict(pkg['latent_optim'])
     def get_actor_actions(
         self,
         state,
@@ -626,8 +677,10 @@ class Agent(Module):
     def forward(
         self,
-        memories: list[Memory]
+        memories_and_next_value: MemoriesAndNextValue
     ):
+        memories, next_value = memories_and_next_value
         raise NotImplementedError
 # reinforcement learning related - ppo
@@ -711,6 +764,11 @@ Memory = namedtuple('Memory', [
     'done'
 ])
+MemoriesAndNextValue = namedtuple('MemoriesAndNextValue', [
+    'memories',
+    'next_value'
+])
 class EPO(Module):
     def __init__(
@@ -723,6 +781,6 @@ class EPO(Module):
     def forward(
         self,
         env
-    ) -> list[Memory]:
+    ) -> MemoriesAndNextValue:
         raise NotImplementedError

evolutionary_policy_optimization-0.0.31/evolutionary_policy_optimization/mock_env.py ADDED Viewed

@@ -0,0 +1,36 @@
+from __future__ import annotations
+import torch
+from torch import tensor, randn, randint
+from torch.nn import Module
+# mock env
+class Env(Module):
+    def __init__(
+        self,
+        state_shape: tuple[int, ...]
+    ):
+        super().__init__()
+        self.state_shape = state_shape
+        self.register_buffer('dummy', tensor(0))
+    @property
+    def device(self):
+        return self.dummy.device
+    def reset(
+        self
+    ):
+        state = randn(self.state_shape, device = self.device)
+        return state
+    def forward(
+        self,
+        actions,
+    ):
+        state = randn(self.state_shape, device = self.device)
+        reward = randint(0, 5, (), device = self.device).float()
+        done = zeros((), device = self.device, dtype = torch.bool)
+        return state, reward, done

{evolutionary_policy_optimization-0.0.28 → evolutionary_policy_optimization-0.0.31}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "evolutionary-policy-optimization"
-version = "0.0.28"
+version = "0.0.31"
 description = "EPO - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{evolutionary_policy_optimization-0.0.28 → evolutionary_policy_optimization-0.0.31}/tests/test_epo.py RENAMED Viewed

@@ -68,3 +68,8 @@ def test_create_agent(
     fitness = torch.randn(128)
     agent.update_latent_gene_pool_(fitness) # update once
+    # saving and loading
+    agent.save('./agent.pt', overwrite = True)
+    agent.load('./agent.pt')