PyPI - evolutionary-policy-optimization - Versions diffs - 0.0.29__py3-none-any.whl → 0.0.32__py3-none-any.whl - Mend

evolutionary-policy-optimization 0.0.29py3-none-any.whl → 0.0.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

evolutionary_policy_optimization/epo.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+from pathlib import Path
 from collections import namedtuple
 import torch
@@ -244,6 +245,13 @@ class Critic(Module):
         dim_state,
         dim_hiddens: tuple[int, ...],
         dim_latent = 0,
+        use_regression = False,
+        hl_gauss_loss_kwargs: dict = dict(
+            min_value = -10.,
+            max_value = 10.,
+            num_bins = 25,
+            sigma = 0.5
+        )
     ):
         super().__init__()
@@ -259,23 +267,28 @@ class Critic(Module):
         self.mlp = MLP(dims = dim_hiddens, dim_latent = dim_latent)
-        self.to_out = nn.Sequential(
-            nn.SiLU(),
-            nn.Linear(dim_last, 1),
-            Rearrange('... 1 -> ...')
+        self.final_act = nn.SiLU()
+        self.to_pred = HLGaussLayer(
+            dim = dim_last,
+            use_regression = use_regression,
+            hl_gauss_loss = hl_gauss_loss_kwargs
         )
     def forward(
         self,
         state,
-        latent
+        latent,
+        target = None
     ):
         hidden = self.init_layer(state)
         hidden = self.mlp(hidden, latent)
-        return self.to_out(hidden)
+        hidden = self.final_act(hidden)
+        return self.to_pred(hidden, target = target)
 # criteria for running genetic algorithm
@@ -595,6 +608,7 @@ class Agent(Module):
         self.actor = actor
         self.critic = critic
+        self.num_latents = latent_gene_pool.num_latents
         self.latent_gene_pool = latent_gene_pool
         assert actor.dim_latent == critic.dim_latent == latent_gene_pool.dim_latent
@@ -606,6 +620,39 @@ class Agent(Module):
         self.latent_optim = optim_klass(latent_gene_pool.parameters(), lr = latent_lr, **latent_optim_kwargs) if not latent_gene_pool.frozen_latents else None
+    def save(self, path, overwrite = False):
+        path = Path(path)
+        assert not path.exists() or overwrite
+        pkg = dict(
+            actor = self.actor.state_dict(),
+            critic = self.critic.state_dict(),
+            latents = self.latent_gene_pool.state_dict(),
+            actor_optim = self.actor_optim.state_dict(),
+            critic_optim = self.critic_optim.state_dict(),
+            latent_optim = self.latent_optim.state_dict() if exists(self.latent_optim) else None
+        )
+        torch.save(pkg, str(path))
+    def load(self, path):
+        path = Path(path)
+        assert path.exists()
+        pkg = torch.load(str(path), weights_only = True)
+        self.actor.load_state_dict(pkg['actor'])
+        self.critic.load_state_dict(pkg['critic'])
+        self.latent_gene_pool.load_state_dict(pkg['latents'])
+        self.actor_optim.load_state_dict(pkg['actor_optim'])
+        self.critic_optim.load_state_dict(pkg['critic_optim'])
+        if exists(pkg.get('latent_optim', None)):
+            self.latent_optim.load_state_dict(pkg['latent_optim'])
     def get_actor_actions(
         self,
         state,
@@ -630,8 +677,10 @@ class Agent(Module):
     def forward(
         self,
-        memories: list[Memory]
+        memories_and_next_value: MemoriesAndNextValue
     ):
+        memories, next_value = memories_and_next_value
         raise NotImplementedError
 # reinforcement learning related - ppo
@@ -715,6 +764,11 @@ Memory = namedtuple('Memory', [
     'done'
 ])
+MemoriesAndNextValue = namedtuple('MemoriesAndNextValue', [
+    'memories',
+    'next_value'
+])
 class EPO(Module):
     def __init__(
@@ -727,6 +781,6 @@ class EPO(Module):
     def forward(
         self,
         env
-    ) -> list[Memory]:
+    ) -> MemoriesAndNextValue:
         raise NotImplementedError

evolutionary_policy_optimization/mock_env.py ADDED Viewed

@@ -0,0 +1,36 @@
+from __future__ import annotations
+import torch
+from torch import tensor, randn, randint
+from torch.nn import Module
+# mock env
+class Env(Module):
+    def __init__(
+        self,
+        state_shape: tuple[int, ...]
+    ):
+        super().__init__()
+        self.state_shape = state_shape
+        self.register_buffer('dummy', tensor(0))
+    @property
+    def device(self):
+        return self.dummy.device
+    def reset(
+        self
+    ):
+        state = randn(self.state_shape, device = self.device)
+        return state
+    def forward(
+        self,
+        actions,
+    ):
+        state = randn(self.state_shape, device = self.device)
+        reward = randint(0, 5, (), device = self.device).float()
+        done = zeros((), device = self.device, dtype = torch.bool)
+        return state, reward, done

{evolutionary_policy_optimization-0.0.29.dist-info → evolutionary_policy_optimization-0.0.32.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.0.29
+Version: 0.0.32
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization

evolutionary_policy_optimization-0.0.32.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+evolutionary_policy_optimization/__init__.py,sha256=Qavcia0n13jjaWIS_LPW7QrxSLT_BBeKujCjF9kQjbA,133
+evolutionary_policy_optimization/epo.py,sha256=MUcCJLE9cNZS84m5Dhl9qD2ygptvJSuDe6ElwardtgA,23525
+evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
+evolutionary_policy_optimization/mock_env.py,sha256=3xrd-gwjZeVd_sEvxIyX0lppnMWcfQGOapO-XjKmExI,816
+evolutionary_policy_optimization-0.0.32.dist-info/METADATA,sha256=NfF4ogDZA7ea4vLWHO_rl1ixapXuKIBeuy7tKzEFCTY,4958
+evolutionary_policy_optimization-0.0.32.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+evolutionary_policy_optimization-0.0.32.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+evolutionary_policy_optimization-0.0.32.dist-info/RECORD,,

evolutionary_policy_optimization-0.0.29.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-evolutionary_policy_optimization/__init__.py,sha256=Qavcia0n13jjaWIS_LPW7QrxSLT_BBeKujCjF9kQjbA,133
-evolutionary_policy_optimization/epo.py,sha256=4iuro11yTpRNzFfSoRZARnOiTDIJYndWmVaUAqk3--E,21826
-evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
-evolutionary_policy_optimization-0.0.29.dist-info/METADATA,sha256=C4gxOaspzHqA7TN5iQ8cDIFkg8llS8kg4y5Xg_ke2Qc,4958
-evolutionary_policy_optimization-0.0.29.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-evolutionary_policy_optimization-0.0.29.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-evolutionary_policy_optimization-0.0.29.dist-info/RECORD,,

{evolutionary_policy_optimization-0.0.29.dist-info → evolutionary_policy_optimization-0.0.32.dist-info}/WHEEL RENAMED Viewed

File without changes

{evolutionary_policy_optimization-0.0.29.dist-info → evolutionary_policy_optimization-0.0.32.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

evolutionary-policy-optimization 0.0.29__py3-none-any.whl → 0.0.32__py3-none-any.whl

evolutionary-policy-optimization 0.0.29py3-none-any.whl → 0.0.32py3-none-any.whl