PyPI - evolutionary-policy-optimization - Versions diffs - 0.1.10__tar.gz → 0.1.14__tar.gz - Mend

evolutionary-policy-optimization 0.1.10tar.gz → 0.1.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{evolutionary_policy_optimization-0.1.10 → evolutionary_policy_optimization-0.1.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.1.10
+Version: 0.1.14
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -215,4 +215,15 @@ agent.load('./agent.pt')
 }
 ```
+```bibtex
+@article{Ash2019OnTD,
+    title   = {On the Difficulty of Warm-Starting Neural Network Training},
+    author  = {Jordan T. Ash and Ryan P. Adams},
+    journal = {ArXiv},
+    year    = {2019},
+    volume  = {abs/1910.08475},
+    url     = {https://api.semanticscholar.org/CorpusID:204788802}
+}
+```
 *Evolution is cleverer than you are.* - Leslie Orgel

{evolutionary_policy_optimization-0.1.10 → evolutionary_policy_optimization-0.1.14}/README.md RENAMED Viewed

@@ -162,4 +162,15 @@ agent.load('./agent.pt')
 }
 ```
+```bibtex
+@article{Ash2019OnTD,
+    title   = {On the Difficulty of Warm-Starting Neural Network Training},
+    author  = {Jordan T. Ash and Ryan P. Adams},
+    journal = {ArXiv},
+    year    = {2019},
+    volume  = {abs/1910.08475},
+    url     = {https://api.semanticscholar.org/CorpusID:204788802}
+}
+```
 *Evolution is cleverer than you are.* - Leslie Orgel

{evolutionary_policy_optimization-0.1.10 → evolutionary_policy_optimization-0.1.14}/evolutionary_policy_optimization/epo.py RENAMED Viewed

@@ -76,6 +76,14 @@ def maybe(fn):
 def interface_torch_numpy(fn, device):
     # for a given function, move all inputs from torch tensor to numpy, and all outputs from numpy to torch tensor
+    def to_torch_tensor(t):
+        if isinstance(t, (np.ndarray, np.float64)):
+            t = from_numpy(np.array(t))
+        elif isinstance(t, (float, int, bool)):
+            t = tensor(t)
+        return t.to(device)
     @wraps(fn)
     def decorated_fn(*args, **kwargs):
@@ -83,7 +91,7 @@ def interface_torch_numpy(fn, device):
         out = fn(*args, **kwargs)
-        out = tree_map(lambda t: from_numpy(np.array(t)).to(device) if isinstance(t, (np.ndarray, np.float64)) else t, out)
+        out = tree_map(to_torch_tensor, out)
         return out
     return decorated_fn
@@ -146,6 +154,24 @@ def temp_batch_dim(fn):
     return inner
+# plasticity related
+def shrink_and_perturb_(
+    module,
+    shrink_factor = 0.5,
+    perturb_factor = 0.01
+):
+    # Shrink & Perturb
+    # Ash et al. https://arxiv.org/abs/1910.08475
+    assert 0. <= shrink_factor <= 1.
+    for p in module.parameters():
+        noise = torch.randn_like(p.data)
+        p.data.mul_(1. - shrink_factor).add_(noise * perturb_factor)
+    return module
 # fitness related
 def get_fitness_scores(
@@ -267,37 +293,42 @@ class PowerLawDist(Module):
 class MLP(Module):
     def __init__(
         self,
-        dims: tuple[int, ...],
+        dim,
+        depth,
         dim_latent = 0,
+        expansion_factor = 2.
     ):
         super().__init__()
         dim_latent = default(dim_latent, 0)
-        assert len(dims) >= 2, 'must have at least two dimensions'
-        # add the latent to the first dim
-        first_dim, *rest_dims = dims
-        dims = (first_dim + dim_latent, *rest_dims)
         self.dim_latent = dim_latent
         self.needs_latent = dim_latent > 0
         self.encode_latent = nn.Sequential(
-            Linear(dim_latent, dim_latent),
+            Linear(dim_latent, dim),
             nn.SiLU()
         ) if self.needs_latent else None
-        # pairs of dimension
+        dim_hidden = int(dim * expansion_factor)
-        dim_pairs = tuple(zip(dims[:-1], dims[1:]))
+        # layers
-        # modules across layers
+        layers = []
+        for _ in range(depth):
+            layer = nn.Sequential(
+                nn.LayerNorm(dim, bias = False),
+                nn.Linear(dim, dim_hidden),
+                nn.SiLU(),
+                nn.Linear(dim_hidden, dim),
+            )
-        layers = ModuleList([Linear(dim_in, dim_out) for dim_in, dim_out in dim_pairs])
+            layers.append(layer)
-        self.layers = layers
+        # modules across layers
+        self.layers = ModuleList(layers)
     def forward(
         self,
@@ -319,17 +350,14 @@ class MLP(Module):
             assert latent.shape[0] == x.shape[0], f'received state with batch size {x.shape[0]} but latent ids received had batch size {latent_id.shape[0]}'
-            x = cat((x, latent), dim = -1)
+            x = x * latent
         # layers
         for ind, layer in enumerate(self.layers, start = 1):
             is_last = ind == len(self.layers)
-            x = layer(x)
-            if not is_last:
-                x = F.silu(x)
+            x = layer(x) + x
         return x
@@ -341,26 +369,24 @@ class Actor(Module):
         self,
         dim_state,
         num_actions,
-        dim_hiddens: tuple[int, ...],
+        dim,
+        mlp_depth,
         dim_latent = 0,
     ):
         super().__init__()
-        assert len(dim_hiddens) >= 2
-        dim_first, *_, dim_last = dim_hiddens
         self.dim_latent = dim_latent
         self.init_layer = nn.Sequential(
-            nn.Linear(dim_state, dim_first),
+            nn.Linear(dim_state, dim),
             nn.SiLU()
         )
-        self.mlp = MLP(dims = dim_hiddens, dim_latent = dim_latent)
+        self.mlp = MLP(dim = dim, depth = mlp_depth, dim_latent = dim_latent)
         self.to_out = nn.Sequential(
-            nn.SiLU(),
-            nn.Linear(dim_last, num_actions),
+            nn.LayerNorm(dim, bias = False),
+            nn.Linear(dim, num_actions, bias = False),
         )
     def forward(
@@ -379,34 +405,31 @@ class Critic(Module):
     def __init__(
         self,
         dim_state,
-        dim_hiddens: tuple[int, ...],
+        dim,
+        mlp_depth,
         dim_latent = 0,
         use_regression = False,
         hl_gauss_loss_kwargs: dict = dict(
-            min_value = -10.,
-            max_value = 10.,
-            num_bins = 25,
-            sigma = 0.5
+            min_value = -100.,
+            max_value = 100.,
+            num_bins = 200
         )
     ):
         super().__init__()
-        assert len(dim_hiddens) >= 2
-        dim_first, *_, dim_last = dim_hiddens
         self.dim_latent = dim_latent
         self.init_layer = nn.Sequential(
-            nn.Linear(dim_state, dim_first),
+            nn.Linear(dim_state, dim),
             nn.SiLU()
         )
-        self.mlp = MLP(dims = dim_hiddens, dim_latent = dim_latent)
+        self.mlp = MLP(dim = dim, depth = mlp_depth, dim_latent = dim_latent)
-        self.final_act = nn.SiLU()
+        self.final_norm = nn.LayerNorm(dim, bias = False)
         self.to_pred = HLGaussLayer(
-            dim = dim_last,
+            dim = dim,
             use_regression = use_regression,
             hl_gauss_loss = hl_gauss_loss_kwargs
         )
@@ -470,7 +493,7 @@ class Critic(Module):
         hidden = self.mlp(hidden, latent)
-        hidden = self.final_act(hidden)
+        hidden = self.final_norm(hidden)
         pred_kwargs = dict(return_logits = return_logits) if not self.use_regression else dict()
         return self.to_pred(hidden, **pred_kwargs)
@@ -825,16 +848,16 @@ class Agent(Module):
         critic: Critic,
         latent_gene_pool: LatentGenePool | None,
         optim_klass = AdoptAtan2,
-        actor_lr = 1e-4,
-        critic_lr = 1e-4,
+        actor_lr = 8e-4,
+        critic_lr = 8e-4,
         latent_lr = 1e-5,
-        actor_weight_decay = 1e-3,
-        critic_weight_decay = 1e-3,
+        actor_weight_decay = 5e-4,
+        critic_weight_decay = 5e-4,
         diversity_aux_loss_weight = 0.,
         use_critic_ema = True,
-        critic_ema_beta = 0.99,
-        max_grad_norm = 0.5,
-        batch_size = 16,
+        critic_ema_beta = 0.95,
+        max_grad_norm = 1.0,
+        batch_size = 32,
         calc_gae_kwargs: dict = dict(
             use_accelerated = False,
             gamma = 0.99,
@@ -1251,8 +1274,10 @@ def create_agent(
     num_latents,
     dim_latent,
     actor_num_actions,
-    actor_dim_hiddens: int | tuple[int, ...],
-    critic_dim_hiddens: int | tuple[int, ...],
+    actor_dim,
+    actor_mlp_depth,
+    critic_dim,
+    critic_mlp_depth,
     use_critic_ema = True,
     latent_gene_pool_kwargs: dict = dict(),
     actor_kwargs: dict = dict(),
@@ -1275,14 +1300,16 @@ def create_agent(
         num_actions = actor_num_actions,
         dim_state = dim_state,
         dim_latent = dim_latent,
-        dim_hiddens = actor_dim_hiddens,
+        dim = actor_dim,
+        mlp_depth = actor_mlp_depth,
         **actor_kwargs
     )
     critic = Critic(
         dim_state = dim_state,
         dim_latent = dim_latent,
-        dim_hiddens = critic_dim_hiddens,
+        dim = critic_dim,
+        mlp_depth = critic_mlp_depth,
         **critic_kwargs
     )
@@ -1457,7 +1484,7 @@ class EPO(Module):
                     log_prob,
                     reward,
                     value,
-                    tensor(terminated)
+                    terminated
                 )
                 memory = Memory(*tuple(t.cpu() for t in memory))
@@ -1469,7 +1496,7 @@ class EPO(Module):
             if not terminated:
                 # add bootstrap value if truncated
-                next_value = temp_batch_dim(self.agent.get_critic_values)(state, latent = latent)
+                next_value = temp_batch_dim(self.agent.get_critic_values)(state, latent = latent, use_ema_if_available = True, use_unwrapped_model = True)
                 memory_for_gae = memory._replace(
                     episode_id = invalid_episode,

{evolutionary_policy_optimization-0.1.10 → evolutionary_policy_optimization-0.1.14}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "evolutionary-policy-optimization"
-version = "0.1.10"
+version = "0.1.14"
 description = "EPO - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{evolutionary_policy_optimization-0.1.10 → evolutionary_policy_optimization-0.1.14}/tests/test_epo.py RENAMED Viewed

@@ -1,10 +1,11 @@
 import pytest
 import torch
-from evolutionary_policy_optimization import (
+from evolutionary_policy_optimization.epo import (
     LatentGenePool,
     Actor,
-    Critic
+    Critic,
+    shrink_and_perturb_
 )
 @pytest.mark.parametrize('latent_ids', (2, (2, 4)))
@@ -128,3 +129,5 @@ def test_e2e_with_mock_env(
     agent.save('./agent.pt', overwrite = True)
     agent.load('./agent.pt')
+    shrink_and_perturb_(agent)

evolutionary_policy_optimization-0.1.14/train_gym.py ADDED Viewed

@@ -0,0 +1,63 @@
+import torch
+from evolutionary_policy_optimization import (
+    EPO,
+    GymnasiumEnvWrapper
+)
+# gymnasium
+from shutil import rmtree
+import gymnasium as gym
+env = gym.make(
+    'LunarLander-v3',
+    render_mode = 'rgb_array'
+)
+rmtree('./recordings', ignore_errors = True)
+env = gym.wrappers.RecordVideo(
+    env = env,
+    video_folder = './recordings',
+    name_prefix = 'lunar-video',
+    episode_trigger = lambda eps_num: (eps_num % 250) == 0,
+    disable_logger = True
+)
+env = GymnasiumEnvWrapper(env)
+# epo
+agent = env.to_epo_agent(
+    num_latents = 1,
+    dim_latent = 32,
+    actor_dim = 128,
+    actor_mlp_depth = 2,
+    critic_dim = 256,
+    critic_mlp_depth = 4,
+    latent_gene_pool_kwargs = dict(
+        frac_natural_selected = 0.5,
+        frac_tournaments = 0.5
+    ),
+    accelerate_kwargs = dict(
+        cpu = False
+    ),
+    actor_optim_kwargs = dict(
+        cautious_factor = 0.1,
+    ),
+    critic_optim_kwargs = dict(
+        cautious_factor = 0.1,
+    ),
+)
+epo = EPO(
+    agent,
+    episodes_per_latent = 50,
+    max_episode_length = 500,
+    action_sample_temperature = 1.,
+)
+epo(agent, env, num_learning_cycles = 100)
+agent.save('./agent.pt', overwrite = True)

evolutionary_policy_optimization-0.1.10/train_gym.py DELETED Viewed

@@ -1,44 +0,0 @@
-import torch
-from evolutionary_policy_optimization import (
-    EPO,
-    GymnasiumEnvWrapper
-)
-# gymnasium
-import gymnasium as gym
-env = gym.make(
-    'LunarLander-v3',
-    render_mode = 'rgb_array'
-)
-env = GymnasiumEnvWrapper(env)
-# epo
-agent = env.to_epo_agent(
-    num_latents = 8,
-    dim_latent = 32,
-    actor_dim_hiddens = (256, 128),
-    critic_dim_hiddens = (256, 128, 64),
-    latent_gene_pool_kwargs = dict(
-        frac_natural_selected = 0.5,
-        frac_tournaments = 0.5
-    ),
-    accelerate_kwargs = dict(
-        cpu = False
-    )
-)
-epo = EPO(
-    agent,
-    episodes_per_latent = 5,
-    max_episode_length = 10,
-    action_sample_temperature = 1.,
-)
-epo(agent, env, num_learning_cycles = 5)
-agent.save('./agent.pt', overwrite = True)