PyPI - evolutionary-policy-optimization - Versions diffs - 0.0.17__py3-none-any.whl → 0.0.20__py3-none-any.whl - Mend

evolutionary-policy-optimization 0.0.17py3-none-any.whl → 0.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

evolutionary_policy_optimization/epo.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 from collections import namedtuple
 import torch
-from torch import nn, cat
+from torch import nn, cat, is_tensor, tensor
 import torch.nn.functional as F
 from torch.nn import Linear, Module, ModuleList
 from torch.utils.data import TensorDataset, DataLoader
@@ -176,6 +176,8 @@ class MLP(Module):
             if latent.ndim == 1:
                 latent = repeat(latent, 'd -> b d', b = batch)
+            assert latent.shape[0] == x.shape[0], f'received state with batch size {x.shape[0]} but latent ids received had batch size {latent_id.shape[0]}'
             x = cat((x, latent), dim = -1)
         # layers
@@ -206,6 +208,8 @@ class Actor(Module):
         assert len(dim_hiddens) >= 2
         dim_first, *_, dim_last = dim_hiddens
+        self.dim_latent = dim_latent
         self.init_layer = nn.Sequential(
             nn.Linear(dim_state, dim_first),
             nn.SiLU()
@@ -242,6 +246,8 @@ class Critic(Module):
         assert len(dim_hiddens) >= 2
         dim_first, *_, dim_last = dim_hiddens
+        self.dim_latent = dim_latent
         self.init_layer = nn.Sequential(
             nn.Linear(dim_state, dim_first),
             nn.SiLU()
@@ -437,25 +443,38 @@ class LatentGenePool(Module):
         net: Module | None = None,
         **kwargs,
     ):
+        device = self.latents.device
         # if only 1 latent, assume doing ablation and get lone gene
         if not exists(latent_id) and self.num_latents == 1:
             latent_id = 0
-        assert 0 <= latent_id < self.num_latents
+        if not is_tensor(latent_id):
+            latent_id = tensor(latent_id, device = device)
+        assert (0 <= latent_id).all() and (latent_id < self.num_latents).all()
         # fetch latent
+        fetching_multiple_latents = latent_id.numel() > 1
         latent = self.latents[latent_id]
         if self.needs_latent_gate:
             assert exists(state), 'state must be passed in if greater than number of 1 latent set'
+            if not fetching_multiple_latents:
+                latent = repeat(latent, '... -> b ...', b = state.shape[0])
+            assert latent.shape[0] == state.shape[0]
             gates = self.to_latent_gate(state)
-            latent = einsum(latent, gates, 'n g, b n -> b g')
+            latent = einsum(latent, gates, 'b n g, b n -> b g')
+        elif fetching_multiple_latents:
+            latent = latent[:, 0]
         else:
-            assert latent.shape[0] == 1
             latent = latent[0]
         if not exists(net):
@@ -490,6 +509,8 @@ class Agent(Module):
         self.latent_gene_pool = latent_gene_pool
+        assert actor.dim_latent == critic.dim_latent == latent_gene_pool.dim_latent
         # optimizers
         self.actor_optim = optim_klass(actor.parameters(), lr = actor_lr, **actor_optim_kwargs)

{evolutionary_policy_optimization-0.0.17.dist-info → evolutionary_policy_optimization-0.0.20.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: evolutionary-policy-optimization
-Version: 0.0.17
+Version: 0.0.20
 Summary: EPO - Pytorch
 Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
 Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -33,7 +33,7 @@ Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3.8
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Python: >=3.8
+Requires-Python: >=3.9
 Requires-Dist: adam-atan2-pytorch
 Requires-Dist: assoc-scan
 Requires-Dist: einops>=0.8.0
@@ -44,10 +44,6 @@ Provides-Extra: examples
 Requires-Dist: numpy; extra == 'examples'
 Requires-Dist: pufferlib>=2.0.6; extra == 'examples'
 Requires-Dist: tqdm; extra == 'examples'
-Provides-Extra: examples-gym
-Requires-Dist: box2d-py; extra == 'examples-gym'
-Requires-Dist: gymnasium[box2d]>=1.0.0; extra == 'examples-gym'
-Requires-Dist: tqdm; extra == 'examples-gym'
 Provides-Extra: test
 Requires-Dist: pytest; extra == 'test'
 Description-Content-Type: text/markdown
@@ -64,10 +60,15 @@ Besides their latent variable strategy, I'll also throw in some attempts with cr
 Update: I see, mixing genetic algorithms with gradient based method is already a research field, under [Memetic algorithms](https://en.wikipedia.org/wiki/Memetic_algorithm)
+## Install
+```bash
+$ pip install evolutionary-policy-optimization
+```
 ## Usage
 ```python
 import torch
 from evolutionary_policy_optimization import (

evolutionary_policy_optimization-0.0.20.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+evolutionary_policy_optimization/__init__.py,sha256=Qavcia0n13jjaWIS_LPW7QrxSLT_BBeKujCjF9kQjbA,133
+evolutionary_policy_optimization/epo.py,sha256=BTBqkgDq-x4dUMlKdSojvV2Yjzf9pDUZGMik32WjdHQ,18361
+evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
+evolutionary_policy_optimization-0.0.20.dist-info/METADATA,sha256=0QNTGATtchVuxVplbrfXAtupcrMKEQD-uisM7CFm7qE,4931
+evolutionary_policy_optimization-0.0.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+evolutionary_policy_optimization-0.0.20.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+evolutionary_policy_optimization-0.0.20.dist-info/RECORD,,

evolutionary_policy_optimization-0.0.17.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-evolutionary_policy_optimization/__init__.py,sha256=Qavcia0n13jjaWIS_LPW7QrxSLT_BBeKujCjF9kQjbA,133
-evolutionary_policy_optimization/epo.py,sha256=U1iROmPdJjU_tqd50XtBUibfOHtYUE7MzfPu-6bU2Pw,17586
-evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
-evolutionary_policy_optimization-0.0.17.dist-info/METADATA,sha256=okvM0b28MQBex5XUXVWwflYcf7hqG3I5dAh8PxWGhrM,5047
-evolutionary_policy_optimization-0.0.17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-evolutionary_policy_optimization-0.0.17.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-evolutionary_policy_optimization-0.0.17.dist-info/RECORD,,

{evolutionary_policy_optimization-0.0.17.dist-info → evolutionary_policy_optimization-0.0.20.dist-info}/WHEEL RENAMED Viewed

File without changes

{evolutionary_policy_optimization-0.0.17.dist-info → evolutionary_policy_optimization-0.0.20.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

evolutionary-policy-optimization 0.0.17__py3-none-any.whl → 0.0.20__py3-none-any.whl

evolutionary-policy-optimization 0.0.17py3-none-any.whl → 0.0.20py3-none-any.whl