evolutionary-policy-optimization 0.1.2__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/PKG-INFO +2 -2
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/README.md +1 -1
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/evolutionary_policy_optimization/epo.py +22 -4
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/pyproject.toml +1 -1
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/.github/workflows/python-publish.yml +0 -0
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/.github/workflows/test.yml +0 -0
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/.gitignore +0 -0
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/LICENSE +0 -0
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/evolutionary_policy_optimization/__init__.py +0 -0
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/evolutionary_policy_optimization/distributed.py +0 -0
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/evolutionary_policy_optimization/env_wrappers.py +0 -0
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/evolutionary_policy_optimization/experimental.py +0 -0
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/evolutionary_policy_optimization/mock_env.py +0 -0
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/requirements.txt +0 -0
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/tests/test_epo.py +0 -0
- {evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/train_gym.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.5
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -53,7 +53,7 @@ Description-Content-Type: text/markdown
|
|
53
53
|
|
54
54
|
<img width="450px" alt="fig1" src="https://github.com/user-attachments/assets/33bef569-e786-4f09-bdee-56bad7ea9e6d" />
|
55
55
|
|
56
|
-
## Evolutionary Policy Optimization
|
56
|
+
## Evolutionary Policy Optimization
|
57
57
|
|
58
58
|
Pytorch implementation of [Evolutionary Policy Optimization](https://web3.arxiv.org/abs/2503.19037), from Wang et al. of the Robotics Institute at Carnegie Mellon University
|
59
59
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
<img width="450px" alt="fig1" src="https://github.com/user-attachments/assets/33bef569-e786-4f09-bdee-56bad7ea9e6d" />
|
2
2
|
|
3
|
-
## Evolutionary Policy Optimization
|
3
|
+
## Evolutionary Policy Optimization
|
4
4
|
|
5
5
|
Pytorch implementation of [Evolutionary Policy Optimization](https://web3.arxiv.org/abs/2503.19037), from Wang et al. of the Robotics Institute at Carnegie Mellon University
|
6
6
|
|
@@ -79,7 +79,7 @@ def interface_torch_numpy(fn, device):
|
|
79
79
|
@wraps(fn)
|
80
80
|
def decorated_fn(*args, **kwargs):
|
81
81
|
|
82
|
-
args, kwargs = tree_map(lambda t: t.cpu().numpy() if
|
82
|
+
args, kwargs = tree_map(lambda t: t.cpu().numpy() if is_tensor(t) else t, (args, kwargs))
|
83
83
|
|
84
84
|
out = fn(*args, **kwargs)
|
85
85
|
|
@@ -88,6 +88,16 @@ def interface_torch_numpy(fn, device):
|
|
88
88
|
|
89
89
|
return decorated_fn
|
90
90
|
|
91
|
+
def move_input_tensors_to_device(fn):
|
92
|
+
|
93
|
+
@wraps(fn)
|
94
|
+
def decorated_fn(self, *args, **kwargs):
|
95
|
+
args, kwargs = tree_map(lambda t: t.to(self.device) if is_tensor(t) else t, (args, kwargs))
|
96
|
+
|
97
|
+
return fn(self, *args, **kwargs)
|
98
|
+
|
99
|
+
return decorated_fn
|
100
|
+
|
91
101
|
# tensor helpers
|
92
102
|
|
93
103
|
def l2norm(t):
|
@@ -797,6 +807,8 @@ class Agent(Module):
|
|
797
807
|
|
798
808
|
self.unwrap_model = identity if not wrap_with_accelerate else self.accelerate.unwrap_model
|
799
809
|
|
810
|
+
dummy = tensor(0)
|
811
|
+
|
800
812
|
if wrap_with_accelerate:
|
801
813
|
(
|
802
814
|
self.actor,
|
@@ -816,11 +828,14 @@ class Agent(Module):
|
|
816
828
|
)
|
817
829
|
)
|
818
830
|
|
819
|
-
|
831
|
+
if exists(self.critic_ema):
|
832
|
+
self.critic_ema.to(self.accelerate.device)
|
820
833
|
|
821
|
-
|
834
|
+
dummy = dummy.to(self.accelerate.device)
|
822
835
|
|
823
|
-
|
836
|
+
# device tracking
|
837
|
+
|
838
|
+
self.register_buffer('dummy', dummy)
|
824
839
|
|
825
840
|
@property
|
826
841
|
def device(self):
|
@@ -870,6 +885,7 @@ class Agent(Module):
|
|
870
885
|
if exists(pkg.get('latent_optim', None)):
|
871
886
|
self.latent_optim.load_state_dict(pkg['latent_optim'])
|
872
887
|
|
888
|
+
@move_input_tensors_to_device
|
873
889
|
def get_actor_actions(
|
874
890
|
self,
|
875
891
|
state,
|
@@ -895,6 +911,7 @@ class Agent(Module):
|
|
895
911
|
|
896
912
|
return actions, log_probs
|
897
913
|
|
914
|
+
@move_input_tensors_to_device
|
898
915
|
def get_critic_values(
|
899
916
|
self,
|
900
917
|
state,
|
@@ -903,6 +920,7 @@ class Agent(Module):
|
|
903
920
|
use_ema_if_available = False,
|
904
921
|
use_unwrapped_model = False
|
905
922
|
):
|
923
|
+
|
906
924
|
maybe_unwrap = identity if not use_unwrapped_model else self.unwrap_model
|
907
925
|
|
908
926
|
if not exists(latent) and exists(latent_id):
|
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/.gitignore
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/requirements.txt
RENAMED
File without changes
|
{evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/tests/test_epo.py
RENAMED
File without changes
|
{evolutionary_policy_optimization-0.1.2 → evolutionary_policy_optimization-0.1.5}/train_gym.py
RENAMED
File without changes
|