evolutionary-policy-optimization 0.1.15__tar.gz → 0.1.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/PKG-INFO +5 -3
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/README.md +4 -2
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/epo.py +29 -5
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/pyproject.toml +1 -1
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/tests/test_epo.py +10 -6
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/train_gym.py +5 -5
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/.github/workflows/python-publish.yml +0 -0
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/.github/workflows/test.yml +0 -0
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/.gitignore +0 -0
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/LICENSE +0 -0
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/__init__.py +0 -0
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/distributed.py +0 -0
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/env_wrappers.py +0 -0
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/experimental.py +0 -0
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/mock_env.py +0 -0
- {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/requirements.txt +0 -0
{evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.16
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -118,8 +118,10 @@ agent = create_agent(
|
|
118
118
|
num_latents = 16,
|
119
119
|
dim_latent = 32,
|
120
120
|
actor_num_actions = 5,
|
121
|
-
|
122
|
-
|
121
|
+
actor_dim = 256,
|
122
|
+
actor_mlp_depth = 2,
|
123
|
+
critic_dim = 256,
|
124
|
+
critic_mlp_depth = 3,
|
123
125
|
latent_gene_pool_kwargs = dict(
|
124
126
|
frac_natural_selected = 0.5
|
125
127
|
)
|
{evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/README.md
RENAMED
@@ -65,8 +65,10 @@ agent = create_agent(
|
|
65
65
|
num_latents = 16,
|
66
66
|
dim_latent = 32,
|
67
67
|
actor_num_actions = 5,
|
68
|
-
|
69
|
-
|
68
|
+
actor_dim = 256,
|
69
|
+
actor_mlp_depth = 2,
|
70
|
+
critic_dim = 256,
|
71
|
+
critic_mlp_depth = 3,
|
70
72
|
latent_gene_pool_kwargs = dict(
|
71
73
|
frac_natural_selected = 0.5
|
72
74
|
)
|
@@ -76,6 +76,7 @@ def maybe(fn):
|
|
76
76
|
def interface_torch_numpy(fn, device):
|
77
77
|
# for a given function, move all inputs from torch tensor to numpy, and all outputs from numpy to torch tensor
|
78
78
|
|
79
|
+
@maybe
|
79
80
|
def to_torch_tensor(t):
|
80
81
|
if isinstance(t, (np.ndarray, np.float64)):
|
81
82
|
t = from_numpy(np.array(t))
|
@@ -287,6 +288,22 @@ class PowerLawDist(Module):
|
|
287
288
|
|
288
289
|
return self.values[sampled]
|
289
290
|
|
291
|
+
# FiLM for latent to mlp conditioning
|
292
|
+
|
293
|
+
class FiLM(Module):
|
294
|
+
def __init__(self, dim, dim_out):
|
295
|
+
super().__init__()
|
296
|
+
self.to_gamma = nn.Linear(dim, dim_out, bias = False)
|
297
|
+
self.to_beta = nn.Linear(dim, dim_out, bias = False)
|
298
|
+
|
299
|
+
nn.init.zeros_(self.to_gamma.weight)
|
300
|
+
nn.init.zeros_(self.to_beta.weight)
|
301
|
+
|
302
|
+
def forward(self, x, cond):
|
303
|
+
gamma, beta = self.to_gamma(cond), self.to_beta(cond)
|
304
|
+
|
305
|
+
return x * (gamma + 1.) + beta
|
306
|
+
|
290
307
|
# layer integrated memory
|
291
308
|
|
292
309
|
class DynamicLIMe(Module):
|
@@ -301,7 +318,7 @@ class DynamicLIMe(Module):
|
|
301
318
|
self.to_weights = nn.Sequential(
|
302
319
|
nn.RMSNorm(dim),
|
303
320
|
nn.Linear(dim, num_layers),
|
304
|
-
nn.
|
321
|
+
nn.Softmax(dim = -1)
|
305
322
|
)
|
306
323
|
|
307
324
|
def forward(
|
@@ -338,7 +355,7 @@ class MLP(Module):
|
|
338
355
|
self.needs_latent = dim_latent > 0
|
339
356
|
|
340
357
|
self.encode_latent = nn.Sequential(
|
341
|
-
Linear(dim_latent, dim),
|
358
|
+
Linear(dim_latent, dim * 2),
|
342
359
|
nn.SiLU()
|
343
360
|
) if self.needs_latent else None
|
344
361
|
|
@@ -351,6 +368,11 @@ class MLP(Module):
|
|
351
368
|
for ind in range(depth):
|
352
369
|
is_first = ind == 0
|
353
370
|
|
371
|
+
film = None
|
372
|
+
|
373
|
+
if self.needs_latent:
|
374
|
+
film = FiLM(dim * 2, dim)
|
375
|
+
|
354
376
|
lime = DynamicLIMe(dim, num_layers = ind + 1) if not is_first else None
|
355
377
|
|
356
378
|
layer = nn.Sequential(
|
@@ -362,6 +384,7 @@ class MLP(Module):
|
|
362
384
|
|
363
385
|
layers.append(ModuleList([
|
364
386
|
lime,
|
387
|
+
film,
|
365
388
|
layer
|
366
389
|
]))
|
367
390
|
|
@@ -389,19 +412,20 @@ class MLP(Module):
|
|
389
412
|
|
390
413
|
assert latent.shape[0] == x.shape[0], f'received state with batch size {x.shape[0]} but latent ids received had batch size {latent_id.shape[0]}'
|
391
414
|
|
392
|
-
x = x * latent
|
393
|
-
|
394
415
|
# layers
|
395
416
|
|
396
417
|
prev_layer_inputs = [x]
|
397
418
|
|
398
|
-
for lime, layer in self.layers:
|
419
|
+
for lime, film, layer in self.layers:
|
399
420
|
|
400
421
|
layer_inp = x
|
401
422
|
|
402
423
|
if exists(lime):
|
403
424
|
layer_inp = lime(x, prev_layer_inputs)
|
404
425
|
|
426
|
+
if exists(film):
|
427
|
+
layer_inp = film(layer_inp, latent)
|
428
|
+
|
405
429
|
x = layer(layer_inp) + x
|
406
430
|
|
407
431
|
prev_layer_inputs.append(x)
|
@@ -26,8 +26,8 @@ def test_readme(
|
|
26
26
|
|
27
27
|
state = torch.randn(2, 512)
|
28
28
|
|
29
|
-
actor = Actor(dim_state = 512,
|
30
|
-
critic = Critic(dim_state = 512,
|
29
|
+
actor = Actor(dim_state = 512, dim = 256, mlp_depth = 2, num_actions = 4, dim_latent = 32)
|
30
|
+
critic = Critic(dim_state = 512, dim = 256, mlp_depth = 4, dim_latent = 32)
|
31
31
|
|
32
32
|
latent = latent_pool(latent_id = latent_ids, state = state)
|
33
33
|
|
@@ -55,8 +55,10 @@ def test_create_agent(
|
|
55
55
|
num_latents = 128,
|
56
56
|
dim_latent = 32,
|
57
57
|
actor_num_actions = 5,
|
58
|
-
|
59
|
-
|
58
|
+
actor_dim = 256,
|
59
|
+
actor_mlp_depth = 2,
|
60
|
+
critic_dim = 256,
|
61
|
+
critic_mlp_depth = 4
|
60
62
|
)
|
61
63
|
|
62
64
|
state = torch.randn(2, 512)
|
@@ -98,8 +100,10 @@ def test_e2e_with_mock_env(
|
|
98
100
|
num_latents = num_latents,
|
99
101
|
dim_latent = 32,
|
100
102
|
actor_num_actions = 5,
|
101
|
-
|
102
|
-
|
103
|
+
actor_dim = 256,
|
104
|
+
actor_mlp_depth = 2,
|
105
|
+
critic_dim = 256,
|
106
|
+
critic_mlp_depth = 4,
|
103
107
|
use_critic_ema = use_critic_ema,
|
104
108
|
diversity_aux_loss_weight = diversity_aux_loss_weight,
|
105
109
|
critic_kwargs = dict(
|
{evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/train_gym.py
RENAMED
@@ -21,7 +21,7 @@ env = gym.wrappers.RecordVideo(
|
|
21
21
|
env = env,
|
22
22
|
video_folder = './recordings',
|
23
23
|
name_prefix = 'lunar-video',
|
24
|
-
episode_trigger = lambda eps_num: (eps_num % 250) == 0,
|
24
|
+
episode_trigger = lambda eps_num: (eps_num % (250 * 4)) == 0,
|
25
25
|
disable_logger = True
|
26
26
|
)
|
27
27
|
|
@@ -30,12 +30,12 @@ env = GymnasiumEnvWrapper(env)
|
|
30
30
|
# epo
|
31
31
|
|
32
32
|
agent = env.to_epo_agent(
|
33
|
-
num_latents =
|
33
|
+
num_latents = 8,
|
34
34
|
dim_latent = 32,
|
35
35
|
actor_dim = 128,
|
36
|
-
actor_mlp_depth =
|
36
|
+
actor_mlp_depth = 3,
|
37
37
|
critic_dim = 256,
|
38
|
-
critic_mlp_depth =
|
38
|
+
critic_mlp_depth = 5,
|
39
39
|
latent_gene_pool_kwargs = dict(
|
40
40
|
frac_natural_selected = 0.5,
|
41
41
|
frac_tournaments = 0.5
|
@@ -53,7 +53,7 @@ agent = env.to_epo_agent(
|
|
53
53
|
|
54
54
|
epo = EPO(
|
55
55
|
agent,
|
56
|
-
episodes_per_latent =
|
56
|
+
episodes_per_latent = 5,
|
57
57
|
max_episode_length = 500,
|
58
58
|
action_sample_temperature = 1.,
|
59
59
|
)
|
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/.gitignore
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/requirements.txt
RENAMED
File without changes
|