evolutionary-policy-optimization 0.1.15__tar.gz → 0.1.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/PKG-INFO +5 -3
  2. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/README.md +4 -2
  3. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/epo.py +29 -5
  4. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/pyproject.toml +1 -1
  5. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/tests/test_epo.py +10 -6
  6. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/train_gym.py +5 -5
  7. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/.github/workflows/python-publish.yml +0 -0
  8. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/.github/workflows/test.yml +0 -0
  9. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/.gitignore +0 -0
  10. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/LICENSE +0 -0
  11. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/__init__.py +0 -0
  12. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/distributed.py +0 -0
  13. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/env_wrappers.py +0 -0
  14. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/experimental.py +0 -0
  15. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/mock_env.py +0 -0
  16. {evolutionary_policy_optimization-0.1.15 → evolutionary_policy_optimization-0.1.16}/requirements.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.1.15
3
+ Version: 0.1.16
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -118,8 +118,10 @@ agent = create_agent(
118
118
  num_latents = 16,
119
119
  dim_latent = 32,
120
120
  actor_num_actions = 5,
121
- actor_dim_hiddens = (256, 128),
122
- critic_dim_hiddens = (256, 128, 64),
121
+ actor_dim = 256,
122
+ actor_mlp_depth = 2,
123
+ critic_dim = 256,
124
+ critic_mlp_depth = 3,
123
125
  latent_gene_pool_kwargs = dict(
124
126
  frac_natural_selected = 0.5
125
127
  )
@@ -65,8 +65,10 @@ agent = create_agent(
65
65
  num_latents = 16,
66
66
  dim_latent = 32,
67
67
  actor_num_actions = 5,
68
- actor_dim_hiddens = (256, 128),
69
- critic_dim_hiddens = (256, 128, 64),
68
+ actor_dim = 256,
69
+ actor_mlp_depth = 2,
70
+ critic_dim = 256,
71
+ critic_mlp_depth = 3,
70
72
  latent_gene_pool_kwargs = dict(
71
73
  frac_natural_selected = 0.5
72
74
  )
@@ -76,6 +76,7 @@ def maybe(fn):
76
76
  def interface_torch_numpy(fn, device):
77
77
  # for a given function, move all inputs from torch tensor to numpy, and all outputs from numpy to torch tensor
78
78
 
79
+ @maybe
79
80
  def to_torch_tensor(t):
80
81
  if isinstance(t, (np.ndarray, np.float64)):
81
82
  t = from_numpy(np.array(t))
@@ -287,6 +288,22 @@ class PowerLawDist(Module):
287
288
 
288
289
  return self.values[sampled]
289
290
 
291
+ # FiLM for latent to mlp conditioning
292
+
293
+ class FiLM(Module):
294
+ def __init__(self, dim, dim_out):
295
+ super().__init__()
296
+ self.to_gamma = nn.Linear(dim, dim_out, bias = False)
297
+ self.to_beta = nn.Linear(dim, dim_out, bias = False)
298
+
299
+ nn.init.zeros_(self.to_gamma.weight)
300
+ nn.init.zeros_(self.to_beta.weight)
301
+
302
+ def forward(self, x, cond):
303
+ gamma, beta = self.to_gamma(cond), self.to_beta(cond)
304
+
305
+ return x * (gamma + 1.) + beta
306
+
290
307
  # layer integrated memory
291
308
 
292
309
  class DynamicLIMe(Module):
@@ -301,7 +318,7 @@ class DynamicLIMe(Module):
301
318
  self.to_weights = nn.Sequential(
302
319
  nn.RMSNorm(dim),
303
320
  nn.Linear(dim, num_layers),
304
- nn.ReLU()
321
+ nn.Softmax(dim = -1)
305
322
  )
306
323
 
307
324
  def forward(
@@ -338,7 +355,7 @@ class MLP(Module):
338
355
  self.needs_latent = dim_latent > 0
339
356
 
340
357
  self.encode_latent = nn.Sequential(
341
- Linear(dim_latent, dim),
358
+ Linear(dim_latent, dim * 2),
342
359
  nn.SiLU()
343
360
  ) if self.needs_latent else None
344
361
 
@@ -351,6 +368,11 @@ class MLP(Module):
351
368
  for ind in range(depth):
352
369
  is_first = ind == 0
353
370
 
371
+ film = None
372
+
373
+ if self.needs_latent:
374
+ film = FiLM(dim * 2, dim)
375
+
354
376
  lime = DynamicLIMe(dim, num_layers = ind + 1) if not is_first else None
355
377
 
356
378
  layer = nn.Sequential(
@@ -362,6 +384,7 @@ class MLP(Module):
362
384
 
363
385
  layers.append(ModuleList([
364
386
  lime,
387
+ film,
365
388
  layer
366
389
  ]))
367
390
 
@@ -389,19 +412,20 @@ class MLP(Module):
389
412
 
390
413
  assert latent.shape[0] == x.shape[0], f'received state with batch size {x.shape[0]} but latent ids received had batch size {latent_id.shape[0]}'
391
414
 
392
- x = x * latent
393
-
394
415
  # layers
395
416
 
396
417
  prev_layer_inputs = [x]
397
418
 
398
- for lime, layer in self.layers:
419
+ for lime, film, layer in self.layers:
399
420
 
400
421
  layer_inp = x
401
422
 
402
423
  if exists(lime):
403
424
  layer_inp = lime(x, prev_layer_inputs)
404
425
 
426
+ if exists(film):
427
+ layer_inp = film(layer_inp, latent)
428
+
405
429
  x = layer(layer_inp) + x
406
430
 
407
431
  prev_layer_inputs.append(x)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "evolutionary-policy-optimization"
3
- version = "0.1.15"
3
+ version = "0.1.16"
4
4
  description = "EPO - Pytorch"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -26,8 +26,8 @@ def test_readme(
26
26
 
27
27
  state = torch.randn(2, 512)
28
28
 
29
- actor = Actor(dim_state = 512, dim_hiddens = (256, 128), num_actions = 4, dim_latent = 32)
30
- critic = Critic(dim_state = 512, dim_hiddens = (256, 128, 64), dim_latent = 32)
29
+ actor = Actor(dim_state = 512, dim = 256, mlp_depth = 2, num_actions = 4, dim_latent = 32)
30
+ critic = Critic(dim_state = 512, dim = 256, mlp_depth = 4, dim_latent = 32)
31
31
 
32
32
  latent = latent_pool(latent_id = latent_ids, state = state)
33
33
 
@@ -55,8 +55,10 @@ def test_create_agent(
55
55
  num_latents = 128,
56
56
  dim_latent = 32,
57
57
  actor_num_actions = 5,
58
- actor_dim_hiddens = (256, 128),
59
- critic_dim_hiddens = (256, 128, 64)
58
+ actor_dim = 256,
59
+ actor_mlp_depth = 2,
60
+ critic_dim = 256,
61
+ critic_mlp_depth = 4
60
62
  )
61
63
 
62
64
  state = torch.randn(2, 512)
@@ -98,8 +100,10 @@ def test_e2e_with_mock_env(
98
100
  num_latents = num_latents,
99
101
  dim_latent = 32,
100
102
  actor_num_actions = 5,
101
- actor_dim_hiddens = (256, 128),
102
- critic_dim_hiddens = (256, 128, 64),
103
+ actor_dim = 256,
104
+ actor_mlp_depth = 2,
105
+ critic_dim = 256,
106
+ critic_mlp_depth = 4,
103
107
  use_critic_ema = use_critic_ema,
104
108
  diversity_aux_loss_weight = diversity_aux_loss_weight,
105
109
  critic_kwargs = dict(
@@ -21,7 +21,7 @@ env = gym.wrappers.RecordVideo(
21
21
  env = env,
22
22
  video_folder = './recordings',
23
23
  name_prefix = 'lunar-video',
24
- episode_trigger = lambda eps_num: (eps_num % 250) == 0,
24
+ episode_trigger = lambda eps_num: (eps_num % (250 * 4)) == 0,
25
25
  disable_logger = True
26
26
  )
27
27
 
@@ -30,12 +30,12 @@ env = GymnasiumEnvWrapper(env)
30
30
  # epo
31
31
 
32
32
  agent = env.to_epo_agent(
33
- num_latents = 1,
33
+ num_latents = 8,
34
34
  dim_latent = 32,
35
35
  actor_dim = 128,
36
- actor_mlp_depth = 2,
36
+ actor_mlp_depth = 3,
37
37
  critic_dim = 256,
38
- critic_mlp_depth = 4,
38
+ critic_mlp_depth = 5,
39
39
  latent_gene_pool_kwargs = dict(
40
40
  frac_natural_selected = 0.5,
41
41
  frac_tournaments = 0.5
@@ -53,7 +53,7 @@ agent = env.to_epo_agent(
53
53
 
54
54
  epo = EPO(
55
55
  agent,
56
- episodes_per_latent = 50,
56
+ episodes_per_latent = 5,
57
57
  max_episode_length = 500,
58
58
  action_sample_temperature = 1.,
59
59
  )