evolutionary-policy-optimization 0.1.14__tar.gz → 0.1.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/PKG-INFO +14 -3
  2. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/README.md +13 -2
  3. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/epo.py +82 -11
  4. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/pyproject.toml +1 -1
  5. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/tests/test_epo.py +10 -6
  6. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/train_gym.py +5 -5
  7. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/.github/workflows/python-publish.yml +0 -0
  8. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/.github/workflows/test.yml +0 -0
  9. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/.gitignore +0 -0
  10. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/LICENSE +0 -0
  11. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/__init__.py +0 -0
  12. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/distributed.py +0 -0
  13. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/env_wrappers.py +0 -0
  14. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/experimental.py +0 -0
  15. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/evolutionary_policy_optimization/mock_env.py +0 -0
  16. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.16}/requirements.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.1.14
3
+ Version: 0.1.16
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -118,8 +118,10 @@ agent = create_agent(
118
118
  num_latents = 16,
119
119
  dim_latent = 32,
120
120
  actor_num_actions = 5,
121
- actor_dim_hiddens = (256, 128),
122
- critic_dim_hiddens = (256, 128, 64),
121
+ actor_dim = 256,
122
+ actor_mlp_depth = 2,
123
+ critic_dim = 256,
124
+ critic_mlp_depth = 3,
123
125
  latent_gene_pool_kwargs = dict(
124
126
  frac_natural_selected = 0.5
125
127
  )
@@ -226,4 +228,13 @@ agent.load('./agent.pt')
226
228
  }
227
229
  ```
228
230
 
231
+ ```bibtex
232
+ @inproceedings{Gerasimov2025YouDN,
233
+ title = {You Do Not Fully Utilize Transformer's Representation Capacity},
234
+ author = {Gleb Gerasimov and Yaroslav Aksenov and Nikita Balagansky and Viacheslav Sinii and Daniil Gavrilov},
235
+ year = {2025},
236
+ url = {https://api.semanticscholar.org/CorpusID:276317819}
237
+ }
238
+ ```
239
+
229
240
  *Evolution is cleverer than you are.* - Leslie Orgel
@@ -65,8 +65,10 @@ agent = create_agent(
65
65
  num_latents = 16,
66
66
  dim_latent = 32,
67
67
  actor_num_actions = 5,
68
- actor_dim_hiddens = (256, 128),
69
- critic_dim_hiddens = (256, 128, 64),
68
+ actor_dim = 256,
69
+ actor_mlp_depth = 2,
70
+ critic_dim = 256,
71
+ critic_mlp_depth = 3,
70
72
  latent_gene_pool_kwargs = dict(
71
73
  frac_natural_selected = 0.5
72
74
  )
@@ -173,4 +175,13 @@ agent.load('./agent.pt')
173
175
  }
174
176
  ```
175
177
 
178
+ ```bibtex
179
+ @inproceedings{Gerasimov2025YouDN,
180
+ title = {You Do Not Fully Utilize Transformer's Representation Capacity},
181
+ author = {Gleb Gerasimov and Yaroslav Aksenov and Nikita Balagansky and Viacheslav Sinii and Daniil Gavrilov},
182
+ year = {2025},
183
+ url = {https://api.semanticscholar.org/CorpusID:276317819}
184
+ }
185
+ ```
186
+
176
187
  *Evolution is cleverer than you are.* - Leslie Orgel
@@ -76,6 +76,7 @@ def maybe(fn):
76
76
  def interface_torch_numpy(fn, device):
77
77
  # for a given function, move all inputs from torch tensor to numpy, and all outputs from numpy to torch tensor
78
78
 
79
+ @maybe
79
80
  def to_torch_tensor(t):
80
81
  if isinstance(t, (np.ndarray, np.float64)):
81
82
  t = from_numpy(np.array(t))
@@ -287,6 +288,54 @@ class PowerLawDist(Module):
287
288
 
288
289
  return self.values[sampled]
289
290
 
291
+ # FiLM for latent to mlp conditioning
292
+
293
+ class FiLM(Module):
294
+ def __init__(self, dim, dim_out):
295
+ super().__init__()
296
+ self.to_gamma = nn.Linear(dim, dim_out, bias = False)
297
+ self.to_beta = nn.Linear(dim, dim_out, bias = False)
298
+
299
+ nn.init.zeros_(self.to_gamma.weight)
300
+ nn.init.zeros_(self.to_beta.weight)
301
+
302
+ def forward(self, x, cond):
303
+ gamma, beta = self.to_gamma(cond), self.to_beta(cond)
304
+
305
+ return x * (gamma + 1.) + beta
306
+
307
+ # layer integrated memory
308
+
309
+ class DynamicLIMe(Module):
310
+ def __init__(
311
+ self,
312
+ dim,
313
+ num_layers
314
+ ):
315
+ super().__init__()
316
+ self.num_layers = num_layers
317
+
318
+ self.to_weights = nn.Sequential(
319
+ nn.RMSNorm(dim),
320
+ nn.Linear(dim, num_layers),
321
+ nn.Softmax(dim = -1)
322
+ )
323
+
324
+ def forward(
325
+ self,
326
+ x,
327
+ hiddens
328
+ ):
329
+
330
+ if not is_tensor(hiddens):
331
+ hiddens = stack(hiddens)
332
+
333
+ assert hiddens.shape[0] == self.num_layers, f'expected hiddens to have {self.num_layers} layers but received {tuple(hiddens.shape)} instead (first dimension must be layers)'
334
+
335
+ weights = self.to_weights(x)
336
+
337
+ return einsum(hiddens, weights, 'l b d, b l -> b d')
338
+
290
339
  # simple MLP networks, but with latent variables
291
340
  # the latent variables are the "genes" with the rest of the network as the scaffold for "gene expression" - as suggested in the paper
292
341
 
@@ -306,7 +355,7 @@ class MLP(Module):
306
355
  self.needs_latent = dim_latent > 0
307
356
 
308
357
  self.encode_latent = nn.Sequential(
309
- Linear(dim_latent, dim),
358
+ Linear(dim_latent, dim * 2),
310
359
  nn.SiLU()
311
360
  ) if self.needs_latent else None
312
361
 
@@ -316,15 +365,28 @@ class MLP(Module):
316
365
 
317
366
  layers = []
318
367
 
319
- for _ in range(depth):
368
+ for ind in range(depth):
369
+ is_first = ind == 0
370
+
371
+ film = None
372
+
373
+ if self.needs_latent:
374
+ film = FiLM(dim * 2, dim)
375
+
376
+ lime = DynamicLIMe(dim, num_layers = ind + 1) if not is_first else None
377
+
320
378
  layer = nn.Sequential(
321
- nn.LayerNorm(dim, bias = False),
379
+ nn.RMSNorm(dim),
322
380
  nn.Linear(dim, dim_hidden),
323
381
  nn.SiLU(),
324
382
  nn.Linear(dim_hidden, dim),
325
383
  )
326
384
 
327
- layers.append(layer)
385
+ layers.append(ModuleList([
386
+ lime,
387
+ film,
388
+ layer
389
+ ]))
328
390
 
329
391
  # modules across layers
330
392
 
@@ -350,14 +412,23 @@ class MLP(Module):
350
412
 
351
413
  assert latent.shape[0] == x.shape[0], f'received state with batch size {x.shape[0]} but latent ids received had batch size {latent_id.shape[0]}'
352
414
 
353
- x = x * latent
354
-
355
415
  # layers
356
416
 
357
- for ind, layer in enumerate(self.layers, start = 1):
358
- is_last = ind == len(self.layers)
417
+ prev_layer_inputs = [x]
418
+
419
+ for lime, film, layer in self.layers:
420
+
421
+ layer_inp = x
422
+
423
+ if exists(lime):
424
+ layer_inp = lime(x, prev_layer_inputs)
425
+
426
+ if exists(film):
427
+ layer_inp = film(layer_inp, latent)
428
+
429
+ x = layer(layer_inp) + x
359
430
 
360
- x = layer(x) + x
431
+ prev_layer_inputs.append(x)
361
432
 
362
433
  return x
363
434
 
@@ -385,7 +456,7 @@ class Actor(Module):
385
456
  self.mlp = MLP(dim = dim, depth = mlp_depth, dim_latent = dim_latent)
386
457
 
387
458
  self.to_out = nn.Sequential(
388
- nn.LayerNorm(dim, bias = False),
459
+ nn.RMSNorm(dim),
389
460
  nn.Linear(dim, num_actions, bias = False),
390
461
  )
391
462
 
@@ -426,7 +497,7 @@ class Critic(Module):
426
497
 
427
498
  self.mlp = MLP(dim = dim, depth = mlp_depth, dim_latent = dim_latent)
428
499
 
429
- self.final_norm = nn.LayerNorm(dim, bias = False)
500
+ self.final_norm = nn.RMSNorm(dim)
430
501
 
431
502
  self.to_pred = HLGaussLayer(
432
503
  dim = dim,
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "evolutionary-policy-optimization"
3
- version = "0.1.14"
3
+ version = "0.1.16"
4
4
  description = "EPO - Pytorch"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -26,8 +26,8 @@ def test_readme(
26
26
 
27
27
  state = torch.randn(2, 512)
28
28
 
29
- actor = Actor(dim_state = 512, dim_hiddens = (256, 128), num_actions = 4, dim_latent = 32)
30
- critic = Critic(dim_state = 512, dim_hiddens = (256, 128, 64), dim_latent = 32)
29
+ actor = Actor(dim_state = 512, dim = 256, mlp_depth = 2, num_actions = 4, dim_latent = 32)
30
+ critic = Critic(dim_state = 512, dim = 256, mlp_depth = 4, dim_latent = 32)
31
31
 
32
32
  latent = latent_pool(latent_id = latent_ids, state = state)
33
33
 
@@ -55,8 +55,10 @@ def test_create_agent(
55
55
  num_latents = 128,
56
56
  dim_latent = 32,
57
57
  actor_num_actions = 5,
58
- actor_dim_hiddens = (256, 128),
59
- critic_dim_hiddens = (256, 128, 64)
58
+ actor_dim = 256,
59
+ actor_mlp_depth = 2,
60
+ critic_dim = 256,
61
+ critic_mlp_depth = 4
60
62
  )
61
63
 
62
64
  state = torch.randn(2, 512)
@@ -98,8 +100,10 @@ def test_e2e_with_mock_env(
98
100
  num_latents = num_latents,
99
101
  dim_latent = 32,
100
102
  actor_num_actions = 5,
101
- actor_dim_hiddens = (256, 128),
102
- critic_dim_hiddens = (256, 128, 64),
103
+ actor_dim = 256,
104
+ actor_mlp_depth = 2,
105
+ critic_dim = 256,
106
+ critic_mlp_depth = 4,
103
107
  use_critic_ema = use_critic_ema,
104
108
  diversity_aux_loss_weight = diversity_aux_loss_weight,
105
109
  critic_kwargs = dict(
@@ -21,7 +21,7 @@ env = gym.wrappers.RecordVideo(
21
21
  env = env,
22
22
  video_folder = './recordings',
23
23
  name_prefix = 'lunar-video',
24
- episode_trigger = lambda eps_num: (eps_num % 250) == 0,
24
+ episode_trigger = lambda eps_num: (eps_num % (250 * 4)) == 0,
25
25
  disable_logger = True
26
26
  )
27
27
 
@@ -30,12 +30,12 @@ env = GymnasiumEnvWrapper(env)
30
30
  # epo
31
31
 
32
32
  agent = env.to_epo_agent(
33
- num_latents = 1,
33
+ num_latents = 8,
34
34
  dim_latent = 32,
35
35
  actor_dim = 128,
36
- actor_mlp_depth = 2,
36
+ actor_mlp_depth = 3,
37
37
  critic_dim = 256,
38
- critic_mlp_depth = 4,
38
+ critic_mlp_depth = 5,
39
39
  latent_gene_pool_kwargs = dict(
40
40
  frac_natural_selected = 0.5,
41
41
  frac_tournaments = 0.5
@@ -53,7 +53,7 @@ agent = env.to_epo_agent(
53
53
 
54
54
  epo = EPO(
55
55
  agent,
56
- episodes_per_latent = 50,
56
+ episodes_per_latent = 5,
57
57
  max_episode_length = 500,
58
58
  action_sample_temperature = 1.,
59
59
  )