evolutionary-policy-optimization 0.1.14__tar.gz → 0.1.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/PKG-INFO +10 -1
  2. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/README.md +9 -0
  3. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/evolutionary_policy_optimization/epo.py +55 -8
  4. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/pyproject.toml +1 -1
  5. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/.github/workflows/python-publish.yml +0 -0
  6. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/.github/workflows/test.yml +0 -0
  7. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/.gitignore +0 -0
  8. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/LICENSE +0 -0
  9. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/evolutionary_policy_optimization/__init__.py +0 -0
  10. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/evolutionary_policy_optimization/distributed.py +0 -0
  11. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/evolutionary_policy_optimization/env_wrappers.py +0 -0
  12. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/evolutionary_policy_optimization/experimental.py +0 -0
  13. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/evolutionary_policy_optimization/mock_env.py +0 -0
  14. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/requirements.txt +0 -0
  15. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/tests/test_epo.py +0 -0
  16. {evolutionary_policy_optimization-0.1.14 → evolutionary_policy_optimization-0.1.15}/train_gym.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.1.14
3
+ Version: 0.1.15
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -226,4 +226,13 @@ agent.load('./agent.pt')
226
226
  }
227
227
  ```
228
228
 
229
+ ```bibtex
230
+ @inproceedings{Gerasimov2025YouDN,
231
+ title = {You Do Not Fully Utilize Transformer's Representation Capacity},
232
+ author = {Gleb Gerasimov and Yaroslav Aksenov and Nikita Balagansky and Viacheslav Sinii and Daniil Gavrilov},
233
+ year = {2025},
234
+ url = {https://api.semanticscholar.org/CorpusID:276317819}
235
+ }
236
+ ```
237
+
229
238
  *Evolution is cleverer than you are.* - Leslie Orgel
@@ -173,4 +173,13 @@ agent.load('./agent.pt')
173
173
  }
174
174
  ```
175
175
 
176
+ ```bibtex
177
+ @inproceedings{Gerasimov2025YouDN,
178
+ title = {You Do Not Fully Utilize Transformer's Representation Capacity},
179
+ author = {Gleb Gerasimov and Yaroslav Aksenov and Nikita Balagansky and Viacheslav Sinii and Daniil Gavrilov},
180
+ year = {2025},
181
+ url = {https://api.semanticscholar.org/CorpusID:276317819}
182
+ }
183
+ ```
184
+
176
185
  *Evolution is cleverer than you are.* - Leslie Orgel
@@ -287,6 +287,38 @@ class PowerLawDist(Module):
287
287
 
288
288
  return self.values[sampled]
289
289
 
290
+ # layer integrated memory
291
+
292
+ class DynamicLIMe(Module):
293
+ def __init__(
294
+ self,
295
+ dim,
296
+ num_layers
297
+ ):
298
+ super().__init__()
299
+ self.num_layers = num_layers
300
+
301
+ self.to_weights = nn.Sequential(
302
+ nn.RMSNorm(dim),
303
+ nn.Linear(dim, num_layers),
304
+ nn.ReLU()
305
+ )
306
+
307
+ def forward(
308
+ self,
309
+ x,
310
+ hiddens
311
+ ):
312
+
313
+ if not is_tensor(hiddens):
314
+ hiddens = stack(hiddens)
315
+
316
+ assert hiddens.shape[0] == self.num_layers, f'expected hiddens to have {self.num_layers} layers but received {tuple(hiddens.shape)} instead (first dimension must be layers)'
317
+
318
+ weights = self.to_weights(x)
319
+
320
+ return einsum(hiddens, weights, 'l b d, b l -> b d')
321
+
290
322
  # simple MLP networks, but with latent variables
291
323
  # the latent variables are the "genes" with the rest of the network as the scaffold for "gene expression" - as suggested in the paper
292
324
 
@@ -316,15 +348,22 @@ class MLP(Module):
316
348
 
317
349
  layers = []
318
350
 
319
- for _ in range(depth):
351
+ for ind in range(depth):
352
+ is_first = ind == 0
353
+
354
+ lime = DynamicLIMe(dim, num_layers = ind + 1) if not is_first else None
355
+
320
356
  layer = nn.Sequential(
321
- nn.LayerNorm(dim, bias = False),
357
+ nn.RMSNorm(dim),
322
358
  nn.Linear(dim, dim_hidden),
323
359
  nn.SiLU(),
324
360
  nn.Linear(dim_hidden, dim),
325
361
  )
326
362
 
327
- layers.append(layer)
363
+ layers.append(ModuleList([
364
+ lime,
365
+ layer
366
+ ]))
328
367
 
329
368
  # modules across layers
330
369
 
@@ -354,10 +393,18 @@ class MLP(Module):
354
393
 
355
394
  # layers
356
395
 
357
- for ind, layer in enumerate(self.layers, start = 1):
358
- is_last = ind == len(self.layers)
396
+ prev_layer_inputs = [x]
397
+
398
+ for lime, layer in self.layers:
399
+
400
+ layer_inp = x
401
+
402
+ if exists(lime):
403
+ layer_inp = lime(x, prev_layer_inputs)
404
+
405
+ x = layer(layer_inp) + x
359
406
 
360
- x = layer(x) + x
407
+ prev_layer_inputs.append(x)
361
408
 
362
409
  return x
363
410
 
@@ -385,7 +432,7 @@ class Actor(Module):
385
432
  self.mlp = MLP(dim = dim, depth = mlp_depth, dim_latent = dim_latent)
386
433
 
387
434
  self.to_out = nn.Sequential(
388
- nn.LayerNorm(dim, bias = False),
435
+ nn.RMSNorm(dim),
389
436
  nn.Linear(dim, num_actions, bias = False),
390
437
  )
391
438
 
@@ -426,7 +473,7 @@ class Critic(Module):
426
473
 
427
474
  self.mlp = MLP(dim = dim, depth = mlp_depth, dim_latent = dim_latent)
428
475
 
429
- self.final_norm = nn.LayerNorm(dim, bias = False)
476
+ self.final_norm = nn.RMSNorm(dim)
430
477
 
431
478
  self.to_pred = HLGaussLayer(
432
479
  dim = dim,
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "evolutionary-policy-optimization"
3
- version = "0.1.14"
3
+ version = "0.1.15"
4
4
  description = "EPO - Pytorch"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }