evolutionary-policy-optimization 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutionary_policy_optimization/epo.py +82 -11
- {evolutionary_policy_optimization-0.1.14.dist-info → evolutionary_policy_optimization-0.1.16.dist-info}/METADATA +14 -3
- {evolutionary_policy_optimization-0.1.14.dist-info → evolutionary_policy_optimization-0.1.16.dist-info}/RECORD +5 -5
- {evolutionary_policy_optimization-0.1.14.dist-info → evolutionary_policy_optimization-0.1.16.dist-info}/WHEEL +0 -0
- {evolutionary_policy_optimization-0.1.14.dist-info → evolutionary_policy_optimization-0.1.16.dist-info}/licenses/LICENSE +0 -0
@@ -76,6 +76,7 @@ def maybe(fn):
|
|
76
76
|
def interface_torch_numpy(fn, device):
|
77
77
|
# for a given function, move all inputs from torch tensor to numpy, and all outputs from numpy to torch tensor
|
78
78
|
|
79
|
+
@maybe
|
79
80
|
def to_torch_tensor(t):
|
80
81
|
if isinstance(t, (np.ndarray, np.float64)):
|
81
82
|
t = from_numpy(np.array(t))
|
@@ -287,6 +288,54 @@ class PowerLawDist(Module):
|
|
287
288
|
|
288
289
|
return self.values[sampled]
|
289
290
|
|
291
|
+
# FiLM for latent to mlp conditioning
|
292
|
+
|
293
|
+
class FiLM(Module):
|
294
|
+
def __init__(self, dim, dim_out):
|
295
|
+
super().__init__()
|
296
|
+
self.to_gamma = nn.Linear(dim, dim_out, bias = False)
|
297
|
+
self.to_beta = nn.Linear(dim, dim_out, bias = False)
|
298
|
+
|
299
|
+
nn.init.zeros_(self.to_gamma.weight)
|
300
|
+
nn.init.zeros_(self.to_beta.weight)
|
301
|
+
|
302
|
+
def forward(self, x, cond):
|
303
|
+
gamma, beta = self.to_gamma(cond), self.to_beta(cond)
|
304
|
+
|
305
|
+
return x * (gamma + 1.) + beta
|
306
|
+
|
307
|
+
# layer integrated memory
|
308
|
+
|
309
|
+
class DynamicLIMe(Module):
|
310
|
+
def __init__(
|
311
|
+
self,
|
312
|
+
dim,
|
313
|
+
num_layers
|
314
|
+
):
|
315
|
+
super().__init__()
|
316
|
+
self.num_layers = num_layers
|
317
|
+
|
318
|
+
self.to_weights = nn.Sequential(
|
319
|
+
nn.RMSNorm(dim),
|
320
|
+
nn.Linear(dim, num_layers),
|
321
|
+
nn.Softmax(dim = -1)
|
322
|
+
)
|
323
|
+
|
324
|
+
def forward(
|
325
|
+
self,
|
326
|
+
x,
|
327
|
+
hiddens
|
328
|
+
):
|
329
|
+
|
330
|
+
if not is_tensor(hiddens):
|
331
|
+
hiddens = stack(hiddens)
|
332
|
+
|
333
|
+
assert hiddens.shape[0] == self.num_layers, f'expected hiddens to have {self.num_layers} layers but received {tuple(hiddens.shape)} instead (first dimension must be layers)'
|
334
|
+
|
335
|
+
weights = self.to_weights(x)
|
336
|
+
|
337
|
+
return einsum(hiddens, weights, 'l b d, b l -> b d')
|
338
|
+
|
290
339
|
# simple MLP networks, but with latent variables
|
291
340
|
# the latent variables are the "genes" with the rest of the network as the scaffold for "gene expression" - as suggested in the paper
|
292
341
|
|
@@ -306,7 +355,7 @@ class MLP(Module):
|
|
306
355
|
self.needs_latent = dim_latent > 0
|
307
356
|
|
308
357
|
self.encode_latent = nn.Sequential(
|
309
|
-
Linear(dim_latent, dim),
|
358
|
+
Linear(dim_latent, dim * 2),
|
310
359
|
nn.SiLU()
|
311
360
|
) if self.needs_latent else None
|
312
361
|
|
@@ -316,15 +365,28 @@ class MLP(Module):
|
|
316
365
|
|
317
366
|
layers = []
|
318
367
|
|
319
|
-
for
|
368
|
+
for ind in range(depth):
|
369
|
+
is_first = ind == 0
|
370
|
+
|
371
|
+
film = None
|
372
|
+
|
373
|
+
if self.needs_latent:
|
374
|
+
film = FiLM(dim * 2, dim)
|
375
|
+
|
376
|
+
lime = DynamicLIMe(dim, num_layers = ind + 1) if not is_first else None
|
377
|
+
|
320
378
|
layer = nn.Sequential(
|
321
|
-
nn.
|
379
|
+
nn.RMSNorm(dim),
|
322
380
|
nn.Linear(dim, dim_hidden),
|
323
381
|
nn.SiLU(),
|
324
382
|
nn.Linear(dim_hidden, dim),
|
325
383
|
)
|
326
384
|
|
327
|
-
layers.append(
|
385
|
+
layers.append(ModuleList([
|
386
|
+
lime,
|
387
|
+
film,
|
388
|
+
layer
|
389
|
+
]))
|
328
390
|
|
329
391
|
# modules across layers
|
330
392
|
|
@@ -350,14 +412,23 @@ class MLP(Module):
|
|
350
412
|
|
351
413
|
assert latent.shape[0] == x.shape[0], f'received state with batch size {x.shape[0]} but latent ids received had batch size {latent_id.shape[0]}'
|
352
414
|
|
353
|
-
x = x * latent
|
354
|
-
|
355
415
|
# layers
|
356
416
|
|
357
|
-
|
358
|
-
|
417
|
+
prev_layer_inputs = [x]
|
418
|
+
|
419
|
+
for lime, film, layer in self.layers:
|
420
|
+
|
421
|
+
layer_inp = x
|
422
|
+
|
423
|
+
if exists(lime):
|
424
|
+
layer_inp = lime(x, prev_layer_inputs)
|
425
|
+
|
426
|
+
if exists(film):
|
427
|
+
layer_inp = film(layer_inp, latent)
|
428
|
+
|
429
|
+
x = layer(layer_inp) + x
|
359
430
|
|
360
|
-
|
431
|
+
prev_layer_inputs.append(x)
|
361
432
|
|
362
433
|
return x
|
363
434
|
|
@@ -385,7 +456,7 @@ class Actor(Module):
|
|
385
456
|
self.mlp = MLP(dim = dim, depth = mlp_depth, dim_latent = dim_latent)
|
386
457
|
|
387
458
|
self.to_out = nn.Sequential(
|
388
|
-
nn.
|
459
|
+
nn.RMSNorm(dim),
|
389
460
|
nn.Linear(dim, num_actions, bias = False),
|
390
461
|
)
|
391
462
|
|
@@ -426,7 +497,7 @@ class Critic(Module):
|
|
426
497
|
|
427
498
|
self.mlp = MLP(dim = dim, depth = mlp_depth, dim_latent = dim_latent)
|
428
499
|
|
429
|
-
self.final_norm = nn.
|
500
|
+
self.final_norm = nn.RMSNorm(dim)
|
430
501
|
|
431
502
|
self.to_pred = HLGaussLayer(
|
432
503
|
dim = dim,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.16
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -118,8 +118,10 @@ agent = create_agent(
|
|
118
118
|
num_latents = 16,
|
119
119
|
dim_latent = 32,
|
120
120
|
actor_num_actions = 5,
|
121
|
-
|
122
|
-
|
121
|
+
actor_dim = 256,
|
122
|
+
actor_mlp_depth = 2,
|
123
|
+
critic_dim = 256,
|
124
|
+
critic_mlp_depth = 3,
|
123
125
|
latent_gene_pool_kwargs = dict(
|
124
126
|
frac_natural_selected = 0.5
|
125
127
|
)
|
@@ -226,4 +228,13 @@ agent.load('./agent.pt')
|
|
226
228
|
}
|
227
229
|
```
|
228
230
|
|
231
|
+
```bibtex
|
232
|
+
@inproceedings{Gerasimov2025YouDN,
|
233
|
+
title = {You Do Not Fully Utilize Transformer's Representation Capacity},
|
234
|
+
author = {Gleb Gerasimov and Yaroslav Aksenov and Nikita Balagansky and Viacheslav Sinii and Daniil Gavrilov},
|
235
|
+
year = {2025},
|
236
|
+
url = {https://api.semanticscholar.org/CorpusID:276317819}
|
237
|
+
}
|
238
|
+
```
|
239
|
+
|
229
240
|
*Evolution is cleverer than you are.* - Leslie Orgel
|
@@ -1,10 +1,10 @@
|
|
1
1
|
evolutionary_policy_optimization/__init__.py,sha256=NyiYDYU7DlpmOTM7xiBQET3r1WwX0ebrgMCBLSQrW3c,288
|
2
2
|
evolutionary_policy_optimization/distributed.py,sha256=7KgZdeS_wxBHo_du9XZFB1Cu318J-Bp66Xdr6Log_20,2423
|
3
3
|
evolutionary_policy_optimization/env_wrappers.py,sha256=bDL06o9_b1iW6k3fw2xifnOnYlzs643tdW6Yv2gsIdw,803
|
4
|
-
evolutionary_policy_optimization/epo.py,sha256=
|
4
|
+
evolutionary_policy_optimization/epo.py,sha256=kM1jKH3Jl0jB7iF7BKToY_xcNt6j5DkooP5k_VRUDDs,47791
|
5
5
|
evolutionary_policy_optimization/experimental.py,sha256=-IgqjJ_Wk_CMB1y9YYWpoYqTG9GZHAS6kbRdTluVevg,1563
|
6
6
|
evolutionary_policy_optimization/mock_env.py,sha256=TLyyRm6tOD0Kdn9QqJJQriaSnsR-YmNQHo4OohmZFG4,1410
|
7
|
-
evolutionary_policy_optimization-0.1.
|
8
|
-
evolutionary_policy_optimization-0.1.
|
9
|
-
evolutionary_policy_optimization-0.1.
|
10
|
-
evolutionary_policy_optimization-0.1.
|
7
|
+
evolutionary_policy_optimization-0.1.16.dist-info/METADATA,sha256=JrwLijMYTjJdjoTQb-LCYyTuEibgffGUEEPRQEIsOhY,7979
|
8
|
+
evolutionary_policy_optimization-0.1.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
9
|
+
evolutionary_policy_optimization-0.1.16.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
|
10
|
+
evolutionary_policy_optimization-0.1.16.dist-info/RECORD,,
|
File without changes
|