evolutionary-policy-optimization 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,7 @@
1
1
  from evolutionary_policy_optimization.epo import (
2
2
  MLP,
3
+ Actor,
4
+ Critic,
5
+ Agent,
3
6
  LatentGenePool
4
7
  )
@@ -10,6 +10,7 @@ import torch.nn.functional as F
10
10
  from torch.nn import Linear, Module, ModuleList
11
11
 
12
12
  from einops import rearrange, repeat, einsum
13
+ from einops.layers.torch import Rearrange
13
14
 
14
15
  from assoc_scan import AssocScan
15
16
 
@@ -277,7 +278,7 @@ class Actor(Module):
277
278
 
278
279
  hidden = self.init_layer(state)
279
280
 
280
- hidden = self.mlp(state, latent)
281
+ hidden = self.mlp(hidden, latent)
281
282
 
282
283
  return self.to_out(hidden)
283
284
 
@@ -314,33 +315,16 @@ class Critic(Module):
314
315
 
315
316
  hidden = self.init_layer(state)
316
317
 
317
- hidden = self.mlp(state, latent)
318
+ hidden = self.mlp(hidden, latent)
318
319
 
319
320
  return self.to_out(hidden)
320
321
 
321
- class Agent(Module):
322
- def __init__(
323
- self,
324
- actor: Actor,
325
- critic: Critic,
326
- ):
327
- super().__init__()
328
-
329
- self.actor = actor
330
- self.critic = critic
331
-
332
- def forward(
333
- self,
334
- memories: list[Memory]
335
- ):
336
- raise NotImplementedError
337
-
338
322
  # criteria for running genetic algorithm
339
323
 
340
324
  class ShouldRunGeneticAlgorithm(Module):
341
325
  def __init__(
342
326
  self,
343
- gamma = 2. # not sure what the value is
327
+ gamma = 1.5 # not sure what the value is
344
328
  ):
345
329
  super().__init__()
346
330
  self.gamma = gamma
@@ -368,8 +352,8 @@ class LatentGenePool(Module):
368
352
  frac_natural_selected = 0.25, # number of least fit genes to remove from the pool
369
353
  frac_elitism = 0.1, # frac of population to preserve from being noised
370
354
  mutation_strength = 1., # factor to multiply to gaussian noise as mutation to latents
371
- net: MLP | Module | dict | None = None,
372
- should_run_genetic_algorithm: Module = ShouldRunGeneticAlgorithm() # eq (3) in paper
355
+ should_run_genetic_algorithm: Module | None = None, # eq (3) in paper
356
+ default_should_run_ga_gamma = 1.5
373
357
  ):
374
358
  super().__init__()
375
359
 
@@ -404,21 +388,8 @@ class LatentGenePool(Module):
404
388
  self.num_elites = int(frac_elitism * num_latents)
405
389
  self.has_elites = self.num_elites > 0
406
390
 
407
- # network for the latent / gene
408
-
409
- if isinstance(net, dict):
410
- assert 'dim_latent' not in net
411
- assert 'num_latent_sets' not in net
412
-
413
- net.update(dim_latent = dim_latent)
414
- net.update(num_latent_sets = num_latent_sets)
415
-
416
- net = MLP(**net)
417
-
418
- assert net.dim_latent == dim_latent, f'the latent dimension set on the MLP {net.dim_latent} must be what was passed into the latent gene pool module ({dim_latent})'
419
- assert net.num_latent_sets == num_latent_sets, 'number of latent sets must be equal between MLP and and latent gene pool container'
420
-
421
- self.net = net
391
+ if not exists(should_run_genetic_algorithm):
392
+ should_run_genetic_algorithm = ShouldRunGeneticAlgorithm(gamma = default_should_run_ga_gamma)
422
393
 
423
394
  self.should_run_genetic_algorithm = should_run_genetic_algorithm
424
395
 
@@ -463,7 +434,7 @@ class LatentGenePool(Module):
463
434
 
464
435
  tournament_winner_indices = repeat(tournament_winner_indices, '... -> ... n g', g = self.dim_latent, n = self.num_latent_sets)
465
436
 
466
- parents = participants.gather(-2, tournament_winner_indices)
437
+ parents = participants.gather(-3, tournament_winner_indices)
467
438
 
468
439
  # 3. do a crossover of the parents - in their case they went for a simple averaging, but since we are doing tournament style and the same pair of parents may be re-selected, lets make it random interpolation
469
440
 
@@ -501,11 +472,10 @@ class LatentGenePool(Module):
501
472
  self,
502
473
  *args,
503
474
  latent_id: int | None = None,
475
+ net: Module | None = None,
504
476
  **kwargs,
505
477
  ):
506
478
 
507
- assert exists(self.net)
508
-
509
479
  # if only 1 latent, assume doing ablation and get lone gene
510
480
 
511
481
  if not exists(latent_id) and self.num_latents == 1:
@@ -517,12 +487,37 @@ class LatentGenePool(Module):
517
487
 
518
488
  latent = self.latents[latent_id]
519
489
 
520
- return self.net(
490
+ if not exists(net):
491
+ return latent
492
+
493
+ return net(
521
494
  *args,
522
495
  latent = latent,
523
496
  **kwargs
524
497
  )
525
498
 
499
+ # agent contains the actor, critic, and the latent genetic pool
500
+
501
+ class Agent(Module):
502
+ def __init__(
503
+ self,
504
+ actor: Actor,
505
+ critic: Critic,
506
+ latent_gene_pool: LatentGenePool
507
+ ):
508
+ super().__init__()
509
+
510
+ self.actor = actor
511
+ self.critic = critic
512
+
513
+ self.latent_gene_pool = latent_gene_pool
514
+
515
+ def forward(
516
+ self,
517
+ memories: list[Memory]
518
+ ):
519
+ raise NotImplementedError
520
+
526
521
  # EPO - which is just PPO with natural selection of a population of latent variables conditioning the agent
527
522
  # the tricky part is that the latent ids for each episode / trajectory needs to be tracked
528
523
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: evolutionary-policy-optimization
3
- Version: 0.0.8
3
+ Version: 0.0.10
4
4
  Summary: EPO - Pytorch
5
5
  Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
6
6
  Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
@@ -60,37 +60,40 @@ This paper stands out, as I have witnessed the positive effects first hand in an
60
60
 
61
61
  Besides their latent variable strategy, I'll also throw in some attempts with crossover in weight space
62
62
 
63
- Update: I see, mixing genetic algorithms with gradient based method is already a research field, under [Memetic algorithms](https://en.wikipedia.org/wiki/Memetic_algorithm). This is also incidentally what I have concluded what Science is. I am in direct exposure to this phenomenon on a daily basis
63
+ Update: I see, mixing genetic algorithms with gradient based method is already a research field, under [Memetic algorithms](https://en.wikipedia.org/wiki/Memetic_algorithm)
64
64
 
65
65
  ## Usage
66
66
 
67
67
  ```python
68
+
68
69
  import torch
69
70
 
70
71
  from evolutionary_policy_optimization import (
71
72
  LatentGenePool,
72
- MLP
73
+ Actor,
74
+ Critic
73
75
  )
74
76
 
75
77
  latent_pool = LatentGenePool(
76
- num_latents = 32,
78
+ num_latents = 128,
77
79
  dim_latent = 32,
78
- net = MLP(
79
- dims = (512, 256),
80
- dim_latent = 32,
81
- )
82
80
  )
83
81
 
84
82
  state = torch.randn(1, 512)
85
- action = latent_pool(state, latent_id = 3) # use latent / gene 4
83
+
84
+ actor = Actor(512, dim_hiddens = (256, 128), num_actions = 4, dim_latent = 32)
85
+ critic = Critic(512, dim_hiddens = (256, 128, 64), dim_latent = 32)
86
+
87
+ latent = latent_pool(latent_id = 2)
88
+
89
+ actions = actor(state, latent)
90
+ value = critic(state, latent)
86
91
 
87
92
  # interact with environment and receive rewards, termination etc
88
93
 
89
94
  # derive a fitness score for each gene / latent
90
95
 
91
- fitness = torch.randn(32)
92
-
93
- latent_pool.genetic_algorithm_step(fitness) # update latents using one generation of genetic algorithm
96
+ fitness = torch.randn(128)
94
97
 
95
98
  ```
96
99
 
@@ -0,0 +1,7 @@
1
+ evolutionary_policy_optimization/__init__.py,sha256=A07bhbBI_p-GlSTkI15pioQ1XgtJ0V4tBN6v3vs2nuU,115
2
+ evolutionary_policy_optimization/epo.py,sha256=66GOQq8_s5kmQI7G-2Z0J_0g4E5QarjQPJfWEP7mmKg,15442
3
+ evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
4
+ evolutionary_policy_optimization-0.0.10.dist-info/METADATA,sha256=bD3fw2Zw1IxhfkCvzjsRhODyL_XIC5ZsvNQqFbZXNc4,4357
5
+ evolutionary_policy_optimization-0.0.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ evolutionary_policy_optimization-0.0.10.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
7
+ evolutionary_policy_optimization-0.0.10.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- evolutionary_policy_optimization/__init__.py,sha256=Nu-_OMA8abe4AiW9Gw6MvbZH0JZpMHMqjeXmkC9-7UU,81
2
- evolutionary_policy_optimization/epo.py,sha256=LA81Yi6o3EFbJZHkxx1vyBFZWvNqpZ9mGhEauLZu9Ig,15692
3
- evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
4
- evolutionary_policy_optimization-0.0.8.dist-info/METADATA,sha256=42kS9DROtA90mUCJhim940ysydx3apEerwNbNs1wj_A,4460
5
- evolutionary_policy_optimization-0.0.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
- evolutionary_policy_optimization-0.0.8.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
7
- evolutionary_policy_optimization-0.0.8.dist-info/RECORD,,