evolutionary-policy-optimization 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutionary_policy_optimization/epo.py +26 -0
- {evolutionary_policy_optimization-0.0.6.dist-info → evolutionary_policy_optimization-0.0.8.dist-info}/METADATA +1 -1
- evolutionary_policy_optimization-0.0.8.dist-info/RECORD +7 -0
- evolutionary_policy_optimization-0.0.6.dist-info/RECORD +0 -7
- {evolutionary_policy_optimization-0.0.6.dist-info → evolutionary_policy_optimization-0.0.8.dist-info}/WHEEL +0 -0
- {evolutionary_policy_optimization-0.0.6.dist-info → evolutionary_policy_optimization-0.0.8.dist-info}/licenses/LICENSE +0 -0
@@ -335,6 +335,25 @@ class Agent(Module):
|
|
335
335
|
):
|
336
336
|
raise NotImplementedError
|
337
337
|
|
338
|
+
# criteria for running genetic algorithm
|
339
|
+
|
340
|
+
class ShouldRunGeneticAlgorithm(Module):
|
341
|
+
def __init__(
|
342
|
+
self,
|
343
|
+
gamma = 2. # not sure what the value is
|
344
|
+
):
|
345
|
+
super().__init__()
|
346
|
+
self.gamma = gamma
|
347
|
+
|
348
|
+
def forward(self, fitnesses):
|
349
|
+
# equation (3)
|
350
|
+
|
351
|
+
# max(fitness) - min(fitness) > gamma * median(fitness)
|
352
|
+
# however, this equation does not make much sense to me if fitness increases unbounded
|
353
|
+
# just let it be customizable, and offer a variant where mean and variance is over some threshold (could account for skew too)
|
354
|
+
|
355
|
+
return (fitnesses.amax() - fitnesses.amin()) > (self.gamma * torch.median(fitnesses))
|
356
|
+
|
338
357
|
# classes
|
339
358
|
|
340
359
|
class LatentGenePool(Module):
|
@@ -350,6 +369,7 @@ class LatentGenePool(Module):
|
|
350
369
|
frac_elitism = 0.1, # frac of population to preserve from being noised
|
351
370
|
mutation_strength = 1., # factor to multiply to gaussian noise as mutation to latents
|
352
371
|
net: MLP | Module | dict | None = None,
|
372
|
+
should_run_genetic_algorithm: Module = ShouldRunGeneticAlgorithm() # eq (3) in paper
|
353
373
|
):
|
354
374
|
super().__init__()
|
355
375
|
|
@@ -400,6 +420,8 @@ class LatentGenePool(Module):
|
|
400
420
|
|
401
421
|
self.net = net
|
402
422
|
|
423
|
+
self.should_run_genetic_algorithm = should_run_genetic_algorithm
|
424
|
+
|
403
425
|
@torch.no_grad()
|
404
426
|
# non-gradient optimization, at least, not on the individual level (taken care of by rl component)
|
405
427
|
def genetic_algorithm_step(
|
@@ -412,6 +434,10 @@ class LatentGenePool(Module):
|
|
412
434
|
g - gene dimension
|
413
435
|
n - number of genes per individual
|
414
436
|
"""
|
437
|
+
|
438
|
+
if not self.should_run_genetic_algorithm(fitness):
|
439
|
+
return
|
440
|
+
|
415
441
|
assert self.num_latents > 1
|
416
442
|
|
417
443
|
genes = self.latents # the latents are the genes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.8
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -0,0 +1,7 @@
|
|
1
|
+
evolutionary_policy_optimization/__init__.py,sha256=Nu-_OMA8abe4AiW9Gw6MvbZH0JZpMHMqjeXmkC9-7UU,81
|
2
|
+
evolutionary_policy_optimization/epo.py,sha256=LA81Yi6o3EFbJZHkxx1vyBFZWvNqpZ9mGhEauLZu9Ig,15692
|
3
|
+
evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
|
4
|
+
evolutionary_policy_optimization-0.0.8.dist-info/METADATA,sha256=42kS9DROtA90mUCJhim940ysydx3apEerwNbNs1wj_A,4460
|
5
|
+
evolutionary_policy_optimization-0.0.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
+
evolutionary_policy_optimization-0.0.8.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
|
7
|
+
evolutionary_policy_optimization-0.0.8.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
evolutionary_policy_optimization/__init__.py,sha256=Nu-_OMA8abe4AiW9Gw6MvbZH0JZpMHMqjeXmkC9-7UU,81
|
2
|
-
evolutionary_policy_optimization/epo.py,sha256=vXkwsQE0CNEUPpguZP-XXsuDyIBN-bS3xDJDXpYlTHM,14772
|
3
|
-
evolutionary_policy_optimization/experimental.py,sha256=ktBKxRF27Qsj7WIgBpYlWXqMVxO9zOx2oD1JuDYRAwM,548
|
4
|
-
evolutionary_policy_optimization-0.0.6.dist-info/METADATA,sha256=M_0SbTqdifHQ_R9LWIe7ZfHMXgCiFDJ0sDpD29ctiNk,4460
|
5
|
-
evolutionary_policy_optimization-0.0.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
-
evolutionary_policy_optimization-0.0.6.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
|
7
|
-
evolutionary_policy_optimization-0.0.6.dist-info/RECORD,,
|
File without changes
|