evolutionary-policy-optimization 0.0.69__py3-none-any.whl → 0.0.70__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutionary_policy_optimization/epo.py +26 -6
- {evolutionary_policy_optimization-0.0.69.dist-info → evolutionary_policy_optimization-0.0.70.dist-info}/METADATA +7 -6
- {evolutionary_policy_optimization-0.0.69.dist-info → evolutionary_policy_optimization-0.0.70.dist-info}/RECORD +5 -5
- {evolutionary_policy_optimization-0.0.69.dist-info → evolutionary_policy_optimization-0.0.70.dist-info}/WHEEL +0 -0
- {evolutionary_policy_optimization-0.0.69.dist-info → evolutionary_policy_optimization-0.0.70.dist-info}/licenses/LICENSE +0 -0
@@ -416,6 +416,8 @@ class LatentGenePool(Module):
|
|
416
416
|
self.num_natural_selected = int(frac_natural_selected * latents_per_island)
|
417
417
|
self.num_tournament_participants = int(frac_tournaments * self.num_natural_selected)
|
418
418
|
|
419
|
+
assert self.num_tournament_participants >= 2
|
420
|
+
|
419
421
|
self.crossover_random = crossover_random
|
420
422
|
|
421
423
|
self.mutation_strength = mutation_strength
|
@@ -845,7 +847,7 @@ class Agent(Module):
|
|
845
847
|
|
846
848
|
return self.latent_gene_pool.genetic_algorithm_step(fitnesses)
|
847
849
|
|
848
|
-
def
|
850
|
+
def learn_from(
|
849
851
|
self,
|
850
852
|
memories_and_cumulative_rewards: MemoriesAndCumulativeRewards,
|
851
853
|
epochs = 2
|
@@ -853,11 +855,13 @@ class Agent(Module):
|
|
853
855
|
):
|
854
856
|
memories_and_cumulative_rewards = to_device(memories_and_cumulative_rewards, self.device)
|
855
857
|
|
856
|
-
|
858
|
+
memories_list, rewards_per_latent_episode = memories_and_cumulative_rewards
|
857
859
|
|
858
860
|
# stack memories
|
859
861
|
|
860
|
-
memories = map(stack, zip(*
|
862
|
+
memories = map(stack, zip(*memories_list))
|
863
|
+
|
864
|
+
memories_list.clear()
|
861
865
|
|
862
866
|
maybe_barrier()
|
863
867
|
|
@@ -979,7 +983,6 @@ class Agent(Module):
|
|
979
983
|
# apply evolution
|
980
984
|
|
981
985
|
if self.has_latent_genes:
|
982
|
-
|
983
986
|
self.latent_gene_pool.genetic_algorithm_step(fitness_scores)
|
984
987
|
|
985
988
|
# reinforcement learning related - ppo
|
@@ -1159,9 +1162,10 @@ class EPO(Module):
|
|
1159
1162
|
yield latent_id, episode_id, maybe_seed
|
1160
1163
|
|
1161
1164
|
@torch.no_grad()
|
1162
|
-
def
|
1165
|
+
def gather_experience_from(
|
1163
1166
|
self,
|
1164
1167
|
env,
|
1168
|
+
memories: list[Memory] | None = None,
|
1165
1169
|
fix_environ_across_latents = None
|
1166
1170
|
) -> MemoriesAndCumulativeRewards:
|
1167
1171
|
|
@@ -1171,7 +1175,8 @@ class EPO(Module):
|
|
1171
1175
|
|
1172
1176
|
invalid_episode = tensor(-1) # will use `episode_id` value of `-1` for the `next_value`, needed for not discarding last reward for generalized advantage estimate
|
1173
1177
|
|
1174
|
-
memories:
|
1178
|
+
if not exists(memories):
|
1179
|
+
memories = []
|
1175
1180
|
|
1176
1181
|
rewards_per_latent_episode = torch.zeros((self.num_latents, self.episodes_per_latent))
|
1177
1182
|
|
@@ -1254,3 +1259,18 @@ class EPO(Module):
|
|
1254
1259
|
memories = memories,
|
1255
1260
|
cumulative_rewards = rewards_per_latent_episode
|
1256
1261
|
)
|
1262
|
+
|
1263
|
+
def forward(
|
1264
|
+
self,
|
1265
|
+
agent: Agent,
|
1266
|
+
env,
|
1267
|
+
num_learning_cycles
|
1268
|
+
):
|
1269
|
+
|
1270
|
+
for _ in tqdm(range(num_learning_cycles), desc = 'learning cycle'):
|
1271
|
+
|
1272
|
+
memories = self.gather_experience_from(env)
|
1273
|
+
|
1274
|
+
agent.learn_from(memories)
|
1275
|
+
|
1276
|
+
print(f'training complete')
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.70
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -114,11 +114,14 @@ from evolutionary_policy_optimization import (
|
|
114
114
|
|
115
115
|
agent = create_agent(
|
116
116
|
dim_state = 512,
|
117
|
-
num_latents =
|
117
|
+
num_latents = 16,
|
118
118
|
dim_latent = 32,
|
119
119
|
actor_num_actions = 5,
|
120
120
|
actor_dim_hiddens = (256, 128),
|
121
|
-
critic_dim_hiddens = (256, 128, 64)
|
121
|
+
critic_dim_hiddens = (256, 128, 64),
|
122
|
+
latent_gene_pool_kwargs = dict(
|
123
|
+
frac_natural_selected = 0.5
|
124
|
+
)
|
122
125
|
)
|
123
126
|
|
124
127
|
epo = EPO(
|
@@ -130,9 +133,7 @@ epo = EPO(
|
|
130
133
|
|
131
134
|
env = Env((512,))
|
132
135
|
|
133
|
-
|
134
|
-
|
135
|
-
agent(memories)
|
136
|
+
epo(agent, env, num_learning_cycles = 5)
|
136
137
|
|
137
138
|
# saving and loading
|
138
139
|
|
@@ -1,9 +1,9 @@
|
|
1
1
|
evolutionary_policy_optimization/__init__.py,sha256=0q0aBuFgWi06MLMD8FiHzBYQ3_W4LYWrwmCtF3u5H2A,201
|
2
2
|
evolutionary_policy_optimization/distributed.py,sha256=7KgZdeS_wxBHo_du9XZFB1Cu318J-Bp66Xdr6Log_20,2423
|
3
|
-
evolutionary_policy_optimization/epo.py,sha256=
|
3
|
+
evolutionary_policy_optimization/epo.py,sha256=MmsqMwytVqBkb1f2piUygueOfn--Icb817P4bDcfPks,38683
|
4
4
|
evolutionary_policy_optimization/experimental.py,sha256=-IgqjJ_Wk_CMB1y9YYWpoYqTG9GZHAS6kbRdTluVevg,1563
|
5
5
|
evolutionary_policy_optimization/mock_env.py,sha256=Bv9ONFRbma8wpjUurc9aCk19A6ceiWitRnS3nwrIR64,1339
|
6
|
-
evolutionary_policy_optimization-0.0.
|
7
|
-
evolutionary_policy_optimization-0.0.
|
8
|
-
evolutionary_policy_optimization-0.0.
|
9
|
-
evolutionary_policy_optimization-0.0.
|
6
|
+
evolutionary_policy_optimization-0.0.70.dist-info/METADATA,sha256=fX_hR3dCjKUQ3VZtT-sUQy0qT8sF-nDPyP0QDAGHf60,6304
|
7
|
+
evolutionary_policy_optimization-0.0.70.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
8
|
+
evolutionary_policy_optimization-0.0.70.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
|
9
|
+
evolutionary_policy_optimization-0.0.70.dist-info/RECORD,,
|
File without changes
|