evolutionary-policy-optimization 0.0.69__py3-none-any.whl → 0.0.70__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutionary_policy_optimization/epo.py +26 -6
- {evolutionary_policy_optimization-0.0.69.dist-info → evolutionary_policy_optimization-0.0.70.dist-info}/METADATA +7 -6
- {evolutionary_policy_optimization-0.0.69.dist-info → evolutionary_policy_optimization-0.0.70.dist-info}/RECORD +5 -5
- {evolutionary_policy_optimization-0.0.69.dist-info → evolutionary_policy_optimization-0.0.70.dist-info}/WHEEL +0 -0
- {evolutionary_policy_optimization-0.0.69.dist-info → evolutionary_policy_optimization-0.0.70.dist-info}/licenses/LICENSE +0 -0
| @@ -416,6 +416,8 @@ class LatentGenePool(Module): | |
| 416 416 | 
             
                    self.num_natural_selected = int(frac_natural_selected * latents_per_island)
         | 
| 417 417 | 
             
                    self.num_tournament_participants = int(frac_tournaments * self.num_natural_selected)
         | 
| 418 418 |  | 
| 419 | 
            +
                    assert self.num_tournament_participants >= 2
         | 
| 420 | 
            +
             | 
| 419 421 | 
             
                    self.crossover_random  = crossover_random
         | 
| 420 422 |  | 
| 421 423 | 
             
                    self.mutation_strength = mutation_strength
         | 
| @@ -845,7 +847,7 @@ class Agent(Module): | |
| 845 847 |  | 
| 846 848 | 
             
                    return self.latent_gene_pool.genetic_algorithm_step(fitnesses)
         | 
| 847 849 |  | 
| 848 | 
            -
                def  | 
| 850 | 
            +
                def learn_from(
         | 
| 849 851 | 
             
                    self,
         | 
| 850 852 | 
             
                    memories_and_cumulative_rewards: MemoriesAndCumulativeRewards,
         | 
| 851 853 | 
             
                    epochs = 2
         | 
| @@ -853,11 +855,13 @@ class Agent(Module): | |
| 853 855 | 
             
                ):
         | 
| 854 856 | 
             
                    memories_and_cumulative_rewards = to_device(memories_and_cumulative_rewards, self.device)
         | 
| 855 857 |  | 
| 856 | 
            -
                     | 
| 858 | 
            +
                    memories_list, rewards_per_latent_episode = memories_and_cumulative_rewards
         | 
| 857 859 |  | 
| 858 860 | 
             
                    # stack memories
         | 
| 859 861 |  | 
| 860 | 
            -
                    memories = map(stack, zip(* | 
| 862 | 
            +
                    memories = map(stack, zip(*memories_list))
         | 
| 863 | 
            +
             | 
| 864 | 
            +
                    memories_list.clear()
         | 
| 861 865 |  | 
| 862 866 | 
             
                    maybe_barrier()
         | 
| 863 867 |  | 
| @@ -979,7 +983,6 @@ class Agent(Module): | |
| 979 983 | 
             
                    # apply evolution
         | 
| 980 984 |  | 
| 981 985 | 
             
                    if self.has_latent_genes:
         | 
| 982 | 
            -
             | 
| 983 986 | 
             
                        self.latent_gene_pool.genetic_algorithm_step(fitness_scores)
         | 
| 984 987 |  | 
| 985 988 | 
             
            # reinforcement learning related - ppo
         | 
| @@ -1159,9 +1162,10 @@ class EPO(Module): | |
| 1159 1162 | 
             
                        yield latent_id, episode_id, maybe_seed
         | 
| 1160 1163 |  | 
| 1161 1164 | 
             
                @torch.no_grad()
         | 
| 1162 | 
            -
                def  | 
| 1165 | 
            +
                def gather_experience_from(
         | 
| 1163 1166 | 
             
                    self,
         | 
| 1164 1167 | 
             
                    env,
         | 
| 1168 | 
            +
                    memories: list[Memory] | None = None,
         | 
| 1165 1169 | 
             
                    fix_environ_across_latents = None
         | 
| 1166 1170 | 
             
                ) -> MemoriesAndCumulativeRewards:
         | 
| 1167 1171 |  | 
| @@ -1171,7 +1175,8 @@ class EPO(Module): | |
| 1171 1175 |  | 
| 1172 1176 | 
             
                    invalid_episode = tensor(-1) # will use `episode_id` value of `-1` for the `next_value`, needed for not discarding last reward for generalized advantage estimate
         | 
| 1173 1177 |  | 
| 1174 | 
            -
                    memories: | 
| 1178 | 
            +
                    if not exists(memories):
         | 
| 1179 | 
            +
                        memories = []
         | 
| 1175 1180 |  | 
| 1176 1181 | 
             
                    rewards_per_latent_episode = torch.zeros((self.num_latents, self.episodes_per_latent))
         | 
| 1177 1182 |  | 
| @@ -1254,3 +1259,18 @@ class EPO(Module): | |
| 1254 1259 | 
             
                        memories = memories,
         | 
| 1255 1260 | 
             
                        cumulative_rewards = rewards_per_latent_episode
         | 
| 1256 1261 | 
             
                    )
         | 
| 1262 | 
            +
             | 
| 1263 | 
            +
                def forward(
         | 
| 1264 | 
            +
                    self,
         | 
| 1265 | 
            +
                    agent: Agent,
         | 
| 1266 | 
            +
                    env,
         | 
| 1267 | 
            +
                    num_learning_cycles
         | 
| 1268 | 
            +
                ):
         | 
| 1269 | 
            +
             | 
| 1270 | 
            +
                    for _ in tqdm(range(num_learning_cycles), desc = 'learning cycle'):
         | 
| 1271 | 
            +
             | 
| 1272 | 
            +
                        memories = self.gather_experience_from(env)
         | 
| 1273 | 
            +
             | 
| 1274 | 
            +
                        agent.learn_from(memories)
         | 
| 1275 | 
            +
             | 
| 1276 | 
            +
                    print(f'training complete')
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            Metadata-Version: 2.4
         | 
| 2 2 | 
             
            Name: evolutionary-policy-optimization
         | 
| 3 | 
            -
            Version: 0.0. | 
| 3 | 
            +
            Version: 0.0.70
         | 
| 4 4 | 
             
            Summary: EPO - Pytorch
         | 
| 5 5 | 
             
            Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
         | 
| 6 6 | 
             
            Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
         | 
| @@ -114,11 +114,14 @@ from evolutionary_policy_optimization import ( | |
| 114 114 |  | 
| 115 115 | 
             
            agent = create_agent(
         | 
| 116 116 | 
             
                dim_state = 512,
         | 
| 117 | 
            -
                num_latents =  | 
| 117 | 
            +
                num_latents = 16,
         | 
| 118 118 | 
             
                dim_latent = 32,
         | 
| 119 119 | 
             
                actor_num_actions = 5,
         | 
| 120 120 | 
             
                actor_dim_hiddens = (256, 128),
         | 
| 121 | 
            -
                critic_dim_hiddens = (256, 128, 64)
         | 
| 121 | 
            +
                critic_dim_hiddens = (256, 128, 64),
         | 
| 122 | 
            +
                latent_gene_pool_kwargs = dict(
         | 
| 123 | 
            +
                    frac_natural_selected = 0.5
         | 
| 124 | 
            +
                )
         | 
| 122 125 | 
             
            )
         | 
| 123 126 |  | 
| 124 127 | 
             
            epo = EPO(
         | 
| @@ -130,9 +133,7 @@ epo = EPO( | |
| 130 133 |  | 
| 131 134 | 
             
            env = Env((512,))
         | 
| 132 135 |  | 
| 133 | 
            -
             | 
| 134 | 
            -
             | 
| 135 | 
            -
            agent(memories)
         | 
| 136 | 
            +
            epo(agent, env, num_learning_cycles = 5)
         | 
| 136 137 |  | 
| 137 138 | 
             
            # saving and loading
         | 
| 138 139 |  | 
| @@ -1,9 +1,9 @@ | |
| 1 1 | 
             
            evolutionary_policy_optimization/__init__.py,sha256=0q0aBuFgWi06MLMD8FiHzBYQ3_W4LYWrwmCtF3u5H2A,201
         | 
| 2 2 | 
             
            evolutionary_policy_optimization/distributed.py,sha256=7KgZdeS_wxBHo_du9XZFB1Cu318J-Bp66Xdr6Log_20,2423
         | 
| 3 | 
            -
            evolutionary_policy_optimization/epo.py,sha256= | 
| 3 | 
            +
            evolutionary_policy_optimization/epo.py,sha256=MmsqMwytVqBkb1f2piUygueOfn--Icb817P4bDcfPks,38683
         | 
| 4 4 | 
             
            evolutionary_policy_optimization/experimental.py,sha256=-IgqjJ_Wk_CMB1y9YYWpoYqTG9GZHAS6kbRdTluVevg,1563
         | 
| 5 5 | 
             
            evolutionary_policy_optimization/mock_env.py,sha256=Bv9ONFRbma8wpjUurc9aCk19A6ceiWitRnS3nwrIR64,1339
         | 
| 6 | 
            -
            evolutionary_policy_optimization-0.0. | 
| 7 | 
            -
            evolutionary_policy_optimization-0.0. | 
| 8 | 
            -
            evolutionary_policy_optimization-0.0. | 
| 9 | 
            -
            evolutionary_policy_optimization-0.0. | 
| 6 | 
            +
            evolutionary_policy_optimization-0.0.70.dist-info/METADATA,sha256=fX_hR3dCjKUQ3VZtT-sUQy0qT8sF-nDPyP0QDAGHf60,6304
         | 
| 7 | 
            +
            evolutionary_policy_optimization-0.0.70.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
         | 
| 8 | 
            +
            evolutionary_policy_optimization-0.0.70.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
         | 
| 9 | 
            +
            evolutionary_policy_optimization-0.0.70.dist-info/RECORD,,
         | 
| 
            File without changes
         |