evolutionary-policy-optimization 0.0.45__py3-none-any.whl → 0.0.46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolutionary_policy_optimization/epo.py +22 -10
- {evolutionary_policy_optimization-0.0.45.dist-info → evolutionary_policy_optimization-0.0.46.dist-info}/METADATA +1 -1
- {evolutionary_policy_optimization-0.0.45.dist-info → evolutionary_policy_optimization-0.0.46.dist-info}/RECORD +5 -5
- {evolutionary_policy_optimization-0.0.45.dist-info → evolutionary_policy_optimization-0.0.46.dist-info}/WHEEL +0 -0
- {evolutionary_policy_optimization-0.0.45.dist-info → evolutionary_policy_optimization-0.0.46.dist-info}/licenses/LICENSE +0 -0
@@ -87,6 +87,11 @@ def temp_batch_dim(fn):
|
|
87
87
|
|
88
88
|
return inner
|
89
89
|
|
90
|
+
# fitness related
|
91
|
+
|
92
|
+
def get_fitness_scores(cum_rewards, memories):
|
93
|
+
return cum_rewards
|
94
|
+
|
90
95
|
# generalized advantage estimate
|
91
96
|
|
92
97
|
def calc_generalized_advantage_estimate(
|
@@ -643,6 +648,7 @@ class Agent(Module):
|
|
643
648
|
actor_optim_kwargs: dict = dict(),
|
644
649
|
critic_optim_kwargs: dict = dict(),
|
645
650
|
latent_optim_kwargs: dict = dict(),
|
651
|
+
get_fitness_scores: Callable[..., Tensor] = get_fitness_scores
|
646
652
|
):
|
647
653
|
super().__init__()
|
648
654
|
|
@@ -663,6 +669,10 @@ class Agent(Module):
|
|
663
669
|
self.actor_loss = partial(actor_loss, **actor_loss_kwargs)
|
664
670
|
self.calc_gae = partial(calc_generalized_advantage_estimate, **calc_gae_kwargs)
|
665
671
|
|
672
|
+
# fitness score related
|
673
|
+
|
674
|
+
self.get_fitness_scores = get_fitness_scores
|
675
|
+
|
666
676
|
# learning hparams
|
667
677
|
|
668
678
|
self.batch_size = batch_size
|
@@ -766,10 +776,12 @@ class Agent(Module):
|
|
766
776
|
|
767
777
|
def forward(
|
768
778
|
self,
|
769
|
-
|
779
|
+
memories_and_cumulative_rewards: MemoriesAndCumulativeRewards,
|
770
780
|
epochs = 2
|
771
781
|
):
|
772
|
-
memories,
|
782
|
+
memories, cumulative_rewards = memories_and_cumulative_rewards
|
783
|
+
|
784
|
+
fitness_scores = self.get_fitness_scores(cumulative_rewards, memories)
|
773
785
|
|
774
786
|
(
|
775
787
|
episode_ids,
|
@@ -952,9 +964,9 @@ Memory = namedtuple('Memory', [
|
|
952
964
|
'done'
|
953
965
|
])
|
954
966
|
|
955
|
-
|
967
|
+
MemoriesAndCumulativeRewards = namedtuple('MemoriesAndCumulativeRewards', [
|
956
968
|
'memories',
|
957
|
-
'
|
969
|
+
'cumulative_rewards'
|
958
970
|
])
|
959
971
|
|
960
972
|
class EPO(Module):
|
@@ -978,7 +990,7 @@ class EPO(Module):
|
|
978
990
|
def forward(
|
979
991
|
self,
|
980
992
|
env
|
981
|
-
) ->
|
993
|
+
) -> MemoriesAndCumulativeRewards:
|
982
994
|
|
983
995
|
self.agent.eval()
|
984
996
|
|
@@ -986,7 +998,7 @@ class EPO(Module):
|
|
986
998
|
|
987
999
|
memories: list[Memory] = []
|
988
1000
|
|
989
|
-
|
1001
|
+
cumulative_rewards = torch.zeros((self.num_latents))
|
990
1002
|
|
991
1003
|
for episode_id in tqdm(range(self.episodes_per_latent), desc = 'episode'):
|
992
1004
|
|
@@ -1019,9 +1031,9 @@ class EPO(Module):
|
|
1019
1031
|
|
1020
1032
|
state, reward, done = env(action)
|
1021
1033
|
|
1022
|
-
# update
|
1034
|
+
# update cumulative rewards per latent, to be used as default fitness score
|
1023
1035
|
|
1024
|
-
|
1036
|
+
cumulative_rewards[latent_id] += reward
|
1025
1037
|
|
1026
1038
|
# store memories
|
1027
1039
|
|
@@ -1051,7 +1063,7 @@ class EPO(Module):
|
|
1051
1063
|
|
1052
1064
|
memories.append(memory_for_gae)
|
1053
1065
|
|
1054
|
-
return
|
1066
|
+
return MemoriesAndCumulativeRewards(
|
1055
1067
|
memories = memories,
|
1056
|
-
|
1068
|
+
cumulative_rewards = cumulative_rewards
|
1057
1069
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: evolutionary-policy-optimization
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.46
|
4
4
|
Summary: EPO - Pytorch
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/evolutionary-policy-optimization/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/evolutionary-policy-optimization
|
@@ -1,8 +1,8 @@
|
|
1
1
|
evolutionary_policy_optimization/__init__.py,sha256=0q0aBuFgWi06MLMD8FiHzBYQ3_W4LYWrwmCtF3u5H2A,201
|
2
|
-
evolutionary_policy_optimization/epo.py,sha256=
|
2
|
+
evolutionary_policy_optimization/epo.py,sha256=SAhWgRY8uPQEKFg1_nz1mvh8A6S_sHwnDykhd0F5xEI,31853
|
3
3
|
evolutionary_policy_optimization/experimental.py,sha256=9FrJGviLESlYysHI3i83efT9g2ZB9ha4u3K9HXN98_w,1100
|
4
4
|
evolutionary_policy_optimization/mock_env.py,sha256=6AIc4mwL_C6JkAxwESJgCLxXHMzCAu2FcffVg3HkSm0,920
|
5
|
-
evolutionary_policy_optimization-0.0.
|
6
|
-
evolutionary_policy_optimization-0.0.
|
7
|
-
evolutionary_policy_optimization-0.0.
|
8
|
-
evolutionary_policy_optimization-0.0.
|
5
|
+
evolutionary_policy_optimization-0.0.46.dist-info/METADATA,sha256=xP2kdKo52-X4Z5XXTPpW0M_NFI0spuigeL7fvqFlsRM,6213
|
6
|
+
evolutionary_policy_optimization-0.0.46.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
7
|
+
evolutionary_policy_optimization-0.0.46.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
|
8
|
+
evolutionary_policy_optimization-0.0.46.dist-info/RECORD,,
|
File without changes
|