rlgym-learn-algos 0.2.4__cp310-cp310-musllinux_1_2_i686.whl → 0.2.5__cp310-cp310-musllinux_1_2_i686.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py +6 -3
- rlgym_learn_algos/ppo/ppo_learner.py +9 -4
- rlgym_learn_algos/rlgym_learn_algos.cpython-310-i386-linux-gnu.so +0 -0
- {rlgym_learn_algos-0.2.4.dist-info → rlgym_learn_algos-0.2.5.dist-info}/METADATA +1 -1
- {rlgym_learn_algos-0.2.4.dist-info → rlgym_learn_algos-0.2.5.dist-info}/RECORD +7 -7
- {rlgym_learn_algos-0.2.4.dist-info → rlgym_learn_algos-0.2.5.dist-info}/WHEEL +1 -1
- {rlgym_learn_algos-0.2.4.dist-info → rlgym_learn_algos-0.2.5.dist-info}/licenses/LICENSE +0 -0
@@ -117,14 +117,17 @@ class GAETrajectoryProcessorPurePython(
|
|
117
117
|
else:
|
118
118
|
avg_return = np.nan
|
119
119
|
return_std = np.nan
|
120
|
-
avg_reward = reward_sum
|
120
|
+
avg_reward = reward_sum.item() / exp_len
|
121
|
+
average_episode_return = reward_sum.item() / len(trajectories)
|
121
122
|
trajectory_processor_data = GAETrajectoryProcessorData(
|
122
|
-
average_undiscounted_episodic_return=
|
123
|
+
average_undiscounted_episodic_return=average_episode_return,
|
123
124
|
average_return=avg_return,
|
124
125
|
return_standard_deviation=return_std,
|
126
|
+
average_reward=avg_reward
|
125
127
|
)
|
126
128
|
return (
|
127
129
|
(
|
130
|
+
agent_ids,
|
128
131
|
observations,
|
129
132
|
actions,
|
130
133
|
torch.stack(log_probs_list).to(device=self.device),
|
@@ -146,7 +149,7 @@ class GAETrajectoryProcessorPurePython(
|
|
146
149
|
self.max_returns_per_stats_increment = (
|
147
150
|
config.trajectory_processor_config.max_returns_per_stats_increment
|
148
151
|
)
|
149
|
-
self.dtype = np.dtype(config.dtype)
|
152
|
+
self.dtype = np.dtype(str(config.dtype).replace("torch.", ""))
|
150
153
|
self.device = config.device
|
151
154
|
self.checkpoint_load_folder = config.checkpoint_load_folder
|
152
155
|
if self.checkpoint_load_folder is not None:
|
@@ -284,6 +284,15 @@ class PPOLearner(
|
|
284
284
|
batch_advantages,
|
285
285
|
) = batch
|
286
286
|
batch_target_values = batch_values + batch_advantages
|
287
|
+
if self.config.learner_config.advantage_normalization:
|
288
|
+
old_device = batch_advantages.device
|
289
|
+
batch_advantages = batch_advantages.to(
|
290
|
+
self.config.learner_config.device
|
291
|
+
)
|
292
|
+
std, mean = torch.std_mean(batch_advantages)
|
293
|
+
batch_advantages = (batch_advantages - mean) / (std + 1e-8)
|
294
|
+
batch_advantages = batch_advantages.to(old_device)
|
295
|
+
|
287
296
|
self.actor_optimizer.zero_grad()
|
288
297
|
self.critic_optimizer.zero_grad()
|
289
298
|
|
@@ -306,10 +315,6 @@ class PPOLearner(
|
|
306
315
|
advantages = batch_advantages[start:stop].to(
|
307
316
|
self.config.learner_config.device
|
308
317
|
)
|
309
|
-
if self.config.learner_config.advantage_normalization:
|
310
|
-
advantages = (advantages - torch.mean(advantages)) / (
|
311
|
-
torch.std(advantages) + 1e-8
|
312
|
-
)
|
313
318
|
old_probs = batch_old_probs[start:stop].to(
|
314
319
|
self.config.learner_config.device
|
315
320
|
)
|
Binary file
|
@@ -1,6 +1,6 @@
|
|
1
|
-
rlgym_learn_algos-0.2.
|
2
|
-
rlgym_learn_algos-0.2.
|
3
|
-
rlgym_learn_algos-0.2.
|
1
|
+
rlgym_learn_algos-0.2.5.dist-info/METADATA,sha256=2VF5mDcwQYEzbe79AxZaPgBLM27xqPRKgwe2e032iEM,2403
|
2
|
+
rlgym_learn_algos-0.2.5.dist-info/WHEEL,sha256=C5uhi6P1W20Czs0QuhiCmpm0acDOuRmrNZ4S6GYSzsI,105
|
3
|
+
rlgym_learn_algos-0.2.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
4
4
|
rlgym_learn_algos.libs/libgcc_s-b5472b99.so.1,sha256=wh8CpjXz9IccAyeERcB7YDEx7NH2jF-PykwOyYNeRRI,453841
|
5
5
|
rlgym_learn_algos/__init__.py,sha256=dZeTgNro6qG1Hu0l0UBhgHOYiyeCwPWndC84dJAp__U,203
|
6
6
|
rlgym_learn_algos/conversion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -19,15 +19,15 @@ rlgym_learn_algos/ppo/env_trajectories.py,sha256=PaO6dmpNkQ3yDLaHIRc0ipn45t5zAjE
|
|
19
19
|
rlgym_learn_algos/ppo/experience_buffer.py,sha256=4wWSfq1tobXv7lmBbkM5sbTVuPJxrdAbxi5rNHc04g4,10769
|
20
20
|
rlgym_learn_algos/ppo/experience_buffer_numpy.py,sha256=6AOGQjDn_dHLS9bmxJW_cGEjBUbe8u5VWS0LVlpIdmY,4617
|
21
21
|
rlgym_learn_algos/ppo/gae_trajectory_processor.py,sha256=gv5kxvvPnK7SyQIAq6MbOFILIMdPlzoLZwM8TRmtNWw,5302
|
22
|
-
rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py,sha256=
|
22
|
+
rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py,sha256=APtUDpxo5ZNV-xupe56lzu2nwvfLwyEEB87hiISgRdw,6836
|
23
23
|
rlgym_learn_algos/ppo/multi_discrete_actor.py,sha256=Mik0X79dUy2ZRIMol4RMTZE9qzsOk6f_6bDaOl5ghxs,3039
|
24
24
|
rlgym_learn_algos/ppo/ppo_agent_controller.py,sha256=to807i7Nm7FMA0zT8m9VWTBZz7pxhL-W8JLBM4OFuc0,25051
|
25
|
-
rlgym_learn_algos/ppo/ppo_learner.py,sha256=
|
25
|
+
rlgym_learn_algos/ppo/ppo_learner.py,sha256=DTqohPeFB7XbxoB8hkvBpGbbAZG_0vGPyPuKEyM8Btw,15770
|
26
26
|
rlgym_learn_algos/ppo/ppo_metrics_logger.py,sha256=iUyUc2GPwDIIjZeJPZWxoeRrzUWV_qLOac0vApQBkp0,2803
|
27
27
|
rlgym_learn_algos/ppo/trajectory.py,sha256=_xyS9ueU6iVvqMUpFr-kb42wEHHZy4zCse7_r660n5E,690
|
28
28
|
rlgym_learn_algos/ppo/trajectory_processor.py,sha256=3XRsXXexHWp6UV5nAeBLYvWqvQ9EbNHSN3Yooi4cezo,2031
|
29
29
|
rlgym_learn_algos/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
|
-
rlgym_learn_algos/rlgym_learn_algos.cpython-310-i386-linux-gnu.so,sha256=
|
30
|
+
rlgym_learn_algos/rlgym_learn_algos.cpython-310-i386-linux-gnu.so,sha256=jjJu8_ajdMeRqcnoCii9IAqWVGoT_ijpGqUTKd9oHlE,707161
|
31
31
|
rlgym_learn_algos/rlgym_learn_algos.pyi,sha256=B9Kt9uK8xCqASRxWvzLdV501TSCMO4vTNqvZ0MhOHyo,1164
|
32
32
|
rlgym_learn_algos/stateful_functions/__init__.py,sha256=OAVy6cQIS85Utyp18jjHgdmascX_8nkwk3A0OpFJxT4,230
|
33
33
|
rlgym_learn_algos/stateful_functions/batch_reward_type_numpy_converter.py,sha256=a3q2l5SIgDI36ImF_kYoa684pghnFnlV2vGYvV2zcV0,743
|
@@ -37,4 +37,4 @@ rlgym_learn_algos/util/__init__.py,sha256=hq7M00Q7zAfyQmIGmXOif0vI40aj_FQ5SqI5dn
|
|
37
37
|
rlgym_learn_algos/util/running_stats.py,sha256=KtzdKKT75-5ZC58JRqaDXk6sBqa3ZSjQQZrRajAw3Yk,4339
|
38
38
|
rlgym_learn_algos/util/torch_functions.py,sha256=ImgDw4I3ZixGDi17YRkW6UbaiaQTbvOCUCS7N0QVSsU,3320
|
39
39
|
rlgym_learn_algos/util/torch_pydantic.py,sha256=khPGA6kWh4_WHoploDkl_SCIGX8SkKkFT40RE06PImc,3413
|
40
|
-
rlgym_learn_algos-0.2.
|
40
|
+
rlgym_learn_algos-0.2.5.dist-info/RECORD,,
|
File without changes
|