rlgym-learn-algos 0.2.4__cp39-cp39-musllinux_1_2_armv7l.whl → 0.2.5__cp39-cp39-musllinux_1_2_armv7l.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -117,14 +117,17 @@ class GAETrajectoryProcessorPurePython(
117
117
  else:
118
118
  avg_return = np.nan
119
119
  return_std = np.nan
120
- avg_reward = reward_sum[0] / exp_len
120
+ avg_reward = reward_sum.item() / exp_len
121
+ average_episode_return = reward_sum.item() / len(trajectories)
121
122
  trajectory_processor_data = GAETrajectoryProcessorData(
122
- average_undiscounted_episodic_return=avg_reward,
123
+ average_undiscounted_episodic_return=average_episode_return,
123
124
  average_return=avg_return,
124
125
  return_standard_deviation=return_std,
126
+ average_reward=avg_reward
125
127
  )
126
128
  return (
127
129
  (
130
+ agent_ids,
128
131
  observations,
129
132
  actions,
130
133
  torch.stack(log_probs_list).to(device=self.device),
@@ -146,7 +149,7 @@ class GAETrajectoryProcessorPurePython(
146
149
  self.max_returns_per_stats_increment = (
147
150
  config.trajectory_processor_config.max_returns_per_stats_increment
148
151
  )
149
- self.dtype = np.dtype(config.dtype)
152
+ self.dtype = np.dtype(str(config.dtype).replace("torch.", ""))
150
153
  self.device = config.device
151
154
  self.checkpoint_load_folder = config.checkpoint_load_folder
152
155
  if self.checkpoint_load_folder is not None:
@@ -284,6 +284,15 @@ class PPOLearner(
284
284
  batch_advantages,
285
285
  ) = batch
286
286
  batch_target_values = batch_values + batch_advantages
287
+ if self.config.learner_config.advantage_normalization:
288
+ old_device = batch_advantages.device
289
+ batch_advantages = batch_advantages.to(
290
+ self.config.learner_config.device
291
+ )
292
+ std, mean = torch.std_mean(batch_advantages)
293
+ batch_advantages = (batch_advantages - mean) / (std + 1e-8)
294
+ batch_advantages = batch_advantages.to(old_device)
295
+
287
296
  self.actor_optimizer.zero_grad()
288
297
  self.critic_optimizer.zero_grad()
289
298
 
@@ -306,10 +315,6 @@ class PPOLearner(
306
315
  advantages = batch_advantages[start:stop].to(
307
316
  self.config.learner_config.device
308
317
  )
309
- if self.config.learner_config.advantage_normalization:
310
- advantages = (advantages - torch.mean(advantages)) / (
311
- torch.std(advantages) + 1e-8
312
- )
313
318
  old_probs = batch_old_probs[start:stop].to(
314
319
  self.config.learner_config.device
315
320
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlgym-learn-algos
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Requires-Dist: pydantic>=2.8.2
@@ -1,6 +1,6 @@
1
- rlgym_learn_algos-0.2.4.dist-info/METADATA,sha256=KldIto2nUjijheVI6OpfvsBKYxgWYCbOFclqfwp13ys,2403
2
- rlgym_learn_algos-0.2.4.dist-info/WHEEL,sha256=ZaFOerxeFPIN7Ome868VEyCSJvdCwZINrGvZscX7-b8,105
3
- rlgym_learn_algos-0.2.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
1
+ rlgym_learn_algos-0.2.5.dist-info/METADATA,sha256=2VF5mDcwQYEzbe79AxZaPgBLM27xqPRKgwe2e032iEM,2403
2
+ rlgym_learn_algos-0.2.5.dist-info/WHEEL,sha256=tJzuvXDEq7ucUZfJWsHlvQOirMibwrJVwFDSzASj-C8,105
3
+ rlgym_learn_algos-0.2.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
4
4
  rlgym_learn_algos.libs/libgcc_s-5b5488a6.so.1,sha256=HGKUsVmTeNAxEdSy7Ua5Vh_I9FN3RCbPWzvZ7H_TrwE,2749061
5
5
  rlgym_learn_algos/__init__.py,sha256=dZeTgNro6qG1Hu0l0UBhgHOYiyeCwPWndC84dJAp__U,203
6
6
  rlgym_learn_algos/conversion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -19,15 +19,15 @@ rlgym_learn_algos/ppo/env_trajectories.py,sha256=PaO6dmpNkQ3yDLaHIRc0ipn45t5zAjE
19
19
  rlgym_learn_algos/ppo/experience_buffer.py,sha256=4wWSfq1tobXv7lmBbkM5sbTVuPJxrdAbxi5rNHc04g4,10769
20
20
  rlgym_learn_algos/ppo/experience_buffer_numpy.py,sha256=6AOGQjDn_dHLS9bmxJW_cGEjBUbe8u5VWS0LVlpIdmY,4617
21
21
  rlgym_learn_algos/ppo/gae_trajectory_processor.py,sha256=gv5kxvvPnK7SyQIAq6MbOFILIMdPlzoLZwM8TRmtNWw,5302
22
- rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py,sha256=cq7qbK0mcLDXRzA6-pKW0OC50X52XhT5himcOTD6Ei4,6657
22
+ rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py,sha256=APtUDpxo5ZNV-xupe56lzu2nwvfLwyEEB87hiISgRdw,6836
23
23
  rlgym_learn_algos/ppo/multi_discrete_actor.py,sha256=Mik0X79dUy2ZRIMol4RMTZE9qzsOk6f_6bDaOl5ghxs,3039
24
24
  rlgym_learn_algos/ppo/ppo_agent_controller.py,sha256=to807i7Nm7FMA0zT8m9VWTBZz7pxhL-W8JLBM4OFuc0,25051
25
- rlgym_learn_algos/ppo/ppo_learner.py,sha256=utEWkikXCpC6Xc1D3asohO0HsIaq3tLyoTlb7fXLOw4,15522
25
+ rlgym_learn_algos/ppo/ppo_learner.py,sha256=DTqohPeFB7XbxoB8hkvBpGbbAZG_0vGPyPuKEyM8Btw,15770
26
26
  rlgym_learn_algos/ppo/ppo_metrics_logger.py,sha256=iUyUc2GPwDIIjZeJPZWxoeRrzUWV_qLOac0vApQBkp0,2803
27
27
  rlgym_learn_algos/ppo/trajectory.py,sha256=_xyS9ueU6iVvqMUpFr-kb42wEHHZy4zCse7_r660n5E,690
28
28
  rlgym_learn_algos/ppo/trajectory_processor.py,sha256=3XRsXXexHWp6UV5nAeBLYvWqvQ9EbNHSN3Yooi4cezo,2031
29
29
  rlgym_learn_algos/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
- rlgym_learn_algos/rlgym_learn_algos.cpython-39-arm-linux-gnueabihf.so,sha256=uvZb1wCJqEBZozUM2MxXf4OcgqDLlVZiwHOQ1Z6fXSs,739981
30
+ rlgym_learn_algos/rlgym_learn_algos.cpython-39-arm-linux-gnueabihf.so,sha256=w0fTk1SIIbgpHivl-ywxuoQZyRhKjO8GjoyWZqppzn8,739981
31
31
  rlgym_learn_algos/rlgym_learn_algos.pyi,sha256=B9Kt9uK8xCqASRxWvzLdV501TSCMO4vTNqvZ0MhOHyo,1164
32
32
  rlgym_learn_algos/stateful_functions/__init__.py,sha256=OAVy6cQIS85Utyp18jjHgdmascX_8nkwk3A0OpFJxT4,230
33
33
  rlgym_learn_algos/stateful_functions/batch_reward_type_numpy_converter.py,sha256=a3q2l5SIgDI36ImF_kYoa684pghnFnlV2vGYvV2zcV0,743
@@ -37,4 +37,4 @@ rlgym_learn_algos/util/__init__.py,sha256=hq7M00Q7zAfyQmIGmXOif0vI40aj_FQ5SqI5dn
37
37
  rlgym_learn_algos/util/running_stats.py,sha256=KtzdKKT75-5ZC58JRqaDXk6sBqa3ZSjQQZrRajAw3Yk,4339
38
38
  rlgym_learn_algos/util/torch_functions.py,sha256=ImgDw4I3ZixGDi17YRkW6UbaiaQTbvOCUCS7N0QVSsU,3320
39
39
  rlgym_learn_algos/util/torch_pydantic.py,sha256=khPGA6kWh4_WHoploDkl_SCIGX8SkKkFT40RE06PImc,3413
40
- rlgym_learn_algos-0.2.4.dist-info/RECORD,,
40
+ rlgym_learn_algos-0.2.5.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.9.0)
2
+ Generator: maturin (1.9.1)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp39-cp39-musllinux_1_2_armv7l