rlgym-learn-algos 0.2.4__cp39-cp39-win_amd64.whl → 0.2.5__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -117,14 +117,17 @@ class GAETrajectoryProcessorPurePython(
117
117
  else:
118
118
  avg_return = np.nan
119
119
  return_std = np.nan
120
- avg_reward = reward_sum[0] / exp_len
120
+ avg_reward = reward_sum.item() / exp_len
121
+ average_episode_return = reward_sum.item() / len(trajectories)
121
122
  trajectory_processor_data = GAETrajectoryProcessorData(
122
- average_undiscounted_episodic_return=avg_reward,
123
+ average_undiscounted_episodic_return=average_episode_return,
123
124
  average_return=avg_return,
124
125
  return_standard_deviation=return_std,
126
+ average_reward=avg_reward
125
127
  )
126
128
  return (
127
129
  (
130
+ agent_ids,
128
131
  observations,
129
132
  actions,
130
133
  torch.stack(log_probs_list).to(device=self.device),
@@ -146,7 +149,7 @@ class GAETrajectoryProcessorPurePython(
146
149
  self.max_returns_per_stats_increment = (
147
150
  config.trajectory_processor_config.max_returns_per_stats_increment
148
151
  )
149
- self.dtype = np.dtype(config.dtype)
152
+ self.dtype = np.dtype(str(config.dtype).replace("torch.", ""))
150
153
  self.device = config.device
151
154
  self.checkpoint_load_folder = config.checkpoint_load_folder
152
155
  if self.checkpoint_load_folder is not None:
@@ -284,6 +284,15 @@ class PPOLearner(
284
284
  batch_advantages,
285
285
  ) = batch
286
286
  batch_target_values = batch_values + batch_advantages
287
+ if self.config.learner_config.advantage_normalization:
288
+ old_device = batch_advantages.device
289
+ batch_advantages = batch_advantages.to(
290
+ self.config.learner_config.device
291
+ )
292
+ std, mean = torch.std_mean(batch_advantages)
293
+ batch_advantages = (batch_advantages - mean) / (std + 1e-8)
294
+ batch_advantages = batch_advantages.to(old_device)
295
+
287
296
  self.actor_optimizer.zero_grad()
288
297
  self.critic_optimizer.zero_grad()
289
298
 
@@ -306,10 +315,6 @@ class PPOLearner(
306
315
  advantages = batch_advantages[start:stop].to(
307
316
  self.config.learner_config.device
308
317
  )
309
- if self.config.learner_config.advantage_normalization:
310
- advantages = (advantages - torch.mean(advantages)) / (
311
- torch.std(advantages) + 1e-8
312
- )
313
318
  old_probs = batch_old_probs[start:stop].to(
314
319
  self.config.learner_config.device
315
320
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlgym-learn-algos
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Requires-Dist: pydantic>=2.8.2
@@ -1,6 +1,6 @@
1
- rlgym_learn_algos-0.2.4.dist-info/METADATA,sha256=JtoluYo3NjNSC68j3U6S7xb79gvrSC74O3TNy_r8v8k,2431
2
- rlgym_learn_algos-0.2.4.dist-info/WHEEL,sha256=SqgFUQC3rmJBHv6XKhA7wOKmU2ih3aAfz0FSmR1FvyM,94
3
- rlgym_learn_algos-0.2.4.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
1
+ rlgym_learn_algos-0.2.5.dist-info/METADATA,sha256=_Hd86nz00PS2t06xX-wHch7-50wlZr8c_FUPjiPmh30,2431
2
+ rlgym_learn_algos-0.2.5.dist-info/WHEEL,sha256=xZ8SzCfbQYp83o2fyM7cU7vfL9QFqpx9fTDOTKg2VCE,94
3
+ rlgym_learn_algos-0.2.5.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
4
4
  rlgym_learn_algos/__init__.py,sha256=C7cRdL4lZrpk3ge_4_lGAbGodqWJXM56FfgO0keRPAY,207
5
5
  rlgym_learn_algos/conversion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  rlgym_learn_algos/conversion/convert_rlgym_ppo_checkpoint.py,sha256=A9nvzjp3DQNRNL5TAt-u3xE80JDIpYEDqAGNReHvFG0,908
@@ -18,15 +18,15 @@ rlgym_learn_algos/ppo/env_trajectories.py,sha256=gzQBRkzwZhlZeSvWL50cc8AOgBfsg5z
18
18
  rlgym_learn_algos/ppo/experience_buffer.py,sha256=xDm8NIMdErpv3GyWUBcTvzkLBQa8tW1TXb7OrKRDIu4,11059
19
19
  rlgym_learn_algos/ppo/experience_buffer_numpy.py,sha256=Apk4x-pfRnitKJPW6LBZyOPIhgeJs_5EG7BbTCqMwjk,4761
20
20
  rlgym_learn_algos/ppo/gae_trajectory_processor.py,sha256=JK958vasIIiuf3ALcFNlvBgGNhFshK8MhQJjwvxhrAM,5453
21
- rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py,sha256=RpyDR6GQ1JXvwtoKkx5V3z3WvU9ElJdzfNtpPiZDaTc,6831
21
+ rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py,sha256=whnxjwrETiX_DSxjSy7ZidSqWHTTJAJ5F4pQqBVmDw4,7013
22
22
  rlgym_learn_algos/ppo/multi_discrete_actor.py,sha256=zSYeBBirjguSv_wO-peo06hioHiVhZQjnd-NYwJxmag,3127
23
23
  rlgym_learn_algos/ppo/ppo_agent_controller.py,sha256=-qAAlkv8qX2FccAT1Uvr7d40v6j4Vi1Rzl9EdmYRI50,25636
24
- rlgym_learn_algos/ppo/ppo_learner.py,sha256=Cbbuz0AMwPCmkQ1YPDdZLkbgZOdyrOLEx89Camn-nGE,15942
24
+ rlgym_learn_algos/ppo/ppo_learner.py,sha256=NXj7_CDEpMIVpZwZLO91oYUVDiUPx_E4M7Tu_Zs7jzs,16195
25
25
  rlgym_learn_algos/ppo/ppo_metrics_logger.py,sha256=niW8xgQLEBCGgTaVyiE_JqsU6RTjV6h-JzM-7c3JT38,2868
26
26
  rlgym_learn_algos/ppo/trajectory.py,sha256=IIH_IG8B_HkyxRPf-YsCyF1jQqNjDx752hgzAehG25I,719
27
27
  rlgym_learn_algos/ppo/trajectory_processor.py,sha256=5eY_mNGjqIkhqnbKeaqDvqIWPdg6wD6Ai3fXH2WoXbw,2091
28
28
  rlgym_learn_algos/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- rlgym_learn_algos/rlgym_learn_algos.cp39-win_amd64.pyd,sha256=Cm7rNAYop9cn-cIYQLzmF8G4NQegX92VlsyELPC8ehQ,416256
29
+ rlgym_learn_algos/rlgym_learn_algos.cp39-win_amd64.pyd,sha256=kGXk-rmyRKfJ0j-DbVnbWDu5ba7eD8jJp3yzaDNPdRk,408576
30
30
  rlgym_learn_algos/rlgym_learn_algos.pyi,sha256=NwY-sDZWM06TUiKPzxpfH1Td6G6E8TdxtRPgBSh-PPE,1203
31
31
  rlgym_learn_algos/stateful_functions/__init__.py,sha256=QS0KYjuzagNkYiYllXQmjoJn14-G7KZawq1Zvwh8alY,236
32
32
  rlgym_learn_algos/stateful_functions/batch_reward_type_numpy_converter.py,sha256=1yte5qYyl9LWdClHZ_YsF7R9dJqQeYfINMdgNF_59Gs,767
@@ -36,4 +36,4 @@ rlgym_learn_algos/util/__init__.py,sha256=VPM6SN4T_625H9t30s9EiLeXiEEWgcyRVHa-LL
36
36
  rlgym_learn_algos/util/running_stats.py,sha256=0tiGFpKtHWzMa1CxM_ueBzd_ryX4bJBriC8MXcSLg8w,4479
37
37
  rlgym_learn_algos/util/torch_functions.py,sha256=_uAXhq1YYPneWI3_XXRYsSA3Hn1a8wGjUnI3m9UojdU,3411
38
38
  rlgym_learn_algos/util/torch_pydantic.py,sha256=5AbXQcfQtVgLRBSgCj0Hvi_H42WHLu4Oty4l_i22nAo,3531
39
- rlgym_learn_algos-0.2.4.dist-info/RECORD,,
39
+ rlgym_learn_algos-0.2.5.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.9.0)
2
+ Generator: maturin (1.9.1)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp39-cp39-win_amd64