rlgym-learn-algos 0.2.4__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/Cargo.lock +1 -1
  2. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/Cargo.toml +1 -1
  3. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/PKG-INFO +1 -1
  4. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py +6 -3
  5. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/ppo_learner.py +9 -4
  6. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/src/ppo/gae_trajectory_processor.rs +5 -1
  7. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/.github/workflows/CICD.yml +0 -0
  8. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/.gitignore +0 -0
  9. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/LICENSE +0 -0
  10. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/README.md +0 -0
  11. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/pyproject.toml +0 -0
  12. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/requirements.txt +0 -0
  13. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/__init__.py +0 -0
  14. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/conversion/__init__.py +0 -0
  15. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/conversion/convert_rlgym_ppo_checkpoint.py +0 -0
  16. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/logging/__init__.py +0 -0
  17. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/logging/dict_metrics_logger.py +0 -0
  18. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/logging/metrics_logger.py +0 -0
  19. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/logging/wandb_metrics_logger.py +0 -0
  20. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/__init__.py +0 -0
  21. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/actor.py +0 -0
  22. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/basic_critic.py +0 -0
  23. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/continuous_actor.py +0 -0
  24. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/critic.py +0 -0
  25. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/discrete_actor.py +0 -0
  26. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/env_trajectories.py +0 -0
  27. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/experience_buffer.py +0 -0
  28. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/experience_buffer_numpy.py +0 -0
  29. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/gae_trajectory_processor.py +0 -0
  30. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/multi_discrete_actor.py +0 -0
  31. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/ppo_agent_controller.py +0 -0
  32. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/ppo_metrics_logger.py +0 -0
  33. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/trajectory.py +0 -0
  34. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/ppo/trajectory_processor.py +0 -0
  35. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/py.typed +0 -0
  36. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/rlgym_learn_algos.pyi +0 -0
  37. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/stateful_functions/__init__.py +0 -0
  38. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/stateful_functions/batch_reward_type_numpy_converter.py +0 -0
  39. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/stateful_functions/numpy_obs_standardizer.py +0 -0
  40. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/stateful_functions/obs_standardizer.py +0 -0
  41. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/util/__init__.py +0 -0
  42. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/util/running_stats.py +0 -0
  43. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/util/torch_functions.py +0 -0
  44. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/rlgym_learn_algos/util/torch_pydantic.py +0 -0
  45. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/src/common/mod.rs +0 -0
  46. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/src/common/numpy_dtype.rs +0 -0
  47. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/src/lib.rs +0 -0
  48. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/src/misc.rs +0 -0
  49. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/src/ppo/mod.rs +0 -0
  50. {rlgym_learn_algos-0.2.4 → rlgym_learn_algos-0.2.5}/src/ppo/trajectory.rs +0 -0
@@ -229,7 +229,7 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
229
229
 
230
230
  [[package]]
231
231
  name = "rlgym-learn-algos"
232
- version = "0.2.4"
232
+ version = "0.2.5"
233
233
  dependencies = [
234
234
  "itertools",
235
235
  "numpy",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "rlgym-learn-algos"
3
- version = "0.2.4"
3
+ version = "0.2.5"
4
4
  edition = "2021"
5
5
  description = "Rust backend for the more expensive parts of the rlgym-learn-algos python module"
6
6
  license="Apache-2.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlgym-learn-algos
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Requires-Dist: pydantic>=2.8.2
@@ -117,14 +117,17 @@ class GAETrajectoryProcessorPurePython(
117
117
  else:
118
118
  avg_return = np.nan
119
119
  return_std = np.nan
120
- avg_reward = reward_sum[0] / exp_len
120
+ avg_reward = reward_sum.item() / exp_len
121
+ average_episode_return = reward_sum.item() / len(trajectories)
121
122
  trajectory_processor_data = GAETrajectoryProcessorData(
122
- average_undiscounted_episodic_return=avg_reward,
123
+ average_undiscounted_episodic_return=average_episode_return,
123
124
  average_return=avg_return,
124
125
  return_standard_deviation=return_std,
126
+ average_reward=avg_reward
125
127
  )
126
128
  return (
127
129
  (
130
+ agent_ids,
128
131
  observations,
129
132
  actions,
130
133
  torch.stack(log_probs_list).to(device=self.device),
@@ -146,7 +149,7 @@ class GAETrajectoryProcessorPurePython(
146
149
  self.max_returns_per_stats_increment = (
147
150
  config.trajectory_processor_config.max_returns_per_stats_increment
148
151
  )
149
- self.dtype = np.dtype(config.dtype)
152
+ self.dtype = np.dtype(str(config.dtype).replace("torch.", ""))
150
153
  self.device = config.device
151
154
  self.checkpoint_load_folder = config.checkpoint_load_folder
152
155
  if self.checkpoint_load_folder is not None:
@@ -284,6 +284,15 @@ class PPOLearner(
284
284
  batch_advantages,
285
285
  ) = batch
286
286
  batch_target_values = batch_values + batch_advantages
287
+ if self.config.learner_config.advantage_normalization:
288
+ old_device = batch_advantages.device
289
+ batch_advantages = batch_advantages.to(
290
+ self.config.learner_config.device
291
+ )
292
+ std, mean = torch.std_mean(batch_advantages)
293
+ batch_advantages = (batch_advantages - mean) / (std + 1e-8)
294
+ batch_advantages = batch_advantages.to(old_device)
295
+
287
296
  self.actor_optimizer.zero_grad()
288
297
  self.critic_optimizer.zero_grad()
289
298
 
@@ -306,10 +315,6 @@ class PPOLearner(
306
315
  advantages = batch_advantages[start:stop].to(
307
316
  self.config.learner_config.device
308
317
  )
309
- if self.config.learner_config.advantage_normalization:
310
- advantages = (advantages - torch.mean(advantages)) / (
311
- torch.std(advantages) + 1e-8
312
- )
313
318
  old_probs = batch_old_probs[start:stop].to(
314
319
  self.config.learner_config.device
315
320
  )
@@ -73,7 +73,11 @@ macro_rules! define_process_trajectories {
73
73
  for trajectory in trajectories.into_iter() {
74
74
  let trajectory_len = trajectory.obs_list.len();
75
75
  let mut cur_return = 0 as $dtype;
76
- let mut next_val_pred = trajectory.final_val_pred.extract::<$dtype>()?;
76
+ let mut next_val_pred = if trajectory.truncated {
77
+ trajectory.final_val_pred.extract::<$dtype>()?
78
+ } else {
79
+ 0 as $dtype
80
+ };
77
81
  let mut cur_advantage = 0 as $dtype;
78
82
  let timesteps_rewards = batch_reward_type_numpy_converter
79
83
  .call_method1(intern!(py, "as_numpy"), (&trajectory.reward_list,))?