rlgym-learn-algos 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/Cargo.lock +1 -1
  2. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/Cargo.toml +1 -1
  3. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/PKG-INFO +1 -1
  4. rlgym_learn_algos-0.2.1/rlgym_learn_algos/conversion/convert_rlgym_ppo_checkpoint.py +27 -0
  5. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/logging/wandb_metrics_logger.py +1 -1
  6. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/experience_buffer.py +2 -13
  7. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/gae_trajectory_processor.py +12 -6
  8. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/ppo_agent_controller.py +52 -27
  9. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/ppo_learner.py +32 -36
  10. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/trajectory_processor.py +1 -0
  11. rlgym_learn_algos-0.2.1/rlgym_learn_algos/py.typed +0 -0
  12. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/.github/workflows/CICD.yml +0 -0
  13. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/.gitignore +0 -0
  14. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/LICENSE +0 -0
  15. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/README.md +0 -0
  16. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/pyproject.toml +0 -0
  17. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/requirements.txt +0 -0
  18. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/__init__.py +0 -0
  19. /rlgym_learn_algos-0.2.0/rlgym_learn_algos/py.typed → /rlgym_learn_algos-0.2.1/rlgym_learn_algos/conversion/__init__.py +0 -0
  20. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/logging/__init__.py +0 -0
  21. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/logging/dict_metrics_logger.py +0 -0
  22. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/logging/metrics_logger.py +0 -0
  23. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/__init__.py +0 -0
  24. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/actor.py +0 -0
  25. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/basic_critic.py +0 -0
  26. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/continuous_actor.py +0 -0
  27. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/critic.py +0 -0
  28. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/discrete_actor.py +0 -0
  29. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/env_trajectories.py +0 -0
  30. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/experience_buffer_numpy.py +0 -0
  31. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py +0 -0
  32. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/multi_discrete_actor.py +0 -0
  33. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/ppo_metrics_logger.py +0 -0
  34. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/trajectory.py +0 -0
  35. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/rlgym_learn_algos.pyi +0 -0
  36. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/stateful_functions/__init__.py +0 -0
  37. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/stateful_functions/batch_reward_type_numpy_converter.py +0 -0
  38. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/stateful_functions/numpy_obs_standardizer.py +0 -0
  39. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/stateful_functions/obs_standardizer.py +0 -0
  40. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/util/__init__.py +0 -0
  41. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/util/running_stats.py +0 -0
  42. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/util/torch_functions.py +0 -0
  43. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/util/torch_pydantic.py +0 -0
  44. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/src/common/mod.rs +0 -0
  45. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/src/common/numpy_dtype.rs +0 -0
  46. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/src/lib.rs +0 -0
  47. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/src/misc.rs +0 -0
  48. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/src/ppo/gae_trajectory_processor.rs +0 -0
  49. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/src/ppo/mod.rs +0 -0
  50. {rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/src/ppo/trajectory.rs +0 -0
@@ -229,7 +229,7 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
229
229
 
230
230
  [[package]]
231
231
  name = "rlgym-learn-algos"
232
- version = "0.2.0"
232
+ version = "0.2.1"
233
233
  dependencies = [
234
234
  "itertools",
235
235
  "numpy",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "rlgym-learn-algos"
3
- version = "0.2.0"
3
+ version = "0.2.1"
4
4
  edition = "2021"
5
5
  description = "Rust backend for the more expensive parts of the rlgym-learn-algos python module"
6
6
  license="Apache-2.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlgym-learn-algos
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Requires-Dist: pydantic>=2.8.2
@@ -0,0 +1,27 @@
1
+ import json
2
+ import os
3
+ import time
4
+ from typing import Optional
5
+
6
+
7
+ def convert_rlgym_ppo_checkpoint(
8
+ rlgym_ppo_checkpoint_folder: str, out_folder: Optional[str]
9
+ ):
10
+
11
+ if out_folder is None:
12
+ out_folder = f"rlgym_ppo_converted_checkpoint_{time.time_ns()}"
13
+ print(f"Saving converted checkpoint to folder {out_folder}")
14
+
15
+ os.makedirs(out_folder, exist_ok=True)
16
+
17
+ PPO_FILES = [
18
+ ("PPO_POLICY_OPTIMIZER.pt", "actor_optimizer.pt"),
19
+ ("PPO_POLICY.pt", "actor.pt"),
20
+ ("PPO_VALUE_NET_OPTIMIZER.pt", "critic_optimizer.pt"),
21
+ ("PPO_VALUE_NET.pt", "critic.pt"),
22
+ ]
23
+ os.makedirs(f"{out_folder}/ppo_learner", exist_ok=True)
24
+ for file in PPO_FILES:
25
+ with open(f"{rlgym_ppo_checkpoint_folder}/{file[0]}", "rb") as fin:
26
+ with open(f"{out_folder}/ppo_learner/{file[1]}", "wb") as fout:
27
+ fout.write(fin.read())
@@ -164,7 +164,7 @@ class WandbMetricsLogger(
164
164
  self.run_id = None
165
165
  except FileNotFoundError:
166
166
  print(
167
- f"{self.config.agent_controller_name}: Tried to load from checkpoint, but checkpoint didn't contain a wandb run! A new run will be created based on the config values."
167
+ f"{self.config.agent_controller_name}: Tried to load wandb run from checkpoint using the file at location {str(os.path.join(self.config.checkpoint_load_folder, self.checkpoint_file_name))}, but there is no such file! A new run will be created based on the config values instead."
168
168
  )
169
169
  self.run_id = None
170
170
 
@@ -7,7 +7,6 @@ import numpy as np
7
7
  import torch
8
8
  from pydantic import BaseModel, Field, model_validator
9
9
  from rlgym.api import ActionType, AgentID, ObsType, RewardType
10
-
11
10
  from rlgym_learn_algos.util.torch_functions import get_device
12
11
  from rlgym_learn_algos.util.torch_pydantic import PydanticTorchDevice
13
12
 
@@ -45,17 +44,6 @@ class ExperienceBufferConfigModel(BaseModel, extra="forbid"):
45
44
  data["device"] = get_device("auto")
46
45
  return data
47
46
 
48
- # device: PydanticTorchDevice = "auto"
49
-
50
- # @model_validator(mode="before")
51
- # @classmethod
52
- # def set_device(cls, data):
53
- # if isinstance(data, dict) and (
54
- # "device" not in data or data["device"] == "auto"
55
- # ):
56
- # data["device"] = get_device("auto")
57
- # return data
58
-
59
47
 
60
48
  @dataclass
61
49
  class DerivedExperienceBufferConfig:
@@ -139,6 +127,7 @@ class ExperienceBuffer(
139
127
  self.trajectory_processor.load(
140
128
  DerivedTrajectoryProcessorConfig(
141
129
  trajectory_processor_config=trajectory_processor_config,
130
+ agent_controller_name=config.agent_controller_name,
142
131
  dtype=config.dtype,
143
132
  device=config.learner_device,
144
133
  )
@@ -171,7 +160,7 @@ class ExperienceBuffer(
171
160
  self.advantages = state_dict["advantages"]
172
161
  except FileNotFoundError:
173
162
  print(
174
- f"{self.config.agent_controller_name}: Tried to load from checkpoint, but checkpoint didn't contain a saved experience buffer! A blank experience buffer will be used instead."
163
+ f"{self.config.agent_controller_name}: Tried to load experience buffer from checkpoint using the file at location {str(os.path.join(self.config.checkpoint_load_folder, EXPERIENCE_BUFFER_FILE))}, but there is no such file! A blank experience buffer will be used instead."
175
164
  )
176
165
 
177
166
  def save_checkpoint(self, folder_path):
@@ -115,6 +115,7 @@ class GAETrajectoryProcessor(
115
115
  self.max_returns_per_stats_increment = (
116
116
  config.trajectory_processor_config.max_returns_per_stats_increment
117
117
  )
118
+ self.agent_controller_name = config.agent_controller_name
118
119
  self.dtype = config.dtype
119
120
  self.device = config.device
120
121
  self.checkpoint_load_folder = config.checkpoint_load_folder
@@ -127,12 +128,17 @@ class GAETrajectoryProcessor(
127
128
  )
128
129
 
129
130
  def _load_from_checkpoint(self):
130
- with open(
131
- os.path.join(self.checkpoint_load_folder, TRAJECTORY_PROCESSOR_FILE),
132
- "rt",
133
- ) as f:
134
- state = json.load(f)
135
- self.return_stats.load_state_dict(state["return_running_stats"])
131
+ try:
132
+ with open(
133
+ os.path.join(self.checkpoint_load_folder, TRAJECTORY_PROCESSOR_FILE),
134
+ "rt",
135
+ ) as f:
136
+ state = json.load(f)
137
+ self.return_stats.load_state_dict(state["return_running_stats"])
138
+ except FileNotFoundError:
139
+ print(
140
+ f"{self.agent_controller_name}: Tried to load trajectory processor from checkpoint using the trajectory processor file at location {str(os.path.join(self.checkpoint_load_folder, TRAJECTORY_PROCESSOR_FILE))}, but there is no such file! Running stats will be initialized as if this were a new run instead."
141
+ )
136
142
 
137
143
  def save_checkpoint(self, folder_path):
138
144
  state = {
@@ -253,6 +253,7 @@ class PPOAgentController(
253
253
  self.learner.load(
254
254
  DerivedPPOLearnerConfig(
255
255
  learner_config=learner_config,
256
+ agent_controller_name=config.agent_controller_name,
256
257
  obs_space=self.obs_space,
257
258
  action_space=self.action_space,
258
259
  checkpoint_load_folder=learner_checkpoint_load_folder,
@@ -304,33 +305,57 @@ class PPOAgentController(
304
305
  random.seed(self.config.base_config.random_seed)
305
306
 
306
307
  def _load_from_checkpoint(self):
307
- with open(
308
- os.path.join(
309
- self.config.agent_controller_config.checkpoint_load_folder,
310
- CURRENT_TRAJECTORIES_FILE,
311
- ),
312
- "rb",
313
- ) as f:
314
- current_trajectories: Dict[
315
- int,
316
- EnvTrajectories[AgentID, ActionType, ObsType, RewardType],
317
- ] = pickle.load(f)
318
- with open(
319
- os.path.join(
320
- self.config.agent_controller_config.checkpoint_load_folder,
321
- ITERATION_SHARED_INFOS_FILE,
322
- ),
323
- "rb",
324
- ) as f:
325
- iteration_shared_infos: List[Dict[str, Any]] = pickle.load(f)
326
- with open(
327
- os.path.join(
328
- self.config.agent_controller_config.checkpoint_load_folder,
329
- PPO_AGENT_FILE,
330
- ),
331
- "rt",
332
- ) as f:
333
- state = json.load(f)
308
+ try:
309
+ with open(
310
+ os.path.join(
311
+ self.config.agent_controller_config.checkpoint_load_folder,
312
+ CURRENT_TRAJECTORIES_FILE,
313
+ ),
314
+ "rb",
315
+ ) as f:
316
+ current_trajectories: Dict[
317
+ int,
318
+ EnvTrajectories[AgentID, ActionType, ObsType, RewardType],
319
+ ] = pickle.load(f)
320
+ except FileNotFoundError:
321
+ print(
322
+ f"{self.config.agent_controller_name}: Tried to load current trajectories from checkpoint using the file at location {str(os.path.join(self.config.agent_controller_config.checkpoint_load_folder, CURRENT_TRAJECTORIES_FILE))}, but there is no such file! Current trajectories will be initialized as an empty dict instead."
323
+ )
324
+ current_trajectories = {}
325
+ try:
326
+ with open(
327
+ os.path.join(
328
+ self.config.agent_controller_config.checkpoint_load_folder,
329
+ ITERATION_SHARED_INFOS_FILE,
330
+ ),
331
+ "rb",
332
+ ) as f:
333
+ iteration_shared_infos: List[Dict[str, Any]] = pickle.load(f)
334
+ except FileNotFoundError:
335
+ print(
336
+ f"{self.config.agent_controller_name}: Tried to load iteration shared info data from checkpoint using the file at location {str(os.path.join(self.config.agent_controller_config.checkpoint_load_folder, ITERATION_SHARED_INFOS_FILE))}, but there is no such file! Iteration shared info data will be initialized as an empty list instead."
337
+ )
338
+ current_trajectories = {}
339
+ try:
340
+ with open(
341
+ os.path.join(
342
+ self.config.agent_controller_config.checkpoint_load_folder,
343
+ PPO_AGENT_FILE,
344
+ ),
345
+ "rt",
346
+ ) as f:
347
+ state = json.load(f)
348
+ except FileNotFoundError:
349
+ print(
350
+ f"{self.config.agent_controller_name}: Tried to load PPO agent miscellaneous state data from checkpoint using the file at location {str(os.path.join(self.config.agent_controller_config.checkpoint_load_folder, PPO_AGENT_FILE))}, but there is no such file! This state data will be initialized as if this were a new run instead."
351
+ )
352
+ state = {
353
+ "cur_iteration": 0,
354
+ "iteration_timesteps": 0,
355
+ "cumulative_timesteps": 0,
356
+ "iteration_start_time": time.perf_counter(),
357
+ "timestep_collection_start_time": time.perf_counter(),
358
+ }
334
359
 
335
360
  self.current_trajectories = current_trajectories
336
361
  self.iteration_shared_infos = iteration_shared_infos
@@ -16,12 +16,13 @@ from rlgym.api import (
16
16
  ObsType,
17
17
  RewardType,
18
18
  )
19
+ from torch import nn as nn
20
+
19
21
  from rlgym_learn_algos.util.torch_functions import get_device
20
22
  from rlgym_learn_algos.util.torch_pydantic import (
21
23
  PydanticTorchDevice,
22
24
  PydanticTorchDtype,
23
25
  )
24
- from torch import nn as nn
25
26
 
26
27
  from .actor import Actor
27
28
  from .critic import Critic
@@ -50,31 +51,10 @@ class PPOLearnerConfigModel(BaseModel, extra="forbid"):
50
51
  return data
51
52
 
52
53
 
53
- # @model_validator(mode="before")
54
- # @classmethod
55
- # def set_agent_controllers_config(cls, data):
56
- # if isinstance(data, LearningCoordinatorConfigModel):
57
- # agent_controllers_config = {}
58
- # for k, v in data.agent_controllers_config.items():
59
- # if isinstance(v, BaseModel):
60
- # agent_controllers_config[k] = v.model_dump()
61
- # else:
62
- # agent_controllers_config[k] = v
63
- # data.agent_controllers_config = agent_controllers_config
64
- # elif isinstance(data, dict) and "agent_controllers_config" in data:
65
- # agent_controllers_config = {}
66
- # for k, v in data["agent_controllers_config"].items():
67
- # if isinstance(v, BaseModel):
68
- # agent_controllers_config[k] = v.model_dump()
69
- # else:
70
- # agent_controllers_config[k] = v
71
- # data["agent_controllers_config"] = agent_controllers_config
72
- # return data
73
-
74
-
75
54
  @dataclass
76
55
  class DerivedPPOLearnerConfig:
77
56
  learner_config: PPOLearnerConfigModel
57
+ agent_controller_name: str
78
58
  obs_space: ObsSpaceType
79
59
  action_space: ActionSpaceType
80
60
  checkpoint_load_folder: Optional[str] = None
@@ -155,16 +135,26 @@ class PPOLearner(
155
135
  total_parameters = actor_params_count + critic_params_count
156
136
 
157
137
  # Display in a structured manner
158
- print("Trainable Parameters:")
159
- print(f"{'Component':<10} {'Count':<10}")
138
+ print(f"{self.config.agent_controller_name}: Trainable Parameters:")
139
+ print(f"{self.config.agent_controller_name}: {'Component':<10} {'Count':<10}")
160
140
  print("-" * 20)
161
- print(f"{'Policy':<10} {actor_params_count:<10}")
162
- print(f"{'Critic':<10} {critic_params_count:<10}")
141
+ print(
142
+ f"{self.config.agent_controller_name}: {'Policy':<10} {actor_params_count:<10}"
143
+ )
144
+ print(
145
+ f"{self.config.agent_controller_name}: {'Critic':<10} {critic_params_count:<10}"
146
+ )
163
147
  print("-" * 20)
164
- print(f"{'Total':<10} {total_parameters:<10}")
148
+ print(
149
+ f"{self.config.agent_controller_name}: {'Total':<10} {total_parameters:<10}"
150
+ )
165
151
 
166
- print(f"Current Policy Learning Rate: {self.config.learner_config.actor_lr}")
167
- print(f"Current Critic Learning Rate: {self.config.learner_config.critic_lr}")
152
+ print(
153
+ f"{self.config.agent_controller_name}: Current Policy Learning Rate: {self.config.learner_config.actor_lr}"
154
+ )
155
+ print(
156
+ f"{self.config.agent_controller_name}: Current Critic Learning Rate: {self.config.learner_config.critic_lr}"
157
+ )
168
158
  self.cumulative_model_updates = 0
169
159
 
170
160
  if self.config.checkpoint_load_folder is not None:
@@ -180,7 +170,7 @@ class PPOLearner(
180
170
 
181
171
  assert os.path.exists(
182
172
  self.config.checkpoint_load_folder
183
- ), f"PPO Learner cannot find folder: {self.config.checkpoint_load_folder}"
173
+ ), f"{self.config.agent_controller_name}: PPO Learner cannot find folder: {self.config.checkpoint_load_folder}"
184
174
 
185
175
  self.actor.load_state_dict(
186
176
  torch.load(
@@ -206,11 +196,17 @@ class PPOLearner(
206
196
  map_location=self.config.learner_config.device,
207
197
  )
208
198
  )
209
- with open(
210
- os.path.join(self.config.checkpoint_load_folder, MISC_STATE), "rt"
211
- ) as f:
212
- misc_state = json.load(f)
213
- self.cumulative_model_updates = misc_state["cumulative_model_updates"]
199
+ try:
200
+ with open(
201
+ os.path.join(self.config.checkpoint_load_folder, MISC_STATE), "rt"
202
+ ) as f:
203
+ misc_state = json.load(f)
204
+ self.cumulative_model_updates = misc_state["cumulative_model_updates"]
205
+ except FileNotFoundError:
206
+ print(
207
+ f"{self.config.agent_controller_name}: Tried to load the PPO learner's misc state from the file at location {str(os.path.join(self.config.checkpoint_load_folder, MISC_STATE))}, but there is no such file! Miscellaneous stats will be initialized as if this were a new run instead."
208
+ )
209
+ self.cumulative_model_updates = 0
214
210
 
215
211
  def save_checkpoint(self, folder_path):
216
212
  os.makedirs(folder_path, exist_ok=True)
@@ -16,6 +16,7 @@ TRAJECTORY_PROCESSOR_FILE = "trajectory_processor.json"
16
16
  @dataclass
17
17
  class DerivedTrajectoryProcessorConfig(Generic[TrajectoryProcessorConfig]):
18
18
  trajectory_processor_config: TrajectoryProcessorConfig
19
+ agent_controller_name: str
19
20
  dtype: dtype
20
21
  device: device
21
22
  checkpoint_load_folder: Optional[str] = None
File without changes