rlgym-learn-algos 0.1.5__cp310-cp310-win32.whl → 0.2.0__cp310-cp310-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,9 +12,9 @@ MetricsLoggerAdditionalDerivedConfig = TypeVar("MetricsLoggerAdditionalDerivedCo
12
12
  class DerivedMetricsLoggerConfig(
13
13
  Generic[MetricsLoggerConfig, MetricsLoggerAdditionalDerivedConfig]
14
14
  ):
15
+ metrics_logger_config: MetricsLoggerConfig = None
15
16
  checkpoint_load_folder: Optional[str] = None
16
17
  agent_controller_name: str = ""
17
- metrics_logger_config: MetricsLoggerConfig = None
18
18
  additional_derived_config: MetricsLoggerAdditionalDerivedConfig = None
19
19
 
20
20
 
@@ -29,7 +29,7 @@ def convert_nested_dict(d):
29
29
  return new
30
30
 
31
31
 
32
- class WandbMetricsLoggerConfigModel(BaseModel):
32
+ class WandbMetricsLoggerConfigModel(BaseModel, extra="forbid"):
33
33
  enable: bool = True
34
34
  project: str = "rlgym-learn"
35
35
  group: str = "unnamed-runs"
@@ -37,6 +37,7 @@ class WandbMetricsLoggerConfigModel(BaseModel):
37
37
  id: Optional[str] = None
38
38
  new_run_with_timestamp_suffix: bool = False
39
39
  additional_wandb_run_config: Dict[str, Any] = Field(default_factory=dict)
40
+ settings_kwargs: Dict[str, Any] = Field(default_factory=dict)
40
41
 
41
42
 
42
43
  @dataclass
@@ -76,6 +77,7 @@ class WandbMetricsLogger(
76
77
  ):
77
78
  self.inner_metrics_logger = inner_metrics_logger
78
79
  self.checkpoint_file_name = checkpoint_file_name
80
+ self.run_id = None
79
81
 
80
82
  def collect_env_metrics(self, data: List[Dict[str, Any]]):
81
83
  self.inner_metrics_logger.collect_env_metrics(data)
@@ -107,17 +109,11 @@ class WandbMetricsLogger(
107
109
  self.run_id = None
108
110
  return
109
111
 
110
- if (
111
- self.config.checkpoint_load_folder is not None
112
- and self.config.metrics_logger_config.id is not None
113
- ):
114
- if self.run_id is not None:
115
- print(
116
- f"{self.config.agent_controller_name}: Wandb run id from checkpoint ({self.run_id}) is being overridden by wandb run id from config: {self.config.metrics_logger_config.id}"
117
- )
112
+ if self.run_id is not None and self.config.metrics_logger_config.id is not None:
113
+ print(
114
+ f"{self.config.agent_controller_name}: Wandb run id from checkpoint ({self.run_id}) is being overridden by wandb run id from config: {self.config.metrics_logger_config.id}"
115
+ )
118
116
  self.run_id = self.config.metrics_logger_config.id
119
- else:
120
- self.run_id = None
121
117
 
122
118
  wandb_config = {
123
119
  **self.config.additional_derived_config.derived_wandb_run_config,
@@ -145,22 +141,31 @@ class WandbMetricsLogger(
145
141
  id=self.run_id,
146
142
  resume="allow",
147
143
  reinit=True,
144
+ settings=wandb.Settings(
145
+ **self.config.metrics_logger_config.settings_kwargs
146
+ ),
148
147
  )
149
148
  self.run_id = self.wandb_run.id
150
149
  print(f"{self.config.agent_controller_name}: Created wandb run! {self.run_id}")
151
150
 
152
151
  def _load_from_checkpoint(self):
153
- with open(
154
- os.path.join(
155
- self.config.checkpoint_load_folder,
156
- self.checkpoint_file_name,
157
- ),
158
- "rt",
159
- ) as f:
160
- state = json.load(f)
161
- if "run_id" in state:
162
- self.run_id = state["run_id"]
163
- else:
152
+ try:
153
+ with open(
154
+ os.path.join(
155
+ self.config.checkpoint_load_folder,
156
+ self.checkpoint_file_name,
157
+ ),
158
+ "rt",
159
+ ) as f:
160
+ state = json.load(f)
161
+ if "run_id" in state:
162
+ self.run_id = state["run_id"]
163
+ else:
164
+ self.run_id = None
165
+ except FileNotFoundError:
166
+ print(
167
+ f"{self.config.agent_controller_name}: Tried to load from checkpoint, but checkpoint didn't contain a wandb run! A new run will be created based on the config values."
168
+ )
164
169
  self.run_id = None
165
170
 
166
171
  def save_checkpoint(self, folder_path):
@@ -8,6 +8,9 @@ import torch
8
8
  from pydantic import BaseModel, Field, model_validator
9
9
  from rlgym.api import ActionType, AgentID, ObsType, RewardType
10
10
 
11
+ from rlgym_learn_algos.util.torch_functions import get_device
12
+ from rlgym_learn_algos.util.torch_pydantic import PydanticTorchDevice
13
+
11
14
  from .trajectory import Trajectory
12
15
  from .trajectory_processor import (
13
16
  DerivedTrajectoryProcessorConfig,
@@ -19,8 +22,9 @@ from .trajectory_processor import (
19
22
  EXPERIENCE_BUFFER_FILE = "experience_buffer.pkl"
20
23
 
21
24
 
22
- class ExperienceBufferConfigModel(BaseModel):
25
+ class ExperienceBufferConfigModel(BaseModel, extra="forbid"):
23
26
  max_size: int = 100000
27
+ device: PydanticTorchDevice = "auto"
24
28
  trajectory_processor_config: Dict[str, Any] = Field(default_factory=dict)
25
29
 
26
30
  @model_validator(mode="before")
@@ -31,21 +35,35 @@ class ExperienceBufferConfigModel(BaseModel):
31
35
  data.trajectory_processor_config = (
32
36
  data.trajectory_processor_config.model_dump()
33
37
  )
34
- elif isinstance(data, dict) and "trajectory_processor_config" in data:
35
- if isinstance(data["trajectory_processor_config"], BaseModel):
36
- data["trajectory_processor_config"] = data[
37
- "trajectory_processor_config"
38
- ].model_dump()
38
+ elif isinstance(data, dict):
39
+ if "trajectory_processor_config" in data:
40
+ if isinstance(data["trajectory_processor_config"], BaseModel):
41
+ data["trajectory_processor_config"] = data[
42
+ "trajectory_processor_config"
43
+ ].model_dump()
44
+ if "device" not in data or data["device"] == "auto":
45
+ data["device"] = get_device("auto")
39
46
  return data
40
47
 
48
+ # device: PydanticTorchDevice = "auto"
49
+
50
+ # @model_validator(mode="before")
51
+ # @classmethod
52
+ # def set_device(cls, data):
53
+ # if isinstance(data, dict) and (
54
+ # "device" not in data or data["device"] == "auto"
55
+ # ):
56
+ # data["device"] = get_device("auto")
57
+ # return data
58
+
41
59
 
42
60
  @dataclass
43
61
  class DerivedExperienceBufferConfig:
44
- max_size: int
62
+ experience_buffer_config: ExperienceBufferConfigModel
63
+ agent_controller_name: str
45
64
  seed: int
46
- dtype: str
47
- device: str
48
- trajectory_processor_config: Dict[str, Any]
65
+ dtype: torch.dtype
66
+ learner_device: torch.device
49
67
  checkpoint_load_folder: Optional[str] = None
50
68
 
51
69
 
@@ -111,42 +129,50 @@ class ExperienceBuffer(
111
129
  self.agent_ids: List[AgentID] = []
112
130
  self.observations: List[ObsType] = []
113
131
  self.actions: List[ActionType] = []
114
- self.log_probs = torch.FloatTensor()
115
- self.values = torch.FloatTensor()
116
- self.advantages = torch.FloatTensor()
117
132
 
118
133
  def load(self, config: DerivedExperienceBufferConfig):
119
134
  self.config = config
120
135
  self.rng = np.random.RandomState(config.seed)
121
136
  trajectory_processor_config = self.trajectory_processor.validate_config(
122
- config.trajectory_processor_config
137
+ config.experience_buffer_config.trajectory_processor_config
123
138
  )
124
139
  self.trajectory_processor.load(
125
140
  DerivedTrajectoryProcessorConfig(
126
141
  trajectory_processor_config=trajectory_processor_config,
127
142
  dtype=config.dtype,
128
- device=config.device,
143
+ device=config.learner_device,
129
144
  )
130
145
  )
146
+ self.log_probs = torch.tensor([], dtype=config.dtype)
147
+ self.values = torch.tensor([], dtype=config.dtype)
148
+ self.advantages = torch.tensor([], dtype=config.dtype)
131
149
  if self.config.checkpoint_load_folder is not None:
132
150
  self._load_from_checkpoint()
133
- self.log_probs = self.log_probs.to(config.device)
134
- self.values = self.values.to(config.device)
135
- self.advantages = self.advantages.to(config.device)
151
+ self.log_probs = self.log_probs.to(config.learner_device)
152
+ self.values = self.values.to(config.learner_device)
153
+ self.advantages = self.advantages.to(config.learner_device)
136
154
 
137
155
  def _load_from_checkpoint(self):
138
156
  # lazy way
139
- with open(
140
- os.path.join(self.config.checkpoint_load_folder, EXPERIENCE_BUFFER_FILE),
141
- "rb",
142
- ) as f:
143
- state_dict = pickle.load(f)
144
- self.agent_ids = state_dict["agent_ids"]
145
- self.observations = state_dict["observations"]
146
- self.actions = state_dict["actions"]
147
- self.log_probs = state_dict["log_probs"]
148
- self.values = state_dict["values"]
149
- self.advantages = state_dict["advantages"]
157
+ # TODO: don't use pickle for torch things, use torch.load because of map_location. Or maybe define a custom unpickler for this? Or maybe one already exists?
158
+ try:
159
+ with open(
160
+ os.path.join(
161
+ self.config.checkpoint_load_folder, EXPERIENCE_BUFFER_FILE
162
+ ),
163
+ "rb",
164
+ ) as f:
165
+ state_dict = pickle.load(f)
166
+ self.agent_ids = state_dict["agent_ids"]
167
+ self.observations = state_dict["observations"]
168
+ self.actions = state_dict["actions"]
169
+ self.log_probs = state_dict["log_probs"]
170
+ self.values = state_dict["values"]
171
+ self.advantages = state_dict["advantages"]
172
+ except FileNotFoundError:
173
+ print(
174
+ f"{self.config.agent_controller_name}: Tried to load from checkpoint, but checkpoint didn't contain a saved experience buffer! A blank experience buffer will be used instead."
175
+ )
150
176
 
151
177
  def save_checkpoint(self, folder_path):
152
178
  os.makedirs(folder_path, exist_ok=True)
@@ -195,29 +221,36 @@ class ExperienceBuffer(
195
221
  exp_buffer_data
196
222
  )
197
223
 
198
- self.agent_ids = _cat_list(self.agent_ids, agent_ids, self.config.max_size)
224
+ self.agent_ids = _cat_list(
225
+ self.agent_ids, agent_ids, self.config.experience_buffer_config.max_size
226
+ )
199
227
  self.observations = _cat_list(
200
- self.observations, observations, self.config.max_size
228
+ self.observations,
229
+ observations,
230
+ self.config.experience_buffer_config.max_size,
231
+ )
232
+ self.actions = _cat_list(
233
+ self.actions, actions, self.config.experience_buffer_config.max_size
201
234
  )
202
- self.actions = _cat_list(self.actions, actions, self.config.max_size)
203
235
  self.log_probs = _cat(
204
236
  self.log_probs,
205
237
  log_probs,
206
- self.config.max_size,
238
+ self.config.experience_buffer_config.max_size,
207
239
  )
208
240
  self.values = _cat(
209
241
  self.values,
210
242
  values,
211
- self.config.max_size,
243
+ self.config.experience_buffer_config.max_size,
212
244
  )
213
245
  self.advantages = _cat(
214
246
  self.advantages,
215
247
  advantages,
216
- self.config.max_size,
248
+ self.config.experience_buffer_config.max_size,
217
249
  )
218
250
 
219
251
  return trajectory_processor_data
220
252
 
253
+ # TODO: tensordict?
221
254
  def _get_samples(self, indices) -> Tuple[
222
255
  Iterable[AgentID],
223
256
  Iterable[ObsType],
@@ -242,18 +275,14 @@ class ExperienceBuffer(
242
275
  :param batch_size: size of each batch yielded by the generator.
243
276
  :return:
244
277
  """
245
- if self.config.device != "cpu":
278
+ if self.config.learner_device.type != "cpu":
246
279
  torch.cuda.current_stream().synchronize()
247
280
  total_samples = self.values.shape[0]
248
281
  indices = self.rng.permutation(total_samples)
249
282
  start_idx = 0
250
- batches = []
251
283
  while start_idx + batch_size <= total_samples:
252
- batches.append(
253
- self._get_samples(indices[start_idx : start_idx + batch_size])
254
- )
284
+ yield self._get_samples(indices[start_idx : start_idx + batch_size])
255
285
  start_idx += batch_size
256
- return batches
257
286
 
258
287
  def clear(self):
259
288
  """
@@ -265,4 +294,4 @@ class ExperienceBuffer(
265
294
  del self.log_probs
266
295
  del self.values
267
296
  del self.advantages
268
- self.__init__(self.max_size, self.seed, self.device)
297
+ self.__init__(self.trajectory_processor)
@@ -76,25 +76,31 @@ class NumpyExperienceBuffer(
76
76
  exp_buffer_data
77
77
  )
78
78
 
79
- self.agent_ids = _cat_list(self.agent_ids, agent_ids, self.config.max_size)
79
+ self.agent_ids = _cat_list(
80
+ self.agent_ids, agent_ids, self.config.experience_buffer_config.max_size
81
+ )
80
82
  self.observations = _cat_numpy(
81
- self.observations, observations, self.config.max_size
83
+ self.observations,
84
+ observations,
85
+ self.config.experience_buffer_config.max_size,
86
+ )
87
+ self.actions = _cat_numpy(
88
+ self.actions, actions, self.config.experience_buffer_config.max_size
82
89
  )
83
- self.actions = _cat_numpy(self.actions, actions, self.config.max_size)
84
90
  self.log_probs = _cat(
85
91
  self.log_probs,
86
92
  log_probs,
87
- self.config.max_size,
93
+ self.config.experience_buffer_config.max_size,
88
94
  )
89
95
  self.values = _cat(
90
96
  self.values,
91
97
  values,
92
- self.config.max_size,
98
+ self.config.experience_buffer_config.max_size,
93
99
  )
94
100
  self.advantages = _cat(
95
101
  self.advantages,
96
102
  advantages,
97
- self.config.max_size,
103
+ self.config.experience_buffer_config.max_size,
98
104
  )
99
105
 
100
106
  return trajectory_processor_data
@@ -116,18 +122,14 @@ class NumpyExperienceBuffer(
116
122
  :param batch_size: size of each batch yielded by the generator.
117
123
  :return:
118
124
  """
119
- if self.config.device != "cpu":
125
+ if self.config.experience_buffer_config.device.type != "cpu":
120
126
  torch.cuda.current_stream().synchronize()
121
127
  total_samples = self.values.shape[0]
122
128
  indices = self.rng.permutation(total_samples)
123
129
  start_idx = 0
124
- batches = []
125
130
  while start_idx + batch_size <= total_samples:
126
- batches.append(
127
- self._get_samples(indices[start_idx : start_idx + batch_size])
128
- )
131
+ yield self._get_samples(indices[start_idx : start_idx + batch_size])
129
132
  start_idx += batch_size
130
- return batches
131
133
 
132
134
  def clear(self):
133
135
  """
@@ -20,7 +20,7 @@ from ..ppo import RustDerivedGAETrajectoryProcessorConfig, RustGAETrajectoryProc
20
20
  from .trajectory_processor import TRAJECTORY_PROCESSOR_FILE, TrajectoryProcessor
21
21
 
22
22
 
23
- class GAETrajectoryProcessorConfigModel(BaseModel):
23
+ class GAETrajectoryProcessorConfigModel(BaseModel, extra="forbid"):
24
24
  gamma: float = 0.99
25
25
  lmbda: float = 0.95
26
26
  standardize_returns: bool = True
@@ -122,7 +122,7 @@ class GAETrajectoryProcessor(
122
122
  self._load_from_checkpoint()
123
123
  self.rust_gae_trajectory_processor.load(
124
124
  RustDerivedGAETrajectoryProcessorConfig(
125
- self.gamma, self.lmbda, np.dtype(self.dtype)
125
+ self.gamma, self.lmbda, np.dtype(str(self.dtype)[6:])
126
126
  )
127
127
  )
128
128
 
@@ -132,19 +132,10 @@ class GAETrajectoryProcessor(
132
132
  "rt",
133
133
  ) as f:
134
134
  state = json.load(f)
135
- # TODO: why are these 4 getting saved/loaded?? They should just come from config
136
- self.gamma = state["gamma"]
137
- self.lmbda = state["lambda"]
138
- self.standardize_returns = state["standardize_returns"]
139
- self.max_returns_per_stats_increment = state["max_returns_per_stats_increment"]
140
135
  self.return_stats.load_state_dict(state["return_running_stats"])
141
136
 
142
137
  def save_checkpoint(self, folder_path):
143
138
  state = {
144
- "gamma": self.gamma,
145
- "lambda": self.lmbda,
146
- "standardize_returns": self.standardize_returns,
147
- "max_returns_per_stats_increment": self.max_returns_per_stats_increment,
148
139
  "return_running_stats": self.return_stats.state_dict(),
149
140
  }
150
141
  with open(
@@ -161,18 +161,10 @@ class GAETrajectoryProcessorPurePython(
161
161
  "rt",
162
162
  ) as f:
163
163
  state = json.load(f)
164
- self.gamma = state["gamma"]
165
- self.lmbda = state["lambda"]
166
- self.standardize_returns = state["standardize_returns"]
167
- self.max_returns_per_stats_increment = state["max_returns_per_stats_increment"]
168
164
  self.return_stats.load_state_dict(state["return_running_stats"])
169
165
 
170
166
  def save_checkpoint(self, folder_path):
171
167
  state = {
172
- "gamma": self.gamma,
173
- "lambda": self.lmbda,
174
- "standardize_returns": self.standardize_returns,
175
- "max_returns_per_stats_increment": self.max_returns_per_stats_increment,
176
168
  "return_running_stats": self.return_stats.state_dict(),
177
169
  }
178
170
  with open(
@@ -24,8 +24,6 @@ from rlgym.api import (
24
24
  )
25
25
  from rlgym_learn import EnvActionResponse, EnvActionResponseType, Timestep
26
26
  from rlgym_learn.api.agent_controller import AgentController
27
- from torch import device as _device
28
-
29
27
  from rlgym_learn_algos.logging import (
30
28
  DerivedMetricsLoggerConfig,
31
29
  MetricsLogger,
@@ -36,6 +34,7 @@ from rlgym_learn_algos.logging import (
36
34
  )
37
35
  from rlgym_learn_algos.stateful_functions import ObsStandardizer
38
36
  from rlgym_learn_algos.util.torch_functions import get_device
37
+ from torch import device as _device
39
38
 
40
39
  from .actor import Actor
41
40
  from .critic import Critic
@@ -62,15 +61,13 @@ ITERATION_SHARED_INFOS_FILE = "iteration_shared_infos.pkl"
62
61
  CURRENT_TRAJECTORIES_FILE = "current_trajectories.pkl"
63
62
 
64
63
 
65
- class PPOAgentControllerConfigModel(BaseModel):
64
+ class PPOAgentControllerConfigModel(BaseModel, extra="forbid"):
66
65
  timesteps_per_iteration: int = 50000
67
66
  save_every_ts: int = 1_000_000
68
67
  add_unix_timestamp: bool = True
69
68
  checkpoint_load_folder: Optional[str] = None
70
69
  n_checkpoints_to_keep: int = 5
71
70
  random_seed: int = 123
72
- dtype: str = "float32"
73
- device: Optional[str] = None
74
71
  learner_config: PPOLearnerConfigModel = Field(default_factory=PPOLearnerConfigModel)
75
72
  experience_buffer_config: ExperienceBufferConfigModel = Field(
76
73
  default_factory=ExperienceBufferConfigModel
@@ -190,11 +187,9 @@ class PPOAgentController(
190
187
 
191
188
  def load(self, config):
192
189
  self.config = config
193
- device = config.agent_controller_config.device
194
- if device is None:
195
- device = config.base_config.device
196
- self.device = get_device(device)
197
- print(f"{self.config.agent_controller_name}: Using device {self.device}")
190
+ print(
191
+ f"{self.config.agent_controller_name}: Using device {config.agent_controller_config.learner_config.device}"
192
+ )
198
193
  agent_controller_config = config.agent_controller_config
199
194
  learner_config = config.agent_controller_config.learner_config
200
195
  experience_buffer_config = (
@@ -234,14 +229,14 @@ class PPOAgentController(
234
229
  # TODO: this doesn't seem to be working
235
230
  if abs_save_folder == loaded_checkpoint_runs_folder:
236
231
  print(
237
- "Using the loaded checkpoint's run folder as the checkpoints save folder."
232
+ f"{config.agent_controller_name}: Using the loaded checkpoint's run folder as the checkpoints save folder."
238
233
  )
239
234
  checkpoints_save_folder = os.path.abspath(
240
235
  os.path.join(agent_controller_config.checkpoint_load_folder, "..")
241
236
  )
242
237
  else:
243
238
  print(
244
- "Runs folder in config does not align with loaded checkpoint's runs folder. Creating new run in the config-based runs folder."
239
+ f"{config.agent_controller_name}: Runs folder in config does not align with loaded checkpoint's runs folder. Creating new run in the config-based runs folder."
245
240
  )
246
241
  checkpoints_save_folder = os.path.join(
247
242
  config.save_folder, agent_controller_config.run_name + run_suffix
@@ -257,26 +252,19 @@ class PPOAgentController(
257
252
 
258
253
  self.learner.load(
259
254
  DerivedPPOLearnerConfig(
255
+ learner_config=learner_config,
260
256
  obs_space=self.obs_space,
261
257
  action_space=self.action_space,
262
- n_epochs=learner_config.n_epochs,
263
- batch_size=learner_config.batch_size,
264
- n_minibatches=learner_config.n_minibatches,
265
- ent_coef=learner_config.ent_coef,
266
- clip_range=learner_config.clip_range,
267
- actor_lr=learner_config.actor_lr,
268
- critic_lr=learner_config.critic_lr,
269
- device=self.device,
270
258
  checkpoint_load_folder=learner_checkpoint_load_folder,
271
259
  )
272
260
  )
273
261
  self.experience_buffer.load(
274
262
  DerivedExperienceBufferConfig(
275
- max_size=experience_buffer_config.max_size,
276
- seed=agent_controller_config.random_seed,
277
- dtype=agent_controller_config.dtype,
278
- device=self.device,
279
- trajectory_processor_config=experience_buffer_config.trajectory_processor_config,
263
+ experience_buffer_config=experience_buffer_config,
264
+ agent_controller_name=config.agent_controller_name,
265
+ seed=config.base_config.random_seed,
266
+ dtype=agent_controller_config.learner_config.dtype,
267
+ learner_device=agent_controller_config.learner_config.device,
280
268
  checkpoint_load_folder=experience_buffer_checkpoint_load_folder,
281
269
  )
282
270
  )
@@ -301,9 +289,9 @@ class PPOAgentController(
301
289
  additional_derived_config = None
302
290
  self.metrics_logger.load(
303
291
  DerivedMetricsLoggerConfig(
292
+ metrics_logger_config=metrics_logger_config,
304
293
  checkpoint_load_folder=metrics_logger_checkpoint_load_folder,
305
294
  agent_controller_name=config.agent_controller_name,
306
- metrics_logger_config=metrics_logger_config,
307
295
  additional_derived_config=additional_derived_config,
308
296
  )
309
297
  )
@@ -465,6 +453,7 @@ class PPOAgentController(
465
453
  ):
466
454
  self.timestep_collection_end_time = time.perf_counter()
467
455
  self._learn()
456
+ self.cur_iteration += 1
468
457
  if self.ts_since_last_save >= self.config.agent_controller_config.save_every_ts:
469
458
  self.save_checkpoint()
470
459
  self.ts_since_last_save = 0
@@ -563,5 +552,5 @@ class PPOAgentController(
563
552
  for idx, (start, stop) in enumerate(traj_timestep_idx_ranges):
564
553
  self.current_trajectories[idx].val_preds = val_preds[start : stop - 1]
565
554
  self.current_trajectories[idx].final_val_pred = val_preds[stop - 1]
566
- if self.device != "cpu":
555
+ if self.config.agent_controller_config.learner_config.device.type != "cpu":
567
556
  torch.cuda.current_stream().synchronize()
@@ -7,7 +7,7 @@ from typing import Generic, Optional
7
7
 
8
8
  import numpy as np
9
9
  import torch
10
- from pydantic import BaseModel
10
+ from pydantic import BaseModel, field_serializer, model_validator
11
11
  from rlgym.api import (
12
12
  ActionSpaceType,
13
13
  ActionType,
@@ -16,6 +16,11 @@ from rlgym.api import (
16
16
  ObsType,
17
17
  RewardType,
18
18
  )
19
+ from rlgym_learn_algos.util.torch_functions import get_device
20
+ from rlgym_learn_algos.util.torch_pydantic import (
21
+ PydanticTorchDevice,
22
+ PydanticTorchDtype,
23
+ )
19
24
  from torch import nn as nn
20
25
 
21
26
  from .actor import Actor
@@ -24,7 +29,8 @@ from .experience_buffer import ExperienceBuffer
24
29
  from .trajectory_processor import TrajectoryProcessorConfig, TrajectoryProcessorData
25
30
 
26
31
 
27
- class PPOLearnerConfigModel(BaseModel):
32
+ class PPOLearnerConfigModel(BaseModel, extra="forbid"):
33
+ dtype: PydanticTorchDtype = torch.float32
28
34
  n_epochs: int = 1
29
35
  batch_size: int = 50000
30
36
  n_minibatches: int = 1
@@ -32,20 +38,45 @@ class PPOLearnerConfigModel(BaseModel):
32
38
  clip_range: float = 0.2
33
39
  actor_lr: float = 3e-4
34
40
  critic_lr: float = 3e-4
41
+ device: PydanticTorchDevice = "auto"
42
+
43
+ @model_validator(mode="before")
44
+ @classmethod
45
+ def set_device(cls, data):
46
+ if isinstance(data, dict) and (
47
+ "device" not in data or data["device"] == "auto"
48
+ ):
49
+ data["device"] = get_device("auto")
50
+ return data
51
+
52
+
53
+ # @model_validator(mode="before")
54
+ # @classmethod
55
+ # def set_agent_controllers_config(cls, data):
56
+ # if isinstance(data, LearningCoordinatorConfigModel):
57
+ # agent_controllers_config = {}
58
+ # for k, v in data.agent_controllers_config.items():
59
+ # if isinstance(v, BaseModel):
60
+ # agent_controllers_config[k] = v.model_dump()
61
+ # else:
62
+ # agent_controllers_config[k] = v
63
+ # data.agent_controllers_config = agent_controllers_config
64
+ # elif isinstance(data, dict) and "agent_controllers_config" in data:
65
+ # agent_controllers_config = {}
66
+ # for k, v in data["agent_controllers_config"].items():
67
+ # if isinstance(v, BaseModel):
68
+ # agent_controllers_config[k] = v.model_dump()
69
+ # else:
70
+ # agent_controllers_config[k] = v
71
+ # data["agent_controllers_config"] = agent_controllers_config
72
+ # return data
35
73
 
36
74
 
37
75
  @dataclass
38
76
  class DerivedPPOLearnerConfig:
77
+ learner_config: PPOLearnerConfigModel
39
78
  obs_space: ObsSpaceType
40
79
  action_space: ActionSpaceType
41
- n_epochs: int = 10
42
- batch_size: int = 50000
43
- n_minibatches: int = 1
44
- ent_coef: float = 0.005
45
- clip_range: float = 0.2
46
- actor_lr: float = 3e-4
47
- critic_lr: float = 3e-4
48
- device: str = "auto"
49
80
  checkpoint_load_folder: Optional[str] = None
50
81
 
51
82
 
@@ -97,15 +128,17 @@ class PPOLearner(
97
128
  self.config = config
98
129
 
99
130
  self.actor = self.actor_factory(
100
- config.obs_space, config.action_space, config.device
131
+ config.obs_space, config.action_space, config.learner_config.device
132
+ )
133
+ self.critic = self.critic_factory(
134
+ config.obs_space, config.learner_config.device
101
135
  )
102
- self.critic = self.critic_factory(config.obs_space, config.device)
103
136
 
104
137
  self.actor_optimizer = torch.optim.Adam(
105
- self.actor.parameters(), lr=self.config.actor_lr
138
+ self.actor.parameters(), lr=self.config.learner_config.actor_lr
106
139
  )
107
140
  self.critic_optimizer = torch.optim.Adam(
108
- self.critic.parameters(), lr=self.config.critic_lr
141
+ self.critic.parameters(), lr=self.config.learner_config.critic_lr
109
142
  )
110
143
  self.critic_loss_fn = torch.nn.MSELoss()
111
144
 
@@ -130,14 +163,17 @@ class PPOLearner(
130
163
  print("-" * 20)
131
164
  print(f"{'Total':<10} {total_parameters:<10}")
132
165
 
133
- print(f"Current Policy Learning Rate: {self.config.actor_lr}")
134
- print(f"Current Critic Learning Rate: {self.config.critic_lr}")
166
+ print(f"Current Policy Learning Rate: {self.config.learner_config.actor_lr}")
167
+ print(f"Current Critic Learning Rate: {self.config.learner_config.critic_lr}")
135
168
  self.cumulative_model_updates = 0
136
169
 
137
170
  if self.config.checkpoint_load_folder is not None:
138
171
  self._load_from_checkpoint()
139
172
  self.minibatch_size = int(
140
- np.ceil(self.config.batch_size / self.config.n_minibatches)
173
+ np.ceil(
174
+ self.config.learner_config.batch_size
175
+ / self.config.learner_config.n_minibatches
176
+ )
141
177
  )
142
178
 
143
179
  def _load_from_checkpoint(self):
@@ -147,19 +183,27 @@ class PPOLearner(
147
183
  ), f"PPO Learner cannot find folder: {self.config.checkpoint_load_folder}"
148
184
 
149
185
  self.actor.load_state_dict(
150
- torch.load(os.path.join(self.config.checkpoint_load_folder, ACTOR_FILE))
186
+ torch.load(
187
+ os.path.join(self.config.checkpoint_load_folder, ACTOR_FILE),
188
+ map_location=self.config.learner_config.device,
189
+ )
151
190
  )
152
191
  self.critic.load_state_dict(
153
- torch.load(os.path.join(self.config.checkpoint_load_folder, CRITIC_FILE))
192
+ torch.load(
193
+ os.path.join(self.config.checkpoint_load_folder, CRITIC_FILE),
194
+ map_location=self.config.learner_config.device,
195
+ )
154
196
  )
155
197
  self.actor_optimizer.load_state_dict(
156
198
  torch.load(
157
- os.path.join(self.config.checkpoint_load_folder, ACTOR_OPTIMIZER_FILE)
199
+ os.path.join(self.config.checkpoint_load_folder, ACTOR_OPTIMIZER_FILE),
200
+ map_location=self.config.learner_config.device,
158
201
  )
159
202
  )
160
203
  self.critic_optimizer.load_state_dict(
161
204
  torch.load(
162
- os.path.join(self.config.checkpoint_load_folder, CRITIC_OPTIMIZER_FILE)
205
+ os.path.join(self.config.checkpoint_load_folder, CRITIC_OPTIMIZER_FILE),
206
+ map_location=self.config.learner_config.device,
163
207
  )
164
208
  )
165
209
  with open(
@@ -215,9 +259,11 @@ class PPOLearner(
215
259
  critic_before = torch.nn.utils.parameters_to_vector(self.critic.parameters())
216
260
 
217
261
  t1 = time.time()
218
- for epoch in range(self.config.n_epochs):
262
+ for epoch in range(self.config.learner_config.n_epochs):
219
263
  # Get all shuffled batches from the experience buffer.
220
- batches = exp.get_all_batches_shuffled(self.config.batch_size)
264
+ batches = exp.get_all_batches_shuffled(
265
+ self.config.learner_config.batch_size
266
+ )
221
267
  for batch in batches:
222
268
  (
223
269
  batch_agent_ids,
@@ -232,20 +278,29 @@ class PPOLearner(
232
278
  self.critic_optimizer.zero_grad()
233
279
 
234
280
  for minibatch_slice in range(
235
- 0, self.config.batch_size, self.minibatch_size
281
+ 0, self.config.learner_config.batch_size, self.minibatch_size
236
282
  ):
237
283
  # Send everything to the device and enforce correct shapes.
238
284
  start = minibatch_slice
239
- stop = min(start + self.minibatch_size, self.config.batch_size)
240
- minibatch_ratio = (stop - start) / self.config.batch_size
285
+ stop = min(
286
+ start + self.minibatch_size,
287
+ self.config.learner_config.batch_size,
288
+ )
289
+ minibatch_ratio = (
290
+ stop - start
291
+ ) / self.config.learner_config.batch_size
241
292
 
242
293
  agent_ids = batch_agent_ids[start:stop]
243
294
  obs = batch_obs[start:stop]
244
295
  acts = batch_acts[start:stop]
245
- advantages = batch_advantages[start:stop].to(self.config.device)
246
- old_probs = batch_old_probs[start:stop].to(self.config.device)
296
+ advantages = batch_advantages[start:stop].to(
297
+ self.config.learner_config.device
298
+ )
299
+ old_probs = batch_old_probs[start:stop].to(
300
+ self.config.learner_config.device
301
+ )
247
302
  target_values = batch_target_values[start:stop].to(
248
- self.config.device
303
+ self.config.learner_config.device
249
304
  )
250
305
 
251
306
  # Compute value estimates.
@@ -262,8 +317,8 @@ class PPOLearner(
262
317
  ratio = torch.exp(log_probs - old_probs)
263
318
  clipped = torch.clamp(
264
319
  ratio,
265
- 1.0 - self.config.clip_range,
266
- 1.0 + self.config.clip_range,
320
+ 1.0 - self.config.learner_config.clip_range,
321
+ 1.0 + self.config.learner_config.clip_range,
267
322
  )
268
323
 
269
324
  # Compute KL divergence & clip fraction using SB3 method for reporting.
@@ -274,7 +329,10 @@ class PPOLearner(
274
329
 
275
330
  # From the stable-baselines3 implementation of PPO.
276
331
  clip_fraction = torch.mean(
277
- (torch.abs(ratio - 1) > self.config.clip_range).float()
332
+ (
333
+ torch.abs(ratio - 1)
334
+ > self.config.learner_config.clip_range
335
+ ).float()
278
336
  ).to(device="cpu", non_blocking=True)
279
337
  clip_fractions.append((clip_fraction, minibatch_ratio))
280
338
 
@@ -285,7 +343,9 @@ class PPOLearner(
285
343
  value_loss = (
286
344
  self.critic_loss_fn(vals, target_values) * minibatch_ratio
287
345
  )
288
- ppo_loss = actor_loss - entropy * self.config.ent_coef
346
+ ppo_loss = (
347
+ actor_loss - entropy * self.config.learner_config.ent_coef
348
+ )
289
349
 
290
350
  ppo_loss.backward()
291
351
  value_loss.backward()
@@ -312,7 +372,7 @@ class PPOLearner(
312
372
  actor_update_magnitude = (actor_before - actor_after).norm().cpu().item()
313
373
  critic_update_magnitude = (critic_before - critic_after).norm().cpu().item()
314
374
 
315
- if self.config.device != "cpu":
375
+ if self.config.learner_config.device.type != "cpu":
316
376
  torch.cuda.current_stream().synchronize()
317
377
 
318
378
  tot_clip = sum(
@@ -3,7 +3,7 @@ from dataclasses import dataclass
3
3
  from typing import Any, Dict, Generic, List, Optional, Tuple, TypeVar
4
4
 
5
5
  from rlgym.api import ActionType, AgentID, ObsType, RewardType
6
- from torch import Tensor
6
+ from torch import Tensor, device, dtype
7
7
 
8
8
  from .trajectory import Trajectory
9
9
 
@@ -16,8 +16,8 @@ TRAJECTORY_PROCESSOR_FILE = "trajectory_processor.json"
16
16
  @dataclass
17
17
  class DerivedTrajectoryProcessorConfig(Generic[TrajectoryProcessorConfig]):
18
18
  trajectory_processor_config: TrajectoryProcessorConfig
19
- dtype: str
20
- device: str
19
+ dtype: dtype
20
+ device: device
21
21
  checkpoint_load_folder: Optional[str] = None
22
22
 
23
23
 
@@ -0,0 +1,118 @@
1
+ from typing import Annotated, Any
2
+
3
+ import torch
4
+ from pydantic import (
5
+ BaseModel,
6
+ GetCoreSchemaHandler,
7
+ GetJsonSchemaHandler,
8
+ ValidationError,
9
+ )
10
+ from pydantic.json_schema import JsonSchemaValue
11
+ from pydantic_core import core_schema
12
+
13
+ dtype_str_regex = "|".join(
14
+ set(
15
+ f"({str(v)[6:]})" for v in torch.__dict__.values() if isinstance(v, torch.dtype)
16
+ )
17
+ )
18
+ device_str_regex = (
19
+ "("
20
+ + "|".join(
21
+ f"({v})"
22
+ for v in [
23
+ "cpu",
24
+ "cuda",
25
+ "ipu",
26
+ "xpu",
27
+ "mkldnn",
28
+ "opengl",
29
+ "opencl",
30
+ "ideep",
31
+ "hip",
32
+ "ve",
33
+ "fpga",
34
+ "maia",
35
+ "xla",
36
+ "lazy",
37
+ "vulkan",
38
+ "mps",
39
+ "meta",
40
+ "hpu",
41
+ "mtia",
42
+ "privateuseone",
43
+ ]
44
+ )
45
+ + ")(:\d+)"
46
+ )
47
+
48
+
49
+ # Created using the example here: https://docs.pydantic.dev/latest/concepts/types/#handling-third-party-types
50
+ class _TorchDtypePydanticAnnotation:
51
+ @classmethod
52
+ def __get_pydantic_core_schema__(
53
+ cls,
54
+ _source_type: Any,
55
+ _handler: GetCoreSchemaHandler,
56
+ ) -> core_schema.CoreSchema:
57
+ from_str_schema = core_schema.chain_schema(
58
+ [
59
+ core_schema.str_schema(pattern=dtype_str_regex),
60
+ core_schema.no_info_plain_validator_function(
61
+ lambda v: getattr(torch, v)
62
+ ),
63
+ ]
64
+ )
65
+
66
+ return core_schema.json_or_python_schema(
67
+ json_schema=from_str_schema,
68
+ python_schema=core_schema.union_schema(
69
+ [
70
+ # check if it's an instance first before doing any further work
71
+ core_schema.is_instance_schema(torch.dtype),
72
+ from_str_schema,
73
+ ]
74
+ ),
75
+ serialization=core_schema.plain_serializer_function_ser_schema(
76
+ lambda v: str(v)[6:]
77
+ ),
78
+ )
79
+
80
+
81
+ class _TorchDevicePydanticAnnotation:
82
+ @classmethod
83
+ def __get_pydantic_core_schema__(
84
+ cls,
85
+ _source_type: Any,
86
+ _handler: GetCoreSchemaHandler,
87
+ ) -> core_schema.CoreSchema:
88
+ from_str_schema = core_schema.chain_schema(
89
+ [
90
+ core_schema.str_schema(pattern=device_str_regex),
91
+ core_schema.no_info_plain_validator_function(lambda v: torch.device(v)),
92
+ ]
93
+ )
94
+ from_int_schema = core_schema.chain_schema(
95
+ [
96
+ core_schema.int_schema(ge=0),
97
+ core_schema.no_info_plain_validator_function(lambda v: torch.device(v)),
98
+ ]
99
+ )
100
+
101
+ return core_schema.json_or_python_schema(
102
+ json_schema=from_str_schema,
103
+ python_schema=core_schema.union_schema(
104
+ [
105
+ # check if it's an instance first before doing any further work
106
+ core_schema.is_instance_schema(torch.dtype),
107
+ from_str_schema,
108
+ from_int_schema,
109
+ ]
110
+ ),
111
+ serialization=core_schema.plain_serializer_function_ser_schema(
112
+ lambda v: str(v)
113
+ ),
114
+ )
115
+
116
+
117
+ PydanticTorchDtype = Annotated[torch.dtype, _TorchDtypePydanticAnnotation]
118
+ PydanticTorchDevice = Annotated[torch.device, _TorchDevicePydanticAnnotation]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rlgym-learn-algos
3
- Version: 0.1.5
3
+ Version: 0.2.0
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Requires-Dist: pydantic>=2.8.2
@@ -1,36 +1,37 @@
1
- rlgym_learn_algos-0.1.5.dist-info/METADATA,sha256=nknWtQflpjq-HMgFle-snwH3_eZuT3bgd56ssd_rSkQ,2431
2
- rlgym_learn_algos-0.1.5.dist-info/WHEEL,sha256=WWWNS_YivL6eU-qhhdTFNNU59V1SfDxFkjCIXFZL9K8,92
3
- rlgym_learn_algos-0.1.5.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
4
- rlgym_learn_algos/logging/dict_metrics_logger.py,sha256=qmqr0HSiHpm5rjyxfAdmXOeBSbgP_t36-e-enpOccnE,1991
5
- rlgym_learn_algos/logging/metrics_logger.py,sha256=45FBH49OcHl5skvG9J9MIFJtAxbFo1TxtEvLWwjttSU,4122
6
- rlgym_learn_algos/logging/wandb_metrics_logger.py,sha256=funcqZYUarlKND7W79TThFc5d8j-a_CIQwAnOGYD-rs,6518
1
+ rlgym_learn_algos-0.2.0.dist-info/METADATA,sha256=lhDW1yMI9RhHxSWjTN8WFWJ6O_WlgtFsm8NI1e-GLgI,2431
2
+ rlgym_learn_algos-0.2.0.dist-info/WHEEL,sha256=2I8FMXqttZIv4Ceqp7SSjsZUyiuHAyz-otzopz4PdFA,92
3
+ rlgym_learn_algos-0.2.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
4
+ rlgym_learn_algos/__init__.py,sha256=C7cRdL4lZrpk3ge_4_lGAbGodqWJXM56FfgO0keRPAY,207
7
5
  rlgym_learn_algos/logging/__init__.py,sha256=ouItskWI4ItuoFdL--rt9YXCt7MasA473lYPhmJnrFA,423
6
+ rlgym_learn_algos/logging/dict_metrics_logger.py,sha256=qmqr0HSiHpm5rjyxfAdmXOeBSbgP_t36-e-enpOccnE,1991
7
+ rlgym_learn_algos/logging/metrics_logger.py,sha256=0l69GSSrxRcPm0xAjvF7yEIis7jGNu70unXu3hnK0XE,4122
8
+ rlgym_learn_algos/logging/wandb_metrics_logger.py,sha256=Kxi8y-nfoh3EI_OqLm4pDS-zhUWEjkS8F4TdD01dr9U,6939
9
+ rlgym_learn_algos/ppo/__init__.py,sha256=o6B8wCRfeyipSNEGJFyB3SHYmxUytaQelX2zsted5cg,1184
8
10
  rlgym_learn_algos/ppo/actor.py,sha256=LZevg0kqRrb4PwF05ePK9b1JIBX04YkWjsPs7swZ9JY,1767
9
11
  rlgym_learn_algos/ppo/basic_critic.py,sha256=oyyo8x9K6mi2BsbA6_tRy2Av8Pimb35WspJkPpe8XdQ,1022
10
12
  rlgym_learn_algos/ppo/continuous_actor.py,sha256=1vdBUw2mQNFNu6A6ZrAztBjd4DmwjGkIIFLboMZ02lc,4417
11
13
  rlgym_learn_algos/ppo/critic.py,sha256=RB89WtiN52BEq5QCpGAPrASUnasac-Bpg7B0lM3UXHw,689
12
14
  rlgym_learn_algos/ppo/discrete_actor.py,sha256=Nuc3EndIQud3NGrkBIQgy-Z-okhXVrj6p6okSGD1KNY,2620
13
15
  rlgym_learn_algos/ppo/env_trajectories.py,sha256=gzQBRkzwZhlZeSvWL50cc8AOgBfsg5zUys0aTJj6aZU,3775
14
- rlgym_learn_algos/ppo/experience_buffer.py,sha256=0TqIuWUe-La_oeXDyXztsnyr855EAvkMjtDGVrSzkAQ,9488
15
- rlgym_learn_algos/ppo/experience_buffer_numpy.py,sha256=5PRgkoNbdsp8a6SyxwFug2k5x6zqEVl4ZJFb4aBblCs,4594
16
- rlgym_learn_algos/ppo/gae_trajectory_processor.py,sha256=ESwN_CUWJapmtoYUnlFNfBByd_7arMFdMGbnjV5HEVE,5467
17
- rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py,sha256=kRZcM29XBarOQib84mzrcIGkwH5N1yU7aqaxRn_YWVQ,7277
16
+ rlgym_learn_algos/ppo/experience_buffer.py,sha256=py7kwhRJFsPx5lyvcUVywLAsu5zbU_0wV_52Fb6Kb_4,11012
17
+ rlgym_learn_algos/ppo/experience_buffer_numpy.py,sha256=Apk4x-pfRnitKJPW6LBZyOPIhgeJs_5EG7BbTCqMwjk,4761
18
+ rlgym_learn_algos/ppo/gae_trajectory_processor.py,sha256=r-o5ajNSTNr5nZxsUc17KMuZR6c4l4NHHTIs2-WbMgE,4956
19
+ rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py,sha256=RpyDR6GQ1JXvwtoKkx5V3z3WvU9ElJdzfNtpPiZDaTc,6831
18
20
  rlgym_learn_algos/ppo/multi_discrete_actor.py,sha256=zSYeBBirjguSv_wO-peo06hioHiVhZQjnd-NYwJxmag,3127
19
- rlgym_learn_algos/ppo/ppo_agent_controller.py,sha256=9qe92yTqjAeDp9lLmOFYotNdcvNpip-xeqHSYIyqI08,23610
20
- rlgym_learn_algos/ppo/ppo_learner.py,sha256=jMYQRsA6WIbC_UeP8YLqguyfrLuGX5s4_4p8zJ9fq6A,12807
21
+ rlgym_learn_algos/ppo/ppo_agent_controller.py,sha256=CH-xpO2mOCwe4iu_n9wPPsqLxEPYcbQhnvcJYs46qgM,23270
22
+ rlgym_learn_algos/ppo/ppo_learner.py,sha256=z14GaL52mx7b20mQsuOLFXlpYVlR0_9Nn5HinImWaLY,15295
21
23
  rlgym_learn_algos/ppo/ppo_metrics_logger.py,sha256=niW8xgQLEBCGgTaVyiE_JqsU6RTjV6h-JzM-7c3JT38,2868
22
24
  rlgym_learn_algos/ppo/trajectory.py,sha256=IIH_IG8B_HkyxRPf-YsCyF1jQqNjDx752hgzAehG25I,719
23
- rlgym_learn_algos/ppo/trajectory_processor.py,sha256=AMdZ2OVqPr9uJfgDcdi0_WwqpuPY6lnNMioiyqbAkxs,2039
24
- rlgym_learn_algos/ppo/__init__.py,sha256=o6B8wCRfeyipSNEGJFyB3SHYmxUytaQelX2zsted5cg,1184
25
+ rlgym_learn_algos/ppo/trajectory_processor.py,sha256=9-JE8hJkOgVJ-R3_9JYjFTUUTlnVq2-U43VP0HiY-sM,2059
25
26
  rlgym_learn_algos/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
+ rlgym_learn_algos/rlgym_learn_algos.cp310-win32.pyd,sha256=GuvLbJ2bJrcgAmafhTIuvnHsi7X71dPwlbZCaPgzhBI,339968
26
28
  rlgym_learn_algos/rlgym_learn_algos.pyi,sha256=NwY-sDZWM06TUiKPzxpfH1Td6G6E8TdxtRPgBSh-PPE,1203
29
+ rlgym_learn_algos/stateful_functions/__init__.py,sha256=QS0KYjuzagNkYiYllXQmjoJn14-G7KZawq1Zvwh8alY,236
27
30
  rlgym_learn_algos/stateful_functions/batch_reward_type_numpy_converter.py,sha256=1yte5qYyl9LWdClHZ_YsF7R9dJqQeYfINMdgNF_59Gs,767
28
31
  rlgym_learn_algos/stateful_functions/numpy_obs_standardizer.py,sha256=OgtwCaxBGTySPMnE5D5VDKpJ0dsTEz9oHc08A96xRao,1604
29
32
  rlgym_learn_algos/stateful_functions/obs_standardizer.py,sha256=qPPc3--J_3mpJJ-QHJjta6dbWWBobL7SYdK5MUP-XMw,606
30
- rlgym_learn_algos/stateful_functions/__init__.py,sha256=QS0KYjuzagNkYiYllXQmjoJn14-G7KZawq1Zvwh8alY,236
33
+ rlgym_learn_algos/util/__init__.py,sha256=VPM6SN4T_625H9t30s9EiLeXiEEWgcyRVHa-LLVNrn4,47
31
34
  rlgym_learn_algos/util/running_stats.py,sha256=0tiGFpKtHWzMa1CxM_ueBzd_ryX4bJBriC8MXcSLg8w,4479
32
35
  rlgym_learn_algos/util/torch_functions.py,sha256=CTTHzTIi7u1O9HyX0cVJOrnYVbAtnlVs0g1fO9s3ano,3458
33
- rlgym_learn_algos/util/__init__.py,sha256=VPM6SN4T_625H9t30s9EiLeXiEEWgcyRVHa-LLVNrn4,47
34
- rlgym_learn_algos/__init__.py,sha256=C7cRdL4lZrpk3ge_4_lGAbGodqWJXM56FfgO0keRPAY,207
35
- rlgym_learn_algos/rlgym_learn_algos.cp310-win32.pyd,sha256=Z3ibd7sjCsmUOw1ENr_aWX81l86hwqt9St0qdAe18IM,339968
36
- rlgym_learn_algos-0.1.5.dist-info/RECORD,,
36
+ rlgym_learn_algos/util/torch_pydantic.py,sha256=pgj3I-3q8iW9qtOCv1fgjNkZgA00G_Rdkb4qJPk5gxo,3530
37
+ rlgym_learn_algos-0.2.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.8.3)
2
+ Generator: maturin (1.8.6)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp310-cp310-win32