rlgym-learn-algos 0.2.0__cp312-cp312-win32.whl → 0.2.2__cp312-cp312-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rlgym_learn_algos/conversion/__init__.py +0 -0
- rlgym_learn_algos/conversion/convert_rlgym_ppo_checkpoint.py +27 -0
- rlgym_learn_algos/logging/wandb_metrics_logger.py +1 -1
- rlgym_learn_algos/ppo/experience_buffer.py +22 -29
- rlgym_learn_algos/ppo/gae_trajectory_processor.py +12 -6
- rlgym_learn_algos/ppo/ppo_agent_controller.py +52 -27
- rlgym_learn_algos/ppo/ppo_learner.py +53 -39
- rlgym_learn_algos/ppo/trajectory_processor.py +1 -0
- rlgym_learn_algos/rlgym_learn_algos.cp312-win32.pyd +0 -0
- rlgym_learn_algos/util/torch_functions.py +0 -1
- rlgym_learn_algos/util/torch_pydantic.py +1 -1
- {rlgym_learn_algos-0.2.0.dist-info → rlgym_learn_algos-0.2.2.dist-info}/METADATA +1 -1
- {rlgym_learn_algos-0.2.0.dist-info → rlgym_learn_algos-0.2.2.dist-info}/RECORD +15 -13
- {rlgym_learn_algos-0.2.0.dist-info → rlgym_learn_algos-0.2.2.dist-info}/WHEEL +1 -1
- {rlgym_learn_algos-0.2.0.dist-info → rlgym_learn_algos-0.2.2.dist-info}/licenses/LICENSE +0 -0
File without changes
|
@@ -0,0 +1,27 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
import time
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
|
7
|
+
def convert_rlgym_ppo_checkpoint(
|
8
|
+
rlgym_ppo_checkpoint_folder: str, out_folder: Optional[str]
|
9
|
+
):
|
10
|
+
|
11
|
+
if out_folder is None:
|
12
|
+
out_folder = f"rlgym_ppo_converted_checkpoint_{time.time_ns()}"
|
13
|
+
print(f"Saving converted checkpoint to folder {out_folder}")
|
14
|
+
|
15
|
+
os.makedirs(out_folder, exist_ok=True)
|
16
|
+
|
17
|
+
PPO_FILES = [
|
18
|
+
("PPO_POLICY_OPTIMIZER.pt", "actor_optimizer.pt"),
|
19
|
+
("PPO_POLICY.pt", "actor.pt"),
|
20
|
+
("PPO_VALUE_NET_OPTIMIZER.pt", "critic_optimizer.pt"),
|
21
|
+
("PPO_VALUE_NET.pt", "critic.pt"),
|
22
|
+
]
|
23
|
+
os.makedirs(f"{out_folder}/ppo_learner", exist_ok=True)
|
24
|
+
for file in PPO_FILES:
|
25
|
+
with open(f"{rlgym_ppo_checkpoint_folder}/{file[0]}", "rb") as fin:
|
26
|
+
with open(f"{out_folder}/ppo_learner/{file[1]}", "wb") as fout:
|
27
|
+
fout.write(fin.read())
|
@@ -164,7 +164,7 @@ class WandbMetricsLogger(
|
|
164
164
|
self.run_id = None
|
165
165
|
except FileNotFoundError:
|
166
166
|
print(
|
167
|
-
f"{self.config.agent_controller_name}: Tried to load from checkpoint, but
|
167
|
+
f"{self.config.agent_controller_name}: Tried to load wandb run from checkpoint using the file at location {str(os.path.join(self.config.checkpoint_load_folder, self.checkpoint_file_name))}, but there is no such file! A new run will be created based on the config values instead."
|
168
168
|
)
|
169
169
|
self.run_id = None
|
170
170
|
|
@@ -25,6 +25,7 @@ EXPERIENCE_BUFFER_FILE = "experience_buffer.pkl"
|
|
25
25
|
class ExperienceBufferConfigModel(BaseModel, extra="forbid"):
|
26
26
|
max_size: int = 100000
|
27
27
|
device: PydanticTorchDevice = "auto"
|
28
|
+
save_experience_buffer_in_checkpoint: bool = True
|
28
29
|
trajectory_processor_config: Dict[str, Any] = Field(default_factory=dict)
|
29
30
|
|
30
31
|
@model_validator(mode="before")
|
@@ -41,21 +42,11 @@ class ExperienceBufferConfigModel(BaseModel, extra="forbid"):
|
|
41
42
|
data["trajectory_processor_config"] = data[
|
42
43
|
"trajectory_processor_config"
|
43
44
|
].model_dump()
|
44
|
-
if "device" not in data
|
45
|
-
data["device"] =
|
45
|
+
if "device" not in data:
|
46
|
+
data["device"] = "auto"
|
47
|
+
data["device"] = get_device(data["device"])
|
46
48
|
return data
|
47
49
|
|
48
|
-
# device: PydanticTorchDevice = "auto"
|
49
|
-
|
50
|
-
# @model_validator(mode="before")
|
51
|
-
# @classmethod
|
52
|
-
# def set_device(cls, data):
|
53
|
-
# if isinstance(data, dict) and (
|
54
|
-
# "device" not in data or data["device"] == "auto"
|
55
|
-
# ):
|
56
|
-
# data["device"] = get_device("auto")
|
57
|
-
# return data
|
58
|
-
|
59
50
|
|
60
51
|
@dataclass
|
61
52
|
class DerivedExperienceBufferConfig:
|
@@ -139,6 +130,7 @@ class ExperienceBuffer(
|
|
139
130
|
self.trajectory_processor.load(
|
140
131
|
DerivedTrajectoryProcessorConfig(
|
141
132
|
trajectory_processor_config=trajectory_processor_config,
|
133
|
+
agent_controller_name=config.agent_controller_name,
|
142
134
|
dtype=config.dtype,
|
143
135
|
device=config.learner_device,
|
144
136
|
)
|
@@ -171,26 +163,27 @@ class ExperienceBuffer(
|
|
171
163
|
self.advantages = state_dict["advantages"]
|
172
164
|
except FileNotFoundError:
|
173
165
|
print(
|
174
|
-
f"{self.config.agent_controller_name}: Tried to load from checkpoint, but
|
166
|
+
f"{self.config.agent_controller_name}: Tried to load experience buffer from checkpoint using the file at location {str(os.path.join(self.config.checkpoint_load_folder, EXPERIENCE_BUFFER_FILE))}, but there is no such file! A blank experience buffer will be used instead."
|
175
167
|
)
|
176
168
|
|
177
169
|
def save_checkpoint(self, folder_path):
|
178
170
|
os.makedirs(folder_path, exist_ok=True)
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
171
|
+
if self.config.experience_buffer_config.save_experience_buffer_in_checkpoint:
|
172
|
+
with open(
|
173
|
+
os.path.join(folder_path, EXPERIENCE_BUFFER_FILE),
|
174
|
+
"wb",
|
175
|
+
) as f:
|
176
|
+
pickle.dump(
|
177
|
+
{
|
178
|
+
"agent_ids": self.agent_ids,
|
179
|
+
"observations": self.observations,
|
180
|
+
"actions": self.actions,
|
181
|
+
"log_probs": self.log_probs,
|
182
|
+
"values": self.values,
|
183
|
+
"advantages": self.advantages,
|
184
|
+
},
|
185
|
+
f,
|
186
|
+
)
|
194
187
|
self.trajectory_processor.save_checkpoint(folder_path)
|
195
188
|
|
196
189
|
# TODO: update docs
|
@@ -115,6 +115,7 @@ class GAETrajectoryProcessor(
|
|
115
115
|
self.max_returns_per_stats_increment = (
|
116
116
|
config.trajectory_processor_config.max_returns_per_stats_increment
|
117
117
|
)
|
118
|
+
self.agent_controller_name = config.agent_controller_name
|
118
119
|
self.dtype = config.dtype
|
119
120
|
self.device = config.device
|
120
121
|
self.checkpoint_load_folder = config.checkpoint_load_folder
|
@@ -127,12 +128,17 @@ class GAETrajectoryProcessor(
|
|
127
128
|
)
|
128
129
|
|
129
130
|
def _load_from_checkpoint(self):
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
131
|
+
try:
|
132
|
+
with open(
|
133
|
+
os.path.join(self.checkpoint_load_folder, TRAJECTORY_PROCESSOR_FILE),
|
134
|
+
"rt",
|
135
|
+
) as f:
|
136
|
+
state = json.load(f)
|
137
|
+
self.return_stats.load_state_dict(state["return_running_stats"])
|
138
|
+
except FileNotFoundError:
|
139
|
+
print(
|
140
|
+
f"{self.agent_controller_name}: Tried to load trajectory processor from checkpoint using the trajectory processor file at location {str(os.path.join(self.checkpoint_load_folder, TRAJECTORY_PROCESSOR_FILE))}, but there is no such file! Running stats will be initialized as if this were a new run instead."
|
141
|
+
)
|
136
142
|
|
137
143
|
def save_checkpoint(self, folder_path):
|
138
144
|
state = {
|
@@ -253,6 +253,7 @@ class PPOAgentController(
|
|
253
253
|
self.learner.load(
|
254
254
|
DerivedPPOLearnerConfig(
|
255
255
|
learner_config=learner_config,
|
256
|
+
agent_controller_name=config.agent_controller_name,
|
256
257
|
obs_space=self.obs_space,
|
257
258
|
action_space=self.action_space,
|
258
259
|
checkpoint_load_folder=learner_checkpoint_load_folder,
|
@@ -304,33 +305,57 @@ class PPOAgentController(
|
|
304
305
|
random.seed(self.config.base_config.random_seed)
|
305
306
|
|
306
307
|
def _load_from_checkpoint(self):
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
)
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
308
|
+
try:
|
309
|
+
with open(
|
310
|
+
os.path.join(
|
311
|
+
self.config.agent_controller_config.checkpoint_load_folder,
|
312
|
+
CURRENT_TRAJECTORIES_FILE,
|
313
|
+
),
|
314
|
+
"rb",
|
315
|
+
) as f:
|
316
|
+
current_trajectories: Dict[
|
317
|
+
int,
|
318
|
+
EnvTrajectories[AgentID, ActionType, ObsType, RewardType],
|
319
|
+
] = pickle.load(f)
|
320
|
+
except FileNotFoundError:
|
321
|
+
print(
|
322
|
+
f"{self.config.agent_controller_name}: Tried to load current trajectories from checkpoint using the file at location {str(os.path.join(self.config.agent_controller_config.checkpoint_load_folder, CURRENT_TRAJECTORIES_FILE))}, but there is no such file! Current trajectories will be initialized as an empty dict instead."
|
323
|
+
)
|
324
|
+
current_trajectories = {}
|
325
|
+
try:
|
326
|
+
with open(
|
327
|
+
os.path.join(
|
328
|
+
self.config.agent_controller_config.checkpoint_load_folder,
|
329
|
+
ITERATION_SHARED_INFOS_FILE,
|
330
|
+
),
|
331
|
+
"rb",
|
332
|
+
) as f:
|
333
|
+
iteration_shared_infos: List[Dict[str, Any]] = pickle.load(f)
|
334
|
+
except FileNotFoundError:
|
335
|
+
print(
|
336
|
+
f"{self.config.agent_controller_name}: Tried to load iteration shared info data from checkpoint using the file at location {str(os.path.join(self.config.agent_controller_config.checkpoint_load_folder, ITERATION_SHARED_INFOS_FILE))}, but there is no such file! Iteration shared info data will be initialized as an empty list instead."
|
337
|
+
)
|
338
|
+
current_trajectories = {}
|
339
|
+
try:
|
340
|
+
with open(
|
341
|
+
os.path.join(
|
342
|
+
self.config.agent_controller_config.checkpoint_load_folder,
|
343
|
+
PPO_AGENT_FILE,
|
344
|
+
),
|
345
|
+
"rt",
|
346
|
+
) as f:
|
347
|
+
state = json.load(f)
|
348
|
+
except FileNotFoundError:
|
349
|
+
print(
|
350
|
+
f"{self.config.agent_controller_name}: Tried to load PPO agent miscellaneous state data from checkpoint using the file at location {str(os.path.join(self.config.agent_controller_config.checkpoint_load_folder, PPO_AGENT_FILE))}, but there is no such file! This state data will be initialized as if this were a new run instead."
|
351
|
+
)
|
352
|
+
state = {
|
353
|
+
"cur_iteration": 0,
|
354
|
+
"iteration_timesteps": 0,
|
355
|
+
"cumulative_timesteps": 0,
|
356
|
+
"iteration_start_time": time.perf_counter(),
|
357
|
+
"timestep_collection_start_time": time.perf_counter(),
|
358
|
+
}
|
334
359
|
|
335
360
|
self.current_trajectories = current_trajectories
|
336
361
|
self.iteration_shared_infos = iteration_shared_infos
|
@@ -16,12 +16,13 @@ from rlgym.api import (
|
|
16
16
|
ObsType,
|
17
17
|
RewardType,
|
18
18
|
)
|
19
|
+
from torch import nn as nn
|
20
|
+
|
19
21
|
from rlgym_learn_algos.util.torch_functions import get_device
|
20
22
|
from rlgym_learn_algos.util.torch_pydantic import (
|
21
23
|
PydanticTorchDevice,
|
22
24
|
PydanticTorchDtype,
|
23
25
|
)
|
24
|
-
from torch import nn as nn
|
25
26
|
|
26
27
|
from .actor import Actor
|
27
28
|
from .critic import Critic
|
@@ -38,43 +39,30 @@ class PPOLearnerConfigModel(BaseModel, extra="forbid"):
|
|
38
39
|
clip_range: float = 0.2
|
39
40
|
actor_lr: float = 3e-4
|
40
41
|
critic_lr: float = 3e-4
|
42
|
+
advantage_normalization: bool = True
|
41
43
|
device: PydanticTorchDevice = "auto"
|
44
|
+
cudnn_benchmark_mode: bool = True
|
42
45
|
|
43
46
|
@model_validator(mode="before")
|
44
47
|
@classmethod
|
45
48
|
def set_device(cls, data):
|
46
|
-
if isinstance(data, dict)
|
47
|
-
"device" not in data
|
48
|
-
|
49
|
-
data["device"] = get_device("
|
49
|
+
if isinstance(data, dict):
|
50
|
+
if "device" not in data:
|
51
|
+
data["device"] = "auto"
|
52
|
+
data["device"] = get_device(data["device"])
|
50
53
|
return data
|
51
54
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
# agent_controllers_config = {}
|
58
|
-
# for k, v in data.agent_controllers_config.items():
|
59
|
-
# if isinstance(v, BaseModel):
|
60
|
-
# agent_controllers_config[k] = v.model_dump()
|
61
|
-
# else:
|
62
|
-
# agent_controllers_config[k] = v
|
63
|
-
# data.agent_controllers_config = agent_controllers_config
|
64
|
-
# elif isinstance(data, dict) and "agent_controllers_config" in data:
|
65
|
-
# agent_controllers_config = {}
|
66
|
-
# for k, v in data["agent_controllers_config"].items():
|
67
|
-
# if isinstance(v, BaseModel):
|
68
|
-
# agent_controllers_config[k] = v.model_dump()
|
69
|
-
# else:
|
70
|
-
# agent_controllers_config[k] = v
|
71
|
-
# data["agent_controllers_config"] = agent_controllers_config
|
72
|
-
# return data
|
55
|
+
@model_validator(mode="after")
|
56
|
+
def validate_cudnn_benchmark(self):
|
57
|
+
if self.device.type != "cuda":
|
58
|
+
self.cudnn_benchmark_mode = False
|
59
|
+
return self
|
73
60
|
|
74
61
|
|
75
62
|
@dataclass
|
76
63
|
class DerivedPPOLearnerConfig:
|
77
64
|
learner_config: PPOLearnerConfigModel
|
65
|
+
agent_controller_name: str
|
78
66
|
obs_space: ObsSpaceType
|
79
67
|
action_space: ActionSpaceType
|
80
68
|
checkpoint_load_folder: Optional[str] = None
|
@@ -127,6 +115,12 @@ class PPOLearner(
|
|
127
115
|
def load(self, config: DerivedPPOLearnerConfig):
|
128
116
|
self.config = config
|
129
117
|
|
118
|
+
if (
|
119
|
+
config.learner_config.cudnn_benchmark_mode
|
120
|
+
and config.learner_config.device.type == "cuda"
|
121
|
+
):
|
122
|
+
torch.backends.cudnn.benchmark = True
|
123
|
+
|
130
124
|
self.actor = self.actor_factory(
|
131
125
|
config.obs_space, config.action_space, config.learner_config.device
|
132
126
|
)
|
@@ -155,16 +149,26 @@ class PPOLearner(
|
|
155
149
|
total_parameters = actor_params_count + critic_params_count
|
156
150
|
|
157
151
|
# Display in a structured manner
|
158
|
-
print("Trainable Parameters:")
|
159
|
-
print(f"{'Component':<10} {'Count':<10}")
|
152
|
+
print(f"{self.config.agent_controller_name}: Trainable Parameters:")
|
153
|
+
print(f"{self.config.agent_controller_name}: {'Component':<10} {'Count':<10}")
|
160
154
|
print("-" * 20)
|
161
|
-
print(
|
162
|
-
|
155
|
+
print(
|
156
|
+
f"{self.config.agent_controller_name}: {'Policy':<10} {actor_params_count:<10}"
|
157
|
+
)
|
158
|
+
print(
|
159
|
+
f"{self.config.agent_controller_name}: {'Critic':<10} {critic_params_count:<10}"
|
160
|
+
)
|
163
161
|
print("-" * 20)
|
164
|
-
print(
|
162
|
+
print(
|
163
|
+
f"{self.config.agent_controller_name}: {'Total':<10} {total_parameters:<10}"
|
164
|
+
)
|
165
165
|
|
166
|
-
print(
|
167
|
-
|
166
|
+
print(
|
167
|
+
f"{self.config.agent_controller_name}: Current Policy Learning Rate: {self.config.learner_config.actor_lr}"
|
168
|
+
)
|
169
|
+
print(
|
170
|
+
f"{self.config.agent_controller_name}: Current Critic Learning Rate: {self.config.learner_config.critic_lr}"
|
171
|
+
)
|
168
172
|
self.cumulative_model_updates = 0
|
169
173
|
|
170
174
|
if self.config.checkpoint_load_folder is not None:
|
@@ -180,7 +184,7 @@ class PPOLearner(
|
|
180
184
|
|
181
185
|
assert os.path.exists(
|
182
186
|
self.config.checkpoint_load_folder
|
183
|
-
), f"PPO Learner cannot find folder: {self.config.checkpoint_load_folder}"
|
187
|
+
), f"{self.config.agent_controller_name}: PPO Learner cannot find folder: {self.config.checkpoint_load_folder}"
|
184
188
|
|
185
189
|
self.actor.load_state_dict(
|
186
190
|
torch.load(
|
@@ -206,11 +210,17 @@ class PPOLearner(
|
|
206
210
|
map_location=self.config.learner_config.device,
|
207
211
|
)
|
208
212
|
)
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
213
|
+
try:
|
214
|
+
with open(
|
215
|
+
os.path.join(self.config.checkpoint_load_folder, MISC_STATE), "rt"
|
216
|
+
) as f:
|
217
|
+
misc_state = json.load(f)
|
218
|
+
self.cumulative_model_updates = misc_state["cumulative_model_updates"]
|
219
|
+
except FileNotFoundError:
|
220
|
+
print(
|
221
|
+
f"{self.config.agent_controller_name}: Tried to load the PPO learner's misc state from the file at location {str(os.path.join(self.config.checkpoint_load_folder, MISC_STATE))}, but there is no such file! Miscellaneous stats will be initialized as if this were a new run instead."
|
222
|
+
)
|
223
|
+
self.cumulative_model_updates = 0
|
214
224
|
|
215
225
|
def save_checkpoint(self, folder_path):
|
216
226
|
os.makedirs(folder_path, exist_ok=True)
|
@@ -296,6 +306,10 @@ class PPOLearner(
|
|
296
306
|
advantages = batch_advantages[start:stop].to(
|
297
307
|
self.config.learner_config.device
|
298
308
|
)
|
309
|
+
if self.config.learner_config.advantage_normalization:
|
310
|
+
advantages = (advantages - torch.mean(advantages)) / (
|
311
|
+
torch.std(advantages) + 1e-8
|
312
|
+
)
|
299
313
|
old_probs = batch_old_probs[start:stop].to(
|
300
314
|
self.config.learner_config.device
|
301
315
|
)
|
@@ -16,6 +16,7 @@ TRAJECTORY_PROCESSOR_FILE = "trajectory_processor.json"
|
|
16
16
|
@dataclass
|
17
17
|
class DerivedTrajectoryProcessorConfig(Generic[TrajectoryProcessorConfig]):
|
18
18
|
trajectory_processor_config: TrajectoryProcessorConfig
|
19
|
+
agent_controller_name: str
|
19
20
|
dtype: dtype
|
20
21
|
device: device
|
21
22
|
checkpoint_load_folder: Optional[str] = None
|
Binary file
|
@@ -1,11 +1,13 @@
|
|
1
|
-
rlgym_learn_algos-0.2.
|
2
|
-
rlgym_learn_algos-0.2.
|
3
|
-
rlgym_learn_algos-0.2.
|
1
|
+
rlgym_learn_algos-0.2.2.dist-info/METADATA,sha256=4wwr9xqqVWvZ7HYM4cumHiRdz79gkixfpe11b4MyvSU,2431
|
2
|
+
rlgym_learn_algos-0.2.2.dist-info/WHEEL,sha256=_hRredGV19BUVIQG50bodYTQ5yHFvjyY42Je1tLhKbY,92
|
3
|
+
rlgym_learn_algos-0.2.2.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
4
4
|
rlgym_learn_algos/__init__.py,sha256=C7cRdL4lZrpk3ge_4_lGAbGodqWJXM56FfgO0keRPAY,207
|
5
|
+
rlgym_learn_algos/conversion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
rlgym_learn_algos/conversion/convert_rlgym_ppo_checkpoint.py,sha256=A9nvzjp3DQNRNL5TAt-u3xE80JDIpYEDqAGNReHvFG0,908
|
5
7
|
rlgym_learn_algos/logging/__init__.py,sha256=ouItskWI4ItuoFdL--rt9YXCt7MasA473lYPhmJnrFA,423
|
6
8
|
rlgym_learn_algos/logging/dict_metrics_logger.py,sha256=qmqr0HSiHpm5rjyxfAdmXOeBSbgP_t36-e-enpOccnE,1991
|
7
9
|
rlgym_learn_algos/logging/metrics_logger.py,sha256=0l69GSSrxRcPm0xAjvF7yEIis7jGNu70unXu3hnK0XE,4122
|
8
|
-
rlgym_learn_algos/logging/wandb_metrics_logger.py,sha256=
|
10
|
+
rlgym_learn_algos/logging/wandb_metrics_logger.py,sha256=OXyOJzGP4zz0mgy3-FAvR6LW7aZet3Ii8CsI5csw4c4,7051
|
9
11
|
rlgym_learn_algos/ppo/__init__.py,sha256=o6B8wCRfeyipSNEGJFyB3SHYmxUytaQelX2zsted5cg,1184
|
10
12
|
rlgym_learn_algos/ppo/actor.py,sha256=LZevg0kqRrb4PwF05ePK9b1JIBX04YkWjsPs7swZ9JY,1767
|
11
13
|
rlgym_learn_algos/ppo/basic_critic.py,sha256=oyyo8x9K6mi2BsbA6_tRy2Av8Pimb35WspJkPpe8XdQ,1022
|
@@ -13,18 +15,18 @@ rlgym_learn_algos/ppo/continuous_actor.py,sha256=1vdBUw2mQNFNu6A6ZrAztBjd4DmwjGk
|
|
13
15
|
rlgym_learn_algos/ppo/critic.py,sha256=RB89WtiN52BEq5QCpGAPrASUnasac-Bpg7B0lM3UXHw,689
|
14
16
|
rlgym_learn_algos/ppo/discrete_actor.py,sha256=Nuc3EndIQud3NGrkBIQgy-Z-okhXVrj6p6okSGD1KNY,2620
|
15
17
|
rlgym_learn_algos/ppo/env_trajectories.py,sha256=gzQBRkzwZhlZeSvWL50cc8AOgBfsg5zUys0aTJj6aZU,3775
|
16
|
-
rlgym_learn_algos/ppo/experience_buffer.py,sha256=
|
18
|
+
rlgym_learn_algos/ppo/experience_buffer.py,sha256=xDm8NIMdErpv3GyWUBcTvzkLBQa8tW1TXb7OrKRDIu4,11059
|
17
19
|
rlgym_learn_algos/ppo/experience_buffer_numpy.py,sha256=Apk4x-pfRnitKJPW6LBZyOPIhgeJs_5EG7BbTCqMwjk,4761
|
18
|
-
rlgym_learn_algos/ppo/gae_trajectory_processor.py,sha256=
|
20
|
+
rlgym_learn_algos/ppo/gae_trajectory_processor.py,sha256=JK958vasIIiuf3ALcFNlvBgGNhFshK8MhQJjwvxhrAM,5453
|
19
21
|
rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py,sha256=RpyDR6GQ1JXvwtoKkx5V3z3WvU9ElJdzfNtpPiZDaTc,6831
|
20
22
|
rlgym_learn_algos/ppo/multi_discrete_actor.py,sha256=zSYeBBirjguSv_wO-peo06hioHiVhZQjnd-NYwJxmag,3127
|
21
|
-
rlgym_learn_algos/ppo/ppo_agent_controller.py,sha256=
|
22
|
-
rlgym_learn_algos/ppo/ppo_learner.py,sha256=
|
23
|
+
rlgym_learn_algos/ppo/ppo_agent_controller.py,sha256=h0UR-o2k-_LyeFTzvII3HQHHWyeMJewqLlca8ThtyfA,25105
|
24
|
+
rlgym_learn_algos/ppo/ppo_learner.py,sha256=Cbbuz0AMwPCmkQ1YPDdZLkbgZOdyrOLEx89Camn-nGE,15942
|
23
25
|
rlgym_learn_algos/ppo/ppo_metrics_logger.py,sha256=niW8xgQLEBCGgTaVyiE_JqsU6RTjV6h-JzM-7c3JT38,2868
|
24
26
|
rlgym_learn_algos/ppo/trajectory.py,sha256=IIH_IG8B_HkyxRPf-YsCyF1jQqNjDx752hgzAehG25I,719
|
25
|
-
rlgym_learn_algos/ppo/trajectory_processor.py,sha256=
|
27
|
+
rlgym_learn_algos/ppo/trajectory_processor.py,sha256=5eY_mNGjqIkhqnbKeaqDvqIWPdg6wD6Ai3fXH2WoXbw,2091
|
26
28
|
rlgym_learn_algos/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
|
-
rlgym_learn_algos/rlgym_learn_algos.cp312-win32.pyd,sha256=
|
29
|
+
rlgym_learn_algos/rlgym_learn_algos.cp312-win32.pyd,sha256=auas_7bpNMlCT5PTCIAYgepldvq3LvXcpE4LniUUr64,342016
|
28
30
|
rlgym_learn_algos/rlgym_learn_algos.pyi,sha256=NwY-sDZWM06TUiKPzxpfH1Td6G6E8TdxtRPgBSh-PPE,1203
|
29
31
|
rlgym_learn_algos/stateful_functions/__init__.py,sha256=QS0KYjuzagNkYiYllXQmjoJn14-G7KZawq1Zvwh8alY,236
|
30
32
|
rlgym_learn_algos/stateful_functions/batch_reward_type_numpy_converter.py,sha256=1yte5qYyl9LWdClHZ_YsF7R9dJqQeYfINMdgNF_59Gs,767
|
@@ -32,6 +34,6 @@ rlgym_learn_algos/stateful_functions/numpy_obs_standardizer.py,sha256=OgtwCaxBGT
|
|
32
34
|
rlgym_learn_algos/stateful_functions/obs_standardizer.py,sha256=qPPc3--J_3mpJJ-QHJjta6dbWWBobL7SYdK5MUP-XMw,606
|
33
35
|
rlgym_learn_algos/util/__init__.py,sha256=VPM6SN4T_625H9t30s9EiLeXiEEWgcyRVHa-LLVNrn4,47
|
34
36
|
rlgym_learn_algos/util/running_stats.py,sha256=0tiGFpKtHWzMa1CxM_ueBzd_ryX4bJBriC8MXcSLg8w,4479
|
35
|
-
rlgym_learn_algos/util/torch_functions.py,sha256=
|
36
|
-
rlgym_learn_algos/util/torch_pydantic.py,sha256=
|
37
|
-
rlgym_learn_algos-0.2.
|
37
|
+
rlgym_learn_algos/util/torch_functions.py,sha256=_uAXhq1YYPneWI3_XXRYsSA3Hn1a8wGjUnI3m9UojdU,3411
|
38
|
+
rlgym_learn_algos/util/torch_pydantic.py,sha256=5AbXQcfQtVgLRBSgCj0Hvi_H42WHLu4Oty4l_i22nAo,3531
|
39
|
+
rlgym_learn_algos-0.2.2.dist-info/RECORD,,
|
File without changes
|