rlgym-learn-algos 0.1.5__cp310-cp310-win32.whl → 0.2.0__cp310-cp310-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rlgym_learn_algos/logging/metrics_logger.py +1 -1
- rlgym_learn_algos/logging/wandb_metrics_logger.py +27 -22
- rlgym_learn_algos/ppo/experience_buffer.py +71 -42
- rlgym_learn_algos/ppo/experience_buffer_numpy.py +14 -12
- rlgym_learn_algos/ppo/gae_trajectory_processor.py +2 -11
- rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py +0 -8
- rlgym_learn_algos/ppo/ppo_agent_controller.py +16 -27
- rlgym_learn_algos/ppo/ppo_learner.py +94 -34
- rlgym_learn_algos/ppo/trajectory_processor.py +3 -3
- rlgym_learn_algos/rlgym_learn_algos.cp310-win32.pyd +0 -0
- rlgym_learn_algos/util/torch_pydantic.py +118 -0
- {rlgym_learn_algos-0.1.5.dist-info → rlgym_learn_algos-0.2.0.dist-info}/METADATA +1 -1
- {rlgym_learn_algos-0.1.5.dist-info → rlgym_learn_algos-0.2.0.dist-info}/RECORD +20 -19
- {rlgym_learn_algos-0.1.5.dist-info → rlgym_learn_algos-0.2.0.dist-info}/WHEEL +1 -1
- {rlgym_learn_algos-0.1.5.dist-info → rlgym_learn_algos-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -12,9 +12,9 @@ MetricsLoggerAdditionalDerivedConfig = TypeVar("MetricsLoggerAdditionalDerivedCo
|
|
12
12
|
class DerivedMetricsLoggerConfig(
|
13
13
|
Generic[MetricsLoggerConfig, MetricsLoggerAdditionalDerivedConfig]
|
14
14
|
):
|
15
|
+
metrics_logger_config: MetricsLoggerConfig = None
|
15
16
|
checkpoint_load_folder: Optional[str] = None
|
16
17
|
agent_controller_name: str = ""
|
17
|
-
metrics_logger_config: MetricsLoggerConfig = None
|
18
18
|
additional_derived_config: MetricsLoggerAdditionalDerivedConfig = None
|
19
19
|
|
20
20
|
|
@@ -29,7 +29,7 @@ def convert_nested_dict(d):
|
|
29
29
|
return new
|
30
30
|
|
31
31
|
|
32
|
-
class WandbMetricsLoggerConfigModel(BaseModel):
|
32
|
+
class WandbMetricsLoggerConfigModel(BaseModel, extra="forbid"):
|
33
33
|
enable: bool = True
|
34
34
|
project: str = "rlgym-learn"
|
35
35
|
group: str = "unnamed-runs"
|
@@ -37,6 +37,7 @@ class WandbMetricsLoggerConfigModel(BaseModel):
|
|
37
37
|
id: Optional[str] = None
|
38
38
|
new_run_with_timestamp_suffix: bool = False
|
39
39
|
additional_wandb_run_config: Dict[str, Any] = Field(default_factory=dict)
|
40
|
+
settings_kwargs: Dict[str, Any] = Field(default_factory=dict)
|
40
41
|
|
41
42
|
|
42
43
|
@dataclass
|
@@ -76,6 +77,7 @@ class WandbMetricsLogger(
|
|
76
77
|
):
|
77
78
|
self.inner_metrics_logger = inner_metrics_logger
|
78
79
|
self.checkpoint_file_name = checkpoint_file_name
|
80
|
+
self.run_id = None
|
79
81
|
|
80
82
|
def collect_env_metrics(self, data: List[Dict[str, Any]]):
|
81
83
|
self.inner_metrics_logger.collect_env_metrics(data)
|
@@ -107,17 +109,11 @@ class WandbMetricsLogger(
|
|
107
109
|
self.run_id = None
|
108
110
|
return
|
109
111
|
|
110
|
-
if
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
if self.run_id is not None:
|
115
|
-
print(
|
116
|
-
f"{self.config.agent_controller_name}: Wandb run id from checkpoint ({self.run_id}) is being overridden by wandb run id from config: {self.config.metrics_logger_config.id}"
|
117
|
-
)
|
112
|
+
if self.run_id is not None and self.config.metrics_logger_config.id is not None:
|
113
|
+
print(
|
114
|
+
f"{self.config.agent_controller_name}: Wandb run id from checkpoint ({self.run_id}) is being overridden by wandb run id from config: {self.config.metrics_logger_config.id}"
|
115
|
+
)
|
118
116
|
self.run_id = self.config.metrics_logger_config.id
|
119
|
-
else:
|
120
|
-
self.run_id = None
|
121
117
|
|
122
118
|
wandb_config = {
|
123
119
|
**self.config.additional_derived_config.derived_wandb_run_config,
|
@@ -145,22 +141,31 @@ class WandbMetricsLogger(
|
|
145
141
|
id=self.run_id,
|
146
142
|
resume="allow",
|
147
143
|
reinit=True,
|
144
|
+
settings=wandb.Settings(
|
145
|
+
**self.config.metrics_logger_config.settings_kwargs
|
146
|
+
),
|
148
147
|
)
|
149
148
|
self.run_id = self.wandb_run.id
|
150
149
|
print(f"{self.config.agent_controller_name}: Created wandb run! {self.run_id}")
|
151
150
|
|
152
151
|
def _load_from_checkpoint(self):
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
152
|
+
try:
|
153
|
+
with open(
|
154
|
+
os.path.join(
|
155
|
+
self.config.checkpoint_load_folder,
|
156
|
+
self.checkpoint_file_name,
|
157
|
+
),
|
158
|
+
"rt",
|
159
|
+
) as f:
|
160
|
+
state = json.load(f)
|
161
|
+
if "run_id" in state:
|
162
|
+
self.run_id = state["run_id"]
|
163
|
+
else:
|
164
|
+
self.run_id = None
|
165
|
+
except FileNotFoundError:
|
166
|
+
print(
|
167
|
+
f"{self.config.agent_controller_name}: Tried to load from checkpoint, but checkpoint didn't contain a wandb run! A new run will be created based on the config values."
|
168
|
+
)
|
164
169
|
self.run_id = None
|
165
170
|
|
166
171
|
def save_checkpoint(self, folder_path):
|
@@ -8,6 +8,9 @@ import torch
|
|
8
8
|
from pydantic import BaseModel, Field, model_validator
|
9
9
|
from rlgym.api import ActionType, AgentID, ObsType, RewardType
|
10
10
|
|
11
|
+
from rlgym_learn_algos.util.torch_functions import get_device
|
12
|
+
from rlgym_learn_algos.util.torch_pydantic import PydanticTorchDevice
|
13
|
+
|
11
14
|
from .trajectory import Trajectory
|
12
15
|
from .trajectory_processor import (
|
13
16
|
DerivedTrajectoryProcessorConfig,
|
@@ -19,8 +22,9 @@ from .trajectory_processor import (
|
|
19
22
|
EXPERIENCE_BUFFER_FILE = "experience_buffer.pkl"
|
20
23
|
|
21
24
|
|
22
|
-
class ExperienceBufferConfigModel(BaseModel):
|
25
|
+
class ExperienceBufferConfigModel(BaseModel, extra="forbid"):
|
23
26
|
max_size: int = 100000
|
27
|
+
device: PydanticTorchDevice = "auto"
|
24
28
|
trajectory_processor_config: Dict[str, Any] = Field(default_factory=dict)
|
25
29
|
|
26
30
|
@model_validator(mode="before")
|
@@ -31,21 +35,35 @@ class ExperienceBufferConfigModel(BaseModel):
|
|
31
35
|
data.trajectory_processor_config = (
|
32
36
|
data.trajectory_processor_config.model_dump()
|
33
37
|
)
|
34
|
-
elif isinstance(data, dict)
|
35
|
-
if
|
36
|
-
data["trajectory_processor_config"]
|
37
|
-
"trajectory_processor_config"
|
38
|
-
|
38
|
+
elif isinstance(data, dict):
|
39
|
+
if "trajectory_processor_config" in data:
|
40
|
+
if isinstance(data["trajectory_processor_config"], BaseModel):
|
41
|
+
data["trajectory_processor_config"] = data[
|
42
|
+
"trajectory_processor_config"
|
43
|
+
].model_dump()
|
44
|
+
if "device" not in data or data["device"] == "auto":
|
45
|
+
data["device"] = get_device("auto")
|
39
46
|
return data
|
40
47
|
|
48
|
+
# device: PydanticTorchDevice = "auto"
|
49
|
+
|
50
|
+
# @model_validator(mode="before")
|
51
|
+
# @classmethod
|
52
|
+
# def set_device(cls, data):
|
53
|
+
# if isinstance(data, dict) and (
|
54
|
+
# "device" not in data or data["device"] == "auto"
|
55
|
+
# ):
|
56
|
+
# data["device"] = get_device("auto")
|
57
|
+
# return data
|
58
|
+
|
41
59
|
|
42
60
|
@dataclass
|
43
61
|
class DerivedExperienceBufferConfig:
|
44
|
-
|
62
|
+
experience_buffer_config: ExperienceBufferConfigModel
|
63
|
+
agent_controller_name: str
|
45
64
|
seed: int
|
46
|
-
dtype:
|
47
|
-
|
48
|
-
trajectory_processor_config: Dict[str, Any]
|
65
|
+
dtype: torch.dtype
|
66
|
+
learner_device: torch.device
|
49
67
|
checkpoint_load_folder: Optional[str] = None
|
50
68
|
|
51
69
|
|
@@ -111,42 +129,50 @@ class ExperienceBuffer(
|
|
111
129
|
self.agent_ids: List[AgentID] = []
|
112
130
|
self.observations: List[ObsType] = []
|
113
131
|
self.actions: List[ActionType] = []
|
114
|
-
self.log_probs = torch.FloatTensor()
|
115
|
-
self.values = torch.FloatTensor()
|
116
|
-
self.advantages = torch.FloatTensor()
|
117
132
|
|
118
133
|
def load(self, config: DerivedExperienceBufferConfig):
|
119
134
|
self.config = config
|
120
135
|
self.rng = np.random.RandomState(config.seed)
|
121
136
|
trajectory_processor_config = self.trajectory_processor.validate_config(
|
122
|
-
config.trajectory_processor_config
|
137
|
+
config.experience_buffer_config.trajectory_processor_config
|
123
138
|
)
|
124
139
|
self.trajectory_processor.load(
|
125
140
|
DerivedTrajectoryProcessorConfig(
|
126
141
|
trajectory_processor_config=trajectory_processor_config,
|
127
142
|
dtype=config.dtype,
|
128
|
-
device=config.
|
143
|
+
device=config.learner_device,
|
129
144
|
)
|
130
145
|
)
|
146
|
+
self.log_probs = torch.tensor([], dtype=config.dtype)
|
147
|
+
self.values = torch.tensor([], dtype=config.dtype)
|
148
|
+
self.advantages = torch.tensor([], dtype=config.dtype)
|
131
149
|
if self.config.checkpoint_load_folder is not None:
|
132
150
|
self._load_from_checkpoint()
|
133
|
-
self.log_probs = self.log_probs.to(config.
|
134
|
-
self.values = self.values.to(config.
|
135
|
-
self.advantages = self.advantages.to(config.
|
151
|
+
self.log_probs = self.log_probs.to(config.learner_device)
|
152
|
+
self.values = self.values.to(config.learner_device)
|
153
|
+
self.advantages = self.advantages.to(config.learner_device)
|
136
154
|
|
137
155
|
def _load_from_checkpoint(self):
|
138
156
|
# lazy way
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
157
|
+
# TODO: don't use pickle for torch things, use torch.load because of map_location. Or maybe define a custom unpickler for this? Or maybe one already exists?
|
158
|
+
try:
|
159
|
+
with open(
|
160
|
+
os.path.join(
|
161
|
+
self.config.checkpoint_load_folder, EXPERIENCE_BUFFER_FILE
|
162
|
+
),
|
163
|
+
"rb",
|
164
|
+
) as f:
|
165
|
+
state_dict = pickle.load(f)
|
166
|
+
self.agent_ids = state_dict["agent_ids"]
|
167
|
+
self.observations = state_dict["observations"]
|
168
|
+
self.actions = state_dict["actions"]
|
169
|
+
self.log_probs = state_dict["log_probs"]
|
170
|
+
self.values = state_dict["values"]
|
171
|
+
self.advantages = state_dict["advantages"]
|
172
|
+
except FileNotFoundError:
|
173
|
+
print(
|
174
|
+
f"{self.config.agent_controller_name}: Tried to load from checkpoint, but checkpoint didn't contain a saved experience buffer! A blank experience buffer will be used instead."
|
175
|
+
)
|
150
176
|
|
151
177
|
def save_checkpoint(self, folder_path):
|
152
178
|
os.makedirs(folder_path, exist_ok=True)
|
@@ -195,29 +221,36 @@ class ExperienceBuffer(
|
|
195
221
|
exp_buffer_data
|
196
222
|
)
|
197
223
|
|
198
|
-
self.agent_ids = _cat_list(
|
224
|
+
self.agent_ids = _cat_list(
|
225
|
+
self.agent_ids, agent_ids, self.config.experience_buffer_config.max_size
|
226
|
+
)
|
199
227
|
self.observations = _cat_list(
|
200
|
-
self.observations,
|
228
|
+
self.observations,
|
229
|
+
observations,
|
230
|
+
self.config.experience_buffer_config.max_size,
|
231
|
+
)
|
232
|
+
self.actions = _cat_list(
|
233
|
+
self.actions, actions, self.config.experience_buffer_config.max_size
|
201
234
|
)
|
202
|
-
self.actions = _cat_list(self.actions, actions, self.config.max_size)
|
203
235
|
self.log_probs = _cat(
|
204
236
|
self.log_probs,
|
205
237
|
log_probs,
|
206
|
-
self.config.max_size,
|
238
|
+
self.config.experience_buffer_config.max_size,
|
207
239
|
)
|
208
240
|
self.values = _cat(
|
209
241
|
self.values,
|
210
242
|
values,
|
211
|
-
self.config.max_size,
|
243
|
+
self.config.experience_buffer_config.max_size,
|
212
244
|
)
|
213
245
|
self.advantages = _cat(
|
214
246
|
self.advantages,
|
215
247
|
advantages,
|
216
|
-
self.config.max_size,
|
248
|
+
self.config.experience_buffer_config.max_size,
|
217
249
|
)
|
218
250
|
|
219
251
|
return trajectory_processor_data
|
220
252
|
|
253
|
+
# TODO: tensordict?
|
221
254
|
def _get_samples(self, indices) -> Tuple[
|
222
255
|
Iterable[AgentID],
|
223
256
|
Iterable[ObsType],
|
@@ -242,18 +275,14 @@ class ExperienceBuffer(
|
|
242
275
|
:param batch_size: size of each batch yielded by the generator.
|
243
276
|
:return:
|
244
277
|
"""
|
245
|
-
if self.config.
|
278
|
+
if self.config.learner_device.type != "cpu":
|
246
279
|
torch.cuda.current_stream().synchronize()
|
247
280
|
total_samples = self.values.shape[0]
|
248
281
|
indices = self.rng.permutation(total_samples)
|
249
282
|
start_idx = 0
|
250
|
-
batches = []
|
251
283
|
while start_idx + batch_size <= total_samples:
|
252
|
-
|
253
|
-
self._get_samples(indices[start_idx : start_idx + batch_size])
|
254
|
-
)
|
284
|
+
yield self._get_samples(indices[start_idx : start_idx + batch_size])
|
255
285
|
start_idx += batch_size
|
256
|
-
return batches
|
257
286
|
|
258
287
|
def clear(self):
|
259
288
|
"""
|
@@ -265,4 +294,4 @@ class ExperienceBuffer(
|
|
265
294
|
del self.log_probs
|
266
295
|
del self.values
|
267
296
|
del self.advantages
|
268
|
-
self.__init__(self.
|
297
|
+
self.__init__(self.trajectory_processor)
|
@@ -76,25 +76,31 @@ class NumpyExperienceBuffer(
|
|
76
76
|
exp_buffer_data
|
77
77
|
)
|
78
78
|
|
79
|
-
self.agent_ids = _cat_list(
|
79
|
+
self.agent_ids = _cat_list(
|
80
|
+
self.agent_ids, agent_ids, self.config.experience_buffer_config.max_size
|
81
|
+
)
|
80
82
|
self.observations = _cat_numpy(
|
81
|
-
self.observations,
|
83
|
+
self.observations,
|
84
|
+
observations,
|
85
|
+
self.config.experience_buffer_config.max_size,
|
86
|
+
)
|
87
|
+
self.actions = _cat_numpy(
|
88
|
+
self.actions, actions, self.config.experience_buffer_config.max_size
|
82
89
|
)
|
83
|
-
self.actions = _cat_numpy(self.actions, actions, self.config.max_size)
|
84
90
|
self.log_probs = _cat(
|
85
91
|
self.log_probs,
|
86
92
|
log_probs,
|
87
|
-
self.config.max_size,
|
93
|
+
self.config.experience_buffer_config.max_size,
|
88
94
|
)
|
89
95
|
self.values = _cat(
|
90
96
|
self.values,
|
91
97
|
values,
|
92
|
-
self.config.max_size,
|
98
|
+
self.config.experience_buffer_config.max_size,
|
93
99
|
)
|
94
100
|
self.advantages = _cat(
|
95
101
|
self.advantages,
|
96
102
|
advantages,
|
97
|
-
self.config.max_size,
|
103
|
+
self.config.experience_buffer_config.max_size,
|
98
104
|
)
|
99
105
|
|
100
106
|
return trajectory_processor_data
|
@@ -116,18 +122,14 @@ class NumpyExperienceBuffer(
|
|
116
122
|
:param batch_size: size of each batch yielded by the generator.
|
117
123
|
:return:
|
118
124
|
"""
|
119
|
-
if self.config.device != "cpu":
|
125
|
+
if self.config.experience_buffer_config.device.type != "cpu":
|
120
126
|
torch.cuda.current_stream().synchronize()
|
121
127
|
total_samples = self.values.shape[0]
|
122
128
|
indices = self.rng.permutation(total_samples)
|
123
129
|
start_idx = 0
|
124
|
-
batches = []
|
125
130
|
while start_idx + batch_size <= total_samples:
|
126
|
-
|
127
|
-
self._get_samples(indices[start_idx : start_idx + batch_size])
|
128
|
-
)
|
131
|
+
yield self._get_samples(indices[start_idx : start_idx + batch_size])
|
129
132
|
start_idx += batch_size
|
130
|
-
return batches
|
131
133
|
|
132
134
|
def clear(self):
|
133
135
|
"""
|
@@ -20,7 +20,7 @@ from ..ppo import RustDerivedGAETrajectoryProcessorConfig, RustGAETrajectoryProc
|
|
20
20
|
from .trajectory_processor import TRAJECTORY_PROCESSOR_FILE, TrajectoryProcessor
|
21
21
|
|
22
22
|
|
23
|
-
class GAETrajectoryProcessorConfigModel(BaseModel):
|
23
|
+
class GAETrajectoryProcessorConfigModel(BaseModel, extra="forbid"):
|
24
24
|
gamma: float = 0.99
|
25
25
|
lmbda: float = 0.95
|
26
26
|
standardize_returns: bool = True
|
@@ -122,7 +122,7 @@ class GAETrajectoryProcessor(
|
|
122
122
|
self._load_from_checkpoint()
|
123
123
|
self.rust_gae_trajectory_processor.load(
|
124
124
|
RustDerivedGAETrajectoryProcessorConfig(
|
125
|
-
self.gamma, self.lmbda, np.dtype(self.dtype)
|
125
|
+
self.gamma, self.lmbda, np.dtype(str(self.dtype)[6:])
|
126
126
|
)
|
127
127
|
)
|
128
128
|
|
@@ -132,19 +132,10 @@ class GAETrajectoryProcessor(
|
|
132
132
|
"rt",
|
133
133
|
) as f:
|
134
134
|
state = json.load(f)
|
135
|
-
# TODO: why are these 4 getting saved/loaded?? They should just come from config
|
136
|
-
self.gamma = state["gamma"]
|
137
|
-
self.lmbda = state["lambda"]
|
138
|
-
self.standardize_returns = state["standardize_returns"]
|
139
|
-
self.max_returns_per_stats_increment = state["max_returns_per_stats_increment"]
|
140
135
|
self.return_stats.load_state_dict(state["return_running_stats"])
|
141
136
|
|
142
137
|
def save_checkpoint(self, folder_path):
|
143
138
|
state = {
|
144
|
-
"gamma": self.gamma,
|
145
|
-
"lambda": self.lmbda,
|
146
|
-
"standardize_returns": self.standardize_returns,
|
147
|
-
"max_returns_per_stats_increment": self.max_returns_per_stats_increment,
|
148
139
|
"return_running_stats": self.return_stats.state_dict(),
|
149
140
|
}
|
150
141
|
with open(
|
@@ -161,18 +161,10 @@ class GAETrajectoryProcessorPurePython(
|
|
161
161
|
"rt",
|
162
162
|
) as f:
|
163
163
|
state = json.load(f)
|
164
|
-
self.gamma = state["gamma"]
|
165
|
-
self.lmbda = state["lambda"]
|
166
|
-
self.standardize_returns = state["standardize_returns"]
|
167
|
-
self.max_returns_per_stats_increment = state["max_returns_per_stats_increment"]
|
168
164
|
self.return_stats.load_state_dict(state["return_running_stats"])
|
169
165
|
|
170
166
|
def save_checkpoint(self, folder_path):
|
171
167
|
state = {
|
172
|
-
"gamma": self.gamma,
|
173
|
-
"lambda": self.lmbda,
|
174
|
-
"standardize_returns": self.standardize_returns,
|
175
|
-
"max_returns_per_stats_increment": self.max_returns_per_stats_increment,
|
176
168
|
"return_running_stats": self.return_stats.state_dict(),
|
177
169
|
}
|
178
170
|
with open(
|
@@ -24,8 +24,6 @@ from rlgym.api import (
|
|
24
24
|
)
|
25
25
|
from rlgym_learn import EnvActionResponse, EnvActionResponseType, Timestep
|
26
26
|
from rlgym_learn.api.agent_controller import AgentController
|
27
|
-
from torch import device as _device
|
28
|
-
|
29
27
|
from rlgym_learn_algos.logging import (
|
30
28
|
DerivedMetricsLoggerConfig,
|
31
29
|
MetricsLogger,
|
@@ -36,6 +34,7 @@ from rlgym_learn_algos.logging import (
|
|
36
34
|
)
|
37
35
|
from rlgym_learn_algos.stateful_functions import ObsStandardizer
|
38
36
|
from rlgym_learn_algos.util.torch_functions import get_device
|
37
|
+
from torch import device as _device
|
39
38
|
|
40
39
|
from .actor import Actor
|
41
40
|
from .critic import Critic
|
@@ -62,15 +61,13 @@ ITERATION_SHARED_INFOS_FILE = "iteration_shared_infos.pkl"
|
|
62
61
|
CURRENT_TRAJECTORIES_FILE = "current_trajectories.pkl"
|
63
62
|
|
64
63
|
|
65
|
-
class PPOAgentControllerConfigModel(BaseModel):
|
64
|
+
class PPOAgentControllerConfigModel(BaseModel, extra="forbid"):
|
66
65
|
timesteps_per_iteration: int = 50000
|
67
66
|
save_every_ts: int = 1_000_000
|
68
67
|
add_unix_timestamp: bool = True
|
69
68
|
checkpoint_load_folder: Optional[str] = None
|
70
69
|
n_checkpoints_to_keep: int = 5
|
71
70
|
random_seed: int = 123
|
72
|
-
dtype: str = "float32"
|
73
|
-
device: Optional[str] = None
|
74
71
|
learner_config: PPOLearnerConfigModel = Field(default_factory=PPOLearnerConfigModel)
|
75
72
|
experience_buffer_config: ExperienceBufferConfigModel = Field(
|
76
73
|
default_factory=ExperienceBufferConfigModel
|
@@ -190,11 +187,9 @@ class PPOAgentController(
|
|
190
187
|
|
191
188
|
def load(self, config):
|
192
189
|
self.config = config
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
self.device = get_device(device)
|
197
|
-
print(f"{self.config.agent_controller_name}: Using device {self.device}")
|
190
|
+
print(
|
191
|
+
f"{self.config.agent_controller_name}: Using device {config.agent_controller_config.learner_config.device}"
|
192
|
+
)
|
198
193
|
agent_controller_config = config.agent_controller_config
|
199
194
|
learner_config = config.agent_controller_config.learner_config
|
200
195
|
experience_buffer_config = (
|
@@ -234,14 +229,14 @@ class PPOAgentController(
|
|
234
229
|
# TODO: this doesn't seem to be working
|
235
230
|
if abs_save_folder == loaded_checkpoint_runs_folder:
|
236
231
|
print(
|
237
|
-
"Using the loaded checkpoint's run folder as the checkpoints save folder."
|
232
|
+
f"{config.agent_controller_name}: Using the loaded checkpoint's run folder as the checkpoints save folder."
|
238
233
|
)
|
239
234
|
checkpoints_save_folder = os.path.abspath(
|
240
235
|
os.path.join(agent_controller_config.checkpoint_load_folder, "..")
|
241
236
|
)
|
242
237
|
else:
|
243
238
|
print(
|
244
|
-
"Runs folder in config does not align with loaded checkpoint's runs folder. Creating new run in the config-based runs folder."
|
239
|
+
f"{config.agent_controller_name}: Runs folder in config does not align with loaded checkpoint's runs folder. Creating new run in the config-based runs folder."
|
245
240
|
)
|
246
241
|
checkpoints_save_folder = os.path.join(
|
247
242
|
config.save_folder, agent_controller_config.run_name + run_suffix
|
@@ -257,26 +252,19 @@ class PPOAgentController(
|
|
257
252
|
|
258
253
|
self.learner.load(
|
259
254
|
DerivedPPOLearnerConfig(
|
255
|
+
learner_config=learner_config,
|
260
256
|
obs_space=self.obs_space,
|
261
257
|
action_space=self.action_space,
|
262
|
-
n_epochs=learner_config.n_epochs,
|
263
|
-
batch_size=learner_config.batch_size,
|
264
|
-
n_minibatches=learner_config.n_minibatches,
|
265
|
-
ent_coef=learner_config.ent_coef,
|
266
|
-
clip_range=learner_config.clip_range,
|
267
|
-
actor_lr=learner_config.actor_lr,
|
268
|
-
critic_lr=learner_config.critic_lr,
|
269
|
-
device=self.device,
|
270
258
|
checkpoint_load_folder=learner_checkpoint_load_folder,
|
271
259
|
)
|
272
260
|
)
|
273
261
|
self.experience_buffer.load(
|
274
262
|
DerivedExperienceBufferConfig(
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
263
|
+
experience_buffer_config=experience_buffer_config,
|
264
|
+
agent_controller_name=config.agent_controller_name,
|
265
|
+
seed=config.base_config.random_seed,
|
266
|
+
dtype=agent_controller_config.learner_config.dtype,
|
267
|
+
learner_device=agent_controller_config.learner_config.device,
|
280
268
|
checkpoint_load_folder=experience_buffer_checkpoint_load_folder,
|
281
269
|
)
|
282
270
|
)
|
@@ -301,9 +289,9 @@ class PPOAgentController(
|
|
301
289
|
additional_derived_config = None
|
302
290
|
self.metrics_logger.load(
|
303
291
|
DerivedMetricsLoggerConfig(
|
292
|
+
metrics_logger_config=metrics_logger_config,
|
304
293
|
checkpoint_load_folder=metrics_logger_checkpoint_load_folder,
|
305
294
|
agent_controller_name=config.agent_controller_name,
|
306
|
-
metrics_logger_config=metrics_logger_config,
|
307
295
|
additional_derived_config=additional_derived_config,
|
308
296
|
)
|
309
297
|
)
|
@@ -465,6 +453,7 @@ class PPOAgentController(
|
|
465
453
|
):
|
466
454
|
self.timestep_collection_end_time = time.perf_counter()
|
467
455
|
self._learn()
|
456
|
+
self.cur_iteration += 1
|
468
457
|
if self.ts_since_last_save >= self.config.agent_controller_config.save_every_ts:
|
469
458
|
self.save_checkpoint()
|
470
459
|
self.ts_since_last_save = 0
|
@@ -563,5 +552,5 @@ class PPOAgentController(
|
|
563
552
|
for idx, (start, stop) in enumerate(traj_timestep_idx_ranges):
|
564
553
|
self.current_trajectories[idx].val_preds = val_preds[start : stop - 1]
|
565
554
|
self.current_trajectories[idx].final_val_pred = val_preds[stop - 1]
|
566
|
-
if self.device != "cpu":
|
555
|
+
if self.config.agent_controller_config.learner_config.device.type != "cpu":
|
567
556
|
torch.cuda.current_stream().synchronize()
|
@@ -7,7 +7,7 @@ from typing import Generic, Optional
|
|
7
7
|
|
8
8
|
import numpy as np
|
9
9
|
import torch
|
10
|
-
from pydantic import BaseModel
|
10
|
+
from pydantic import BaseModel, field_serializer, model_validator
|
11
11
|
from rlgym.api import (
|
12
12
|
ActionSpaceType,
|
13
13
|
ActionType,
|
@@ -16,6 +16,11 @@ from rlgym.api import (
|
|
16
16
|
ObsType,
|
17
17
|
RewardType,
|
18
18
|
)
|
19
|
+
from rlgym_learn_algos.util.torch_functions import get_device
|
20
|
+
from rlgym_learn_algos.util.torch_pydantic import (
|
21
|
+
PydanticTorchDevice,
|
22
|
+
PydanticTorchDtype,
|
23
|
+
)
|
19
24
|
from torch import nn as nn
|
20
25
|
|
21
26
|
from .actor import Actor
|
@@ -24,7 +29,8 @@ from .experience_buffer import ExperienceBuffer
|
|
24
29
|
from .trajectory_processor import TrajectoryProcessorConfig, TrajectoryProcessorData
|
25
30
|
|
26
31
|
|
27
|
-
class PPOLearnerConfigModel(BaseModel):
|
32
|
+
class PPOLearnerConfigModel(BaseModel, extra="forbid"):
|
33
|
+
dtype: PydanticTorchDtype = torch.float32
|
28
34
|
n_epochs: int = 1
|
29
35
|
batch_size: int = 50000
|
30
36
|
n_minibatches: int = 1
|
@@ -32,20 +38,45 @@ class PPOLearnerConfigModel(BaseModel):
|
|
32
38
|
clip_range: float = 0.2
|
33
39
|
actor_lr: float = 3e-4
|
34
40
|
critic_lr: float = 3e-4
|
41
|
+
device: PydanticTorchDevice = "auto"
|
42
|
+
|
43
|
+
@model_validator(mode="before")
|
44
|
+
@classmethod
|
45
|
+
def set_device(cls, data):
|
46
|
+
if isinstance(data, dict) and (
|
47
|
+
"device" not in data or data["device"] == "auto"
|
48
|
+
):
|
49
|
+
data["device"] = get_device("auto")
|
50
|
+
return data
|
51
|
+
|
52
|
+
|
53
|
+
# @model_validator(mode="before")
|
54
|
+
# @classmethod
|
55
|
+
# def set_agent_controllers_config(cls, data):
|
56
|
+
# if isinstance(data, LearningCoordinatorConfigModel):
|
57
|
+
# agent_controllers_config = {}
|
58
|
+
# for k, v in data.agent_controllers_config.items():
|
59
|
+
# if isinstance(v, BaseModel):
|
60
|
+
# agent_controllers_config[k] = v.model_dump()
|
61
|
+
# else:
|
62
|
+
# agent_controllers_config[k] = v
|
63
|
+
# data.agent_controllers_config = agent_controllers_config
|
64
|
+
# elif isinstance(data, dict) and "agent_controllers_config" in data:
|
65
|
+
# agent_controllers_config = {}
|
66
|
+
# for k, v in data["agent_controllers_config"].items():
|
67
|
+
# if isinstance(v, BaseModel):
|
68
|
+
# agent_controllers_config[k] = v.model_dump()
|
69
|
+
# else:
|
70
|
+
# agent_controllers_config[k] = v
|
71
|
+
# data["agent_controllers_config"] = agent_controllers_config
|
72
|
+
# return data
|
35
73
|
|
36
74
|
|
37
75
|
@dataclass
|
38
76
|
class DerivedPPOLearnerConfig:
|
77
|
+
learner_config: PPOLearnerConfigModel
|
39
78
|
obs_space: ObsSpaceType
|
40
79
|
action_space: ActionSpaceType
|
41
|
-
n_epochs: int = 10
|
42
|
-
batch_size: int = 50000
|
43
|
-
n_minibatches: int = 1
|
44
|
-
ent_coef: float = 0.005
|
45
|
-
clip_range: float = 0.2
|
46
|
-
actor_lr: float = 3e-4
|
47
|
-
critic_lr: float = 3e-4
|
48
|
-
device: str = "auto"
|
49
80
|
checkpoint_load_folder: Optional[str] = None
|
50
81
|
|
51
82
|
|
@@ -97,15 +128,17 @@ class PPOLearner(
|
|
97
128
|
self.config = config
|
98
129
|
|
99
130
|
self.actor = self.actor_factory(
|
100
|
-
config.obs_space, config.action_space, config.device
|
131
|
+
config.obs_space, config.action_space, config.learner_config.device
|
132
|
+
)
|
133
|
+
self.critic = self.critic_factory(
|
134
|
+
config.obs_space, config.learner_config.device
|
101
135
|
)
|
102
|
-
self.critic = self.critic_factory(config.obs_space, config.device)
|
103
136
|
|
104
137
|
self.actor_optimizer = torch.optim.Adam(
|
105
|
-
self.actor.parameters(), lr=self.config.actor_lr
|
138
|
+
self.actor.parameters(), lr=self.config.learner_config.actor_lr
|
106
139
|
)
|
107
140
|
self.critic_optimizer = torch.optim.Adam(
|
108
|
-
self.critic.parameters(), lr=self.config.critic_lr
|
141
|
+
self.critic.parameters(), lr=self.config.learner_config.critic_lr
|
109
142
|
)
|
110
143
|
self.critic_loss_fn = torch.nn.MSELoss()
|
111
144
|
|
@@ -130,14 +163,17 @@ class PPOLearner(
|
|
130
163
|
print("-" * 20)
|
131
164
|
print(f"{'Total':<10} {total_parameters:<10}")
|
132
165
|
|
133
|
-
print(f"Current Policy Learning Rate: {self.config.actor_lr}")
|
134
|
-
print(f"Current Critic Learning Rate: {self.config.critic_lr}")
|
166
|
+
print(f"Current Policy Learning Rate: {self.config.learner_config.actor_lr}")
|
167
|
+
print(f"Current Critic Learning Rate: {self.config.learner_config.critic_lr}")
|
135
168
|
self.cumulative_model_updates = 0
|
136
169
|
|
137
170
|
if self.config.checkpoint_load_folder is not None:
|
138
171
|
self._load_from_checkpoint()
|
139
172
|
self.minibatch_size = int(
|
140
|
-
np.ceil(
|
173
|
+
np.ceil(
|
174
|
+
self.config.learner_config.batch_size
|
175
|
+
/ self.config.learner_config.n_minibatches
|
176
|
+
)
|
141
177
|
)
|
142
178
|
|
143
179
|
def _load_from_checkpoint(self):
|
@@ -147,19 +183,27 @@ class PPOLearner(
|
|
147
183
|
), f"PPO Learner cannot find folder: {self.config.checkpoint_load_folder}"
|
148
184
|
|
149
185
|
self.actor.load_state_dict(
|
150
|
-
torch.load(
|
186
|
+
torch.load(
|
187
|
+
os.path.join(self.config.checkpoint_load_folder, ACTOR_FILE),
|
188
|
+
map_location=self.config.learner_config.device,
|
189
|
+
)
|
151
190
|
)
|
152
191
|
self.critic.load_state_dict(
|
153
|
-
torch.load(
|
192
|
+
torch.load(
|
193
|
+
os.path.join(self.config.checkpoint_load_folder, CRITIC_FILE),
|
194
|
+
map_location=self.config.learner_config.device,
|
195
|
+
)
|
154
196
|
)
|
155
197
|
self.actor_optimizer.load_state_dict(
|
156
198
|
torch.load(
|
157
|
-
os.path.join(self.config.checkpoint_load_folder, ACTOR_OPTIMIZER_FILE)
|
199
|
+
os.path.join(self.config.checkpoint_load_folder, ACTOR_OPTIMIZER_FILE),
|
200
|
+
map_location=self.config.learner_config.device,
|
158
201
|
)
|
159
202
|
)
|
160
203
|
self.critic_optimizer.load_state_dict(
|
161
204
|
torch.load(
|
162
|
-
os.path.join(self.config.checkpoint_load_folder, CRITIC_OPTIMIZER_FILE)
|
205
|
+
os.path.join(self.config.checkpoint_load_folder, CRITIC_OPTIMIZER_FILE),
|
206
|
+
map_location=self.config.learner_config.device,
|
163
207
|
)
|
164
208
|
)
|
165
209
|
with open(
|
@@ -215,9 +259,11 @@ class PPOLearner(
|
|
215
259
|
critic_before = torch.nn.utils.parameters_to_vector(self.critic.parameters())
|
216
260
|
|
217
261
|
t1 = time.time()
|
218
|
-
for epoch in range(self.config.n_epochs):
|
262
|
+
for epoch in range(self.config.learner_config.n_epochs):
|
219
263
|
# Get all shuffled batches from the experience buffer.
|
220
|
-
batches = exp.get_all_batches_shuffled(
|
264
|
+
batches = exp.get_all_batches_shuffled(
|
265
|
+
self.config.learner_config.batch_size
|
266
|
+
)
|
221
267
|
for batch in batches:
|
222
268
|
(
|
223
269
|
batch_agent_ids,
|
@@ -232,20 +278,29 @@ class PPOLearner(
|
|
232
278
|
self.critic_optimizer.zero_grad()
|
233
279
|
|
234
280
|
for minibatch_slice in range(
|
235
|
-
0, self.config.batch_size, self.minibatch_size
|
281
|
+
0, self.config.learner_config.batch_size, self.minibatch_size
|
236
282
|
):
|
237
283
|
# Send everything to the device and enforce correct shapes.
|
238
284
|
start = minibatch_slice
|
239
|
-
stop = min(
|
240
|
-
|
285
|
+
stop = min(
|
286
|
+
start + self.minibatch_size,
|
287
|
+
self.config.learner_config.batch_size,
|
288
|
+
)
|
289
|
+
minibatch_ratio = (
|
290
|
+
stop - start
|
291
|
+
) / self.config.learner_config.batch_size
|
241
292
|
|
242
293
|
agent_ids = batch_agent_ids[start:stop]
|
243
294
|
obs = batch_obs[start:stop]
|
244
295
|
acts = batch_acts[start:stop]
|
245
|
-
advantages = batch_advantages[start:stop].to(
|
246
|
-
|
296
|
+
advantages = batch_advantages[start:stop].to(
|
297
|
+
self.config.learner_config.device
|
298
|
+
)
|
299
|
+
old_probs = batch_old_probs[start:stop].to(
|
300
|
+
self.config.learner_config.device
|
301
|
+
)
|
247
302
|
target_values = batch_target_values[start:stop].to(
|
248
|
-
self.config.device
|
303
|
+
self.config.learner_config.device
|
249
304
|
)
|
250
305
|
|
251
306
|
# Compute value estimates.
|
@@ -262,8 +317,8 @@ class PPOLearner(
|
|
262
317
|
ratio = torch.exp(log_probs - old_probs)
|
263
318
|
clipped = torch.clamp(
|
264
319
|
ratio,
|
265
|
-
1.0 - self.config.clip_range,
|
266
|
-
1.0 + self.config.clip_range,
|
320
|
+
1.0 - self.config.learner_config.clip_range,
|
321
|
+
1.0 + self.config.learner_config.clip_range,
|
267
322
|
)
|
268
323
|
|
269
324
|
# Compute KL divergence & clip fraction using SB3 method for reporting.
|
@@ -274,7 +329,10 @@ class PPOLearner(
|
|
274
329
|
|
275
330
|
# From the stable-baselines3 implementation of PPO.
|
276
331
|
clip_fraction = torch.mean(
|
277
|
-
(
|
332
|
+
(
|
333
|
+
torch.abs(ratio - 1)
|
334
|
+
> self.config.learner_config.clip_range
|
335
|
+
).float()
|
278
336
|
).to(device="cpu", non_blocking=True)
|
279
337
|
clip_fractions.append((clip_fraction, minibatch_ratio))
|
280
338
|
|
@@ -285,7 +343,9 @@ class PPOLearner(
|
|
285
343
|
value_loss = (
|
286
344
|
self.critic_loss_fn(vals, target_values) * minibatch_ratio
|
287
345
|
)
|
288
|
-
ppo_loss =
|
346
|
+
ppo_loss = (
|
347
|
+
actor_loss - entropy * self.config.learner_config.ent_coef
|
348
|
+
)
|
289
349
|
|
290
350
|
ppo_loss.backward()
|
291
351
|
value_loss.backward()
|
@@ -312,7 +372,7 @@ class PPOLearner(
|
|
312
372
|
actor_update_magnitude = (actor_before - actor_after).norm().cpu().item()
|
313
373
|
critic_update_magnitude = (critic_before - critic_after).norm().cpu().item()
|
314
374
|
|
315
|
-
if self.config.device != "cpu":
|
375
|
+
if self.config.learner_config.device.type != "cpu":
|
316
376
|
torch.cuda.current_stream().synchronize()
|
317
377
|
|
318
378
|
tot_clip = sum(
|
@@ -3,7 +3,7 @@ from dataclasses import dataclass
|
|
3
3
|
from typing import Any, Dict, Generic, List, Optional, Tuple, TypeVar
|
4
4
|
|
5
5
|
from rlgym.api import ActionType, AgentID, ObsType, RewardType
|
6
|
-
from torch import Tensor
|
6
|
+
from torch import Tensor, device, dtype
|
7
7
|
|
8
8
|
from .trajectory import Trajectory
|
9
9
|
|
@@ -16,8 +16,8 @@ TRAJECTORY_PROCESSOR_FILE = "trajectory_processor.json"
|
|
16
16
|
@dataclass
|
17
17
|
class DerivedTrajectoryProcessorConfig(Generic[TrajectoryProcessorConfig]):
|
18
18
|
trajectory_processor_config: TrajectoryProcessorConfig
|
19
|
-
dtype:
|
20
|
-
device:
|
19
|
+
dtype: dtype
|
20
|
+
device: device
|
21
21
|
checkpoint_load_folder: Optional[str] = None
|
22
22
|
|
23
23
|
|
Binary file
|
@@ -0,0 +1,118 @@
|
|
1
|
+
from typing import Annotated, Any
|
2
|
+
|
3
|
+
import torch
|
4
|
+
from pydantic import (
|
5
|
+
BaseModel,
|
6
|
+
GetCoreSchemaHandler,
|
7
|
+
GetJsonSchemaHandler,
|
8
|
+
ValidationError,
|
9
|
+
)
|
10
|
+
from pydantic.json_schema import JsonSchemaValue
|
11
|
+
from pydantic_core import core_schema
|
12
|
+
|
13
|
+
dtype_str_regex = "|".join(
|
14
|
+
set(
|
15
|
+
f"({str(v)[6:]})" for v in torch.__dict__.values() if isinstance(v, torch.dtype)
|
16
|
+
)
|
17
|
+
)
|
18
|
+
device_str_regex = (
|
19
|
+
"("
|
20
|
+
+ "|".join(
|
21
|
+
f"({v})"
|
22
|
+
for v in [
|
23
|
+
"cpu",
|
24
|
+
"cuda",
|
25
|
+
"ipu",
|
26
|
+
"xpu",
|
27
|
+
"mkldnn",
|
28
|
+
"opengl",
|
29
|
+
"opencl",
|
30
|
+
"ideep",
|
31
|
+
"hip",
|
32
|
+
"ve",
|
33
|
+
"fpga",
|
34
|
+
"maia",
|
35
|
+
"xla",
|
36
|
+
"lazy",
|
37
|
+
"vulkan",
|
38
|
+
"mps",
|
39
|
+
"meta",
|
40
|
+
"hpu",
|
41
|
+
"mtia",
|
42
|
+
"privateuseone",
|
43
|
+
]
|
44
|
+
)
|
45
|
+
+ ")(:\d+)"
|
46
|
+
)
|
47
|
+
|
48
|
+
|
49
|
+
# Created using the example here: https://docs.pydantic.dev/latest/concepts/types/#handling-third-party-types
|
50
|
+
class _TorchDtypePydanticAnnotation:
|
51
|
+
@classmethod
|
52
|
+
def __get_pydantic_core_schema__(
|
53
|
+
cls,
|
54
|
+
_source_type: Any,
|
55
|
+
_handler: GetCoreSchemaHandler,
|
56
|
+
) -> core_schema.CoreSchema:
|
57
|
+
from_str_schema = core_schema.chain_schema(
|
58
|
+
[
|
59
|
+
core_schema.str_schema(pattern=dtype_str_regex),
|
60
|
+
core_schema.no_info_plain_validator_function(
|
61
|
+
lambda v: getattr(torch, v)
|
62
|
+
),
|
63
|
+
]
|
64
|
+
)
|
65
|
+
|
66
|
+
return core_schema.json_or_python_schema(
|
67
|
+
json_schema=from_str_schema,
|
68
|
+
python_schema=core_schema.union_schema(
|
69
|
+
[
|
70
|
+
# check if it's an instance first before doing any further work
|
71
|
+
core_schema.is_instance_schema(torch.dtype),
|
72
|
+
from_str_schema,
|
73
|
+
]
|
74
|
+
),
|
75
|
+
serialization=core_schema.plain_serializer_function_ser_schema(
|
76
|
+
lambda v: str(v)[6:]
|
77
|
+
),
|
78
|
+
)
|
79
|
+
|
80
|
+
|
81
|
+
class _TorchDevicePydanticAnnotation:
|
82
|
+
@classmethod
|
83
|
+
def __get_pydantic_core_schema__(
|
84
|
+
cls,
|
85
|
+
_source_type: Any,
|
86
|
+
_handler: GetCoreSchemaHandler,
|
87
|
+
) -> core_schema.CoreSchema:
|
88
|
+
from_str_schema = core_schema.chain_schema(
|
89
|
+
[
|
90
|
+
core_schema.str_schema(pattern=device_str_regex),
|
91
|
+
core_schema.no_info_plain_validator_function(lambda v: torch.device(v)),
|
92
|
+
]
|
93
|
+
)
|
94
|
+
from_int_schema = core_schema.chain_schema(
|
95
|
+
[
|
96
|
+
core_schema.int_schema(ge=0),
|
97
|
+
core_schema.no_info_plain_validator_function(lambda v: torch.device(v)),
|
98
|
+
]
|
99
|
+
)
|
100
|
+
|
101
|
+
return core_schema.json_or_python_schema(
|
102
|
+
json_schema=from_str_schema,
|
103
|
+
python_schema=core_schema.union_schema(
|
104
|
+
[
|
105
|
+
# check if it's an instance first before doing any further work
|
106
|
+
core_schema.is_instance_schema(torch.dtype),
|
107
|
+
from_str_schema,
|
108
|
+
from_int_schema,
|
109
|
+
]
|
110
|
+
),
|
111
|
+
serialization=core_schema.plain_serializer_function_ser_schema(
|
112
|
+
lambda v: str(v)
|
113
|
+
),
|
114
|
+
)
|
115
|
+
|
116
|
+
|
117
|
+
PydanticTorchDtype = Annotated[torch.dtype, _TorchDtypePydanticAnnotation]
|
118
|
+
PydanticTorchDevice = Annotated[torch.device, _TorchDevicePydanticAnnotation]
|
@@ -1,36 +1,37 @@
|
|
1
|
-
rlgym_learn_algos-0.
|
2
|
-
rlgym_learn_algos-0.
|
3
|
-
rlgym_learn_algos-0.
|
4
|
-
rlgym_learn_algos/
|
5
|
-
rlgym_learn_algos/logging/metrics_logger.py,sha256=45FBH49OcHl5skvG9J9MIFJtAxbFo1TxtEvLWwjttSU,4122
|
6
|
-
rlgym_learn_algos/logging/wandb_metrics_logger.py,sha256=funcqZYUarlKND7W79TThFc5d8j-a_CIQwAnOGYD-rs,6518
|
1
|
+
rlgym_learn_algos-0.2.0.dist-info/METADATA,sha256=lhDW1yMI9RhHxSWjTN8WFWJ6O_WlgtFsm8NI1e-GLgI,2431
|
2
|
+
rlgym_learn_algos-0.2.0.dist-info/WHEEL,sha256=2I8FMXqttZIv4Ceqp7SSjsZUyiuHAyz-otzopz4PdFA,92
|
3
|
+
rlgym_learn_algos-0.2.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
4
|
+
rlgym_learn_algos/__init__.py,sha256=C7cRdL4lZrpk3ge_4_lGAbGodqWJXM56FfgO0keRPAY,207
|
7
5
|
rlgym_learn_algos/logging/__init__.py,sha256=ouItskWI4ItuoFdL--rt9YXCt7MasA473lYPhmJnrFA,423
|
6
|
+
rlgym_learn_algos/logging/dict_metrics_logger.py,sha256=qmqr0HSiHpm5rjyxfAdmXOeBSbgP_t36-e-enpOccnE,1991
|
7
|
+
rlgym_learn_algos/logging/metrics_logger.py,sha256=0l69GSSrxRcPm0xAjvF7yEIis7jGNu70unXu3hnK0XE,4122
|
8
|
+
rlgym_learn_algos/logging/wandb_metrics_logger.py,sha256=Kxi8y-nfoh3EI_OqLm4pDS-zhUWEjkS8F4TdD01dr9U,6939
|
9
|
+
rlgym_learn_algos/ppo/__init__.py,sha256=o6B8wCRfeyipSNEGJFyB3SHYmxUytaQelX2zsted5cg,1184
|
8
10
|
rlgym_learn_algos/ppo/actor.py,sha256=LZevg0kqRrb4PwF05ePK9b1JIBX04YkWjsPs7swZ9JY,1767
|
9
11
|
rlgym_learn_algos/ppo/basic_critic.py,sha256=oyyo8x9K6mi2BsbA6_tRy2Av8Pimb35WspJkPpe8XdQ,1022
|
10
12
|
rlgym_learn_algos/ppo/continuous_actor.py,sha256=1vdBUw2mQNFNu6A6ZrAztBjd4DmwjGkIIFLboMZ02lc,4417
|
11
13
|
rlgym_learn_algos/ppo/critic.py,sha256=RB89WtiN52BEq5QCpGAPrASUnasac-Bpg7B0lM3UXHw,689
|
12
14
|
rlgym_learn_algos/ppo/discrete_actor.py,sha256=Nuc3EndIQud3NGrkBIQgy-Z-okhXVrj6p6okSGD1KNY,2620
|
13
15
|
rlgym_learn_algos/ppo/env_trajectories.py,sha256=gzQBRkzwZhlZeSvWL50cc8AOgBfsg5zUys0aTJj6aZU,3775
|
14
|
-
rlgym_learn_algos/ppo/experience_buffer.py,sha256=
|
15
|
-
rlgym_learn_algos/ppo/experience_buffer_numpy.py,sha256=
|
16
|
-
rlgym_learn_algos/ppo/gae_trajectory_processor.py,sha256=
|
17
|
-
rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py,sha256=
|
16
|
+
rlgym_learn_algos/ppo/experience_buffer.py,sha256=py7kwhRJFsPx5lyvcUVywLAsu5zbU_0wV_52Fb6Kb_4,11012
|
17
|
+
rlgym_learn_algos/ppo/experience_buffer_numpy.py,sha256=Apk4x-pfRnitKJPW6LBZyOPIhgeJs_5EG7BbTCqMwjk,4761
|
18
|
+
rlgym_learn_algos/ppo/gae_trajectory_processor.py,sha256=r-o5ajNSTNr5nZxsUc17KMuZR6c4l4NHHTIs2-WbMgE,4956
|
19
|
+
rlgym_learn_algos/ppo/gae_trajectory_processor_pure_python.py,sha256=RpyDR6GQ1JXvwtoKkx5V3z3WvU9ElJdzfNtpPiZDaTc,6831
|
18
20
|
rlgym_learn_algos/ppo/multi_discrete_actor.py,sha256=zSYeBBirjguSv_wO-peo06hioHiVhZQjnd-NYwJxmag,3127
|
19
|
-
rlgym_learn_algos/ppo/ppo_agent_controller.py,sha256=
|
20
|
-
rlgym_learn_algos/ppo/ppo_learner.py,sha256=
|
21
|
+
rlgym_learn_algos/ppo/ppo_agent_controller.py,sha256=CH-xpO2mOCwe4iu_n9wPPsqLxEPYcbQhnvcJYs46qgM,23270
|
22
|
+
rlgym_learn_algos/ppo/ppo_learner.py,sha256=z14GaL52mx7b20mQsuOLFXlpYVlR0_9Nn5HinImWaLY,15295
|
21
23
|
rlgym_learn_algos/ppo/ppo_metrics_logger.py,sha256=niW8xgQLEBCGgTaVyiE_JqsU6RTjV6h-JzM-7c3JT38,2868
|
22
24
|
rlgym_learn_algos/ppo/trajectory.py,sha256=IIH_IG8B_HkyxRPf-YsCyF1jQqNjDx752hgzAehG25I,719
|
23
|
-
rlgym_learn_algos/ppo/trajectory_processor.py,sha256=
|
24
|
-
rlgym_learn_algos/ppo/__init__.py,sha256=o6B8wCRfeyipSNEGJFyB3SHYmxUytaQelX2zsted5cg,1184
|
25
|
+
rlgym_learn_algos/ppo/trajectory_processor.py,sha256=9-JE8hJkOgVJ-R3_9JYjFTUUTlnVq2-U43VP0HiY-sM,2059
|
25
26
|
rlgym_learn_algos/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
|
+
rlgym_learn_algos/rlgym_learn_algos.cp310-win32.pyd,sha256=GuvLbJ2bJrcgAmafhTIuvnHsi7X71dPwlbZCaPgzhBI,339968
|
26
28
|
rlgym_learn_algos/rlgym_learn_algos.pyi,sha256=NwY-sDZWM06TUiKPzxpfH1Td6G6E8TdxtRPgBSh-PPE,1203
|
29
|
+
rlgym_learn_algos/stateful_functions/__init__.py,sha256=QS0KYjuzagNkYiYllXQmjoJn14-G7KZawq1Zvwh8alY,236
|
27
30
|
rlgym_learn_algos/stateful_functions/batch_reward_type_numpy_converter.py,sha256=1yte5qYyl9LWdClHZ_YsF7R9dJqQeYfINMdgNF_59Gs,767
|
28
31
|
rlgym_learn_algos/stateful_functions/numpy_obs_standardizer.py,sha256=OgtwCaxBGTySPMnE5D5VDKpJ0dsTEz9oHc08A96xRao,1604
|
29
32
|
rlgym_learn_algos/stateful_functions/obs_standardizer.py,sha256=qPPc3--J_3mpJJ-QHJjta6dbWWBobL7SYdK5MUP-XMw,606
|
30
|
-
rlgym_learn_algos/
|
33
|
+
rlgym_learn_algos/util/__init__.py,sha256=VPM6SN4T_625H9t30s9EiLeXiEEWgcyRVHa-LLVNrn4,47
|
31
34
|
rlgym_learn_algos/util/running_stats.py,sha256=0tiGFpKtHWzMa1CxM_ueBzd_ryX4bJBriC8MXcSLg8w,4479
|
32
35
|
rlgym_learn_algos/util/torch_functions.py,sha256=CTTHzTIi7u1O9HyX0cVJOrnYVbAtnlVs0g1fO9s3ano,3458
|
33
|
-
rlgym_learn_algos/util/
|
34
|
-
rlgym_learn_algos/
|
35
|
-
rlgym_learn_algos/rlgym_learn_algos.cp310-win32.pyd,sha256=Z3ibd7sjCsmUOw1ENr_aWX81l86hwqt9St0qdAe18IM,339968
|
36
|
-
rlgym_learn_algos-0.1.5.dist-info/RECORD,,
|
36
|
+
rlgym_learn_algos/util/torch_pydantic.py,sha256=pgj3I-3q8iW9qtOCv1fgjNkZgA00G_Rdkb4qJPk5gxo,3530
|
37
|
+
rlgym_learn_algos-0.2.0.dist-info/RECORD,,
|
File without changes
|