PyPI - rlgym-learn-algos - Versions diffs - 0.2.0__tar.gz → 0.2.1__tar.gz - Mend

rlgym-learn-algos 0.2.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

{rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/Cargo.lock RENAMED Viewed

@@ -229,7 +229,7 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
 [[package]]
 name = "rlgym-learn-algos"
-version = "0.2.0"
+version = "0.2.1"
 dependencies = [
  "itertools",
  "numpy",

{rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/Cargo.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "rlgym-learn-algos"
-version = "0.2.0"
+version = "0.2.1"
 edition = "2021"
 description = "Rust backend for the more expensive parts of the rlgym-learn-algos python module"
 license="Apache-2.0"

{rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rlgym-learn-algos
-Version: 0.2.0
+Version: 0.2.1
 Classifier: Programming Language :: Rust
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Requires-Dist: pydantic>=2.8.2

rlgym_learn_algos-0.2.1/rlgym_learn_algos/conversion/convert_rlgym_ppo_checkpoint.py ADDED Viewed

@@ -0,0 +1,27 @@
+import json
+import os
+import time
+from typing import Optional
+def convert_rlgym_ppo_checkpoint(
+    rlgym_ppo_checkpoint_folder: str, out_folder: Optional[str]
+):
+    if out_folder is None:
+        out_folder = f"rlgym_ppo_converted_checkpoint_{time.time_ns()}"
+    print(f"Saving converted checkpoint to folder {out_folder}")
+    os.makedirs(out_folder, exist_ok=True)
+    PPO_FILES = [
+        ("PPO_POLICY_OPTIMIZER.pt", "actor_optimizer.pt"),
+        ("PPO_POLICY.pt", "actor.pt"),
+        ("PPO_VALUE_NET_OPTIMIZER.pt", "critic_optimizer.pt"),
+        ("PPO_VALUE_NET.pt", "critic.pt"),
+    ]
+    os.makedirs(f"{out_folder}/ppo_learner", exist_ok=True)
+    for file in PPO_FILES:
+        with open(f"{rlgym_ppo_checkpoint_folder}/{file[0]}", "rb") as fin:
+            with open(f"{out_folder}/ppo_learner/{file[1]}", "wb") as fout:
+                fout.write(fin.read())

{rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/logging/wandb_metrics_logger.py RENAMED Viewed

@@ -164,7 +164,7 @@ class WandbMetricsLogger(
                 self.run_id = None
         except FileNotFoundError:
             print(
-                f"{self.config.agent_controller_name}: Tried to load from checkpoint, but checkpoint didn't contain a wandb run! A new run will be created based on the config values."
+                f"{self.config.agent_controller_name}: Tried to load wandb run from checkpoint using the file at location {str(os.path.join(self.config.checkpoint_load_folder, self.checkpoint_file_name))}, but there is no such file! A new run will be created based on the config values instead."
             )
             self.run_id = None

{rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/experience_buffer.py RENAMED Viewed

@@ -7,7 +7,6 @@ import numpy as np
 import torch
 from pydantic import BaseModel, Field, model_validator
 from rlgym.api import ActionType, AgentID, ObsType, RewardType
 from rlgym_learn_algos.util.torch_functions import get_device
 from rlgym_learn_algos.util.torch_pydantic import PydanticTorchDevice
@@ -45,17 +44,6 @@ class ExperienceBufferConfigModel(BaseModel, extra="forbid"):
                 data["device"] = get_device("auto")
         return data
-    # device: PydanticTorchDevice = "auto"
-    # @model_validator(mode="before")
-    # @classmethod
-    # def set_device(cls, data):
-    #     if isinstance(data, dict) and (
-    #         "device" not in data or data["device"] == "auto"
-    #     ):
-    #         data["device"] = get_device("auto")
-    #     return data
 @dataclass
 class DerivedExperienceBufferConfig:
@@ -139,6 +127,7 @@ class ExperienceBuffer(
         self.trajectory_processor.load(
             DerivedTrajectoryProcessorConfig(
                 trajectory_processor_config=trajectory_processor_config,
+                agent_controller_name=config.agent_controller_name,
                 dtype=config.dtype,
                 device=config.learner_device,
             )
@@ -171,7 +160,7 @@ class ExperienceBuffer(
             self.advantages = state_dict["advantages"]
         except FileNotFoundError:
             print(
-                f"{self.config.agent_controller_name}: Tried to load from checkpoint, but checkpoint didn't contain a saved experience buffer! A blank experience buffer will be used instead."
+                f"{self.config.agent_controller_name}: Tried to load experience buffer from checkpoint using the file at location {str(os.path.join(self.config.checkpoint_load_folder, EXPERIENCE_BUFFER_FILE))}, but there is no such file! A blank experience buffer will be used instead."
             )
     def save_checkpoint(self, folder_path):

{rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/gae_trajectory_processor.py RENAMED Viewed

@@ -115,6 +115,7 @@ class GAETrajectoryProcessor(
         self.max_returns_per_stats_increment = (
             config.trajectory_processor_config.max_returns_per_stats_increment
         )
+        self.agent_controller_name = config.agent_controller_name
         self.dtype = config.dtype
         self.device = config.device
         self.checkpoint_load_folder = config.checkpoint_load_folder
@@ -127,12 +128,17 @@ class GAETrajectoryProcessor(
         )
     def _load_from_checkpoint(self):
-        with open(
-            os.path.join(self.checkpoint_load_folder, TRAJECTORY_PROCESSOR_FILE),
-            "rt",
-        ) as f:
-            state = json.load(f)
-        self.return_stats.load_state_dict(state["return_running_stats"])
+        try:
+            with open(
+                os.path.join(self.checkpoint_load_folder, TRAJECTORY_PROCESSOR_FILE),
+                "rt",
+            ) as f:
+                state = json.load(f)
+            self.return_stats.load_state_dict(state["return_running_stats"])
+        except FileNotFoundError:
+            print(
+                f"{self.agent_controller_name}: Tried to load trajectory processor from checkpoint using the trajectory processor file at location {str(os.path.join(self.checkpoint_load_folder, TRAJECTORY_PROCESSOR_FILE))}, but there is no such file! Running stats will be initialized as if this were a new run instead."
+            )
     def save_checkpoint(self, folder_path):
         state = {

{rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/ppo_agent_controller.py RENAMED Viewed

@@ -253,6 +253,7 @@ class PPOAgentController(
         self.learner.load(
             DerivedPPOLearnerConfig(
                 learner_config=learner_config,
+                agent_controller_name=config.agent_controller_name,
                 obs_space=self.obs_space,
                 action_space=self.action_space,
                 checkpoint_load_folder=learner_checkpoint_load_folder,
@@ -304,33 +305,57 @@ class PPOAgentController(
         random.seed(self.config.base_config.random_seed)
     def _load_from_checkpoint(self):
-        with open(
-            os.path.join(
-                self.config.agent_controller_config.checkpoint_load_folder,
-                CURRENT_TRAJECTORIES_FILE,
-            ),
-            "rb",
-        ) as f:
-            current_trajectories: Dict[
-                int,
-                EnvTrajectories[AgentID, ActionType, ObsType, RewardType],
-            ] = pickle.load(f)
-        with open(
-            os.path.join(
-                self.config.agent_controller_config.checkpoint_load_folder,
-                ITERATION_SHARED_INFOS_FILE,
-            ),
-            "rb",
-        ) as f:
-            iteration_shared_infos: List[Dict[str, Any]] = pickle.load(f)
-        with open(
-            os.path.join(
-                self.config.agent_controller_config.checkpoint_load_folder,
-                PPO_AGENT_FILE,
-            ),
-            "rt",
-        ) as f:
-            state = json.load(f)
+        try:
+            with open(
+                os.path.join(
+                    self.config.agent_controller_config.checkpoint_load_folder,
+                    CURRENT_TRAJECTORIES_FILE,
+                ),
+                "rb",
+            ) as f:
+                current_trajectories: Dict[
+                    int,
+                    EnvTrajectories[AgentID, ActionType, ObsType, RewardType],
+                ] = pickle.load(f)
+        except FileNotFoundError:
+            print(
+                f"{self.config.agent_controller_name}: Tried to load current trajectories from checkpoint using the file at location {str(os.path.join(self.config.agent_controller_config.checkpoint_load_folder, CURRENT_TRAJECTORIES_FILE))}, but there is no such file! Current trajectories will be initialized as an empty dict instead."
+            )
+            current_trajectories = {}
+        try:
+            with open(
+                os.path.join(
+                    self.config.agent_controller_config.checkpoint_load_folder,
+                    ITERATION_SHARED_INFOS_FILE,
+                ),
+                "rb",
+            ) as f:
+                iteration_shared_infos: List[Dict[str, Any]] = pickle.load(f)
+        except FileNotFoundError:
+            print(
+                f"{self.config.agent_controller_name}: Tried to load iteration shared info data from checkpoint using the file at location {str(os.path.join(self.config.agent_controller_config.checkpoint_load_folder, ITERATION_SHARED_INFOS_FILE))}, but there is no such file! Iteration shared info data will be initialized as an empty list instead."
+            )
+            current_trajectories = {}
+        try:
+            with open(
+                os.path.join(
+                    self.config.agent_controller_config.checkpoint_load_folder,
+                    PPO_AGENT_FILE,
+                ),
+                "rt",
+            ) as f:
+                state = json.load(f)
+        except FileNotFoundError:
+            print(
+                f"{self.config.agent_controller_name}: Tried to load PPO agent miscellaneous state data from checkpoint using the file at location {str(os.path.join(self.config.agent_controller_config.checkpoint_load_folder, PPO_AGENT_FILE))}, but there is no such file! This state data will be initialized as if this were a new run instead."
+            )
+            state = {
+                "cur_iteration": 0,
+                "iteration_timesteps": 0,
+                "cumulative_timesteps": 0,
+                "iteration_start_time": time.perf_counter(),
+                "timestep_collection_start_time": time.perf_counter(),
+            }
         self.current_trajectories = current_trajectories
         self.iteration_shared_infos = iteration_shared_infos

{rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/ppo_learner.py RENAMED Viewed

@@ -16,12 +16,13 @@ from rlgym.api import (
     ObsType,
     RewardType,
 )
+from torch import nn as nn
 from rlgym_learn_algos.util.torch_functions import get_device
 from rlgym_learn_algos.util.torch_pydantic import (
     PydanticTorchDevice,
     PydanticTorchDtype,
 )
-from torch import nn as nn
 from .actor import Actor
 from .critic import Critic
@@ -50,31 +51,10 @@ class PPOLearnerConfigModel(BaseModel, extra="forbid"):
         return data
-# @model_validator(mode="before")
-# @classmethod
-# def set_agent_controllers_config(cls, data):
-#     if isinstance(data, LearningCoordinatorConfigModel):
-#         agent_controllers_config = {}
-#         for k, v in data.agent_controllers_config.items():
-#             if isinstance(v, BaseModel):
-#                 agent_controllers_config[k] = v.model_dump()
-#             else:
-#                 agent_controllers_config[k] = v
-#         data.agent_controllers_config = agent_controllers_config
-#     elif isinstance(data, dict) and "agent_controllers_config" in data:
-#         agent_controllers_config = {}
-#         for k, v in data["agent_controllers_config"].items():
-#             if isinstance(v, BaseModel):
-#                 agent_controllers_config[k] = v.model_dump()
-#             else:
-#                 agent_controllers_config[k] = v
-#         data["agent_controllers_config"] = agent_controllers_config
-#     return data
 @dataclass
 class DerivedPPOLearnerConfig:
     learner_config: PPOLearnerConfigModel
+    agent_controller_name: str
     obs_space: ObsSpaceType
     action_space: ActionSpaceType
     checkpoint_load_folder: Optional[str] = None
@@ -155,16 +135,26 @@ class PPOLearner(
         total_parameters = actor_params_count + critic_params_count
         # Display in a structured manner
-        print("Trainable Parameters:")
-        print(f"{'Component':<10} {'Count':<10}")
+        print(f"{self.config.agent_controller_name}: Trainable Parameters:")
+        print(f"{self.config.agent_controller_name}: {'Component':<10} {'Count':<10}")
         print("-" * 20)
-        print(f"{'Policy':<10} {actor_params_count:<10}")
-        print(f"{'Critic':<10} {critic_params_count:<10}")
+        print(
+            f"{self.config.agent_controller_name}: {'Policy':<10} {actor_params_count:<10}"
+        )
+        print(
+            f"{self.config.agent_controller_name}: {'Critic':<10} {critic_params_count:<10}"
+        )
         print("-" * 20)
-        print(f"{'Total':<10} {total_parameters:<10}")
+        print(
+            f"{self.config.agent_controller_name}: {'Total':<10} {total_parameters:<10}"
+        )
-        print(f"Current Policy Learning Rate: {self.config.learner_config.actor_lr}")
-        print(f"Current Critic Learning Rate: {self.config.learner_config.critic_lr}")
+        print(
+            f"{self.config.agent_controller_name}: Current Policy Learning Rate: {self.config.learner_config.actor_lr}"
+        )
+        print(
+            f"{self.config.agent_controller_name}: Current Critic Learning Rate: {self.config.learner_config.critic_lr}"
+        )
         self.cumulative_model_updates = 0
         if self.config.checkpoint_load_folder is not None:
@@ -180,7 +170,7 @@ class PPOLearner(
         assert os.path.exists(
             self.config.checkpoint_load_folder
-        ), f"PPO Learner cannot find folder: {self.config.checkpoint_load_folder}"
+        ), f"{self.config.agent_controller_name}: PPO Learner cannot find folder: {self.config.checkpoint_load_folder}"
         self.actor.load_state_dict(
             torch.load(
@@ -206,11 +196,17 @@ class PPOLearner(
                 map_location=self.config.learner_config.device,
             )
         )
-        with open(
-            os.path.join(self.config.checkpoint_load_folder, MISC_STATE), "rt"
-        ) as f:
-            misc_state = json.load(f)
-            self.cumulative_model_updates = misc_state["cumulative_model_updates"]
+        try:
+            with open(
+                os.path.join(self.config.checkpoint_load_folder, MISC_STATE), "rt"
+            ) as f:
+                misc_state = json.load(f)
+                self.cumulative_model_updates = misc_state["cumulative_model_updates"]
+        except FileNotFoundError:
+            print(
+                f"{self.config.agent_controller_name}: Tried to load the PPO learner's misc state from the file at location {str(os.path.join(self.config.checkpoint_load_folder, MISC_STATE))}, but there is no such file! Miscellaneous stats will be initialized as if this were a new run instead."
+            )
+            self.cumulative_model_updates = 0
     def save_checkpoint(self, folder_path):
         os.makedirs(folder_path, exist_ok=True)

{rlgym_learn_algos-0.2.0 → rlgym_learn_algos-0.2.1}/rlgym_learn_algos/ppo/trajectory_processor.py RENAMED Viewed

@@ -16,6 +16,7 @@ TRAJECTORY_PROCESSOR_FILE = "trajectory_processor.json"
 @dataclass
 class DerivedTrajectoryProcessorConfig(Generic[TrajectoryProcessorConfig]):
     trajectory_processor_config: TrajectoryProcessorConfig
+    agent_controller_name: str
     dtype: dtype
     device: device
     checkpoint_load_folder: Optional[str] = None