PyPI - oncosim - Versions diffs - 0.1.0__tar.gz - Mend

oncosim 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

oncosim-0.1.0/.gitignore +20 -0
oncosim-0.1.0/LICENSE +21 -0
oncosim-0.1.0/PKG-INFO +166 -0
oncosim-0.1.0/README.md +125 -0
oncosim-0.1.0/pyproject.toml +64 -0
oncosim-0.1.0/src/oncosim/__init__.py +7 -0
oncosim-0.1.0/src/oncosim/agents/__init__.py +9 -0
oncosim-0.1.0/src/oncosim/agents/heuristic_agent.py +81 -0
oncosim-0.1.0/src/oncosim/agents/ppo.py +172 -0
oncosim-0.1.0/src/oncosim/agents/random_agent.py +45 -0
oncosim-0.1.0/src/oncosim/benchmarks/__init__.py +1 -0
oncosim-0.1.0/src/oncosim/benchmarks/environments.py +33 -0
oncosim-0.1.0/src/oncosim/benchmarks/runner.py +86 -0
oncosim-0.1.0/src/oncosim/envs/__init__.py +24 -0
oncosim-0.1.0/src/oncosim/envs/adaptive_rt.py +164 -0
oncosim-0.1.0/src/oncosim/envs/beam_selection.py +158 -0
oncosim-0.1.0/src/oncosim/envs/dose_fractionation.py +171 -0
oncosim-0.1.0/src/oncosim/envs/wrappers.py +56 -0
oncosim-0.1.0/src/oncosim/physics/__init__.py +18 -0
oncosim-0.1.0/src/oncosim/physics/beam_geometry.py +122 -0
oncosim-0.1.0/src/oncosim/physics/dose_calc.py +95 -0
oncosim-0.1.0/src/oncosim/physics/tissue_models.py +119 -0
oncosim-0.1.0/src/oncosim/training/__init__.py +1 -0
oncosim-0.1.0/src/oncosim/training/evaluate.py +78 -0
oncosim-0.1.0/src/oncosim/training/train.py +132 -0
oncosim-0.1.0/tests/__init__.py +0 -0
oncosim-0.1.0/tests/test_adaptive_rt_env.py +108 -0
oncosim-0.1.0/tests/test_agents.py +101 -0
oncosim-0.1.0/tests/test_beam_selection_env.py +106 -0
oncosim-0.1.0/tests/test_benchmarks.py +63 -0
oncosim-0.1.0/tests/test_dose_fractionation_env.py +121 -0
oncosim-0.1.0/tests/test_integration.py +107 -0
oncosim-0.1.0/tests/test_physics_dose_calc.py +105 -0
oncosim-0.1.0/tests/test_physics_geometry.py +116 -0
oncosim-0.1.0/tests/test_physics_tissue.py +146 -0
oncosim-0.1.0/tests/test_wrappers.py +76 -0

oncosim-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,20 @@
+.venv/
+__pycache__/
+*.pyc
+*.pyo
+dist/
+build/
+*.egg-info/
+.claude/
+checkpoints/
+results/
+*.pt
+*.zip
+.ruff_cache/
+.mypy_cache/
+.pytest_cache/
+*.egg
+.eggs/
+*.so
+.coverage
+htmlcov/

oncosim-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Hass Dhia, Smart Technology Investments Research Institute
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

oncosim-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,166 @@
+Metadata-Version: 2.4
+Name: oncosim
+Version: 0.1.0
+Summary: Gymnasium-compatible RL environments for radiation therapy treatment planning
+Project-URL: Repository, https://github.com/HassDhia/oncosim
+Project-URL: Documentation, https://github.com/HassDhia/oncosim#readme
+Author-email: Hass Dhia <partners@smarttechinvest.com>
+License-Expression: MIT
+License-File: LICENSE
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
+Requires-Python: >=3.10
+Requires-Dist: gymnasium>=0.29
+Requires-Dist: numpy>=1.24
+Requires-Dist: scipy>=1.11
+Provides-Extra: all
+Requires-Dist: matplotlib>=3.7; extra == 'all'
+Requires-Dist: mypy; extra == 'all'
+Requires-Dist: pytest-cov; extra == 'all'
+Requires-Dist: pytest>=7.0; extra == 'all'
+Requires-Dist: ruff; extra == 'all'
+Requires-Dist: stable-baselines3>=2.0; extra == 'all'
+Requires-Dist: torch>=2.0; extra == 'all'
+Provides-Extra: dev
+Requires-Dist: mypy; extra == 'dev'
+Requires-Dist: pytest-cov; extra == 'dev'
+Requires-Dist: pytest>=7.0; extra == 'dev'
+Requires-Dist: ruff; extra == 'dev'
+Provides-Extra: train
+Requires-Dist: matplotlib>=3.7; extra == 'train'
+Requires-Dist: stable-baselines3>=2.0; extra == 'train'
+Requires-Dist: torch>=2.0; extra == 'train'
+Description-Content-Type: text/markdown
+# oncosim
+**Gymnasium environments for reinforcement learning in radiation therapy treatment planning.**
+![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+![Tests](https://img.shields.io/badge/tests-100%2B%20passing-brightgreen.svg)
+[![PyPI version](https://img.shields.io/pypi/v/oncosim.svg)](https://pypi.org/project/oncosim/)
+---
+OncoSim provides three Gymnasium-compatible reinforcement learning environments for radiation therapy treatment planning research. Each environment models a distinct clinical decision problem with physically grounded dynamics based on the linear-quadratic model of cell kill, Poisson TCP, and Lyman-Kutcher-Burman NTCP.
+The package includes analytical dose calculation, radiobiological tissue models, configurable difficulty tiers, baseline agents (random, heuristic, PPO), and a benchmark suite for reproducible evaluation.
+## Installation
+```bash
+pip install oncosim              # Core (numpy, scipy, gymnasium)
+pip install oncosim[train]       # + SB3, PyTorch for RL training
+pip install oncosim[all]         # Everything
+```
+Development install:
+```bash
+git clone https://github.com/HassDhia/oncosim.git
+cd oncosim
+pip install -e ".[all]"
+```
+## Quick Start
+```python
+import gymnasium as gym
+import oncosim
+env = gym.make("oncosim/BeamSelection-v0")
+obs, info = env.reset(seed=42)
+for _ in range(100):
+    action = env.action_space.sample()
+    obs, reward, terminated, truncated, info = env.step(action)
+    if terminated or truncated:
+        obs, info = env.reset()
+env.close()
+```
+## Environments
+| Environment | Paradigm | Observation | Action | Key Challenge |
+|---|---|---|---|---|
+| `oncosim/BeamSelection-v0` | Beam angle optimization | 64x64 dose grid, tumor/OAR masks, selected beams | Discrete(36): angle index at 10-degree steps | Maximize tumor coverage while sparing organs at risk |
+| `oncosim/DoseFractionation-v0` | Fractionation scheduling | Fraction number, tumor volume, cumulative dose, TCP, NTCP | Box(0.5, 4.0): dose per fraction in Gy | Balance tumor control probability against normal tissue toxicity |
+| `oncosim/AdaptiveRT-v0` | Adaptive replanning | Treatment progress, plan quality, tumor response, dose deviation | MultiDiscrete([2, 5]): replan decision + dose adjustment | Decide when to replan based on anatomical changes during treatment |
+All environments support configurable difficulty tiers and deterministic seeding for reproducibility.
+## Architecture
+```
+oncosim/
+  physics/          # Dose calculation, tissue models, beam geometry
+    dose_calc.py    # Pencil beam analytical model
+    tissue_models.py # LQ surviving fraction, TCP, NTCP, BED
+    beam_geometry.py # 2D beam profiles, structure masks, DVH
+  envs/             # Gymnasium environments
+    beam_selection.py
+    dose_fractionation.py
+    adaptive_rt.py
+    wrappers.py     # FlattenObs, NormalizeReward
+  agents/           # Baseline agents
+    random_agent.py
+    heuristic_agent.py
+    ppo.py          # Stable-Baselines3 PPO wrapper
+  training/         # Training and evaluation pipeline
+  benchmarks/       # Configurable benchmark suite (5 difficulty tiers)
+```
+## Benchmarks
+Run the benchmark suite:
+```bash
+oncosim-benchmark                          # All tiers
+oncosim-benchmark --tiers trivial easy     # Specific tiers
+```
+## Training
+Train PPO agents on all environments:
+```bash
+pip install oncosim[train]
+oncosim-train                              # Default: 100k timesteps each
+oncosim-train --timesteps 500000           # Custom timesteps
+```
+## Paper
+The accompanying paper is available at:
+- [PDF (GitHub)](https://github.com/HassDhia/oncosim/blob/main/paper/oncosim.pdf)
+## Citation
+If you use oncosim in your research, please cite:
+```bibtex
+@software{dhia2026oncosim,
+  author = {Dhia, Hass},
+  title = {OncoSim: Gymnasium Environments for Reinforcement Learning in Radiation Therapy Treatment Planning},
+  year = {2026},
+  publisher = {Smart Technology Investments Research Institute},
+  url = {https://github.com/HassDhia/oncosim}
+}
+```
+## License
+MIT License. See [LICENSE](LICENSE) for details.
+## Contact
+Hass Dhia -- Smart Technology Investments Research Institute
+- Email: partners@smarttechinvest.com
+- Web: [smarttechinvest.com/research](https://smarttechinvest.com/research)

oncosim-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,125 @@
+# oncosim
+**Gymnasium environments for reinforcement learning in radiation therapy treatment planning.**
+![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+![Tests](https://img.shields.io/badge/tests-100%2B%20passing-brightgreen.svg)
+[![PyPI version](https://img.shields.io/pypi/v/oncosim.svg)](https://pypi.org/project/oncosim/)
+---
+OncoSim provides three Gymnasium-compatible reinforcement learning environments for radiation therapy treatment planning research. Each environment models a distinct clinical decision problem with physically grounded dynamics based on the linear-quadratic model of cell kill, Poisson TCP, and Lyman-Kutcher-Burman NTCP.
+The package includes analytical dose calculation, radiobiological tissue models, configurable difficulty tiers, baseline agents (random, heuristic, PPO), and a benchmark suite for reproducible evaluation.
+## Installation
+```bash
+pip install oncosim              # Core (numpy, scipy, gymnasium)
+pip install oncosim[train]       # + SB3, PyTorch for RL training
+pip install oncosim[all]         # Everything
+```
+Development install:
+```bash
+git clone https://github.com/HassDhia/oncosim.git
+cd oncosim
+pip install -e ".[all]"
+```
+## Quick Start
+```python
+import gymnasium as gym
+import oncosim
+env = gym.make("oncosim/BeamSelection-v0")
+obs, info = env.reset(seed=42)
+for _ in range(100):
+    action = env.action_space.sample()
+    obs, reward, terminated, truncated, info = env.step(action)
+    if terminated or truncated:
+        obs, info = env.reset()
+env.close()
+```
+## Environments
+| Environment | Paradigm | Observation | Action | Key Challenge |
+|---|---|---|---|---|
+| `oncosim/BeamSelection-v0` | Beam angle optimization | 64x64 dose grid, tumor/OAR masks, selected beams | Discrete(36): angle index at 10-degree steps | Maximize tumor coverage while sparing organs at risk |
+| `oncosim/DoseFractionation-v0` | Fractionation scheduling | Fraction number, tumor volume, cumulative dose, TCP, NTCP | Box(0.5, 4.0): dose per fraction in Gy | Balance tumor control probability against normal tissue toxicity |
+| `oncosim/AdaptiveRT-v0` | Adaptive replanning | Treatment progress, plan quality, tumor response, dose deviation | MultiDiscrete([2, 5]): replan decision + dose adjustment | Decide when to replan based on anatomical changes during treatment |
+All environments support configurable difficulty tiers and deterministic seeding for reproducibility.
+## Architecture
+```
+oncosim/
+  physics/          # Dose calculation, tissue models, beam geometry
+    dose_calc.py    # Pencil beam analytical model
+    tissue_models.py # LQ surviving fraction, TCP, NTCP, BED
+    beam_geometry.py # 2D beam profiles, structure masks, DVH
+  envs/             # Gymnasium environments
+    beam_selection.py
+    dose_fractionation.py
+    adaptive_rt.py
+    wrappers.py     # FlattenObs, NormalizeReward
+  agents/           # Baseline agents
+    random_agent.py
+    heuristic_agent.py
+    ppo.py          # Stable-Baselines3 PPO wrapper
+  training/         # Training and evaluation pipeline
+  benchmarks/       # Configurable benchmark suite (5 difficulty tiers)
+```
+## Benchmarks
+Run the benchmark suite:
+```bash
+oncosim-benchmark                          # All tiers
+oncosim-benchmark --tiers trivial easy     # Specific tiers
+```
+## Training
+Train PPO agents on all environments:
+```bash
+pip install oncosim[train]
+oncosim-train                              # Default: 100k timesteps each
+oncosim-train --timesteps 500000           # Custom timesteps
+```
+## Paper
+The accompanying paper is available at:
+- [PDF (GitHub)](https://github.com/HassDhia/oncosim/blob/main/paper/oncosim.pdf)
+## Citation
+If you use oncosim in your research, please cite:
+```bibtex
+@software{dhia2026oncosim,
+  author = {Dhia, Hass},
+  title = {OncoSim: Gymnasium Environments for Reinforcement Learning in Radiation Therapy Treatment Planning},
+  year = {2026},
+  publisher = {Smart Technology Investments Research Institute},
+  url = {https://github.com/HassDhia/oncosim}
+}
+```
+## License
+MIT License. See [LICENSE](LICENSE) for details.
+## Contact
+Hass Dhia -- Smart Technology Investments Research Institute
+- Email: partners@smarttechinvest.com
+- Web: [smarttechinvest.com/research](https://smarttechinvest.com/research)

oncosim-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,64 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "oncosim"
+version = "0.1.0"
+description = "Gymnasium-compatible RL environments for radiation therapy treatment planning"
+readme = "README.md"
+license = "MIT"
+requires-python = ">=3.10"
+authors = [
+    { name = "Hass Dhia", email = "partners@smarttechinvest.com" },
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Medical Science Apps.",
+]
+dependencies = [
+    "numpy>=1.24",
+    "scipy>=1.11",
+    "gymnasium>=0.29",
+]
+[project.optional-dependencies]
+dev = ["pytest>=7.0", "pytest-cov", "ruff", "mypy"]
+train = ["stable-baselines3>=2.0", "torch>=2.0", "matplotlib>=3.7"]
+all = ["oncosim[dev,train]"]
+[project.urls]
+Repository = "https://github.com/HassDhia/oncosim"
+Documentation = "https://github.com/HassDhia/oncosim#readme"
+[project.scripts]
+oncosim-train = "oncosim.agents.ppo:main"
+oncosim-benchmark = "oncosim.benchmarks.runner:main"
+[tool.hatch.build.targets.wheel]
+packages = ["src/oncosim"]
+[tool.hatch.build.targets.sdist]
+exclude = [
+    "results/",
+    "paper/",
+    ".venv/",
+    ".claude/",
+    "*.pt",
+    "*.zip",
+    "checkpoints/",
+]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+[tool.ruff]
+target-version = "py310"
+line-length = 99

oncosim-0.1.0/src/oncosim/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""OncoSim: Gymnasium-compatible RL environments for radiation therapy treatment planning."""
+__version__ = "0.1.0"
+from oncosim.envs import register_envs
+register_envs()

oncosim-0.1.0/src/oncosim/agents/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Baseline agents for OncoSim environments."""
+from oncosim.agents.random_agent import RandomAgent, evaluate_random
+from oncosim.agents.heuristic_agent import (
+    BeamSelectionHeuristic,
+    DoseFractionationHeuristic,
+    AdaptiveRTHeuristic,
+    evaluate_heuristic,
+)

oncosim-0.1.0/src/oncosim/agents/heuristic_agent.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Clinical heuristic baseline agents for OncoSim environments."""
+from __future__ import annotations
+import gymnasium as gym
+import numpy as np
+class BeamSelectionHeuristic:
+    """Selects equispaced beam angles (standard clinical approach)."""
+    def __init__(self, env: gym.Env, num_beams: int = 7):
+        self.num_beams = num_beams
+        self._step = 0
+        # Equispaced angles: e.g., for 7 beams -> 0, 51, 103, 154, 206, 257, 309 deg
+        # Map to nearest discrete angle index (0-35, each = 10 deg)
+        spacing = 360.0 / num_beams
+        self._angles = [int(round((i * spacing) / 10.0)) % 36 for i in range(num_beams)]
+    def act(self, obs: dict | np.ndarray) -> int:
+        if self._step < len(self._angles):
+            action = self._angles[self._step]
+        else:
+            action = 0
+        self._step += 1
+        return action
+    def reset(self) -> None:
+        self._step = 0
+class DoseFractionationHeuristic:
+    """Uses standard 2 Gy per fraction (conventional fractionation)."""
+    def act(self, obs: dict | np.ndarray) -> np.ndarray:
+        return np.array([2.0], dtype=np.float32)
+    def reset(self) -> None:
+        pass
+class AdaptiveRTHeuristic:
+    """Conservative approach: never replans, keeps dose at 1.0x."""
+    def act(self, obs: dict | np.ndarray) -> np.ndarray:
+        return np.array([0, 2], dtype=np.int64)  # No replan, dose_factor=1.0
+    def reset(self) -> None:
+        pass
+def evaluate_heuristic(
+    env: gym.Env,
+    agent: BeamSelectionHeuristic | DoseFractionationHeuristic | AdaptiveRTHeuristic,
+    n_episodes: int = 100,
+    seed: int = 42,
+) -> dict[str, float]:
+    """Evaluate a heuristic agent.
+    Returns:
+        Dict with mean_reward, std_reward, min_reward, max_reward.
+    """
+    rewards = []
+    for ep in range(n_episodes):
+        obs, _ = env.reset(seed=seed + ep)
+        agent.reset()
+        total_reward = 0.0
+        done = False
+        while not done:
+            action = agent.act(obs)
+            obs, reward, terminated, truncated, _ = env.step(action)
+            total_reward += reward
+            done = terminated or truncated
+        rewards.append(total_reward)
+    return {
+        "mean_reward": float(np.mean(rewards)),
+        "std_reward": float(np.std(rewards)),
+        "min_reward": float(np.min(rewards)),
+        "max_reward": float(np.max(rewards)),
+    }

oncosim-0.1.0/src/oncosim/agents/ppo.py ADDED Viewed

@@ -0,0 +1,172 @@
+"""PPO training wrapper using Stable-Baselines3."""
+from __future__ import annotations
+import json
+import os
+import sys
+import gymnasium as gym
+import numpy as np
+def train_ppo(
+    env_id: str,
+    total_timesteps: int = 90000,
+    save_dir: str = "checkpoints",
+    seed: int = 42,
+    verbose: int = 0,
+) -> dict:
+    """Train a PPO agent on a given environment.
+    Args:
+        env_id: Gymnasium environment ID.
+        total_timesteps: Total training timesteps.
+        save_dir: Directory to save trained model.
+        seed: Random seed.
+        verbose: Verbosity level.
+    Returns:
+        Dict with training metrics.
+    """
+    try:
+        from stable_baselines3 import PPO
+        from stable_baselines3.common.callbacks import BaseCallback
+    except ImportError:
+        print("stable-baselines3 required. Install with: pip install oncosim[train]")
+        sys.exit(1)
+    from oncosim.envs.wrappers import FlattenObsWrapper
+    env = gym.make(env_id)
+    env = FlattenObsWrapper(env)
+    class RewardCallback(BaseCallback):
+        def __init__(self):
+            super().__init__()
+            self.episode_rewards: list[float] = []
+            self.episode_lengths: list[int] = []
+            self._current_reward = 0.0
+            self._current_length = 0
+        def _on_step(self) -> bool:
+            self._current_reward += self.locals["rewards"][0]
+            self._current_length += 1
+            if self.locals["dones"][0]:
+                self.episode_rewards.append(self._current_reward)
+                self.episode_lengths.append(self._current_length)
+                self._current_reward = 0.0
+                self._current_length = 0
+            return True
+    callback = RewardCallback()
+    model = PPO(
+        "MlpPolicy",
+        env,
+        seed=seed,
+        verbose=verbose,
+        learning_rate=3e-4,
+        n_steps=2048,
+        batch_size=64,
+        n_epochs=10,
+        gamma=0.99,
+        device="cpu",
+    )
+    model.learn(total_timesteps=total_timesteps, callback=callback)
+    os.makedirs(save_dir, exist_ok=True)
+    model_path = os.path.join(save_dir, f"{env_id.replace('/', '_')}")
+    model.save(model_path)
+    env.close()
+    return {
+        "env_id": env_id,
+        "total_timesteps": total_timesteps,
+        "episode_rewards": callback.episode_rewards,
+        "episode_lengths": callback.episode_lengths,
+        "mean_reward": float(np.mean(callback.episode_rewards[-50:])) if callback.episode_rewards else 0.0,
+        "std_reward": float(np.std(callback.episode_rewards[-50:])) if callback.episode_rewards else 0.0,
+        "model_path": model_path,
+    }
+def evaluate_ppo(
+    env_id: str,
+    model_path: str,
+    n_episodes: int = 100,
+    seed: int = 42,
+) -> dict[str, float]:
+    """Evaluate a trained PPO model."""
+    try:
+        from stable_baselines3 import PPO
+    except ImportError:
+        print("stable-baselines3 required.")
+        sys.exit(1)
+    from oncosim.envs.wrappers import FlattenObsWrapper
+    env = gym.make(env_id)
+    env = FlattenObsWrapper(env)
+    model = PPO.load(model_path)
+    rewards = []
+    for ep in range(n_episodes):
+        obs, _ = env.reset(seed=seed + ep)
+        total_reward = 0.0
+        done = False
+        while not done:
+            action, _ = model.predict(obs, deterministic=True)
+            obs, reward, terminated, truncated, _ = env.step(action)
+            total_reward += reward
+            done = terminated or truncated
+        rewards.append(total_reward)
+    env.close()
+    return {
+        "mean_reward": float(np.mean(rewards)),
+        "std_reward": float(np.std(rewards)),
+        "min_reward": float(np.min(rewards)),
+        "max_reward": float(np.max(rewards)),
+    }
+def main():
+    """CLI entry point for training."""
+    import oncosim  # noqa: F401 - registers envs
+    env_ids = [
+        "oncosim/BeamSelection-v0",
+        "oncosim/DoseFractionation-v0",
+        "oncosim/AdaptiveRT-v0",
+    ]
+    results = {}
+    for env_id in env_ids:
+        print(f"Training PPO on {env_id}...")
+        result = train_ppo(env_id, total_timesteps=90000)
+        results[env_id] = result
+        print(f"  Mean reward (last 50 eps): {result['mean_reward']:.3f}")
+    os.makedirs("results", exist_ok=True)
+    with open("results/training_results.json", "w") as f:
+        # Convert to serializable format
+        serializable = {}
+        for k, v in results.items():
+            serializable[k] = {
+                "mean_reward": v["mean_reward"],
+                "std_reward": v["std_reward"],
+                "total_timesteps": v["total_timesteps"],
+                "num_episodes": len(v["episode_rewards"]),
+                "episode_rewards": v["episode_rewards"],
+            }
+        json.dump(serializable, f, indent=2)
+    print("Results saved to results/training_results.json")
+if __name__ == "__main__":
+    main()