oncosim 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. oncosim-0.1.0/.gitignore +20 -0
  2. oncosim-0.1.0/LICENSE +21 -0
  3. oncosim-0.1.0/PKG-INFO +166 -0
  4. oncosim-0.1.0/README.md +125 -0
  5. oncosim-0.1.0/pyproject.toml +64 -0
  6. oncosim-0.1.0/src/oncosim/__init__.py +7 -0
  7. oncosim-0.1.0/src/oncosim/agents/__init__.py +9 -0
  8. oncosim-0.1.0/src/oncosim/agents/heuristic_agent.py +81 -0
  9. oncosim-0.1.0/src/oncosim/agents/ppo.py +172 -0
  10. oncosim-0.1.0/src/oncosim/agents/random_agent.py +45 -0
  11. oncosim-0.1.0/src/oncosim/benchmarks/__init__.py +1 -0
  12. oncosim-0.1.0/src/oncosim/benchmarks/environments.py +33 -0
  13. oncosim-0.1.0/src/oncosim/benchmarks/runner.py +86 -0
  14. oncosim-0.1.0/src/oncosim/envs/__init__.py +24 -0
  15. oncosim-0.1.0/src/oncosim/envs/adaptive_rt.py +164 -0
  16. oncosim-0.1.0/src/oncosim/envs/beam_selection.py +158 -0
  17. oncosim-0.1.0/src/oncosim/envs/dose_fractionation.py +171 -0
  18. oncosim-0.1.0/src/oncosim/envs/wrappers.py +56 -0
  19. oncosim-0.1.0/src/oncosim/physics/__init__.py +18 -0
  20. oncosim-0.1.0/src/oncosim/physics/beam_geometry.py +122 -0
  21. oncosim-0.1.0/src/oncosim/physics/dose_calc.py +95 -0
  22. oncosim-0.1.0/src/oncosim/physics/tissue_models.py +119 -0
  23. oncosim-0.1.0/src/oncosim/training/__init__.py +1 -0
  24. oncosim-0.1.0/src/oncosim/training/evaluate.py +78 -0
  25. oncosim-0.1.0/src/oncosim/training/train.py +132 -0
  26. oncosim-0.1.0/tests/__init__.py +0 -0
  27. oncosim-0.1.0/tests/test_adaptive_rt_env.py +108 -0
  28. oncosim-0.1.0/tests/test_agents.py +101 -0
  29. oncosim-0.1.0/tests/test_beam_selection_env.py +106 -0
  30. oncosim-0.1.0/tests/test_benchmarks.py +63 -0
  31. oncosim-0.1.0/tests/test_dose_fractionation_env.py +121 -0
  32. oncosim-0.1.0/tests/test_integration.py +107 -0
  33. oncosim-0.1.0/tests/test_physics_dose_calc.py +105 -0
  34. oncosim-0.1.0/tests/test_physics_geometry.py +116 -0
  35. oncosim-0.1.0/tests/test_physics_tissue.py +146 -0
  36. oncosim-0.1.0/tests/test_wrappers.py +76 -0
@@ -0,0 +1,20 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ dist/
6
+ build/
7
+ *.egg-info/
8
+ .claude/
9
+ checkpoints/
10
+ results/
11
+ *.pt
12
+ *.zip
13
+ .ruff_cache/
14
+ .mypy_cache/
15
+ .pytest_cache/
16
+ *.egg
17
+ .eggs/
18
+ *.so
19
+ .coverage
20
+ htmlcov/
oncosim-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Hass Dhia, Smart Technology Investments Research Institute
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
oncosim-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,166 @@
1
+ Metadata-Version: 2.4
2
+ Name: oncosim
3
+ Version: 0.1.0
4
+ Summary: Gymnasium-compatible RL environments for radiation therapy treatment planning
5
+ Project-URL: Repository, https://github.com/HassDhia/oncosim
6
+ Project-URL: Documentation, https://github.com/HassDhia/oncosim#readme
7
+ Author-email: Hass Dhia <partners@smarttechinvest.com>
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
19
+ Requires-Python: >=3.10
20
+ Requires-Dist: gymnasium>=0.29
21
+ Requires-Dist: numpy>=1.24
22
+ Requires-Dist: scipy>=1.11
23
+ Provides-Extra: all
24
+ Requires-Dist: matplotlib>=3.7; extra == 'all'
25
+ Requires-Dist: mypy; extra == 'all'
26
+ Requires-Dist: pytest-cov; extra == 'all'
27
+ Requires-Dist: pytest>=7.0; extra == 'all'
28
+ Requires-Dist: ruff; extra == 'all'
29
+ Requires-Dist: stable-baselines3>=2.0; extra == 'all'
30
+ Requires-Dist: torch>=2.0; extra == 'all'
31
+ Provides-Extra: dev
32
+ Requires-Dist: mypy; extra == 'dev'
33
+ Requires-Dist: pytest-cov; extra == 'dev'
34
+ Requires-Dist: pytest>=7.0; extra == 'dev'
35
+ Requires-Dist: ruff; extra == 'dev'
36
+ Provides-Extra: train
37
+ Requires-Dist: matplotlib>=3.7; extra == 'train'
38
+ Requires-Dist: stable-baselines3>=2.0; extra == 'train'
39
+ Requires-Dist: torch>=2.0; extra == 'train'
40
+ Description-Content-Type: text/markdown
41
+
42
+ # oncosim
43
+
44
+ **Gymnasium environments for reinforcement learning in radiation therapy treatment planning.**
45
+
46
+ ![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)
47
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
48
+ ![Tests](https://img.shields.io/badge/tests-100%2B%20passing-brightgreen.svg)
49
+ [![PyPI version](https://img.shields.io/pypi/v/oncosim.svg)](https://pypi.org/project/oncosim/)
50
+
51
+ ---
52
+
53
+ OncoSim provides three Gymnasium-compatible reinforcement learning environments for radiation therapy treatment planning research. Each environment models a distinct clinical decision problem with physically grounded dynamics based on the linear-quadratic model of cell kill, Poisson TCP, and Lyman-Kutcher-Burman NTCP.
54
+
55
+ The package includes analytical dose calculation, radiobiological tissue models, configurable difficulty tiers, baseline agents (random, heuristic, PPO), and a benchmark suite for reproducible evaluation.
56
+
57
+ ## Installation
58
+
59
+ ```bash
60
+ pip install oncosim # Core (numpy, scipy, gymnasium)
61
+ pip install oncosim[train] # + SB3, PyTorch for RL training
62
+ pip install oncosim[all] # Everything
63
+ ```
64
+
65
+ Development install:
66
+
67
+ ```bash
68
+ git clone https://github.com/HassDhia/oncosim.git
69
+ cd oncosim
70
+ pip install -e ".[all]"
71
+ ```
72
+
73
+ ## Quick Start
74
+
75
+ ```python
76
+ import gymnasium as gym
77
+ import oncosim
78
+
79
+ env = gym.make("oncosim/BeamSelection-v0")
80
+ obs, info = env.reset(seed=42)
81
+ for _ in range(100):
82
+ action = env.action_space.sample()
83
+ obs, reward, terminated, truncated, info = env.step(action)
84
+ if terminated or truncated:
85
+ obs, info = env.reset()
86
+ env.close()
87
+ ```
88
+
89
+ ## Environments
90
+
91
+ | Environment | Paradigm | Observation | Action | Key Challenge |
92
+ |---|---|---|---|---|
93
+ | `oncosim/BeamSelection-v0` | Beam angle optimization | 64x64 dose grid, tumor/OAR masks, selected beams | Discrete(36): angle index at 10-degree steps | Maximize tumor coverage while sparing organs at risk |
94
+ | `oncosim/DoseFractionation-v0` | Fractionation scheduling | Fraction number, tumor volume, cumulative dose, TCP, NTCP | Box(0.5, 4.0): dose per fraction in Gy | Balance tumor control probability against normal tissue toxicity |
95
+ | `oncosim/AdaptiveRT-v0` | Adaptive replanning | Treatment progress, plan quality, tumor response, dose deviation | MultiDiscrete([2, 5]): replan decision + dose adjustment | Decide when to replan based on anatomical changes during treatment |
96
+
97
+ All environments support configurable difficulty tiers and deterministic seeding for reproducibility.
98
+
99
+ ## Architecture
100
+
101
+ ```
102
+ oncosim/
103
+ physics/ # Dose calculation, tissue models, beam geometry
104
+ dose_calc.py # Pencil beam analytical model
105
+ tissue_models.py # LQ surviving fraction, TCP, NTCP, BED
106
+ beam_geometry.py # 2D beam profiles, structure masks, DVH
107
+ envs/ # Gymnasium environments
108
+ beam_selection.py
109
+ dose_fractionation.py
110
+ adaptive_rt.py
111
+ wrappers.py # FlattenObs, NormalizeReward
112
+ agents/ # Baseline agents
113
+ random_agent.py
114
+ heuristic_agent.py
115
+ ppo.py # Stable-Baselines3 PPO wrapper
116
+ training/ # Training and evaluation pipeline
117
+ benchmarks/ # Configurable benchmark suite (5 difficulty tiers)
118
+ ```
119
+
120
+ ## Benchmarks
121
+
122
+ Run the benchmark suite:
123
+
124
+ ```bash
125
+ oncosim-benchmark # All tiers
126
+ oncosim-benchmark --tiers trivial easy # Specific tiers
127
+ ```
128
+
129
+ ## Training
130
+
131
+ Train PPO agents on all environments:
132
+
133
+ ```bash
134
+ pip install oncosim[train]
135
+ oncosim-train # Default: 100k timesteps each
136
+ oncosim-train --timesteps 500000 # Custom timesteps
137
+ ```
138
+
139
+ ## Paper
140
+
141
+ The accompanying paper is available at:
142
+ - [PDF (GitHub)](https://github.com/HassDhia/oncosim/blob/main/paper/oncosim.pdf)
143
+
144
+ ## Citation
145
+
146
+ If you use oncosim in your research, please cite:
147
+
148
+ ```bibtex
149
+ @software{dhia2026oncosim,
150
+ author = {Dhia, Hass},
151
+ title = {OncoSim: Gymnasium Environments for Reinforcement Learning in Radiation Therapy Treatment Planning},
152
+ year = {2026},
153
+ publisher = {Smart Technology Investments Research Institute},
154
+ url = {https://github.com/HassDhia/oncosim}
155
+ }
156
+ ```
157
+
158
+ ## License
159
+
160
+ MIT License. See [LICENSE](LICENSE) for details.
161
+
162
+ ## Contact
163
+
164
+ Hass Dhia -- Smart Technology Investments Research Institute
165
+ - Email: partners@smarttechinvest.com
166
+ - Web: [smarttechinvest.com/research](https://smarttechinvest.com/research)
@@ -0,0 +1,125 @@
1
+ # oncosim
2
+
3
+ **Gymnasium environments for reinforcement learning in radiation therapy treatment planning.**
4
+
5
+ ![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
+ ![Tests](https://img.shields.io/badge/tests-100%2B%20passing-brightgreen.svg)
8
+ [![PyPI version](https://img.shields.io/pypi/v/oncosim.svg)](https://pypi.org/project/oncosim/)
9
+
10
+ ---
11
+
12
+ OncoSim provides three Gymnasium-compatible reinforcement learning environments for radiation therapy treatment planning research. Each environment models a distinct clinical decision problem with physically grounded dynamics based on the linear-quadratic model of cell kill, Poisson TCP, and Lyman-Kutcher-Burman NTCP.
13
+
14
+ The package includes analytical dose calculation, radiobiological tissue models, configurable difficulty tiers, baseline agents (random, heuristic, PPO), and a benchmark suite for reproducible evaluation.
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pip install oncosim # Core (numpy, scipy, gymnasium)
20
+ pip install oncosim[train] # + SB3, PyTorch for RL training
21
+ pip install oncosim[all] # Everything
22
+ ```
23
+
24
+ Development install:
25
+
26
+ ```bash
27
+ git clone https://github.com/HassDhia/oncosim.git
28
+ cd oncosim
29
+ pip install -e ".[all]"
30
+ ```
31
+
32
+ ## Quick Start
33
+
34
+ ```python
35
+ import gymnasium as gym
36
+ import oncosim
37
+
38
+ env = gym.make("oncosim/BeamSelection-v0")
39
+ obs, info = env.reset(seed=42)
40
+ for _ in range(100):
41
+ action = env.action_space.sample()
42
+ obs, reward, terminated, truncated, info = env.step(action)
43
+ if terminated or truncated:
44
+ obs, info = env.reset()
45
+ env.close()
46
+ ```
47
+
48
+ ## Environments
49
+
50
+ | Environment | Paradigm | Observation | Action | Key Challenge |
51
+ |---|---|---|---|---|
52
+ | `oncosim/BeamSelection-v0` | Beam angle optimization | 64x64 dose grid, tumor/OAR masks, selected beams | Discrete(36): angle index at 10-degree steps | Maximize tumor coverage while sparing organs at risk |
53
+ | `oncosim/DoseFractionation-v0` | Fractionation scheduling | Fraction number, tumor volume, cumulative dose, TCP, NTCP | Box(0.5, 4.0): dose per fraction in Gy | Balance tumor control probability against normal tissue toxicity |
54
+ | `oncosim/AdaptiveRT-v0` | Adaptive replanning | Treatment progress, plan quality, tumor response, dose deviation | MultiDiscrete([2, 5]): replan decision + dose adjustment | Decide when to replan based on anatomical changes during treatment |
55
+
56
+ All environments support configurable difficulty tiers and deterministic seeding for reproducibility.
57
+
58
+ ## Architecture
59
+
60
+ ```
61
+ oncosim/
62
+ physics/ # Dose calculation, tissue models, beam geometry
63
+ dose_calc.py # Pencil beam analytical model
64
+ tissue_models.py # LQ surviving fraction, TCP, NTCP, BED
65
+ beam_geometry.py # 2D beam profiles, structure masks, DVH
66
+ envs/ # Gymnasium environments
67
+ beam_selection.py
68
+ dose_fractionation.py
69
+ adaptive_rt.py
70
+ wrappers.py # FlattenObs, NormalizeReward
71
+ agents/ # Baseline agents
72
+ random_agent.py
73
+ heuristic_agent.py
74
+ ppo.py # Stable-Baselines3 PPO wrapper
75
+ training/ # Training and evaluation pipeline
76
+ benchmarks/ # Configurable benchmark suite (5 difficulty tiers)
77
+ ```
78
+
79
+ ## Benchmarks
80
+
81
+ Run the benchmark suite:
82
+
83
+ ```bash
84
+ oncosim-benchmark # All tiers
85
+ oncosim-benchmark --tiers trivial easy # Specific tiers
86
+ ```
87
+
88
+ ## Training
89
+
90
+ Train PPO agents on all environments:
91
+
92
+ ```bash
93
+ pip install oncosim[train]
94
+ oncosim-train # Default: 100k timesteps each
95
+ oncosim-train --timesteps 500000 # Custom timesteps
96
+ ```
97
+
98
+ ## Paper
99
+
100
+ The accompanying paper is available at:
101
+ - [PDF (GitHub)](https://github.com/HassDhia/oncosim/blob/main/paper/oncosim.pdf)
102
+
103
+ ## Citation
104
+
105
+ If you use oncosim in your research, please cite:
106
+
107
+ ```bibtex
108
+ @software{dhia2026oncosim,
109
+ author = {Dhia, Hass},
110
+ title = {OncoSim: Gymnasium Environments for Reinforcement Learning in Radiation Therapy Treatment Planning},
111
+ year = {2026},
112
+ publisher = {Smart Technology Investments Research Institute},
113
+ url = {https://github.com/HassDhia/oncosim}
114
+ }
115
+ ```
116
+
117
+ ## License
118
+
119
+ MIT License. See [LICENSE](LICENSE) for details.
120
+
121
+ ## Contact
122
+
123
+ Hass Dhia -- Smart Technology Investments Research Institute
124
+ - Email: partners@smarttechinvest.com
125
+ - Web: [smarttechinvest.com/research](https://smarttechinvest.com/research)
@@ -0,0 +1,64 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "oncosim"
7
+ version = "0.1.0"
8
+ description = "Gymnasium-compatible RL environments for radiation therapy treatment planning"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "Hass Dhia", email = "partners@smarttechinvest.com" },
14
+ ]
15
+ classifiers = [
16
+ "Development Status :: 3 - Alpha",
17
+ "Intended Audience :: Science/Research",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
24
+ "Topic :: Scientific/Engineering :: Medical Science Apps.",
25
+ ]
26
+ dependencies = [
27
+ "numpy>=1.24",
28
+ "scipy>=1.11",
29
+ "gymnasium>=0.29",
30
+ ]
31
+
32
+ [project.optional-dependencies]
33
+ dev = ["pytest>=7.0", "pytest-cov", "ruff", "mypy"]
34
+ train = ["stable-baselines3>=2.0", "torch>=2.0", "matplotlib>=3.7"]
35
+ all = ["oncosim[dev,train]"]
36
+
37
+ [project.urls]
38
+ Repository = "https://github.com/HassDhia/oncosim"
39
+ Documentation = "https://github.com/HassDhia/oncosim#readme"
40
+
41
+ [project.scripts]
42
+ oncosim-train = "oncosim.agents.ppo:main"
43
+ oncosim-benchmark = "oncosim.benchmarks.runner:main"
44
+
45
+ [tool.hatch.build.targets.wheel]
46
+ packages = ["src/oncosim"]
47
+
48
+ [tool.hatch.build.targets.sdist]
49
+ exclude = [
50
+ "results/",
51
+ "paper/",
52
+ ".venv/",
53
+ ".claude/",
54
+ "*.pt",
55
+ "*.zip",
56
+ "checkpoints/",
57
+ ]
58
+
59
+ [tool.pytest.ini_options]
60
+ testpaths = ["tests"]
61
+
62
+ [tool.ruff]
63
+ target-version = "py310"
64
+ line-length = 99
@@ -0,0 +1,7 @@
1
+ """OncoSim: Gymnasium-compatible RL environments for radiation therapy treatment planning."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from oncosim.envs import register_envs
6
+
7
+ register_envs()
@@ -0,0 +1,9 @@
1
+ """Baseline agents for OncoSim environments."""
2
+
3
+ from oncosim.agents.random_agent import RandomAgent, evaluate_random
4
+ from oncosim.agents.heuristic_agent import (
5
+ BeamSelectionHeuristic,
6
+ DoseFractionationHeuristic,
7
+ AdaptiveRTHeuristic,
8
+ evaluate_heuristic,
9
+ )
@@ -0,0 +1,81 @@
1
+ """Clinical heuristic baseline agents for OncoSim environments."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import gymnasium as gym
6
+ import numpy as np
7
+
8
+
9
+ class BeamSelectionHeuristic:
10
+ """Selects equispaced beam angles (standard clinical approach)."""
11
+
12
+ def __init__(self, env: gym.Env, num_beams: int = 7):
13
+ self.num_beams = num_beams
14
+ self._step = 0
15
+ # Equispaced angles: e.g., for 7 beams -> 0, 51, 103, 154, 206, 257, 309 deg
16
+ # Map to nearest discrete angle index (0-35, each = 10 deg)
17
+ spacing = 360.0 / num_beams
18
+ self._angles = [int(round((i * spacing) / 10.0)) % 36 for i in range(num_beams)]
19
+
20
+ def act(self, obs: dict | np.ndarray) -> int:
21
+ if self._step < len(self._angles):
22
+ action = self._angles[self._step]
23
+ else:
24
+ action = 0
25
+ self._step += 1
26
+ return action
27
+
28
+ def reset(self) -> None:
29
+ self._step = 0
30
+
31
+
32
+ class DoseFractionationHeuristic:
33
+ """Uses standard 2 Gy per fraction (conventional fractionation)."""
34
+
35
+ def act(self, obs: dict | np.ndarray) -> np.ndarray:
36
+ return np.array([2.0], dtype=np.float32)
37
+
38
+ def reset(self) -> None:
39
+ pass
40
+
41
+
42
+ class AdaptiveRTHeuristic:
43
+ """Conservative approach: never replans, keeps dose at 1.0x."""
44
+
45
+ def act(self, obs: dict | np.ndarray) -> np.ndarray:
46
+ return np.array([0, 2], dtype=np.int64) # No replan, dose_factor=1.0
47
+
48
+ def reset(self) -> None:
49
+ pass
50
+
51
+
52
+ def evaluate_heuristic(
53
+ env: gym.Env,
54
+ agent: BeamSelectionHeuristic | DoseFractionationHeuristic | AdaptiveRTHeuristic,
55
+ n_episodes: int = 100,
56
+ seed: int = 42,
57
+ ) -> dict[str, float]:
58
+ """Evaluate a heuristic agent.
59
+
60
+ Returns:
61
+ Dict with mean_reward, std_reward, min_reward, max_reward.
62
+ """
63
+ rewards = []
64
+ for ep in range(n_episodes):
65
+ obs, _ = env.reset(seed=seed + ep)
66
+ agent.reset()
67
+ total_reward = 0.0
68
+ done = False
69
+ while not done:
70
+ action = agent.act(obs)
71
+ obs, reward, terminated, truncated, _ = env.step(action)
72
+ total_reward += reward
73
+ done = terminated or truncated
74
+ rewards.append(total_reward)
75
+
76
+ return {
77
+ "mean_reward": float(np.mean(rewards)),
78
+ "std_reward": float(np.std(rewards)),
79
+ "min_reward": float(np.min(rewards)),
80
+ "max_reward": float(np.max(rewards)),
81
+ }
@@ -0,0 +1,172 @@
1
+ """PPO training wrapper using Stable-Baselines3."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import sys
8
+
9
+ import gymnasium as gym
10
+ import numpy as np
11
+
12
+
13
+ def train_ppo(
14
+ env_id: str,
15
+ total_timesteps: int = 90000,
16
+ save_dir: str = "checkpoints",
17
+ seed: int = 42,
18
+ verbose: int = 0,
19
+ ) -> dict:
20
+ """Train a PPO agent on a given environment.
21
+
22
+ Args:
23
+ env_id: Gymnasium environment ID.
24
+ total_timesteps: Total training timesteps.
25
+ save_dir: Directory to save trained model.
26
+ seed: Random seed.
27
+ verbose: Verbosity level.
28
+
29
+ Returns:
30
+ Dict with training metrics.
31
+ """
32
+ try:
33
+ from stable_baselines3 import PPO
34
+ from stable_baselines3.common.callbacks import BaseCallback
35
+ except ImportError:
36
+ print("stable-baselines3 required. Install with: pip install oncosim[train]")
37
+ sys.exit(1)
38
+
39
+ from oncosim.envs.wrappers import FlattenObsWrapper
40
+
41
+ env = gym.make(env_id)
42
+ env = FlattenObsWrapper(env)
43
+
44
+ class RewardCallback(BaseCallback):
45
+ def __init__(self):
46
+ super().__init__()
47
+ self.episode_rewards: list[float] = []
48
+ self.episode_lengths: list[int] = []
49
+ self._current_reward = 0.0
50
+ self._current_length = 0
51
+
52
+ def _on_step(self) -> bool:
53
+ self._current_reward += self.locals["rewards"][0]
54
+ self._current_length += 1
55
+ if self.locals["dones"][0]:
56
+ self.episode_rewards.append(self._current_reward)
57
+ self.episode_lengths.append(self._current_length)
58
+ self._current_reward = 0.0
59
+ self._current_length = 0
60
+ return True
61
+
62
+ callback = RewardCallback()
63
+
64
+ model = PPO(
65
+ "MlpPolicy",
66
+ env,
67
+ seed=seed,
68
+ verbose=verbose,
69
+ learning_rate=3e-4,
70
+ n_steps=2048,
71
+ batch_size=64,
72
+ n_epochs=10,
73
+ gamma=0.99,
74
+ device="cpu",
75
+ )
76
+
77
+ model.learn(total_timesteps=total_timesteps, callback=callback)
78
+
79
+ os.makedirs(save_dir, exist_ok=True)
80
+ model_path = os.path.join(save_dir, f"{env_id.replace('/', '_')}")
81
+ model.save(model_path)
82
+
83
+ env.close()
84
+
85
+ return {
86
+ "env_id": env_id,
87
+ "total_timesteps": total_timesteps,
88
+ "episode_rewards": callback.episode_rewards,
89
+ "episode_lengths": callback.episode_lengths,
90
+ "mean_reward": float(np.mean(callback.episode_rewards[-50:])) if callback.episode_rewards else 0.0,
91
+ "std_reward": float(np.std(callback.episode_rewards[-50:])) if callback.episode_rewards else 0.0,
92
+ "model_path": model_path,
93
+ }
94
+
95
+
96
+ def evaluate_ppo(
97
+ env_id: str,
98
+ model_path: str,
99
+ n_episodes: int = 100,
100
+ seed: int = 42,
101
+ ) -> dict[str, float]:
102
+ """Evaluate a trained PPO model."""
103
+ try:
104
+ from stable_baselines3 import PPO
105
+ except ImportError:
106
+ print("stable-baselines3 required.")
107
+ sys.exit(1)
108
+
109
+ from oncosim.envs.wrappers import FlattenObsWrapper
110
+
111
+ env = gym.make(env_id)
112
+ env = FlattenObsWrapper(env)
113
+ model = PPO.load(model_path)
114
+
115
+ rewards = []
116
+ for ep in range(n_episodes):
117
+ obs, _ = env.reset(seed=seed + ep)
118
+ total_reward = 0.0
119
+ done = False
120
+ while not done:
121
+ action, _ = model.predict(obs, deterministic=True)
122
+ obs, reward, terminated, truncated, _ = env.step(action)
123
+ total_reward += reward
124
+ done = terminated or truncated
125
+ rewards.append(total_reward)
126
+
127
+ env.close()
128
+
129
+ return {
130
+ "mean_reward": float(np.mean(rewards)),
131
+ "std_reward": float(np.std(rewards)),
132
+ "min_reward": float(np.min(rewards)),
133
+ "max_reward": float(np.max(rewards)),
134
+ }
135
+
136
+
137
+ def main():
138
+ """CLI entry point for training."""
139
+ import oncosim # noqa: F401 - registers envs
140
+
141
+ env_ids = [
142
+ "oncosim/BeamSelection-v0",
143
+ "oncosim/DoseFractionation-v0",
144
+ "oncosim/AdaptiveRT-v0",
145
+ ]
146
+
147
+ results = {}
148
+ for env_id in env_ids:
149
+ print(f"Training PPO on {env_id}...")
150
+ result = train_ppo(env_id, total_timesteps=90000)
151
+ results[env_id] = result
152
+ print(f" Mean reward (last 50 eps): {result['mean_reward']:.3f}")
153
+
154
+ os.makedirs("results", exist_ok=True)
155
+ with open("results/training_results.json", "w") as f:
156
+ # Convert to serializable format
157
+ serializable = {}
158
+ for k, v in results.items():
159
+ serializable[k] = {
160
+ "mean_reward": v["mean_reward"],
161
+ "std_reward": v["std_reward"],
162
+ "total_timesteps": v["total_timesteps"],
163
+ "num_episodes": len(v["episode_rewards"]),
164
+ "episode_rewards": v["episode_rewards"],
165
+ }
166
+ json.dump(serializable, f, indent=2)
167
+
168
+ print("Results saved to results/training_results.json")
169
+
170
+
171
+ if __name__ == "__main__":
172
+ main()