rlwatch 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rlwatch-0.3.0/PKG-INFO +293 -0
- rlwatch-0.3.0/README.md +238 -0
- rlwatch-0.3.0/pyproject.toml +108 -0
- rlwatch-0.3.0/setup.cfg +4 -0
- rlwatch-0.3.0/src/rlwatch/__init__.py +8 -0
- rlwatch-0.3.0/src/rlwatch/alerts.py +483 -0
- rlwatch-0.3.0/src/rlwatch/cli.py +308 -0
- rlwatch-0.3.0/src/rlwatch/config.py +368 -0
- rlwatch-0.3.0/src/rlwatch/core.py +380 -0
- rlwatch-0.3.0/src/rlwatch/dashboard.py +318 -0
- rlwatch-0.3.0/src/rlwatch/detectors.py +693 -0
- rlwatch-0.3.0/src/rlwatch/py.typed +0 -0
- rlwatch-0.3.0/src/rlwatch/storage.py +257 -0
- rlwatch-0.3.0/src/rlwatch.egg-info/PKG-INFO +293 -0
- rlwatch-0.3.0/src/rlwatch.egg-info/SOURCES.txt +17 -0
- rlwatch-0.3.0/src/rlwatch.egg-info/dependency_links.txt +1 -0
- rlwatch-0.3.0/src/rlwatch.egg-info/entry_points.txt +2 -0
- rlwatch-0.3.0/src/rlwatch.egg-info/requires.txt +35 -0
- rlwatch-0.3.0/src/rlwatch.egg-info/top_level.txt +1 -0
rlwatch-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rlwatch
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Real-time GRPO/PPO training instability detection for ML teams
|
|
5
|
+
Author-email: Varun Saraf <varunsaraf1724@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/varun1724/rlwatch
|
|
8
|
+
Project-URL: Documentation, https://varun1724.github.io/rlwatch/
|
|
9
|
+
Project-URL: Issues, https://github.com/varun1724/rlwatch/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/varun1724/rlwatch/blob/main/CHANGELOG.md
|
|
11
|
+
Keywords: reinforcement-learning,GRPO,PPO,training,monitoring,debugging
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
17
|
+
Classifier: Operating System :: MacOS
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
Requires-Dist: pyyaml>=6.0
|
|
26
|
+
Requires-Dist: scipy>=1.10.0
|
|
27
|
+
Requires-Dist: rich>=13.0.0
|
|
28
|
+
Requires-Dist: slack-sdk>=3.20.0
|
|
29
|
+
Requires-Dist: click>=8.1.0
|
|
30
|
+
Requires-Dist: numpy>=1.24
|
|
31
|
+
Provides-Extra: dashboard
|
|
32
|
+
Requires-Dist: streamlit>=1.30.0; extra == "dashboard"
|
|
33
|
+
Requires-Dist: plotly>=5.18.0; extra == "dashboard"
|
|
34
|
+
Requires-Dist: pandas>=2.0.0; extra == "dashboard"
|
|
35
|
+
Provides-Extra: torch
|
|
36
|
+
Requires-Dist: torch>=2.0.0; extra == "torch"
|
|
37
|
+
Provides-Extra: trl
|
|
38
|
+
Requires-Dist: trl>=0.7.0; extra == "trl"
|
|
39
|
+
Requires-Dist: transformers>=4.35.0; extra == "trl"
|
|
40
|
+
Provides-Extra: tutorial
|
|
41
|
+
Requires-Dist: trl>=0.11.0; extra == "tutorial"
|
|
42
|
+
Requires-Dist: transformers>=4.45.0; extra == "tutorial"
|
|
43
|
+
Requires-Dist: torch>=2.1.0; extra == "tutorial"
|
|
44
|
+
Requires-Dist: datasets>=2.14.0; extra == "tutorial"
|
|
45
|
+
Provides-Extra: dev
|
|
46
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
47
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
48
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
49
|
+
Requires-Dist: pytest-benchmark>=4.0.0; extra == "dev"
|
|
50
|
+
Requires-Dist: hypothesis>=6.90.0; extra == "dev"
|
|
51
|
+
Requires-Dist: responses>=0.24.0; extra == "dev"
|
|
52
|
+
Requires-Dist: streamlit>=1.30.0; extra == "dev"
|
|
53
|
+
Requires-Dist: plotly>=5.18.0; extra == "dev"
|
|
54
|
+
Requires-Dist: pandas>=2.0.0; extra == "dev"
|
|
55
|
+
|
|
56
|
+
# rlwatch
|
|
57
|
+
|
|
58
|
+
[](https://pypi.org/project/rlwatch/)
|
|
59
|
+
[](https://pypi.org/project/rlwatch/)
|
|
60
|
+
[](https://github.com/varun1724/rlwatch/actions/workflows/test.yml)
|
|
61
|
+
[](https://github.com/varun1724/rlwatch/blob/main/LICENSE)
|
|
62
|
+
[](https://varun1724.github.io/rlwatch/)
|
|
63
|
+
|
|
64
|
+
**Catch broken RL training runs before they waste your GPU budget.**
|
|
65
|
+
|
|
66
|
+
If you train language models with GRPO or PPO, you already know the pain: you kick off a run on 8 H100s, go to sleep, and wake up to find the policy collapsed into repeating the same token 12 hours ago. Nobody saw it. Nothing paged. The run just quietly rotted.
|
|
67
|
+
|
|
68
|
+
rlwatch is a tiny Python library that watches your training metrics in real time and pings you on Slack, Discord, email, or any HTTP endpoint the moment things start going wrong — *before* the run is ruined.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## The 30-second pitch
|
|
73
|
+
|
|
74
|
+
1. `pip install rlwatch`
|
|
75
|
+
2. Add two lines to your training script:
|
|
76
|
+
```python
|
|
77
|
+
import rlwatch
|
|
78
|
+
rlwatch.attach()
|
|
79
|
+
```
|
|
80
|
+
3. Keep training. If something breaks, you get a message like:
|
|
81
|
+
|
|
82
|
+
> 🚨 **rlwatch CRITICAL: entropy_collapse**
|
|
83
|
+
> Run: `grpo_v3_exp12` | Step: 340
|
|
84
|
+
> Policy entropy dropped from 2.8 to 0.4 over 50 steps (threshold: 1.0).
|
|
85
|
+
> **Recommended action:** reduce learning rate by 5× or increase KL penalty.
|
|
86
|
+
|
|
87
|
+
You open the dashboard, confirm the curve, kill the run, fix the config, and you've just saved ~30 GPU-hours.
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## What it watches for
|
|
92
|
+
|
|
93
|
+
These are the most common ways GRPO/PPO runs go sideways. rlwatch runs a dedicated detector for each one on every training step.
|
|
94
|
+
|
|
95
|
+
| Detector | In plain English | Default trip-wire |
|
|
96
|
+
|---|---|---|
|
|
97
|
+
| **Entropy collapse** | The model stopped exploring — it's now just repeating itself. | Entropy < 1.0 for 50 steps in a row |
|
|
98
|
+
| **KL divergence explosion** | The policy is running away from the reference model (usually the prelude to reward hacking). | KL > 3σ above the rolling mean |
|
|
99
|
+
| **Reward hacking proxy** | Rewards suddenly got weird — either way more variance than before, or split into two clusters (some samples hacked, some didn't). | Variance > 3× baseline, **or** Hartigan dip test p < 0.05 |
|
|
100
|
+
| **Advantage variance spike** | The value function estimates just became unstable. | Advantage std > 3× rolling baseline |
|
|
101
|
+
| **Loss NaN / Inf** | The optimizer has blown up; any further updates corrupt the policy. | Loss is non-finite (one step is enough) |
|
|
102
|
+
| **Gradient norm spike** | Gradients exploded — usually the precursor to a loss NaN. | Grad norm > 3σ above frozen baseline |
|
|
103
|
+
|
|
104
|
+
Every detector has two severity levels (**warning** and **critical**), a configurable warmup period so it doesn't fire at step 3, and a cooldown so you don't get spammed.
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## Quick start
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
pip install rlwatch # core library
|
|
112
|
+
pip install "rlwatch[dashboard]" # add the Streamlit dashboard
|
|
113
|
+
pip install "rlwatch[trl]" # add HuggingFace TRL deep integration
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Option A: two-line attach (easiest)
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
import rlwatch
|
|
120
|
+
rlwatch.attach() # works for any framework — see below for the recommended TRL path
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
For HuggingFace TRL, the recommended path is to pass the trainer in directly:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
import rlwatch
|
|
127
|
+
from trl import GRPOTrainer
|
|
128
|
+
|
|
129
|
+
trainer = GRPOTrainer(...)
|
|
130
|
+
monitor = rlwatch.attach(trainer=trainer)
|
|
131
|
+
trainer.train()
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
For veRL, OpenRLHF, or any custom loop, use Option B.
|
|
135
|
+
|
|
136
|
+
### Option B: manual metric logging
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
import rlwatch
|
|
140
|
+
|
|
141
|
+
monitor = rlwatch.attach(framework="manual", run_id="grpo_v3_exp12")
|
|
142
|
+
|
|
143
|
+
for step in range(num_steps):
|
|
144
|
+
# ... your training step ...
|
|
145
|
+
|
|
146
|
+
monitor.log_step(
|
|
147
|
+
step,
|
|
148
|
+
entropy=policy_entropy,
|
|
149
|
+
kl_divergence=kl,
|
|
150
|
+
reward_mean=rewards.mean(),
|
|
151
|
+
reward_std=rewards.std(),
|
|
152
|
+
advantage_std=advantages.std(),
|
|
153
|
+
loss=loss.item(),
|
|
154
|
+
grad_norm=grad_norm.item(),
|
|
155
|
+
)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### See it fire
|
|
159
|
+
|
|
160
|
+
The repo ships with a simulated GRPO run that deliberately collapses entropy:
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
python examples/simulate_grpo_run.py # run the simulation
|
|
164
|
+
rlwatch diagnose # get a retrospective report
|
|
165
|
+
rlwatch dashboard # open the live dashboard at localhost:8501
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
## Setting up alerts
|
|
171
|
+
|
|
172
|
+
### Slack
|
|
173
|
+
```bash
|
|
174
|
+
export RLWATCH_SLACK_WEBHOOK_URL="https://hooks.slack.com/services/..."
|
|
175
|
+
```
|
|
176
|
+
Or put it in `rlwatch.yaml`:
|
|
177
|
+
```yaml
|
|
178
|
+
alerts:
|
|
179
|
+
slack:
|
|
180
|
+
webhook_url: "https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Email
|
|
184
|
+
```yaml
|
|
185
|
+
alerts:
|
|
186
|
+
email:
|
|
187
|
+
smtp_host: smtp.gmail.com
|
|
188
|
+
to_addrs:
|
|
189
|
+
- you@yourcompany.com
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Discord
|
|
193
|
+
```bash
|
|
194
|
+
export RLWATCH_DISCORD_WEBHOOK_URL="https://discord.com/api/webhooks/..."
|
|
195
|
+
```
|
|
196
|
+
Or in `rlwatch.yaml`:
|
|
197
|
+
```yaml
|
|
198
|
+
alerts:
|
|
199
|
+
discord:
|
|
200
|
+
webhook_url: "https://discord.com/api/webhooks/..."
|
|
201
|
+
mention_role_ids: ["123456789012345678"] # @-mentions on critical only
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Generic webhook
|
|
205
|
+
The universal escape hatch — POST a JSON body to any URL. Use this for PagerDuty's events API, an internal incident tracker, Mattermost, or anything else rlwatch doesn't have a dedicated channel for.
|
|
206
|
+
```yaml
|
|
207
|
+
alerts:
|
|
208
|
+
webhook:
|
|
209
|
+
url: "https://your-service.example.com/rlwatch"
|
|
210
|
+
headers:
|
|
211
|
+
Authorization: "Bearer your-token"
|
|
212
|
+
```
|
|
213
|
+
Custom JSON template? See [`docs/alerts/webhook.md`](https://varun1724.github.io/rlwatch/alerts/webhook/).
|
|
214
|
+
|
|
215
|
+
### Console
|
|
216
|
+
Always on. Rich-formatted panels show up in stderr regardless of other channels.
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## Configuration
|
|
221
|
+
|
|
222
|
+
Generate a starter config:
|
|
223
|
+
```bash
|
|
224
|
+
rlwatch init
|
|
225
|
+
```
|
|
226
|
+
This writes `rlwatch.yaml` with every threshold at its default. Tweak to taste.
|
|
227
|
+
|
|
228
|
+
Resolution order: **defaults → YAML file → environment variables → `attach()` kwargs**. Later values win.
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## CLI reference
|
|
233
|
+
|
|
234
|
+
| Command | What it does |
|
|
235
|
+
|---|---|
|
|
236
|
+
| `rlwatch init` | Write a starter `rlwatch.yaml` |
|
|
237
|
+
| `rlwatch runs` | List every monitored run in the local SQLite store |
|
|
238
|
+
| `rlwatch diagnose [--run-id ID]` | Print a retrospective report on a completed run |
|
|
239
|
+
| `rlwatch dashboard` | Launch the Streamlit dashboard at `localhost:8501` |
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
## How it stores data
|
|
244
|
+
|
|
245
|
+
Everything lives in a single SQLite file at `./rlwatch_logs/metrics.db`. Three tables: `runs`, `metrics`, `alerts`. WAL mode is on so the training loop writes and the dashboard reads concurrently without locking. Copy that `.db` file and you've copied the entire history of every run.
|
|
246
|
+
|
|
247
|
+
---
|
|
248
|
+
|
|
249
|
+
## Supported frameworks
|
|
250
|
+
|
|
251
|
+
- **HuggingFace TRL** — pass `attach(trainer=trainer)` for direct callback registration. See the [end-to-end tutorial](https://varun1724.github.io/rlwatch/tutorials/trl-grpo-end-to-end/) for a real GPT-2 + GRPO example that runs on CPU in ~5 minutes.
|
|
252
|
+
- **veRL** — `framework="manual"` + `monitor.log_step()`. Deep integration on the roadmap.
|
|
253
|
+
- **OpenRLHF** — `framework="manual"` + `monitor.log_step()`. Deep integration on the roadmap.
|
|
254
|
+
- **Anything else** — same as above. Every metric in `log_step` is optional; pass whatever your framework exposes.
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## Docker
|
|
259
|
+
|
|
260
|
+
```bash
|
|
261
|
+
docker build -t rlwatch .
|
|
262
|
+
docker run -p 8501:8501 rlwatch
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
---
|
|
266
|
+
|
|
267
|
+
## Documentation
|
|
268
|
+
|
|
269
|
+
Full docs at **[varun1724.github.io/rlwatch](https://varun1724.github.io/rlwatch/)** — getting started, every detector explained in depth, alerts setup, configuration reference, the end-to-end TRL tutorial, and an FAQ.
|
|
270
|
+
|
|
271
|
+
## Project direction
|
|
272
|
+
|
|
273
|
+
rlwatch is heading toward a hosted, team-oriented product. The local-first open-source library will stay free and useful on its own. See [`ROADMAP.md`](ROADMAP.md) for the full plan.
|
|
274
|
+
|
|
275
|
+
## Contributing & testing
|
|
276
|
+
|
|
277
|
+
rlwatch is a monitoring library — if it has bugs, it costs someone a GPU
|
|
278
|
+
budget. The test harness is the most load-bearing part of the repo.
|
|
279
|
+
|
|
280
|
+
```bash
|
|
281
|
+
pip install -e ".[dev]"
|
|
282
|
+
pytest -v # all five tiers
|
|
283
|
+
pytest --cov=rlwatch --cov-fail-under=90 # coverage gate (must pass to merge)
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
The suite is organized into five tiers (unit / property / simulation /
|
|
287
|
+
integration / performance). See **[`TESTING.md`](TESTING.md)** for the
|
|
288
|
+
practical "how to run, write, and debug tests" guide and **[`CLAUDE.md`](CLAUDE.md)**
|
|
289
|
+
for the authoritative contract every PR has to meet.
|
|
290
|
+
|
|
291
|
+
## License
|
|
292
|
+
|
|
293
|
+
MIT
|
rlwatch-0.3.0/README.md
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# rlwatch
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/rlwatch/)
|
|
4
|
+
[](https://pypi.org/project/rlwatch/)
|
|
5
|
+
[](https://github.com/varun1724/rlwatch/actions/workflows/test.yml)
|
|
6
|
+
[](https://github.com/varun1724/rlwatch/blob/main/LICENSE)
|
|
7
|
+
[](https://varun1724.github.io/rlwatch/)
|
|
8
|
+
|
|
9
|
+
**Catch broken RL training runs before they waste your GPU budget.**
|
|
10
|
+
|
|
11
|
+
If you train language models with GRPO or PPO, you already know the pain: you kick off a run on 8 H100s, go to sleep, and wake up to find the policy collapsed into repeating the same token 12 hours ago. Nobody saw it. Nothing paged. The run just quietly rotted.
|
|
12
|
+
|
|
13
|
+
rlwatch is a tiny Python library that watches your training metrics in real time and pings you on Slack, Discord, email, or any HTTP endpoint the moment things start going wrong — *before* the run is ruined.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## The 30-second pitch
|
|
18
|
+
|
|
19
|
+
1. `pip install rlwatch`
|
|
20
|
+
2. Add two lines to your training script:
|
|
21
|
+
```python
|
|
22
|
+
import rlwatch
|
|
23
|
+
rlwatch.attach()
|
|
24
|
+
```
|
|
25
|
+
3. Keep training. If something breaks, you get a message like:
|
|
26
|
+
|
|
27
|
+
> 🚨 **rlwatch CRITICAL: entropy_collapse**
|
|
28
|
+
> Run: `grpo_v3_exp12` | Step: 340
|
|
29
|
+
> Policy entropy dropped from 2.8 to 0.4 over 50 steps (threshold: 1.0).
|
|
30
|
+
> **Recommended action:** reduce learning rate by 5× or increase KL penalty.
|
|
31
|
+
|
|
32
|
+
You open the dashboard, confirm the curve, kill the run, fix the config, and you've just saved ~30 GPU-hours.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## What it watches for
|
|
37
|
+
|
|
38
|
+
These are the most common ways GRPO/PPO runs go sideways. rlwatch runs a dedicated detector for each one on every training step.
|
|
39
|
+
|
|
40
|
+
| Detector | In plain English | Default trip-wire |
|
|
41
|
+
|---|---|---|
|
|
42
|
+
| **Entropy collapse** | The model stopped exploring — it's now just repeating itself. | Entropy < 1.0 for 50 steps in a row |
|
|
43
|
+
| **KL divergence explosion** | The policy is running away from the reference model (usually the prelude to reward hacking). | KL > 3σ above the rolling mean |
|
|
44
|
+
| **Reward hacking proxy** | Rewards suddenly got weird — either way more variance than before, or split into two clusters (some samples hacked, some didn't). | Variance > 3× baseline, **or** Hartigan dip test p < 0.05 |
|
|
45
|
+
| **Advantage variance spike** | The value function estimates just became unstable. | Advantage std > 3× rolling baseline |
|
|
46
|
+
| **Loss NaN / Inf** | The optimizer has blown up; any further updates corrupt the policy. | Loss is non-finite (one step is enough) |
|
|
47
|
+
| **Gradient norm spike** | Gradients exploded — usually the precursor to a loss NaN. | Grad norm > 3σ above frozen baseline |
|
|
48
|
+
|
|
49
|
+
Every detector has two severity levels (**warning** and **critical**), a configurable warmup period so it doesn't fire at step 3, and a cooldown so you don't get spammed.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Quick start
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install rlwatch # core library
|
|
57
|
+
pip install "rlwatch[dashboard]" # add the Streamlit dashboard
|
|
58
|
+
pip install "rlwatch[trl]" # add HuggingFace TRL deep integration
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Option A: two-line attach (easiest)
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
import rlwatch
|
|
65
|
+
rlwatch.attach() # works for any framework — see below for the recommended TRL path
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
For HuggingFace TRL, the recommended path is to pass the trainer in directly:
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import rlwatch
|
|
72
|
+
from trl import GRPOTrainer
|
|
73
|
+
|
|
74
|
+
trainer = GRPOTrainer(...)
|
|
75
|
+
monitor = rlwatch.attach(trainer=trainer)
|
|
76
|
+
trainer.train()
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
For veRL, OpenRLHF, or any custom loop, use Option B.
|
|
80
|
+
|
|
81
|
+
### Option B: manual metric logging
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
import rlwatch
|
|
85
|
+
|
|
86
|
+
monitor = rlwatch.attach(framework="manual", run_id="grpo_v3_exp12")
|
|
87
|
+
|
|
88
|
+
for step in range(num_steps):
|
|
89
|
+
# ... your training step ...
|
|
90
|
+
|
|
91
|
+
monitor.log_step(
|
|
92
|
+
step,
|
|
93
|
+
entropy=policy_entropy,
|
|
94
|
+
kl_divergence=kl,
|
|
95
|
+
reward_mean=rewards.mean(),
|
|
96
|
+
reward_std=rewards.std(),
|
|
97
|
+
advantage_std=advantages.std(),
|
|
98
|
+
loss=loss.item(),
|
|
99
|
+
grad_norm=grad_norm.item(),
|
|
100
|
+
)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### See it fire
|
|
104
|
+
|
|
105
|
+
The repo ships with a simulated GRPO run that deliberately collapses entropy:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
python examples/simulate_grpo_run.py # run the simulation
|
|
109
|
+
rlwatch diagnose # get a retrospective report
|
|
110
|
+
rlwatch dashboard # open the live dashboard at localhost:8501
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Setting up alerts
|
|
116
|
+
|
|
117
|
+
### Slack
|
|
118
|
+
```bash
|
|
119
|
+
export RLWATCH_SLACK_WEBHOOK_URL="https://hooks.slack.com/services/..."
|
|
120
|
+
```
|
|
121
|
+
Or put it in `rlwatch.yaml`:
|
|
122
|
+
```yaml
|
|
123
|
+
alerts:
|
|
124
|
+
slack:
|
|
125
|
+
webhook_url: "https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Email
|
|
129
|
+
```yaml
|
|
130
|
+
alerts:
|
|
131
|
+
email:
|
|
132
|
+
smtp_host: smtp.gmail.com
|
|
133
|
+
to_addrs:
|
|
134
|
+
- you@yourcompany.com
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Discord
|
|
138
|
+
```bash
|
|
139
|
+
export RLWATCH_DISCORD_WEBHOOK_URL="https://discord.com/api/webhooks/..."
|
|
140
|
+
```
|
|
141
|
+
Or in `rlwatch.yaml`:
|
|
142
|
+
```yaml
|
|
143
|
+
alerts:
|
|
144
|
+
discord:
|
|
145
|
+
webhook_url: "https://discord.com/api/webhooks/..."
|
|
146
|
+
mention_role_ids: ["123456789012345678"] # @-mentions on critical only
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Generic webhook
|
|
150
|
+
The universal escape hatch — POST a JSON body to any URL. Use this for PagerDuty's events API, an internal incident tracker, Mattermost, or anything else rlwatch doesn't have a dedicated channel for.
|
|
151
|
+
```yaml
|
|
152
|
+
alerts:
|
|
153
|
+
webhook:
|
|
154
|
+
url: "https://your-service.example.com/rlwatch"
|
|
155
|
+
headers:
|
|
156
|
+
Authorization: "Bearer your-token"
|
|
157
|
+
```
|
|
158
|
+
Custom JSON template? See [`docs/alerts/webhook.md`](https://varun1724.github.io/rlwatch/alerts/webhook/).
|
|
159
|
+
|
|
160
|
+
### Console
|
|
161
|
+
Always on. Rich-formatted panels show up in stderr regardless of other channels.
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## Configuration
|
|
166
|
+
|
|
167
|
+
Generate a starter config:
|
|
168
|
+
```bash
|
|
169
|
+
rlwatch init
|
|
170
|
+
```
|
|
171
|
+
This writes `rlwatch.yaml` with every threshold at its default. Tweak to taste.
|
|
172
|
+
|
|
173
|
+
Resolution order: **defaults → YAML file → environment variables → `attach()` kwargs**. Later values win.
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## CLI reference
|
|
178
|
+
|
|
179
|
+
| Command | What it does |
|
|
180
|
+
|---|---|
|
|
181
|
+
| `rlwatch init` | Write a starter `rlwatch.yaml` |
|
|
182
|
+
| `rlwatch runs` | List every monitored run in the local SQLite store |
|
|
183
|
+
| `rlwatch diagnose [--run-id ID]` | Print a retrospective report on a completed run |
|
|
184
|
+
| `rlwatch dashboard` | Launch the Streamlit dashboard at `localhost:8501` |
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## How it stores data
|
|
189
|
+
|
|
190
|
+
Everything lives in a single SQLite file at `./rlwatch_logs/metrics.db`. Three tables: `runs`, `metrics`, `alerts`. WAL mode is on so the training loop writes and the dashboard reads concurrently without locking. Copy that `.db` file and you've copied the entire history of every run.
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
## Supported frameworks
|
|
195
|
+
|
|
196
|
+
- **HuggingFace TRL** — pass `attach(trainer=trainer)` for direct callback registration. See the [end-to-end tutorial](https://varun1724.github.io/rlwatch/tutorials/trl-grpo-end-to-end/) for a real GPT-2 + GRPO example that runs on CPU in ~5 minutes.
|
|
197
|
+
- **veRL** — `framework="manual"` + `monitor.log_step()`. Deep integration on the roadmap.
|
|
198
|
+
- **OpenRLHF** — `framework="manual"` + `monitor.log_step()`. Deep integration on the roadmap.
|
|
199
|
+
- **Anything else** — same as above. Every metric in `log_step` is optional; pass whatever your framework exposes.
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
## Docker
|
|
204
|
+
|
|
205
|
+
```bash
|
|
206
|
+
docker build -t rlwatch .
|
|
207
|
+
docker run -p 8501:8501 rlwatch
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
## Documentation
|
|
213
|
+
|
|
214
|
+
Full docs at **[varun1724.github.io/rlwatch](https://varun1724.github.io/rlwatch/)** — getting started, every detector explained in depth, alerts setup, configuration reference, the end-to-end TRL tutorial, and an FAQ.
|
|
215
|
+
|
|
216
|
+
## Project direction
|
|
217
|
+
|
|
218
|
+
rlwatch is heading toward a hosted, team-oriented product. The local-first open-source library will stay free and useful on its own. See [`ROADMAP.md`](ROADMAP.md) for the full plan.
|
|
219
|
+
|
|
220
|
+
## Contributing & testing
|
|
221
|
+
|
|
222
|
+
rlwatch is a monitoring library — if it has bugs, it costs someone a GPU
|
|
223
|
+
budget. The test harness is the most load-bearing part of the repo.
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
pip install -e ".[dev]"
|
|
227
|
+
pytest -v # all five tiers
|
|
228
|
+
pytest --cov=rlwatch --cov-fail-under=90 # coverage gate (must pass to merge)
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
The suite is organized into five tiers (unit / property / simulation /
|
|
232
|
+
integration / performance). See **[`TESTING.md`](TESTING.md)** for the
|
|
233
|
+
practical "how to run, write, and debug tests" guide and **[`CLAUDE.md`](CLAUDE.md)**
|
|
234
|
+
for the authoritative contract every PR has to meet.
|
|
235
|
+
|
|
236
|
+
## License
|
|
237
|
+
|
|
238
|
+
MIT
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "rlwatch"
|
|
7
|
+
version = "0.3.0"
|
|
8
|
+
description = "Real-time GRPO/PPO training instability detection for ML teams"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Varun Saraf", email = "varunsaraf1724@gmail.com"}
|
|
14
|
+
]
|
|
15
|
+
keywords = ["reinforcement-learning", "GRPO", "PPO", "training", "monitoring", "debugging"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Science/Research",
|
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Operating System :: POSIX :: Linux",
|
|
22
|
+
"Operating System :: MacOS",
|
|
23
|
+
"Programming Language :: Python :: 3",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Typing :: Typed",
|
|
28
|
+
]
|
|
29
|
+
dependencies = [
|
|
30
|
+
"pyyaml>=6.0",
|
|
31
|
+
"scipy>=1.10.0",
|
|
32
|
+
"rich>=13.0.0",
|
|
33
|
+
"slack-sdk>=3.20.0",
|
|
34
|
+
"click>=8.1.0",
|
|
35
|
+
"numpy>=1.24",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[project.optional-dependencies]
|
|
39
|
+
# Streamlit dashboard. Not in core because the transitive deps are ~150MB
|
|
40
|
+
# and most users only need the alerting / log_step path.
|
|
41
|
+
dashboard = [
|
|
42
|
+
"streamlit>=1.30.0",
|
|
43
|
+
"plotly>=5.18.0",
|
|
44
|
+
"pandas>=2.0.0",
|
|
45
|
+
]
|
|
46
|
+
torch = ["torch>=2.0.0"]
|
|
47
|
+
trl = ["trl>=0.7.0", "transformers>=4.35.0"]
|
|
48
|
+
# The end-to-end tutorial pins exact versions known-working with
|
|
49
|
+
# examples/trl_grpo_tutorial.py. The monthly tutorial CI cron catches
|
|
50
|
+
# silent breakage on future TRL releases.
|
|
51
|
+
tutorial = [
|
|
52
|
+
"trl>=0.11.0",
|
|
53
|
+
"transformers>=4.45.0",
|
|
54
|
+
"torch>=2.1.0",
|
|
55
|
+
"datasets>=2.14.0",
|
|
56
|
+
]
|
|
57
|
+
dev = [
|
|
58
|
+
"pytest>=7.0",
|
|
59
|
+
"pytest-asyncio>=0.21.0",
|
|
60
|
+
"pytest-cov>=4.1.0",
|
|
61
|
+
"pytest-benchmark>=4.0.0",
|
|
62
|
+
"hypothesis>=6.90.0",
|
|
63
|
+
"responses>=0.24.0",
|
|
64
|
+
# dev includes the dashboard so the dashboard tests can run.
|
|
65
|
+
"streamlit>=1.30.0",
|
|
66
|
+
"plotly>=5.18.0",
|
|
67
|
+
"pandas>=2.0.0",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
[project.scripts]
|
|
71
|
+
rlwatch = "rlwatch.cli:main"
|
|
72
|
+
|
|
73
|
+
[project.urls]
|
|
74
|
+
Homepage = "https://github.com/varun1724/rlwatch"
|
|
75
|
+
Documentation = "https://varun1724.github.io/rlwatch/"
|
|
76
|
+
Issues = "https://github.com/varun1724/rlwatch/issues"
|
|
77
|
+
Changelog = "https://github.com/varun1724/rlwatch/blob/main/CHANGELOG.md"
|
|
78
|
+
|
|
79
|
+
[tool.setuptools.packages.find]
|
|
80
|
+
where = ["src"]
|
|
81
|
+
|
|
82
|
+
[tool.setuptools.package-data]
|
|
83
|
+
"rlwatch" = ["py.typed"]
|
|
84
|
+
|
|
85
|
+
[tool.pytest.ini_options]
|
|
86
|
+
testpaths = ["tests"]
|
|
87
|
+
markers = [
|
|
88
|
+
"perf: performance benchmark tests (Tier 5)",
|
|
89
|
+
"integration: integration tests touching real SQLite/SMTP/HTTP (Tier 4)",
|
|
90
|
+
"property: Hypothesis property-based tests (Tier 2)",
|
|
91
|
+
"trl: requires the [trl] extra (transformers + trl)",
|
|
92
|
+
]
|
|
93
|
+
filterwarnings = [
|
|
94
|
+
"ignore::DeprecationWarning:streamlit",
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
[tool.coverage.run]
|
|
98
|
+
source = ["src/rlwatch"]
|
|
99
|
+
omit = [
|
|
100
|
+
# Streamlit dashboard is hard to test in-process; covered manually + by
|
|
101
|
+
# the dashboard smoke test in CI.
|
|
102
|
+
"*/dashboard.py",
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
[tool.coverage.report]
|
|
106
|
+
fail_under = 90
|
|
107
|
+
show_missing = true
|
|
108
|
+
skip_covered = false
|
rlwatch-0.3.0/setup.cfg
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""rlwatch - Real-time GRPO/PPO training instability detection."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
from rlwatch.core import attach, log_step, get_monitor, RLWatch
|
|
6
|
+
from rlwatch.config import RLWatchConfig, load_config
|
|
7
|
+
|
|
8
|
+
__all__ = ["attach", "log_step", "get_monitor", "RLWatch", "RLWatchConfig", "load_config"]
|