rlwatch 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rlwatch-0.3.0/PKG-INFO ADDED
@@ -0,0 +1,293 @@
1
+ Metadata-Version: 2.4
2
+ Name: rlwatch
3
+ Version: 0.3.0
4
+ Summary: Real-time GRPO/PPO training instability detection for ML teams
5
+ Author-email: Varun Saraf <varunsaraf1724@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/varun1724/rlwatch
8
+ Project-URL: Documentation, https://varun1724.github.io/rlwatch/
9
+ Project-URL: Issues, https://github.com/varun1724/rlwatch/issues
10
+ Project-URL: Changelog, https://github.com/varun1724/rlwatch/blob/main/CHANGELOG.md
11
+ Keywords: reinforcement-learning,GRPO,PPO,training,monitoring,debugging
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: POSIX :: Linux
17
+ Classifier: Operating System :: MacOS
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.10
24
+ Description-Content-Type: text/markdown
25
+ Requires-Dist: pyyaml>=6.0
26
+ Requires-Dist: scipy>=1.10.0
27
+ Requires-Dist: rich>=13.0.0
28
+ Requires-Dist: slack-sdk>=3.20.0
29
+ Requires-Dist: click>=8.1.0
30
+ Requires-Dist: numpy>=1.24
31
+ Provides-Extra: dashboard
32
+ Requires-Dist: streamlit>=1.30.0; extra == "dashboard"
33
+ Requires-Dist: plotly>=5.18.0; extra == "dashboard"
34
+ Requires-Dist: pandas>=2.0.0; extra == "dashboard"
35
+ Provides-Extra: torch
36
+ Requires-Dist: torch>=2.0.0; extra == "torch"
37
+ Provides-Extra: trl
38
+ Requires-Dist: trl>=0.7.0; extra == "trl"
39
+ Requires-Dist: transformers>=4.35.0; extra == "trl"
40
+ Provides-Extra: tutorial
41
+ Requires-Dist: trl>=0.11.0; extra == "tutorial"
42
+ Requires-Dist: transformers>=4.45.0; extra == "tutorial"
43
+ Requires-Dist: torch>=2.1.0; extra == "tutorial"
44
+ Requires-Dist: datasets>=2.14.0; extra == "tutorial"
45
+ Provides-Extra: dev
46
+ Requires-Dist: pytest>=7.0; extra == "dev"
47
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
48
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
49
+ Requires-Dist: pytest-benchmark>=4.0.0; extra == "dev"
50
+ Requires-Dist: hypothesis>=6.90.0; extra == "dev"
51
+ Requires-Dist: responses>=0.24.0; extra == "dev"
52
+ Requires-Dist: streamlit>=1.30.0; extra == "dev"
53
+ Requires-Dist: plotly>=5.18.0; extra == "dev"
54
+ Requires-Dist: pandas>=2.0.0; extra == "dev"
55
+
56
+ # rlwatch
57
+
58
+ [![PyPI version](https://img.shields.io/pypi/v/rlwatch.svg)](https://pypi.org/project/rlwatch/)
59
+ [![Python versions](https://img.shields.io/pypi/pyversions/rlwatch.svg)](https://pypi.org/project/rlwatch/)
60
+ [![CI](https://github.com/varun1724/rlwatch/actions/workflows/test.yml/badge.svg)](https://github.com/varun1724/rlwatch/actions/workflows/test.yml)
61
+ [![License: MIT](https://img.shields.io/pypi/l/rlwatch.svg)](https://github.com/varun1724/rlwatch/blob/main/LICENSE)
62
+ [![Docs](https://img.shields.io/badge/docs-mkdocs--material-blue)](https://varun1724.github.io/rlwatch/)
63
+
64
+ **Catch broken RL training runs before they waste your GPU budget.**
65
+
66
+ If you train language models with GRPO or PPO, you already know the pain: you kick off a run on 8 H100s, go to sleep, and wake up to find the policy collapsed into repeating the same token 12 hours ago. Nobody saw it. Nothing paged. The run just quietly rotted.
67
+
68
+ rlwatch is a tiny Python library that watches your training metrics in real time and pings you on Slack, Discord, email, or any HTTP endpoint the moment things start going wrong — *before* the run is ruined.
69
+
70
+ ---
71
+
72
+ ## The 30-second pitch
73
+
74
+ 1. `pip install rlwatch`
75
+ 2. Add two lines to your training script:
76
+ ```python
77
+ import rlwatch
78
+ rlwatch.attach()
79
+ ```
80
+ 3. Keep training. If something breaks, you get a message like:
81
+
82
+ > 🚨 **rlwatch CRITICAL: entropy_collapse**
83
+ > Run: `grpo_v3_exp12` | Step: 340
84
+ > Policy entropy dropped from 2.8 to 0.4 over 50 steps (threshold: 1.0).
85
+ > **Recommended action:** reduce learning rate by 5× or increase KL penalty.
86
+
87
+ You open the dashboard, confirm the curve, kill the run, fix the config, and you've just saved ~30 GPU-hours.
88
+
89
+ ---
90
+
91
+ ## What it watches for
92
+
93
+ These are the most common ways GRPO/PPO runs go sideways. rlwatch runs a dedicated detector for each one on every training step.
94
+
95
+ | Detector | In plain English | Default trip-wire |
96
+ |---|---|---|
97
+ | **Entropy collapse** | The model stopped exploring — it's now just repeating itself. | Entropy < 1.0 for 50 steps in a row |
98
+ | **KL divergence explosion** | The policy is running away from the reference model (usually the prelude to reward hacking). | KL > 3σ above the rolling mean |
99
+ | **Reward hacking proxy** | Rewards suddenly got weird — either way more variance than before, or split into two clusters (some samples hacked, some didn't). | Variance > 3× baseline, **or** Hartigan dip test p < 0.05 |
100
+ | **Advantage variance spike** | The value function estimates just became unstable. | Advantage std > 3× rolling baseline |
101
+ | **Loss NaN / Inf** | The optimizer has blown up; any further updates corrupt the policy. | Loss is non-finite (one step is enough) |
102
+ | **Gradient norm spike** | Gradients exploded — usually the precursor to a loss NaN. | Grad norm > 3σ above frozen baseline |
103
+
104
+ Every detector has two severity levels (**warning** and **critical**), a configurable warmup period so it doesn't fire at step 3, and a cooldown so you don't get spammed.
105
+
106
+ ---
107
+
108
+ ## Quick start
109
+
110
+ ```bash
111
+ pip install rlwatch # core library
112
+ pip install "rlwatch[dashboard]" # add the Streamlit dashboard
113
+ pip install "rlwatch[trl]" # add HuggingFace TRL deep integration
114
+ ```
115
+
116
+ ### Option A: two-line attach (easiest)
117
+
118
+ ```python
119
+ import rlwatch
120
+ rlwatch.attach() # works for any framework — see below for the recommended TRL path
121
+ ```
122
+
123
+ For HuggingFace TRL, the recommended path is to pass the trainer in directly:
124
+
125
+ ```python
126
+ import rlwatch
127
+ from trl import GRPOTrainer
128
+
129
+ trainer = GRPOTrainer(...)
130
+ monitor = rlwatch.attach(trainer=trainer)
131
+ trainer.train()
132
+ ```
133
+
134
+ For veRL, OpenRLHF, or any custom loop, use Option B.
135
+
136
+ ### Option B: manual metric logging
137
+
138
+ ```python
139
+ import rlwatch
140
+
141
+ monitor = rlwatch.attach(framework="manual", run_id="grpo_v3_exp12")
142
+
143
+ for step in range(num_steps):
144
+ # ... your training step ...
145
+
146
+ monitor.log_step(
147
+ step,
148
+ entropy=policy_entropy,
149
+ kl_divergence=kl,
150
+ reward_mean=rewards.mean(),
151
+ reward_std=rewards.std(),
152
+ advantage_std=advantages.std(),
153
+ loss=loss.item(),
154
+ grad_norm=grad_norm.item(),
155
+ )
156
+ ```
157
+
158
+ ### See it fire
159
+
160
+ The repo ships with a simulated GRPO run that deliberately collapses entropy:
161
+
162
+ ```bash
163
+ python examples/simulate_grpo_run.py # run the simulation
164
+ rlwatch diagnose # get a retrospective report
165
+ rlwatch dashboard # open the live dashboard at localhost:8501
166
+ ```
167
+
168
+ ---
169
+
170
+ ## Setting up alerts
171
+
172
+ ### Slack
173
+ ```bash
174
+ export RLWATCH_SLACK_WEBHOOK_URL="https://hooks.slack.com/services/..."
175
+ ```
176
+ Or put it in `rlwatch.yaml`:
177
+ ```yaml
178
+ alerts:
179
+ slack:
180
+ webhook_url: "https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
181
+ ```
182
+
183
+ ### Email
184
+ ```yaml
185
+ alerts:
186
+ email:
187
+ smtp_host: smtp.gmail.com
188
+ to_addrs:
189
+ - you@yourcompany.com
190
+ ```
191
+
192
+ ### Discord
193
+ ```bash
194
+ export RLWATCH_DISCORD_WEBHOOK_URL="https://discord.com/api/webhooks/..."
195
+ ```
196
+ Or in `rlwatch.yaml`:
197
+ ```yaml
198
+ alerts:
199
+ discord:
200
+ webhook_url: "https://discord.com/api/webhooks/..."
201
+ mention_role_ids: ["123456789012345678"] # @-mentions on critical only
202
+ ```
203
+
204
+ ### Generic webhook
205
+ The universal escape hatch — POST a JSON body to any URL. Use this for PagerDuty's events API, an internal incident tracker, Mattermost, or anything else rlwatch doesn't have a dedicated channel for.
206
+ ```yaml
207
+ alerts:
208
+ webhook:
209
+ url: "https://your-service.example.com/rlwatch"
210
+ headers:
211
+ Authorization: "Bearer your-token"
212
+ ```
213
+ Custom JSON template? See [`docs/alerts/webhook.md`](https://varun1724.github.io/rlwatch/alerts/webhook/).
214
+
215
+ ### Console
216
+ Always on. Rich-formatted panels show up in stderr regardless of other channels.
217
+
218
+ ---
219
+
220
+ ## Configuration
221
+
222
+ Generate a starter config:
223
+ ```bash
224
+ rlwatch init
225
+ ```
226
+ This writes `rlwatch.yaml` with every threshold at its default. Tweak to taste.
227
+
228
+ Resolution order: **defaults → YAML file → environment variables → `attach()` kwargs**. Later values win.
229
+
230
+ ---
231
+
232
+ ## CLI reference
233
+
234
+ | Command | What it does |
235
+ |---|---|
236
+ | `rlwatch init` | Write a starter `rlwatch.yaml` |
237
+ | `rlwatch runs` | List every monitored run in the local SQLite store |
238
+ | `rlwatch diagnose [--run-id ID]` | Print a retrospective report on a completed run |
239
+ | `rlwatch dashboard` | Launch the Streamlit dashboard at `localhost:8501` |
240
+
241
+ ---
242
+
243
+ ## How it stores data
244
+
245
+ Everything lives in a single SQLite file at `./rlwatch_logs/metrics.db`. Three tables: `runs`, `metrics`, `alerts`. WAL mode is on so the training loop writes and the dashboard reads concurrently without locking. Copy that `.db` file and you've copied the entire history of every run.
246
+
247
+ ---
248
+
249
+ ## Supported frameworks
250
+
251
+ - **HuggingFace TRL** — pass `attach(trainer=trainer)` for direct callback registration. See the [end-to-end tutorial](https://varun1724.github.io/rlwatch/tutorials/trl-grpo-end-to-end/) for a real GPT-2 + GRPO example that runs on CPU in ~5 minutes.
252
+ - **veRL** — `framework="manual"` + `monitor.log_step()`. Deep integration on the roadmap.
253
+ - **OpenRLHF** — `framework="manual"` + `monitor.log_step()`. Deep integration on the roadmap.
254
+ - **Anything else** — same as above. Every metric in `log_step` is optional; pass whatever your framework exposes.
255
+
256
+ ---
257
+
258
+ ## Docker
259
+
260
+ ```bash
261
+ docker build -t rlwatch .
262
+ docker run -p 8501:8501 rlwatch
263
+ ```
264
+
265
+ ---
266
+
267
+ ## Documentation
268
+
269
+ Full docs at **[varun1724.github.io/rlwatch](https://varun1724.github.io/rlwatch/)** — getting started, every detector explained in depth, alerts setup, configuration reference, the end-to-end TRL tutorial, and an FAQ.
270
+
271
+ ## Project direction
272
+
273
+ rlwatch is heading toward a hosted, team-oriented product. The local-first open-source library will stay free and useful on its own. See [`ROADMAP.md`](ROADMAP.md) for the full plan.
274
+
275
+ ## Contributing & testing
276
+
277
+ rlwatch is a monitoring library — if it has bugs, it costs someone a GPU
278
+ budget. The test harness is the most load-bearing part of the repo.
279
+
280
+ ```bash
281
+ pip install -e ".[dev]"
282
+ pytest -v # all five tiers
283
+ pytest --cov=rlwatch --cov-fail-under=90 # coverage gate (must pass to merge)
284
+ ```
285
+
286
+ The suite is organized into five tiers (unit / property / simulation /
287
+ integration / performance). See **[`TESTING.md`](TESTING.md)** for the
288
+ practical "how to run, write, and debug tests" guide and **[`CLAUDE.md`](CLAUDE.md)**
289
+ for the authoritative contract every PR has to meet.
290
+
291
+ ## License
292
+
293
+ MIT
@@ -0,0 +1,238 @@
1
+ # rlwatch
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/rlwatch.svg)](https://pypi.org/project/rlwatch/)
4
+ [![Python versions](https://img.shields.io/pypi/pyversions/rlwatch.svg)](https://pypi.org/project/rlwatch/)
5
+ [![CI](https://github.com/varun1724/rlwatch/actions/workflows/test.yml/badge.svg)](https://github.com/varun1724/rlwatch/actions/workflows/test.yml)
6
+ [![License: MIT](https://img.shields.io/pypi/l/rlwatch.svg)](https://github.com/varun1724/rlwatch/blob/main/LICENSE)
7
+ [![Docs](https://img.shields.io/badge/docs-mkdocs--material-blue)](https://varun1724.github.io/rlwatch/)
8
+
9
+ **Catch broken RL training runs before they waste your GPU budget.**
10
+
11
+ If you train language models with GRPO or PPO, you already know the pain: you kick off a run on 8 H100s, go to sleep, and wake up to find the policy collapsed into repeating the same token 12 hours ago. Nobody saw it. Nothing paged. The run just quietly rotted.
12
+
13
+ rlwatch is a tiny Python library that watches your training metrics in real time and pings you on Slack, Discord, email, or any HTTP endpoint the moment things start going wrong — *before* the run is ruined.
14
+
15
+ ---
16
+
17
+ ## The 30-second pitch
18
+
19
+ 1. `pip install rlwatch`
20
+ 2. Add two lines to your training script:
21
+ ```python
22
+ import rlwatch
23
+ rlwatch.attach()
24
+ ```
25
+ 3. Keep training. If something breaks, you get a message like:
26
+
27
+ > 🚨 **rlwatch CRITICAL: entropy_collapse**
28
+ > Run: `grpo_v3_exp12` | Step: 340
29
+ > Policy entropy dropped from 2.8 to 0.4 over 50 steps (threshold: 1.0).
30
+ > **Recommended action:** reduce learning rate by 5× or increase KL penalty.
31
+
32
+ You open the dashboard, confirm the curve, kill the run, fix the config, and you've just saved ~30 GPU-hours.
33
+
34
+ ---
35
+
36
+ ## What it watches for
37
+
38
+ These are the most common ways GRPO/PPO runs go sideways. rlwatch runs a dedicated detector for each one on every training step.
39
+
40
+ | Detector | In plain English | Default trip-wire |
41
+ |---|---|---|
42
+ | **Entropy collapse** | The model stopped exploring — it's now just repeating itself. | Entropy < 1.0 for 50 steps in a row |
43
+ | **KL divergence explosion** | The policy is running away from the reference model (usually the prelude to reward hacking). | KL > 3σ above the rolling mean |
44
+ | **Reward hacking proxy** | Rewards suddenly got weird — either way more variance than before, or split into two clusters (some samples hacked, some didn't). | Variance > 3× baseline, **or** Hartigan dip test p < 0.05 |
45
+ | **Advantage variance spike** | The value function estimates just became unstable. | Advantage std > 3× rolling baseline |
46
+ | **Loss NaN / Inf** | The optimizer has blown up; any further updates corrupt the policy. | Loss is non-finite (one step is enough) |
47
+ | **Gradient norm spike** | Gradients exploded — usually the precursor to a loss NaN. | Grad norm > 3σ above frozen baseline |
48
+
49
+ Every detector has two severity levels (**warning** and **critical**), a configurable warmup period so it doesn't fire at step 3, and a cooldown so you don't get spammed.
50
+
51
+ ---
52
+
53
+ ## Quick start
54
+
55
+ ```bash
56
+ pip install rlwatch # core library
57
+ pip install "rlwatch[dashboard]" # add the Streamlit dashboard
58
+ pip install "rlwatch[trl]" # add HuggingFace TRL deep integration
59
+ ```
60
+
61
+ ### Option A: two-line attach (easiest)
62
+
63
+ ```python
64
+ import rlwatch
65
+ rlwatch.attach() # works for any framework — see below for the recommended TRL path
66
+ ```
67
+
68
+ For HuggingFace TRL, the recommended path is to pass the trainer in directly:
69
+
70
+ ```python
71
+ import rlwatch
72
+ from trl import GRPOTrainer
73
+
74
+ trainer = GRPOTrainer(...)
75
+ monitor = rlwatch.attach(trainer=trainer)
76
+ trainer.train()
77
+ ```
78
+
79
+ For veRL, OpenRLHF, or any custom loop, use Option B.
80
+
81
+ ### Option B: manual metric logging
82
+
83
+ ```python
84
+ import rlwatch
85
+
86
+ monitor = rlwatch.attach(framework="manual", run_id="grpo_v3_exp12")
87
+
88
+ for step in range(num_steps):
89
+ # ... your training step ...
90
+
91
+ monitor.log_step(
92
+ step,
93
+ entropy=policy_entropy,
94
+ kl_divergence=kl,
95
+ reward_mean=rewards.mean(),
96
+ reward_std=rewards.std(),
97
+ advantage_std=advantages.std(),
98
+ loss=loss.item(),
99
+ grad_norm=grad_norm.item(),
100
+ )
101
+ ```
102
+
103
+ ### See it fire
104
+
105
+ The repo ships with a simulated GRPO run that deliberately collapses entropy:
106
+
107
+ ```bash
108
+ python examples/simulate_grpo_run.py # run the simulation
109
+ rlwatch diagnose # get a retrospective report
110
+ rlwatch dashboard # open the live dashboard at localhost:8501
111
+ ```
112
+
113
+ ---
114
+
115
+ ## Setting up alerts
116
+
117
+ ### Slack
118
+ ```bash
119
+ export RLWATCH_SLACK_WEBHOOK_URL="https://hooks.slack.com/services/..."
120
+ ```
121
+ Or put it in `rlwatch.yaml`:
122
+ ```yaml
123
+ alerts:
124
+ slack:
125
+ webhook_url: "https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
126
+ ```
127
+
128
+ ### Email
129
+ ```yaml
130
+ alerts:
131
+ email:
132
+ smtp_host: smtp.gmail.com
133
+ to_addrs:
134
+ - you@yourcompany.com
135
+ ```
136
+
137
+ ### Discord
138
+ ```bash
139
+ export RLWATCH_DISCORD_WEBHOOK_URL="https://discord.com/api/webhooks/..."
140
+ ```
141
+ Or in `rlwatch.yaml`:
142
+ ```yaml
143
+ alerts:
144
+ discord:
145
+ webhook_url: "https://discord.com/api/webhooks/..."
146
+ mention_role_ids: ["123456789012345678"] # @-mentions on critical only
147
+ ```
148
+
149
+ ### Generic webhook
150
+ The universal escape hatch — POST a JSON body to any URL. Use this for PagerDuty's events API, an internal incident tracker, Mattermost, or anything else rlwatch doesn't have a dedicated channel for.
151
+ ```yaml
152
+ alerts:
153
+ webhook:
154
+ url: "https://your-service.example.com/rlwatch"
155
+ headers:
156
+ Authorization: "Bearer your-token"
157
+ ```
158
+ Custom JSON template? See [`docs/alerts/webhook.md`](https://varun1724.github.io/rlwatch/alerts/webhook/).
159
+
160
+ ### Console
161
+ Always on. Rich-formatted panels show up in stderr regardless of other channels.
162
+
163
+ ---
164
+
165
+ ## Configuration
166
+
167
+ Generate a starter config:
168
+ ```bash
169
+ rlwatch init
170
+ ```
171
+ This writes `rlwatch.yaml` with every threshold at its default. Tweak to taste.
172
+
173
+ Resolution order: **defaults → YAML file → environment variables → `attach()` kwargs**. Later values win.
174
+
175
+ ---
176
+
177
+ ## CLI reference
178
+
179
+ | Command | What it does |
180
+ |---|---|
181
+ | `rlwatch init` | Write a starter `rlwatch.yaml` |
182
+ | `rlwatch runs` | List every monitored run in the local SQLite store |
183
+ | `rlwatch diagnose [--run-id ID]` | Print a retrospective report on a completed run |
184
+ | `rlwatch dashboard` | Launch the Streamlit dashboard at `localhost:8501` |
185
+
186
+ ---
187
+
188
+ ## How it stores data
189
+
190
+ Everything lives in a single SQLite file at `./rlwatch_logs/metrics.db`. Three tables: `runs`, `metrics`, `alerts`. WAL mode is on so the training loop writes and the dashboard reads concurrently without locking. Copy that `.db` file and you've copied the entire history of every run.
191
+
192
+ ---
193
+
194
+ ## Supported frameworks
195
+
196
+ - **HuggingFace TRL** — pass `attach(trainer=trainer)` for direct callback registration. See the [end-to-end tutorial](https://varun1724.github.io/rlwatch/tutorials/trl-grpo-end-to-end/) for a real GPT-2 + GRPO example that runs on CPU in ~5 minutes.
197
+ - **veRL** — `framework="manual"` + `monitor.log_step()`. Deep integration on the roadmap.
198
+ - **OpenRLHF** — `framework="manual"` + `monitor.log_step()`. Deep integration on the roadmap.
199
+ - **Anything else** — same as above. Every metric in `log_step` is optional; pass whatever your framework exposes.
200
+
201
+ ---
202
+
203
+ ## Docker
204
+
205
+ ```bash
206
+ docker build -t rlwatch .
207
+ docker run -p 8501:8501 rlwatch
208
+ ```
209
+
210
+ ---
211
+
212
+ ## Documentation
213
+
214
+ Full docs at **[varun1724.github.io/rlwatch](https://varun1724.github.io/rlwatch/)** — getting started, every detector explained in depth, alerts setup, configuration reference, the end-to-end TRL tutorial, and an FAQ.
215
+
216
+ ## Project direction
217
+
218
+ rlwatch is heading toward a hosted, team-oriented product. The local-first open-source library will stay free and useful on its own. See [`ROADMAP.md`](ROADMAP.md) for the full plan.
219
+
220
+ ## Contributing & testing
221
+
222
+ rlwatch is a monitoring library — if it has bugs, it costs someone a GPU
223
+ budget. The test harness is the most load-bearing part of the repo.
224
+
225
+ ```bash
226
+ pip install -e ".[dev]"
227
+ pytest -v # all five tiers
228
+ pytest --cov=rlwatch --cov-fail-under=90 # coverage gate (must pass to merge)
229
+ ```
230
+
231
+ The suite is organized into five tiers (unit / property / simulation /
232
+ integration / performance). See **[`TESTING.md`](TESTING.md)** for the
233
+ practical "how to run, write, and debug tests" guide and **[`CLAUDE.md`](CLAUDE.md)**
234
+ for the authoritative contract every PR has to meet.
235
+
236
+ ## License
237
+
238
+ MIT
@@ -0,0 +1,108 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "rlwatch"
7
+ version = "0.3.0"
8
+ description = "Real-time GRPO/PPO training instability detection for ML teams"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ {name = "Varun Saraf", email = "varunsaraf1724@gmail.com"}
14
+ ]
15
+ keywords = ["reinforcement-learning", "GRPO", "PPO", "training", "monitoring", "debugging"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Science/Research",
19
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Operating System :: POSIX :: Linux",
22
+ "Operating System :: MacOS",
23
+ "Programming Language :: Python :: 3",
24
+ "Programming Language :: Python :: 3.10",
25
+ "Programming Language :: Python :: 3.11",
26
+ "Programming Language :: Python :: 3.12",
27
+ "Typing :: Typed",
28
+ ]
29
+ dependencies = [
30
+ "pyyaml>=6.0",
31
+ "scipy>=1.10.0",
32
+ "rich>=13.0.0",
33
+ "slack-sdk>=3.20.0",
34
+ "click>=8.1.0",
35
+ "numpy>=1.24",
36
+ ]
37
+
38
+ [project.optional-dependencies]
39
+ # Streamlit dashboard. Not in core because the transitive deps are ~150MB
40
+ # and most users only need the alerting / log_step path.
41
+ dashboard = [
42
+ "streamlit>=1.30.0",
43
+ "plotly>=5.18.0",
44
+ "pandas>=2.0.0",
45
+ ]
46
+ torch = ["torch>=2.0.0"]
47
+ trl = ["trl>=0.7.0", "transformers>=4.35.0"]
48
+ # The end-to-end tutorial pins exact versions known-working with
49
+ # examples/trl_grpo_tutorial.py. The monthly tutorial CI cron catches
50
+ # silent breakage on future TRL releases.
51
+ tutorial = [
52
+ "trl>=0.11.0",
53
+ "transformers>=4.45.0",
54
+ "torch>=2.1.0",
55
+ "datasets>=2.14.0",
56
+ ]
57
+ dev = [
58
+ "pytest>=7.0",
59
+ "pytest-asyncio>=0.21.0",
60
+ "pytest-cov>=4.1.0",
61
+ "pytest-benchmark>=4.0.0",
62
+ "hypothesis>=6.90.0",
63
+ "responses>=0.24.0",
64
+ # dev includes the dashboard so the dashboard tests can run.
65
+ "streamlit>=1.30.0",
66
+ "plotly>=5.18.0",
67
+ "pandas>=2.0.0",
68
+ ]
69
+
70
+ [project.scripts]
71
+ rlwatch = "rlwatch.cli:main"
72
+
73
+ [project.urls]
74
+ Homepage = "https://github.com/varun1724/rlwatch"
75
+ Documentation = "https://varun1724.github.io/rlwatch/"
76
+ Issues = "https://github.com/varun1724/rlwatch/issues"
77
+ Changelog = "https://github.com/varun1724/rlwatch/blob/main/CHANGELOG.md"
78
+
79
+ [tool.setuptools.packages.find]
80
+ where = ["src"]
81
+
82
+ [tool.setuptools.package-data]
83
+ "rlwatch" = ["py.typed"]
84
+
85
+ [tool.pytest.ini_options]
86
+ testpaths = ["tests"]
87
+ markers = [
88
+ "perf: performance benchmark tests (Tier 5)",
89
+ "integration: integration tests touching real SQLite/SMTP/HTTP (Tier 4)",
90
+ "property: Hypothesis property-based tests (Tier 2)",
91
+ "trl: requires the [trl] extra (transformers + trl)",
92
+ ]
93
+ filterwarnings = [
94
+ "ignore::DeprecationWarning:streamlit",
95
+ ]
96
+
97
+ [tool.coverage.run]
98
+ source = ["src/rlwatch"]
99
+ omit = [
100
+ # Streamlit dashboard is hard to test in-process; covered manually + by
101
+ # the dashboard smoke test in CI.
102
+ "*/dashboard.py",
103
+ ]
104
+
105
+ [tool.coverage.report]
106
+ fail_under = 90
107
+ show_missing = true
108
+ skip_covered = false
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,8 @@
1
+ """rlwatch - Real-time GRPO/PPO training instability detection."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from rlwatch.core import attach, log_step, get_monitor, RLWatch
6
+ from rlwatch.config import RLWatchConfig, load_config
7
+
8
+ __all__ = ["attach", "log_step", "get_monitor", "RLWatch", "RLWatchConfig", "load_config"]