codevigil 0.1.1__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codevigil-0.1.1 → codevigil-0.4.0}/.gitignore +2 -1
- {codevigil-0.1.1 → codevigil-0.4.0}/PKG-INFO +118 -36
- codevigil-0.4.0/README.md +207 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/__init__.py +1 -1
- codevigil-0.4.0/codevigil/aggregator.py +1012 -0
- codevigil-0.4.0/codevigil/analysis/__init__.py +26 -0
- codevigil-0.4.0/codevigil/analysis/cohort.py +221 -0
- codevigil-0.4.0/codevigil/analysis/compare.py +335 -0
- codevigil-0.4.0/codevigil/analysis/correlations.py +106 -0
- codevigil-0.4.0/codevigil/analysis/guards.py +145 -0
- codevigil-0.4.0/codevigil/analysis/processed_store.py +687 -0
- codevigil-0.4.0/codevigil/analysis/store.py +601 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/bootstrap.py +92 -18
- codevigil-0.4.0/codevigil/classifier.py +258 -0
- codevigil-0.4.0/codevigil/cli.py +2186 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/collectors/__init__.py +2 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/collectors/parse_health.py +47 -5
- codevigil-0.4.0/codevigil/collectors/prompts.py +63 -0
- codevigil-0.4.0/codevigil/collectors/read_edit_ratio.py +349 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/collectors/reasoning_loop.py +29 -2
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/collectors/stop_phrase.py +83 -2
- codevigil-0.4.0/codevigil/collectors/thinking.py +154 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/config.py +294 -121
- codevigil-0.4.0/codevigil/history/__init__.py +15 -0
- codevigil-0.4.0/codevigil/history/detail_cmd.py +184 -0
- codevigil-0.4.0/codevigil/history/diff_cmd.py +168 -0
- codevigil-0.4.0/codevigil/history/filters.py +217 -0
- codevigil-0.4.0/codevigil/history/heatmap_cmd.py +218 -0
- codevigil-0.4.0/codevigil/history/list_cmd.py +185 -0
- codevigil-0.4.0/codevigil/ingest.py +355 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/parser.py +475 -85
- codevigil-0.4.0/codevigil/renderers/_bars.py +67 -0
- codevigil-0.4.0/codevigil/renderers/terminal.py +977 -0
- codevigil-0.4.0/codevigil/report/__init__.py +21 -0
- codevigil-0.4.0/codevigil/report/loader.py +473 -0
- codevigil-0.4.0/codevigil/report/renderer.py +1052 -0
- codevigil-0.4.0/codevigil/turns.py +183 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/types.py +34 -1
- codevigil-0.4.0/codevigil/ui/progress.py +282 -0
- codevigil-0.4.0/codevigil/watch_roots.py +88 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/watcher.py +242 -10
- codevigil-0.4.0/codevigil/watcher_cache.py +277 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/docs/README.md +8 -6
- {codevigil-0.1.1 → codevigil-0.4.0}/pyproject.toml +4 -2
- codevigil-0.1.1/README.md +0 -126
- codevigil-0.1.1/codevigil/aggregator.py +0 -506
- codevigil-0.1.1/codevigil/cli.py +0 -732
- codevigil-0.1.1/codevigil/collectors/read_edit_ratio.py +0 -258
- codevigil-0.1.1/codevigil/renderers/terminal.py +0 -236
- {codevigil-0.1.1 → codevigil-0.4.0}/LICENSE +0 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/__main__.py +0 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/collectors/_text_match.py +0 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/errors.py +0 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/privacy.py +0 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/projects.py +0 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/registry.py +0 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/renderers/__init__.py +0 -0
- {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/renderers/json_file.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codevigil
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Local, privacy-preserving observability for Claude Code sessions.
|
|
5
5
|
Project-URL: Homepage, https://github.com/Mathews-Tom/codevigil
|
|
6
6
|
Project-URL: Issues, https://github.com/Mathews-Tom/codevigil/issues
|
|
@@ -221,15 +221,21 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
221
221
|
Classifier: Topic :: Software Development :: Quality Assurance
|
|
222
222
|
Classifier: Topic :: System :: Monitoring
|
|
223
223
|
Requires-Python: >=3.11
|
|
224
|
+
Requires-Dist: rich>=13
|
|
224
225
|
Description-Content-Type: text/markdown
|
|
225
226
|
|
|
226
227
|
# codevigil - Session Quality, Observed
|
|
227
228
|
|
|
228
229
|
Local, privacy-preserving observability for Claude Code sessions.
|
|
229
230
|
|
|
230
|
-
codevigil tails `~/.claude/projects/**/*.jsonl` on disk, computes signal metrics about reasoning and tool-use patterns, and surfaces them in a terminal dashboard or as JSON / markdown reports. **
|
|
231
|
+
codevigil tails `~/.claude/projects/**/*.jsonl` on disk, computes signal metrics about reasoning and tool-use patterns, and surfaces them in a rich terminal dashboard or as JSON / markdown reports. **Zero network egress, no data ever leaves your machine.**
|
|
231
232
|
|
|
232
|
-
Status
|
|
233
|
+
[](https://github.com/Mathews-Tom/codevigil)
|
|
234
|
+
[](CHANGELOG.md)
|
|
235
|
+
[](https://www.python.org/downloads/)
|
|
236
|
+
[](LICENSE)
|
|
237
|
+
[](https://github.com/Mathews-Tom/codevigil/actions/workflows/ci.yml)
|
|
238
|
+
[](docs/privacy.md)
|
|
233
239
|
|
|
234
240
|
## Install
|
|
235
241
|
|
|
@@ -237,25 +243,44 @@ Status: alpha. Python 3.11 and 3.12.
|
|
|
237
243
|
uv tool install codevigil
|
|
238
244
|
```
|
|
239
245
|
|
|
240
|
-
|
|
246
|
+
`uv tool install` places the `codevigil` executable on your `PATH` inside an isolated environment that does not conflict with project virtualenvs. All subcommands, including the full `history` suite with colored panels and formatted tables, work out of the box.
|
|
241
247
|
|
|
242
|
-
|
|
248
|
+
Upgrade and uninstall:
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
uv tool upgrade codevigil
|
|
252
|
+
uv tool uninstall codevigil # leaves config and session data untouched
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
No `uv`? Install it from <https://docs.astral.sh/uv/getting-started/installation/>. Fallbacks: `pipx install codevigil` and `pip install --user codevigil` both work. See [docs/installation.md](docs/installation.md) for all supported paths and from-source installs.
|
|
243
256
|
|
|
244
257
|
## First run
|
|
245
258
|
|
|
246
259
|
```bash
|
|
247
|
-
codevigil
|
|
260
|
+
codevigil ingest # one-shot cold-ingest into persistent memory (first run only)
|
|
261
|
+
codevigil watch # project roll-up dashboard, resumes every file from its cached cursor
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
`codevigil ingest` walks every JSONL under `watch.roots`, parses them end-to-end, and writes a durable record (root-aware session key, raw session id, file id, cursor offset, collector state, metric summary) to the local SQLite store under `~/.local/state/codevigil/`. You run it once after install. Subsequent `codevigil watch` ticks seek past the saved cursor on every file, so the hot path only processes newly-appended events. If the store is absent on startup, `watch` will bootstrap it for you.
|
|
265
|
+
|
|
266
|
+
`codevigil watch` then prints a live **project-row** dashboard: one row per Claude Code project, with the fleet-worst severity, the active session count, and the aggregate metric summary. The top line shows fleet totals (session count, CRIT/WARN/OK tallies, project count, last-updated wall-clock tick). Every session's rolling-window collector state is restored from the store so restart does not erase your percentile baselines.
|
|
267
|
+
|
|
268
|
+
```text
|
|
269
|
+
codevigil [experimental thresholds] | sessions=3 crit=0 warn=1 ok=2 projects=2 updated=2026-04-16T10:22:00 | parse_confidence: 1.00
|
|
270
|
+
project: my-project | 2 active | WARN read_edit_ratio 3.1 | stop_phrase 1 | reasoning_loop 8.4 | thinking 0.82 | prompts 14
|
|
271
|
+
project: another-project | 1 active | OK read_edit_ratio 5.6 | stop_phrase 0 | reasoning_loop 4.1 | thinking 0.91 | prompts 7
|
|
248
272
|
```
|
|
249
273
|
|
|
250
|
-
|
|
274
|
+
Pass `--by-session` (or set `watch.display_mode = "session"`) to fall back to the 0.2.x one-block-per-session layout:
|
|
251
275
|
|
|
252
276
|
```text
|
|
253
|
-
|
|
254
|
-
session: a3f7c2d | project: my-project | 2m 34s ACTIVE
|
|
277
|
+
session: a3f7c2d | project: my-project | 2m 34s ACTIVE [task: debug_loop] [experimental]
|
|
255
278
|
──────────────────────────────────────────────────────────────
|
|
256
|
-
read_edit_ratio 5.2
|
|
257
|
-
stop_phrase 0
|
|
258
|
-
reasoning_loop 6.4
|
|
279
|
+
read_edit_ratio 5.2 OK [R:E 5.2 | research:mut 7.1] [↗3.1→4.2→5.2] [p68 of your baseline]
|
|
280
|
+
stop_phrase 0 OK [0 hits]
|
|
281
|
+
reasoning_loop 6.4 OK [6.4/1K tool calls | burst: 2] [↘8.1→7.2→6.4] [n/a]
|
|
282
|
+
thinking 0.87 OK [0.87 visible | chars med: 342 | sig med: 118]
|
|
283
|
+
prompts 11 OK [11 user turns]
|
|
259
284
|
──────────────────────────────────────────────────────────────
|
|
260
285
|
```
|
|
261
286
|
|
|
@@ -264,23 +289,41 @@ session: a3f7c2d | project: my-project | 2m 34s ACTIVE
|
|
|
264
289
|
## What else can it do
|
|
265
290
|
|
|
266
291
|
```bash
|
|
267
|
-
codevigil config check
|
|
268
|
-
codevigil
|
|
269
|
-
codevigil
|
|
270
|
-
codevigil
|
|
292
|
+
codevigil config check # show the resolved config and where each value came from
|
|
293
|
+
codevigil ingest # cold-ingest every session into local persistent memory
|
|
294
|
+
codevigil ingest --force # rebuild the store from scratch, ignoring existing rows
|
|
295
|
+
codevigil watch --by-session # one block per session (0.2.x layout)
|
|
296
|
+
codevigil report ~/.claude/projects # default: stacked today / 7d / 30d panels
|
|
297
|
+
codevigil report sessions/ --format markdown --from 2026-04-01 # explicit window → single-period mode
|
|
298
|
+
codevigil report ~/.claude/projects --group-by week # cohort trend table by ISO week
|
|
299
|
+
codevigil report ~/.claude/projects --group-by week --format csv # flat CSV for notebook consumption
|
|
300
|
+
codevigil report sessions/ --compare-periods 2026-03-01:2026-03-31,2026-04-01:2026-04-30
|
|
301
|
+
codevigil report sessions/ --pivot-date 2026-04-01 # before/after delta at a change point
|
|
302
|
+
codevigil report sessions/ --group-by week --experimental-correlations # append Pearson appendix [experimental]
|
|
303
|
+
codevigil report sessions/ --output-file ~/reports/april.md # write to an exact file path
|
|
304
|
+
codevigil export session.jsonl # NDJSON event stream on stdout, jq-friendly
|
|
271
305
|
codevigil export session.jsonl | jq 'select(.kind == "tool_call") | .payload.tool_name'
|
|
306
|
+
codevigil history list # list stored sessions
|
|
307
|
+
codevigil history list --task-type debug_loop --since 2026-04-01 --severity warn
|
|
308
|
+
codevigil history SESSION_ID # event, metric, and per-turn task-type timeline
|
|
309
|
+
codevigil history diff SESSION_A SESSION_B # side-by-side Markdown diff of two sessions
|
|
310
|
+
codevigil history heatmap SESSION_ID # tool × severity heatmap with proportional gradient bars
|
|
311
|
+
codevigil history heatmap --axis task_type # cross-tab metrics against experimental task labels
|
|
272
312
|
```
|
|
273
313
|
|
|
314
|
+
`codevigil report` with no date flags now renders three stacked windows — **today**, **7d**, and **30d** — in one invocation. Pass `--from` or `--to` to fall back to the original single-period mode. Scripts that depend on the old no-flag single-period output should pass `--from 1970-01-01` (or any open lower bound) to preserve the previous shape.
|
|
315
|
+
|
|
274
316
|
Full flag reference for every subcommand: [docs/cli.md](docs/cli.md).
|
|
275
317
|
|
|
276
318
|
## Configuration
|
|
277
319
|
|
|
278
|
-
codevigil resolves its configuration from a layered precedence chain: built-in defaults → `~/.config/codevigil/config.toml` → `CODEVIGIL_*` environment variables → CLI flags. Run `codevigil config check` to see every resolved key with its source.
|
|
320
|
+
codevigil resolves its configuration from a layered precedence chain: built-in defaults → `~/.config/codevigil/config.toml` → `CODEVIGIL_*` environment variables → CLI flags. `watch.roots` is the canonical multi-root setting; `watch.root` and `CODEVIGIL_WATCH_ROOT` remain supported as deprecated single-root aliases. Run `codevigil config check` to see every resolved key with its source and any deprecation notices.
|
|
279
321
|
|
|
280
322
|
A minimal `~/.config/codevigil/config.toml`:
|
|
281
323
|
|
|
282
324
|
```toml
|
|
283
325
|
[watch]
|
|
326
|
+
roots = ["~/.claude/projects"]
|
|
284
327
|
poll_interval = 1.0
|
|
285
328
|
|
|
286
329
|
[collectors.read_edit_ratio]
|
|
@@ -292,16 +335,53 @@ The complete key reference, env-var bindings, and validation rules live in [docs
|
|
|
292
335
|
|
|
293
336
|
## What gets measured
|
|
294
337
|
|
|
295
|
-
|
|
338
|
+
Five user-facing collectors plus an always-on integrity gate:
|
|
339
|
+
|
|
340
|
+
| Collector | Signal |
|
|
341
|
+
| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
|
|
342
|
+
| `read_edit_ratio` | Reads vs. mutations, blind-edit detection, file-tracking confidence |
|
|
343
|
+
| `stop_phrase` | Hits against ownership-dodging, permission-seeking, premature-stopping, and known-limitation phrase categories |
|
|
344
|
+
| `reasoning_loop` | Self-correction phrase rate per 1K tool calls plus longest consecutive burst |
|
|
345
|
+
| `thinking` | Visible-vs-redacted thinking-block ratio plus median visible / signature character lengths (headline signal for #42796 depth decline) |
|
|
346
|
+
| `prompts` | Cumulative user-turn count per session; feeds the #42796 "prompts per session" cohort mean |
|
|
347
|
+
| `parse_health` | Always-on. Flips to CRITICAL when parse confidence drops below 0.9 in any 50-line window |
|
|
348
|
+
|
|
349
|
+
`thinking` and `prompts` are descriptive counters — severity stays at OK by design. They exist to feed cohort trend reports, not to alarm. Threshold semantics for the three gated collectors, what each metric is sensitive to, and how to interpret CRITICAL signals live in [docs/collectors.md](docs/collectors.md).
|
|
296
350
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
351
|
+
## Persistent memory
|
|
352
|
+
|
|
353
|
+
0.4.0 adds first-class multi-root support on top of the local SQLite-backed processed-session store under `~/.local/state/codevigil/processed/`. Every finalised session now writes a root-aware identity (`session_key`, raw `session_id`, cursor byte offset, collector state snapshot, and derived metric summary), and the watcher seeds each polled file from the cached cursor on startup instead of re-parsing JSONL from byte 0. Rolling-window collector state (the `read_edit_ratio` 50-event deque, the `reasoning_loop` burst counter) is restored verbatim across restarts, even when different roots contain the same `session_id`. Run `codevigil ingest` once after install; after that, `codevigil watch` only processes newly-appended events on the hot path. Disable the cursor cache for reproducible cold-start benchmarks with `watch.cursor_cache_enabled = false`. Schema, migration policy, and the invariants the store upholds live in [docs/design.md](docs/design.md).
|
|
354
|
+
|
|
355
|
+
## Cohort trend reports
|
|
356
|
+
|
|
357
|
+
`codevigil report --group-by {day,week,project,model,permission_mode}` aggregates every session in the store into cohort cells and emits a Markdown trend table with a methodology header, Δ-vs-prior-row annotations on chronological dimensions, and threshold highlighting for cells crossing warn / critical. Cells with `n<5` are redacted with an `n<5` sentinel. Additional cohort-only flags:
|
|
358
|
+
|
|
359
|
+
- `--pivot-date YYYY-MM-DD` — split the corpus at a change point and emit a Before/After delta table.
|
|
360
|
+
- `--compare-periods A_START:A_END,B_START:B_END` — signed delta table + prose summary per metric.
|
|
361
|
+
- `--experimental-correlations` — Pearson appendix across per-session metric columns; pairs below `n=30` are dropped. Exploratory only — correlation is not causation, and the rendered output says so explicitly.
|
|
362
|
+
- `--format csv` — flat `dimension_value,metric_name,mean,stdev,n,min,max` for notebook consumption.
|
|
363
|
+
- `--format json` — versioned JSON cohort document (`schema_version=1`) for downstream pipelines.
|
|
364
|
+
- `--output-file PATH` — write to an exact file path (parent dirs created, must resolve under `$HOME`).
|
|
365
|
+
|
|
366
|
+
Both new default collectors (`thinking`, `prompts`) surface in cohort reports as `thinking_visible_ratio`, `thinking_visible_chars_median`, `thinking_signature_chars_median`, and `user_turns`. Full reference: [docs/cli.md](docs/cli.md).
|
|
367
|
+
|
|
368
|
+
## Task classifier `[experimental]`
|
|
369
|
+
|
|
370
|
+
The experimental task classifier labels each Claude Code turn as `exploration`, `mutation_heavy`, `debug_loop`, `planning`, or `mixed` using a two-stage cascade (tool-presence heuristic → keyword regex on the user message, stdlib `re` only, zero network, zero new dependencies). Session-level labels aggregate turn labels by majority vote. Labels surface in four places:
|
|
371
|
+
|
|
372
|
+
- **`history list`** — new `task_type` column and `--task-type <label>` filter
|
|
373
|
+
- **`history heatmap --axis task_type`** — cross-tab metrics against task labels
|
|
374
|
+
- **`history SESSION_ID`** — per-turn task-type headings in the event timeline
|
|
375
|
+
- **`codevigil watch`** — right-aligned task tag in each session header
|
|
376
|
+
|
|
377
|
+
Every surface is marked `[experimental]`. The classifier is opt-out via `[classifier]` in `~/.config/codevigil/config.toml`:
|
|
378
|
+
|
|
379
|
+
```toml
|
|
380
|
+
[classifier]
|
|
381
|
+
enabled = false
|
|
382
|
+
```
|
|
303
383
|
|
|
304
|
-
|
|
384
|
+
When disabled, the four surfaces degrade cleanly: no task column in list, no task tag in watch, no per-turn headings in detail, and `history heatmap --axis task_type` exits with a clear error. Category definitions, the cascade algorithm, and the calibration gate (≥85% agreement on a labeled corpus) are documented in [docs/classifier.md](docs/classifier.md).
|
|
305
385
|
|
|
306
386
|
## Privacy
|
|
307
387
|
|
|
@@ -315,20 +395,21 @@ The full privacy model and threat boundary: [docs/privacy.md](docs/privacy.md).
|
|
|
315
395
|
|
|
316
396
|
## Documentation
|
|
317
397
|
|
|
318
|
-
| Doc | What it covers
|
|
319
|
-
| -------------------------------------------------- |
|
|
320
|
-
| [docs/installation.md](docs/installation.md) | Install, upgrade, uninstall, from-source builds
|
|
321
|
-
| [docs/getting-started.md](docs/getting-started.md) | First-run walkthrough and interpreting the output
|
|
322
|
-
| [docs/cli.md](docs/cli.md) | Exhaustive CLI reference: every subcommand and flag
|
|
323
|
-
| [docs/configuration.md](docs/configuration.md) | Every config key, env binding, and validation rule
|
|
324
|
-
| [docs/collectors.md](docs/collectors.md) | What each metric measures and how to interpret it
|
|
325
|
-
| [docs/
|
|
326
|
-
| [docs/
|
|
327
|
-
| [
|
|
398
|
+
| Doc | What it covers |
|
|
399
|
+
| -------------------------------------------------- | ----------------------------------------------------- |
|
|
400
|
+
| [docs/installation.md](docs/installation.md) | Install, upgrade, uninstall, from-source builds |
|
|
401
|
+
| [docs/getting-started.md](docs/getting-started.md) | First-run walkthrough and interpreting the output |
|
|
402
|
+
| [docs/cli.md](docs/cli.md) | Exhaustive CLI reference: every subcommand and flag |
|
|
403
|
+
| [docs/configuration.md](docs/configuration.md) | Every config key, env binding, and validation rule |
|
|
404
|
+
| [docs/collectors.md](docs/collectors.md) | What each metric measures and how to interpret it |
|
|
405
|
+
| [docs/classifier.md](docs/classifier.md) | Experimental task classifier: categories and surfaces |
|
|
406
|
+
| [docs/privacy.md](docs/privacy.md) | Privacy guarantees and the threat model |
|
|
407
|
+
| [docs/design.md](docs/design.md) | Architecture, plugin boundaries, error taxonomy |
|
|
408
|
+
| [CHANGELOG.md](CHANGELOG.md) | Release notes |
|
|
328
409
|
|
|
329
410
|
## Experimental thresholds
|
|
330
411
|
|
|
331
|
-
The default
|
|
412
|
+
The shipped default thresholds were derived from a single user's session window — one user is not a population baseline. Every default ships with `experimental = true` and the watch header shows `[experimental thresholds]` until you either flip the flag in config or let bootstrap mode personalise the thresholds for your own workflow.
|
|
332
413
|
|
|
333
414
|
Bootstrap mode observes the first 10 sessions (configurable) with all severities pinned to `OK`, records the per-collector value distributions, then derives WARN at p80 and CRITICAL at p95 of _your_ local data, clamped by the literal-value hard caps. No manual tuning required. See [docs/collectors.md#experimental-thresholds-and-bootstrap](docs/collectors.md#experimental-thresholds-and-bootstrap).
|
|
334
415
|
|
|
@@ -341,6 +422,7 @@ uv sync --dev
|
|
|
341
422
|
uv run pytest
|
|
342
423
|
uv run mypy --strict codevigil
|
|
343
424
|
uv run ruff check .
|
|
425
|
+
uv run ruff format --check .
|
|
344
426
|
bash scripts/ci_privacy_grep.sh
|
|
345
427
|
```
|
|
346
428
|
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# codevigil - Session Quality, Observed
|
|
2
|
+
|
|
3
|
+
Local, privacy-preserving observability for Claude Code sessions.
|
|
4
|
+
|
|
5
|
+
codevigil tails `~/.claude/projects/**/*.jsonl` on disk, computes signal metrics about reasoning and tool-use patterns, and surfaces them in a rich terminal dashboard or as JSON / markdown reports. **Zero network egress, no data ever leaves your machine.**
|
|
6
|
+
|
|
7
|
+
[](https://github.com/Mathews-Tom/codevigil)
|
|
8
|
+
[](CHANGELOG.md)
|
|
9
|
+
[](https://www.python.org/downloads/)
|
|
10
|
+
[](LICENSE)
|
|
11
|
+
[](https://github.com/Mathews-Tom/codevigil/actions/workflows/ci.yml)
|
|
12
|
+
[](docs/privacy.md)
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
uv tool install codevigil
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
`uv tool install` places the `codevigil` executable on your `PATH` inside an isolated environment that does not conflict with project virtualenvs. All subcommands, including the full `history` suite with colored panels and formatted tables, work out of the box.
|
|
21
|
+
|
|
22
|
+
Upgrade and uninstall:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
uv tool upgrade codevigil
|
|
26
|
+
uv tool uninstall codevigil # leaves config and session data untouched
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
No `uv`? Install it from <https://docs.astral.sh/uv/getting-started/installation/>. Fallbacks: `pipx install codevigil` and `pip install --user codevigil` both work. See [docs/installation.md](docs/installation.md) for all supported paths and from-source installs.
|
|
30
|
+
|
|
31
|
+
## First run
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
codevigil ingest # one-shot cold-ingest into persistent memory (first run only)
|
|
35
|
+
codevigil watch # project roll-up dashboard, resumes every file from its cached cursor
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
`codevigil ingest` walks every JSONL under `watch.roots`, parses them end-to-end, and writes a durable record (root-aware session key, raw session id, file id, cursor offset, collector state, metric summary) to the local SQLite store under `~/.local/state/codevigil/`. You run it once after install. Subsequent `codevigil watch` ticks seek past the saved cursor on every file, so the hot path only processes newly-appended events. If the store is absent on startup, `watch` will bootstrap it for you.
|
|
39
|
+
|
|
40
|
+
`codevigil watch` then prints a live **project-row** dashboard: one row per Claude Code project, with the fleet-worst severity, the active session count, and the aggregate metric summary. The top line shows fleet totals (session count, CRIT/WARN/OK tallies, project count, last-updated wall-clock tick). Every session's rolling-window collector state is restored from the store so restart does not erase your percentile baselines.
|
|
41
|
+
|
|
42
|
+
```text
|
|
43
|
+
codevigil [experimental thresholds] | sessions=3 crit=0 warn=1 ok=2 projects=2 updated=2026-04-16T10:22:00 | parse_confidence: 1.00
|
|
44
|
+
project: my-project | 2 active | WARN read_edit_ratio 3.1 | stop_phrase 1 | reasoning_loop 8.4 | thinking 0.82 | prompts 14
|
|
45
|
+
project: another-project | 1 active | OK read_edit_ratio 5.6 | stop_phrase 0 | reasoning_loop 4.1 | thinking 0.91 | prompts 7
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Pass `--by-session` (or set `watch.display_mode = "session"`) to fall back to the 0.2.x one-block-per-session layout:
|
|
49
|
+
|
|
50
|
+
```text
|
|
51
|
+
session: a3f7c2d | project: my-project | 2m 34s ACTIVE [task: debug_loop] [experimental]
|
|
52
|
+
──────────────────────────────────────────────────────────────
|
|
53
|
+
read_edit_ratio 5.2 OK [R:E 5.2 | research:mut 7.1] [↗3.1→4.2→5.2] [p68 of your baseline]
|
|
54
|
+
stop_phrase 0 OK [0 hits]
|
|
55
|
+
reasoning_loop 6.4 OK [6.4/1K tool calls | burst: 2] [↘8.1→7.2→6.4] [n/a]
|
|
56
|
+
thinking 0.87 OK [0.87 visible | chars med: 342 | sig med: 118]
|
|
57
|
+
prompts 11 OK [11 user turns]
|
|
58
|
+
──────────────────────────────────────────────────────────────
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
`Ctrl-C` exits cleanly. Walk through what every column means and how to interpret it in [docs/getting-started.md](docs/getting-started.md).
|
|
62
|
+
|
|
63
|
+
## What else can it do
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
codevigil config check # show the resolved config and where each value came from
|
|
67
|
+
codevigil ingest # cold-ingest every session into local persistent memory
|
|
68
|
+
codevigil ingest --force # rebuild the store from scratch, ignoring existing rows
|
|
69
|
+
codevigil watch --by-session # one block per session (0.2.x layout)
|
|
70
|
+
codevigil report ~/.claude/projects # default: stacked today / 7d / 30d panels
|
|
71
|
+
codevigil report sessions/ --format markdown --from 2026-04-01 # explicit window → single-period mode
|
|
72
|
+
codevigil report ~/.claude/projects --group-by week # cohort trend table by ISO week
|
|
73
|
+
codevigil report ~/.claude/projects --group-by week --format csv # flat CSV for notebook consumption
|
|
74
|
+
codevigil report sessions/ --compare-periods 2026-03-01:2026-03-31,2026-04-01:2026-04-30
|
|
75
|
+
codevigil report sessions/ --pivot-date 2026-04-01 # before/after delta at a change point
|
|
76
|
+
codevigil report sessions/ --group-by week --experimental-correlations # append Pearson appendix [experimental]
|
|
77
|
+
codevigil report sessions/ --output-file ~/reports/april.md # write to an exact file path
|
|
78
|
+
codevigil export session.jsonl # NDJSON event stream on stdout, jq-friendly
|
|
79
|
+
codevigil export session.jsonl | jq 'select(.kind == "tool_call") | .payload.tool_name'
|
|
80
|
+
codevigil history list # list stored sessions
|
|
81
|
+
codevigil history list --task-type debug_loop --since 2026-04-01 --severity warn
|
|
82
|
+
codevigil history SESSION_ID # event, metric, and per-turn task-type timeline
|
|
83
|
+
codevigil history diff SESSION_A SESSION_B # side-by-side Markdown diff of two sessions
|
|
84
|
+
codevigil history heatmap SESSION_ID # tool × severity heatmap with proportional gradient bars
|
|
85
|
+
codevigil history heatmap --axis task_type # cross-tab metrics against experimental task labels
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
`codevigil report` with no date flags now renders three stacked windows — **today**, **7d**, and **30d** — in one invocation. Pass `--from` or `--to` to fall back to the original single-period mode. Scripts that depend on the old no-flag single-period output should pass `--from 1970-01-01` (or any open lower bound) to preserve the previous shape.
|
|
89
|
+
|
|
90
|
+
Full flag reference for every subcommand: [docs/cli.md](docs/cli.md).
|
|
91
|
+
|
|
92
|
+
## Configuration
|
|
93
|
+
|
|
94
|
+
codevigil resolves its configuration from a layered precedence chain: built-in defaults → `~/.config/codevigil/config.toml` → `CODEVIGIL_*` environment variables → CLI flags. `watch.roots` is the canonical multi-root setting; `watch.root` and `CODEVIGIL_WATCH_ROOT` remain supported as deprecated single-root aliases. Run `codevigil config check` to see every resolved key with its source and any deprecation notices.
|
|
95
|
+
|
|
96
|
+
A minimal `~/.config/codevigil/config.toml`:
|
|
97
|
+
|
|
98
|
+
```toml
|
|
99
|
+
[watch]
|
|
100
|
+
roots = ["~/.claude/projects"]
|
|
101
|
+
poll_interval = 1.0
|
|
102
|
+
|
|
103
|
+
[collectors.read_edit_ratio]
|
|
104
|
+
warn_threshold = 5.0
|
|
105
|
+
critical_threshold = 2.5
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
The complete key reference, env-var bindings, and validation rules live in [docs/configuration.md](docs/configuration.md).
|
|
109
|
+
|
|
110
|
+
## What gets measured
|
|
111
|
+
|
|
112
|
+
Five user-facing collectors plus an always-on integrity gate:
|
|
113
|
+
|
|
114
|
+
| Collector | Signal |
|
|
115
|
+
| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
|
|
116
|
+
| `read_edit_ratio` | Reads vs. mutations, blind-edit detection, file-tracking confidence |
|
|
117
|
+
| `stop_phrase` | Hits against ownership-dodging, permission-seeking, premature-stopping, and known-limitation phrase categories |
|
|
118
|
+
| `reasoning_loop` | Self-correction phrase rate per 1K tool calls plus longest consecutive burst |
|
|
119
|
+
| `thinking` | Visible-vs-redacted thinking-block ratio plus median visible / signature character lengths (headline signal for #42796 depth decline) |
|
|
120
|
+
| `prompts` | Cumulative user-turn count per session; feeds the #42796 "prompts per session" cohort mean |
|
|
121
|
+
| `parse_health` | Always-on. Flips to CRITICAL when parse confidence drops below 0.9 in any 50-line window |
|
|
122
|
+
|
|
123
|
+
`thinking` and `prompts` are descriptive counters — severity stays at OK by design. They exist to feed cohort trend reports, not to alarm. Threshold semantics for the three gated collectors, what each metric is sensitive to, and how to interpret CRITICAL signals live in [docs/collectors.md](docs/collectors.md).
|
|
124
|
+
|
|
125
|
+
## Persistent memory
|
|
126
|
+
|
|
127
|
+
0.4.0 adds first-class multi-root support on top of the local SQLite-backed processed-session store under `~/.local/state/codevigil/processed/`. Every finalised session now writes a root-aware identity (`session_key`, raw `session_id`, cursor byte offset, collector state snapshot, and derived metric summary), and the watcher seeds each polled file from the cached cursor on startup instead of re-parsing JSONL from byte 0. Rolling-window collector state (the `read_edit_ratio` 50-event deque, the `reasoning_loop` burst counter) is restored verbatim across restarts, even when different roots contain the same `session_id`. Run `codevigil ingest` once after install; after that, `codevigil watch` only processes newly-appended events on the hot path. Disable the cursor cache for reproducible cold-start benchmarks with `watch.cursor_cache_enabled = false`. Schema, migration policy, and the invariants the store upholds live in [docs/design.md](docs/design.md).
|
|
128
|
+
|
|
129
|
+
## Cohort trend reports
|
|
130
|
+
|
|
131
|
+
`codevigil report --group-by {day,week,project,model,permission_mode}` aggregates every session in the store into cohort cells and emits a Markdown trend table with a methodology header, Δ-vs-prior-row annotations on chronological dimensions, and threshold highlighting for cells crossing warn / critical. Cells with `n<5` are redacted with an `n<5` sentinel. Additional cohort-only flags:
|
|
132
|
+
|
|
133
|
+
- `--pivot-date YYYY-MM-DD` — split the corpus at a change point and emit a Before/After delta table.
|
|
134
|
+
- `--compare-periods A_START:A_END,B_START:B_END` — signed delta table + prose summary per metric.
|
|
135
|
+
- `--experimental-correlations` — Pearson appendix across per-session metric columns; pairs below `n=30` are dropped. Exploratory only — correlation is not causation, and the rendered output says so explicitly.
|
|
136
|
+
- `--format csv` — flat `dimension_value,metric_name,mean,stdev,n,min,max` for notebook consumption.
|
|
137
|
+
- `--format json` — versioned JSON cohort document (`schema_version=1`) for downstream pipelines.
|
|
138
|
+
- `--output-file PATH` — write to an exact file path (parent dirs created, must resolve under `$HOME`).
|
|
139
|
+
|
|
140
|
+
Both new default collectors (`thinking`, `prompts`) surface in cohort reports as `thinking_visible_ratio`, `thinking_visible_chars_median`, `thinking_signature_chars_median`, and `user_turns`. Full reference: [docs/cli.md](docs/cli.md).
|
|
141
|
+
|
|
142
|
+
## Task classifier `[experimental]`
|
|
143
|
+
|
|
144
|
+
The experimental task classifier labels each Claude Code turn as `exploration`, `mutation_heavy`, `debug_loop`, `planning`, or `mixed` using a two-stage cascade (tool-presence heuristic → keyword regex on the user message, stdlib `re` only, zero network, zero new dependencies). Session-level labels aggregate turn labels by majority vote. Labels surface in four places:
|
|
145
|
+
|
|
146
|
+
- **`history list`** — new `task_type` column and `--task-type <label>` filter
|
|
147
|
+
- **`history heatmap --axis task_type`** — cross-tab metrics against task labels
|
|
148
|
+
- **`history SESSION_ID`** — per-turn task-type headings in the event timeline
|
|
149
|
+
- **`codevigil watch`** — right-aligned task tag in each session header
|
|
150
|
+
|
|
151
|
+
Every surface is marked `[experimental]`. The classifier is opt-out via `[classifier]` in `~/.config/codevigil/config.toml`:
|
|
152
|
+
|
|
153
|
+
```toml
|
|
154
|
+
[classifier]
|
|
155
|
+
enabled = false
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
When disabled, the four surfaces degrade cleanly: no task column in list, no task tag in watch, no per-turn headings in detail, and `history heatmap --axis task_type` exits with a clear error. Category definitions, the cascade algorithm, and the calibration gate (≥85% agreement on a labeled corpus) are documented in [docs/classifier.md](docs/classifier.md).
|
|
159
|
+
|
|
160
|
+
## Privacy
|
|
161
|
+
|
|
162
|
+
Three independent enforcement layers ensure session data never leaves your machine:
|
|
163
|
+
|
|
164
|
+
- **Runtime import allowlist hook** installed at package init refuses any import of `socket`, `urllib`, `http.client`, `httpx`, `requests`, `aiohttp`, `ftplib`, `smtplib`, `ssl`, `subprocess`, or related transports from inside a `codevigil` module.
|
|
165
|
+
- **CI grep gate** re-checks the source tree for the same banned names on every push as a belt-and-suspenders second layer.
|
|
166
|
+
- **Filesystem scope check** refuses any read or write path outside `$HOME` via a `Path.resolve().is_relative_to(home)` check.
|
|
167
|
+
|
|
168
|
+
The full privacy model and threat boundary: [docs/privacy.md](docs/privacy.md).
|
|
169
|
+
|
|
170
|
+
## Documentation
|
|
171
|
+
|
|
172
|
+
| Doc | What it covers |
|
|
173
|
+
| -------------------------------------------------- | ----------------------------------------------------- |
|
|
174
|
+
| [docs/installation.md](docs/installation.md) | Install, upgrade, uninstall, from-source builds |
|
|
175
|
+
| [docs/getting-started.md](docs/getting-started.md) | First-run walkthrough and interpreting the output |
|
|
176
|
+
| [docs/cli.md](docs/cli.md) | Exhaustive CLI reference: every subcommand and flag |
|
|
177
|
+
| [docs/configuration.md](docs/configuration.md) | Every config key, env binding, and validation rule |
|
|
178
|
+
| [docs/collectors.md](docs/collectors.md) | What each metric measures and how to interpret it |
|
|
179
|
+
| [docs/classifier.md](docs/classifier.md) | Experimental task classifier: categories and surfaces |
|
|
180
|
+
| [docs/privacy.md](docs/privacy.md) | Privacy guarantees and the threat model |
|
|
181
|
+
| [docs/design.md](docs/design.md) | Architecture, plugin boundaries, error taxonomy |
|
|
182
|
+
| [CHANGELOG.md](CHANGELOG.md) | Release notes |
|
|
183
|
+
|
|
184
|
+
## Experimental thresholds
|
|
185
|
+
|
|
186
|
+
The shipped default thresholds were derived from a single user's session window — one user is not a population baseline. Every default ships with `experimental = true` and the watch header shows `[experimental thresholds]` until you either flip the flag in config or let bootstrap mode personalise the thresholds for your own workflow.
|
|
187
|
+
|
|
188
|
+
Bootstrap mode observes the first 10 sessions (configurable) with all severities pinned to `OK`, records the per-collector value distributions, then derives WARN at p80 and CRITICAL at p95 of _your_ local data, clamped by the literal-value hard caps. No manual tuning required. See [docs/collectors.md#experimental-thresholds-and-bootstrap](docs/collectors.md#experimental-thresholds-and-bootstrap).
|
|
189
|
+
|
|
190
|
+
## Contributing
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
git clone https://github.com/Mathews-Tom/codevigil
|
|
194
|
+
cd codevigil
|
|
195
|
+
uv sync --dev
|
|
196
|
+
uv run pytest
|
|
197
|
+
uv run mypy --strict codevigil
|
|
198
|
+
uv run ruff check .
|
|
199
|
+
uv run ruff format --check .
|
|
200
|
+
bash scripts/ci_privacy_grep.sh
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
All five gates must pass before a commit lands. The privacy grep runs as a separate CI job alongside the typecheck-and-test matrix on every PR.
|
|
204
|
+
|
|
205
|
+
## License
|
|
206
|
+
|
|
207
|
+
Apache License 2.0. See [LICENSE](LICENSE).
|