codevigil 0.1.1__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {codevigil-0.1.1 → codevigil-0.4.0}/.gitignore +2 -1
  2. {codevigil-0.1.1 → codevigil-0.4.0}/PKG-INFO +118 -36
  3. codevigil-0.4.0/README.md +207 -0
  4. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/__init__.py +1 -1
  5. codevigil-0.4.0/codevigil/aggregator.py +1012 -0
  6. codevigil-0.4.0/codevigil/analysis/__init__.py +26 -0
  7. codevigil-0.4.0/codevigil/analysis/cohort.py +221 -0
  8. codevigil-0.4.0/codevigil/analysis/compare.py +335 -0
  9. codevigil-0.4.0/codevigil/analysis/correlations.py +106 -0
  10. codevigil-0.4.0/codevigil/analysis/guards.py +145 -0
  11. codevigil-0.4.0/codevigil/analysis/processed_store.py +687 -0
  12. codevigil-0.4.0/codevigil/analysis/store.py +601 -0
  13. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/bootstrap.py +92 -18
  14. codevigil-0.4.0/codevigil/classifier.py +258 -0
  15. codevigil-0.4.0/codevigil/cli.py +2186 -0
  16. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/collectors/__init__.py +2 -0
  17. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/collectors/parse_health.py +47 -5
  18. codevigil-0.4.0/codevigil/collectors/prompts.py +63 -0
  19. codevigil-0.4.0/codevigil/collectors/read_edit_ratio.py +349 -0
  20. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/collectors/reasoning_loop.py +29 -2
  21. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/collectors/stop_phrase.py +83 -2
  22. codevigil-0.4.0/codevigil/collectors/thinking.py +154 -0
  23. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/config.py +294 -121
  24. codevigil-0.4.0/codevigil/history/__init__.py +15 -0
  25. codevigil-0.4.0/codevigil/history/detail_cmd.py +184 -0
  26. codevigil-0.4.0/codevigil/history/diff_cmd.py +168 -0
  27. codevigil-0.4.0/codevigil/history/filters.py +217 -0
  28. codevigil-0.4.0/codevigil/history/heatmap_cmd.py +218 -0
  29. codevigil-0.4.0/codevigil/history/list_cmd.py +185 -0
  30. codevigil-0.4.0/codevigil/ingest.py +355 -0
  31. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/parser.py +475 -85
  32. codevigil-0.4.0/codevigil/renderers/_bars.py +67 -0
  33. codevigil-0.4.0/codevigil/renderers/terminal.py +977 -0
  34. codevigil-0.4.0/codevigil/report/__init__.py +21 -0
  35. codevigil-0.4.0/codevigil/report/loader.py +473 -0
  36. codevigil-0.4.0/codevigil/report/renderer.py +1052 -0
  37. codevigil-0.4.0/codevigil/turns.py +183 -0
  38. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/types.py +34 -1
  39. codevigil-0.4.0/codevigil/ui/progress.py +282 -0
  40. codevigil-0.4.0/codevigil/watch_roots.py +88 -0
  41. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/watcher.py +242 -10
  42. codevigil-0.4.0/codevigil/watcher_cache.py +277 -0
  43. {codevigil-0.1.1 → codevigil-0.4.0}/docs/README.md +8 -6
  44. {codevigil-0.1.1 → codevigil-0.4.0}/pyproject.toml +4 -2
  45. codevigil-0.1.1/README.md +0 -126
  46. codevigil-0.1.1/codevigil/aggregator.py +0 -506
  47. codevigil-0.1.1/codevigil/cli.py +0 -732
  48. codevigil-0.1.1/codevigil/collectors/read_edit_ratio.py +0 -258
  49. codevigil-0.1.1/codevigil/renderers/terminal.py +0 -236
  50. {codevigil-0.1.1 → codevigil-0.4.0}/LICENSE +0 -0
  51. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/__main__.py +0 -0
  52. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/collectors/_text_match.py +0 -0
  53. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/errors.py +0 -0
  54. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/privacy.py +0 -0
  55. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/projects.py +0 -0
  56. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/registry.py +0 -0
  57. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/renderers/__init__.py +0 -0
  58. {codevigil-0.1.1 → codevigil-0.4.0}/codevigil/renderers/json_file.py +0 -0
@@ -216,4 +216,5 @@ __marimo__/
216
216
  .opencode/
217
217
 
218
218
  # Developer workspace
219
- .docs/
219
+ .docs/
220
+ reports/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codevigil
3
- Version: 0.1.1
3
+ Version: 0.4.0
4
4
  Summary: Local, privacy-preserving observability for Claude Code sessions.
5
5
  Project-URL: Homepage, https://github.com/Mathews-Tom/codevigil
6
6
  Project-URL: Issues, https://github.com/Mathews-Tom/codevigil/issues
@@ -221,15 +221,21 @@ Classifier: Programming Language :: Python :: 3.12
221
221
  Classifier: Topic :: Software Development :: Quality Assurance
222
222
  Classifier: Topic :: System :: Monitoring
223
223
  Requires-Python: >=3.11
224
+ Requires-Dist: rich>=13
224
225
  Description-Content-Type: text/markdown
225
226
 
226
227
  # codevigil - Session Quality, Observed
227
228
 
228
229
  Local, privacy-preserving observability for Claude Code sessions.
229
230
 
230
- codevigil tails `~/.claude/projects/**/*.jsonl` on disk, computes signal metrics about reasoning and tool-use patterns, and surfaces them in a terminal dashboard or as JSON / markdown reports. **Stdlib-only runtime, zero network egress, no data ever leaves your machine.**
231
+ codevigil tails `~/.claude/projects/**/*.jsonl` on disk, computes signal metrics about reasoning and tool-use patterns, and surfaces them in a rich terminal dashboard or as JSON / markdown reports. **Zero network egress, no data ever leaves your machine.**
231
232
 
232
- Status: alpha. Python 3.11 and 3.12.
233
+ [![Status](https://img.shields.io/badge/status-beta-blue.svg)](https://github.com/Mathews-Tom/codevigil)
234
+ [![Version](https://img.shields.io/badge/version-0.4.0-informational.svg)](CHANGELOG.md)
235
+ [![Python](https://img.shields.io/badge/python-3.11%20%7C%203.12-blue.svg)](https://www.python.org/downloads/)
236
+ [![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)
237
+ [![CI](https://github.com/Mathews-Tom/codevigil/actions/workflows/ci.yml/badge.svg)](https://github.com/Mathews-Tom/codevigil/actions/workflows/ci.yml)
238
+ [![Privacy](https://img.shields.io/badge/network%20egress-zero-success.svg)](docs/privacy.md)
233
239
 
234
240
  ## Install
235
241
 
@@ -237,25 +243,44 @@ Status: alpha. Python 3.11 and 3.12.
237
243
  uv tool install codevigil
238
244
  ```
239
245
 
240
- That's it. `uv tool install` puts a `codevigil` executable on your `PATH` in an isolated environment so it does not interfere with your project virtualenvs. To upgrade later, run `uv tool upgrade codevigil`. To remove it, `uv tool uninstall codevigil`.
246
+ `uv tool install` places the `codevigil` executable on your `PATH` inside an isolated environment that does not conflict with project virtualenvs. All subcommands, including the full `history` suite with colored panels and formatted tables, work out of the box.
241
247
 
242
- If you don't have `uv`, install it from <https://docs.astral.sh/uv/getting-started/installation/>, or fall back to `pipx install codevigil` / `pip install --user codevigil`. See [docs/installation.md](docs/installation.md) for every supported path including from-source installs.
248
+ Upgrade and uninstall:
249
+
250
+ ```bash
251
+ uv tool upgrade codevigil
252
+ uv tool uninstall codevigil # leaves config and session data untouched
253
+ ```
254
+
255
+ No `uv`? Install it from <https://docs.astral.sh/uv/getting-started/installation/>. Fallbacks: `pipx install codevigil` and `pip install --user codevigil` both work. See [docs/installation.md](docs/installation.md) for all supported paths and from-source installs.
243
256
 
244
257
  ## First run
245
258
 
246
259
  ```bash
247
- codevigil watch
260
+ codevigil ingest # one-shot cold-ingest into persistent memory (first run only)
261
+ codevigil watch # project roll-up dashboard, resumes every file from its cached cursor
262
+ ```
263
+
264
+ `codevigil ingest` walks every JSONL under `watch.roots`, parses them end-to-end, and writes a durable record (root-aware session key, raw session id, file id, cursor offset, collector state, metric summary) to the local SQLite store under `~/.local/state/codevigil/`. You run it once after install. Subsequent `codevigil watch` ticks seek past the saved cursor on every file, so the hot path only processes newly-appended events. If the store is absent on startup, `watch` will bootstrap it for you.
265
+
266
+ `codevigil watch` then prints a live **project-row** dashboard: one row per Claude Code project, with the fleet-worst severity, the active session count, and the aggregate metric summary. The top line shows fleet totals (session count, CRIT/WARN/OK tallies, project count, last-updated wall-clock tick). Every session's rolling-window collector state is restored from the store so restart does not erase your percentile baselines.
267
+
268
+ ```text
269
+ codevigil [experimental thresholds] | sessions=3 crit=0 warn=1 ok=2 projects=2 updated=2026-04-16T10:22:00 | parse_confidence: 1.00
270
+ project: my-project | 2 active | WARN read_edit_ratio 3.1 | stop_phrase 1 | reasoning_loop 8.4 | thinking 0.82 | prompts 14
271
+ project: another-project | 1 active | OK read_edit_ratio 5.6 | stop_phrase 0 | reasoning_loop 4.1 | thinking 0.91 | prompts 7
248
272
  ```
249
273
 
250
- Tails every active session under `~/.claude/projects` and prints a live multi-session dashboard at one frame per second. Each session shows three metrics — read/edit ratio, stop-phrase hit count, reasoning loop rate — plus a header line with parse confidence and an `[experimental thresholds]` badge while you're still inside the bootstrap window.
274
+ Pass `--by-session` (or set `watch.display_mode = "session"`) to fall back to the 0.2.x one-block-per-session layout:
251
275
 
252
276
  ```text
253
- codevigil [experimental thresholds] | parse_confidence: 1.00
254
- session: a3f7c2d | project: my-project | 2m 34s ACTIVE
277
+ session: a3f7c2d | project: my-project | 2m 34s ACTIVE [task: debug_loop] [experimental]
255
278
  ──────────────────────────────────────────────────────────────
256
- read_edit_ratio 5.2 OK [R:E 5.2 | research:mut 7.1]
257
- stop_phrase 0 OK [0 hits]
258
- reasoning_loop 6.4 OK [6.4/1K tool calls | burst: 2]
279
+ read_edit_ratio 5.2 OK [R:E 5.2 | research:mut 7.1] [↗3.1→4.2→5.2] [p68 of your baseline]
280
+ stop_phrase 0 OK [0 hits]
281
+ reasoning_loop 6.4 OK [6.4/1K tool calls | burst: 2] [↘8.1→7.2→6.4] [n/a]
282
+ thinking 0.87 OK [0.87 visible | chars med: 342 | sig med: 118]
283
+ prompts 11 OK [11 user turns]
259
284
  ──────────────────────────────────────────────────────────────
260
285
  ```
261
286
 
@@ -264,23 +289,41 @@ session: a3f7c2d | project: my-project | 2m 34s ACTIVE
264
289
  ## What else can it do
265
290
 
266
291
  ```bash
267
- codevigil config check # show the resolved config and where each value came from
268
- codevigil report ~/.claude/projects # batch report over a tree of session files
269
- codevigil report sessions/ --format markdown --from 2026-04-01
270
- codevigil export session.jsonl # NDJSON event stream on stdout, jq-friendly
292
+ codevigil config check # show the resolved config and where each value came from
293
+ codevigil ingest # cold-ingest every session into local persistent memory
294
+ codevigil ingest --force # rebuild the store from scratch, ignoring existing rows
295
+ codevigil watch --by-session # one block per session (0.2.x layout)
296
+ codevigil report ~/.claude/projects # default: stacked today / 7d / 30d panels
297
+ codevigil report sessions/ --format markdown --from 2026-04-01 # explicit window → single-period mode
298
+ codevigil report ~/.claude/projects --group-by week # cohort trend table by ISO week
299
+ codevigil report ~/.claude/projects --group-by week --format csv # flat CSV for notebook consumption
300
+ codevigil report sessions/ --compare-periods 2026-03-01:2026-03-31,2026-04-01:2026-04-30
301
+ codevigil report sessions/ --pivot-date 2026-04-01 # before/after delta at a change point
302
+ codevigil report sessions/ --group-by week --experimental-correlations # append Pearson appendix [experimental]
303
+ codevigil report sessions/ --output-file ~/reports/april.md # write to an exact file path
304
+ codevigil export session.jsonl # NDJSON event stream on stdout, jq-friendly
271
305
  codevigil export session.jsonl | jq 'select(.kind == "tool_call") | .payload.tool_name'
306
+ codevigil history list # list stored sessions
307
+ codevigil history list --task-type debug_loop --since 2026-04-01 --severity warn
308
+ codevigil history SESSION_ID # event, metric, and per-turn task-type timeline
309
+ codevigil history diff SESSION_A SESSION_B # side-by-side Markdown diff of two sessions
310
+ codevigil history heatmap SESSION_ID # tool × severity heatmap with proportional gradient bars
311
+ codevigil history heatmap --axis task_type # cross-tab metrics against experimental task labels
272
312
  ```
273
313
 
314
+ `codevigil report` with no date flags now renders three stacked windows — **today**, **7d**, and **30d** — in one invocation. Pass `--from` or `--to` to fall back to the original single-period mode. Scripts that depend on the old no-flag single-period output should pass `--from 1970-01-01` (or any open lower bound) to preserve the previous shape.
315
+
274
316
  Full flag reference for every subcommand: [docs/cli.md](docs/cli.md).
275
317
 
276
318
  ## Configuration
277
319
 
278
- codevigil resolves its configuration from a layered precedence chain: built-in defaults → `~/.config/codevigil/config.toml` → `CODEVIGIL_*` environment variables → CLI flags. Run `codevigil config check` to see every resolved key with its source.
320
+ codevigil resolves its configuration from a layered precedence chain: built-in defaults → `~/.config/codevigil/config.toml` → `CODEVIGIL_*` environment variables → CLI flags. `watch.roots` is the canonical multi-root setting; `watch.root` and `CODEVIGIL_WATCH_ROOT` remain supported as deprecated single-root aliases. Run `codevigil config check` to see every resolved key with its source and any deprecation notices.
279
321
 
280
322
  A minimal `~/.config/codevigil/config.toml`:
281
323
 
282
324
  ```toml
283
325
  [watch]
326
+ roots = ["~/.claude/projects"]
284
327
  poll_interval = 1.0
285
328
 
286
329
  [collectors.read_edit_ratio]
@@ -292,16 +335,53 @@ The complete key reference, env-var bindings, and validation rules live in [docs
292
335
 
293
336
  ## What gets measured
294
337
 
295
- Three user-facing collectors plus an always-on integrity gate:
338
+ Five user-facing collectors plus an always-on integrity gate:
339
+
340
+ | Collector | Signal |
341
+ | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
342
+ | `read_edit_ratio` | Reads vs. mutations, blind-edit detection, file-tracking confidence |
343
+ | `stop_phrase` | Hits against ownership-dodging, permission-seeking, premature-stopping, and known-limitation phrase categories |
344
+ | `reasoning_loop` | Self-correction phrase rate per 1K tool calls plus longest consecutive burst |
345
+ | `thinking` | Visible-vs-redacted thinking-block ratio plus median visible / signature character lengths (headline signal for #42796 depth decline) |
346
+ | `prompts` | Cumulative user-turn count per session; feeds the #42796 "prompts per session" cohort mean |
347
+ | `parse_health` | Always-on. Flips to CRITICAL when parse confidence drops below 0.9 in any 50-line window |
348
+
349
+ `thinking` and `prompts` are descriptive counters — severity stays at OK by design. They exist to feed cohort trend reports, not to alarm. Threshold semantics for the three gated collectors, what each metric is sensitive to, and how to interpret CRITICAL signals live in [docs/collectors.md](docs/collectors.md).
296
350
 
297
- | Collector | Signal |
298
- | ----------------- | -------------------------------------------------------------------------------------------------------------- |
299
- | `read_edit_ratio` | Reads vs. mutations, blind-edit detection, file-tracking confidence |
300
- | `stop_phrase` | Hits against ownership-dodging, permission-seeking, premature-stopping, and known-limitation phrase categories |
301
- | `reasoning_loop` | Self-correction phrase rate per 1K tool calls plus longest consecutive burst |
302
- | `parse_health` | Always-on. Flips to CRITICAL when parse confidence drops below 0.9 in any 50-line window |
351
+ ## Persistent memory
352
+
353
+ 0.4.0 adds first-class multi-root support on top of the local SQLite-backed processed-session store under `~/.local/state/codevigil/processed/`. Every finalised session now writes a root-aware identity (`session_key`, raw `session_id`, cursor byte offset, collector state snapshot, and derived metric summary), and the watcher seeds each polled file from the cached cursor on startup instead of re-parsing JSONL from byte 0. Rolling-window collector state (the `read_edit_ratio` 50-event deque, the `reasoning_loop` burst counter) is restored verbatim across restarts, even when different roots contain the same `session_id`. Run `codevigil ingest` once after install; after that, `codevigil watch` only processes newly-appended events on the hot path. Disable the cursor cache for reproducible cold-start benchmarks with `watch.cursor_cache_enabled = false`. Schema, migration policy, and the invariants the store upholds live in [docs/design.md](docs/design.md).
354
+
355
+ ## Cohort trend reports
356
+
357
+ `codevigil report --group-by {day,week,project,model,permission_mode}` aggregates every session in the store into cohort cells and emits a Markdown trend table with a methodology header, Δ-vs-prior-row annotations on chronological dimensions, and threshold highlighting for cells crossing warn / critical. Cells with `n<5` are redacted with an `n<5` sentinel. Additional cohort-only flags:
358
+
359
+ - `--pivot-date YYYY-MM-DD` — split the corpus at a change point and emit a Before/After delta table.
360
+ - `--compare-periods A_START:A_END,B_START:B_END` — signed delta table + prose summary per metric.
361
+ - `--experimental-correlations` — Pearson appendix across per-session metric columns; pairs below `n=30` are dropped. Exploratory only — correlation is not causation, and the rendered output says so explicitly.
362
+ - `--format csv` — flat `dimension_value,metric_name,mean,stdev,n,min,max` for notebook consumption.
363
+ - `--format json` — versioned JSON cohort document (`schema_version=1`) for downstream pipelines.
364
+ - `--output-file PATH` — write to an exact file path (parent dirs created, must resolve under `$HOME`).
365
+
366
+ Both new default collectors (`thinking`, `prompts`) surface in cohort reports as `thinking_visible_ratio`, `thinking_visible_chars_median`, `thinking_signature_chars_median`, and `user_turns`. Full reference: [docs/cli.md](docs/cli.md).
367
+
368
+ ## Task classifier `[experimental]`
369
+
370
+ The experimental task classifier labels each Claude Code turn as `exploration`, `mutation_heavy`, `debug_loop`, `planning`, or `mixed` using a two-stage cascade (tool-presence heuristic → keyword regex on the user message, stdlib `re` only, zero network, zero new dependencies). Session-level labels aggregate turn labels by majority vote. Labels surface in four places:
371
+
372
+ - **`history list`** — new `task_type` column and `--task-type <label>` filter
373
+ - **`history heatmap --axis task_type`** — cross-tab metrics against task labels
374
+ - **`history SESSION_ID`** — per-turn task-type headings in the event timeline
375
+ - **`codevigil watch`** — right-aligned task tag in each session header
376
+
377
+ Every surface is marked `[experimental]`. The classifier is opt-out via `[classifier]` in `~/.config/codevigil/config.toml`:
378
+
379
+ ```toml
380
+ [classifier]
381
+ enabled = false
382
+ ```
303
383
 
304
- Threshold semantics, what each metric is sensitive to, and how to interpret CRITICAL signals: [docs/collectors.md](docs/collectors.md).
384
+ When disabled, the four surfaces degrade cleanly: no task column in list, no task tag in watch, no per-turn headings in detail, and `history heatmap --axis task_type` exits with a clear error. Category definitions, the cascade algorithm, and the calibration gate (≥85% agreement on a labeled corpus) are documented in [docs/classifier.md](docs/classifier.md).
305
385
 
306
386
  ## Privacy
307
387
 
@@ -315,20 +395,21 @@ The full privacy model and threat boundary: [docs/privacy.md](docs/privacy.md).
315
395
 
316
396
  ## Documentation
317
397
 
318
- | Doc | What it covers |
319
- | -------------------------------------------------- | --------------------------------------------------- |
320
- | [docs/installation.md](docs/installation.md) | Install, upgrade, uninstall, from-source builds |
321
- | [docs/getting-started.md](docs/getting-started.md) | First-run walkthrough and interpreting the output |
322
- | [docs/cli.md](docs/cli.md) | Exhaustive CLI reference: every subcommand and flag |
323
- | [docs/configuration.md](docs/configuration.md) | Every config key, env binding, and validation rule |
324
- | [docs/collectors.md](docs/collectors.md) | What each metric measures and how to interpret it |
325
- | [docs/privacy.md](docs/privacy.md) | Privacy guarantees and the threat model |
326
- | [docs/design.md](docs/design.md) | Architecture, plugin boundaries, error taxonomy |
327
- | [CHANGELOG.md](CHANGELOG.md) | Release notes |
398
+ | Doc | What it covers |
399
+ | -------------------------------------------------- | ----------------------------------------------------- |
400
+ | [docs/installation.md](docs/installation.md) | Install, upgrade, uninstall, from-source builds |
401
+ | [docs/getting-started.md](docs/getting-started.md) | First-run walkthrough and interpreting the output |
402
+ | [docs/cli.md](docs/cli.md) | Exhaustive CLI reference: every subcommand and flag |
403
+ | [docs/configuration.md](docs/configuration.md) | Every config key, env binding, and validation rule |
404
+ | [docs/collectors.md](docs/collectors.md) | What each metric measures and how to interpret it |
405
+ | [docs/classifier.md](docs/classifier.md) | Experimental task classifier: categories and surfaces |
406
+ | [docs/privacy.md](docs/privacy.md) | Privacy guarantees and the threat model |
407
+ | [docs/design.md](docs/design.md) | Architecture, plugin boundaries, error taxonomy |
408
+ | [CHANGELOG.md](CHANGELOG.md) | Release notes |
328
409
 
329
410
  ## Experimental thresholds
330
411
 
331
- The default v0.1 thresholds were derived from a single user's session window — one user is not a population baseline. Every default ships with `experimental = true` and the watch header shows `[experimental thresholds]` until you either flip the flag in config or let bootstrap mode personalise the thresholds for your own workflow.
412
+ The shipped default thresholds were derived from a single user's session window — one user is not a population baseline. Every default ships with `experimental = true` and the watch header shows `[experimental thresholds]` until you either flip the flag in config or let bootstrap mode personalise the thresholds for your own workflow.
332
413
 
333
414
  Bootstrap mode observes the first 10 sessions (configurable) with all severities pinned to `OK`, records the per-collector value distributions, then derives WARN at p80 and CRITICAL at p95 of _your_ local data, clamped by the literal-value hard caps. No manual tuning required. See [docs/collectors.md#experimental-thresholds-and-bootstrap](docs/collectors.md#experimental-thresholds-and-bootstrap).
334
415
 
@@ -341,6 +422,7 @@ uv sync --dev
341
422
  uv run pytest
342
423
  uv run mypy --strict codevigil
343
424
  uv run ruff check .
425
+ uv run ruff format --check .
344
426
  bash scripts/ci_privacy_grep.sh
345
427
  ```
346
428
 
@@ -0,0 +1,207 @@
1
+ # codevigil - Session Quality, Observed
2
+
3
+ Local, privacy-preserving observability for Claude Code sessions.
4
+
5
+ codevigil tails `~/.claude/projects/**/*.jsonl` on disk, computes signal metrics about reasoning and tool-use patterns, and surfaces them in a rich terminal dashboard or as JSON / markdown reports. **Zero network egress, no data ever leaves your machine.**
6
+
7
+ [![Status](https://img.shields.io/badge/status-beta-blue.svg)](https://github.com/Mathews-Tom/codevigil)
8
+ [![Version](https://img.shields.io/badge/version-0.4.0-informational.svg)](CHANGELOG.md)
9
+ [![Python](https://img.shields.io/badge/python-3.11%20%7C%203.12-blue.svg)](https://www.python.org/downloads/)
10
+ [![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)
11
+ [![CI](https://github.com/Mathews-Tom/codevigil/actions/workflows/ci.yml/badge.svg)](https://github.com/Mathews-Tom/codevigil/actions/workflows/ci.yml)
12
+ [![Privacy](https://img.shields.io/badge/network%20egress-zero-success.svg)](docs/privacy.md)
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ uv tool install codevigil
18
+ ```
19
+
20
+ `uv tool install` places the `codevigil` executable on your `PATH` inside an isolated environment that does not conflict with project virtualenvs. All subcommands, including the full `history` suite with colored panels and formatted tables, work out of the box.
21
+
22
+ Upgrade and uninstall:
23
+
24
+ ```bash
25
+ uv tool upgrade codevigil
26
+ uv tool uninstall codevigil # leaves config and session data untouched
27
+ ```
28
+
29
+ No `uv`? Install it from <https://docs.astral.sh/uv/getting-started/installation/>. Fallbacks: `pipx install codevigil` and `pip install --user codevigil` both work. See [docs/installation.md](docs/installation.md) for all supported paths and from-source installs.
30
+
31
+ ## First run
32
+
33
+ ```bash
34
+ codevigil ingest # one-shot cold-ingest into persistent memory (first run only)
35
+ codevigil watch # project roll-up dashboard, resumes every file from its cached cursor
36
+ ```
37
+
38
+ `codevigil ingest` walks every JSONL under `watch.roots`, parses them end-to-end, and writes a durable record (root-aware session key, raw session id, file id, cursor offset, collector state, metric summary) to the local SQLite store under `~/.local/state/codevigil/`. You run it once after install. Subsequent `codevigil watch` ticks seek past the saved cursor on every file, so the hot path only processes newly-appended events. If the store is absent on startup, `watch` will bootstrap it for you.
39
+
40
+ `codevigil watch` then prints a live **project-row** dashboard: one row per Claude Code project, with the fleet-worst severity, the active session count, and the aggregate metric summary. The top line shows fleet totals (session count, CRIT/WARN/OK tallies, project count, last-updated wall-clock tick). Every session's rolling-window collector state is restored from the store so restart does not erase your percentile baselines.
41
+
42
+ ```text
43
+ codevigil [experimental thresholds] | sessions=3 crit=0 warn=1 ok=2 projects=2 updated=2026-04-16T10:22:00 | parse_confidence: 1.00
44
+ project: my-project | 2 active | WARN read_edit_ratio 3.1 | stop_phrase 1 | reasoning_loop 8.4 | thinking 0.82 | prompts 14
45
+ project: another-project | 1 active | OK read_edit_ratio 5.6 | stop_phrase 0 | reasoning_loop 4.1 | thinking 0.91 | prompts 7
46
+ ```
47
+
48
+ Pass `--by-session` (or set `watch.display_mode = "session"`) to fall back to the 0.2.x one-block-per-session layout:
49
+
50
+ ```text
51
+ session: a3f7c2d | project: my-project | 2m 34s ACTIVE [task: debug_loop] [experimental]
52
+ ──────────────────────────────────────────────────────────────
53
+ read_edit_ratio 5.2 OK [R:E 5.2 | research:mut 7.1] [↗3.1→4.2→5.2] [p68 of your baseline]
54
+ stop_phrase 0 OK [0 hits]
55
+ reasoning_loop 6.4 OK [6.4/1K tool calls | burst: 2] [↘8.1→7.2→6.4] [n/a]
56
+ thinking 0.87 OK [0.87 visible | chars med: 342 | sig med: 118]
57
+ prompts 11 OK [11 user turns]
58
+ ──────────────────────────────────────────────────────────────
59
+ ```
60
+
61
+ `Ctrl-C` exits cleanly. Walk through what every column means and how to interpret it in [docs/getting-started.md](docs/getting-started.md).
62
+
63
+ ## What else can it do
64
+
65
+ ```bash
66
+ codevigil config check # show the resolved config and where each value came from
67
+ codevigil ingest # cold-ingest every session into local persistent memory
68
+ codevigil ingest --force # rebuild the store from scratch, ignoring existing rows
69
+ codevigil watch --by-session # one block per session (0.2.x layout)
70
+ codevigil report ~/.claude/projects # default: stacked today / 7d / 30d panels
71
+ codevigil report sessions/ --format markdown --from 2026-04-01 # explicit window → single-period mode
72
+ codevigil report ~/.claude/projects --group-by week # cohort trend table by ISO week
73
+ codevigil report ~/.claude/projects --group-by week --format csv # flat CSV for notebook consumption
74
+ codevigil report sessions/ --compare-periods 2026-03-01:2026-03-31,2026-04-01:2026-04-30
75
+ codevigil report sessions/ --pivot-date 2026-04-01 # before/after delta at a change point
76
+ codevigil report sessions/ --group-by week --experimental-correlations # append Pearson appendix [experimental]
77
+ codevigil report sessions/ --output-file ~/reports/april.md # write to an exact file path
78
+ codevigil export session.jsonl # NDJSON event stream on stdout, jq-friendly
79
+ codevigil export session.jsonl | jq 'select(.kind == "tool_call") | .payload.tool_name'
80
+ codevigil history list # list stored sessions
81
+ codevigil history list --task-type debug_loop --since 2026-04-01 --severity warn
82
+ codevigil history SESSION_ID # event, metric, and per-turn task-type timeline
83
+ codevigil history diff SESSION_A SESSION_B # side-by-side Markdown diff of two sessions
84
+ codevigil history heatmap SESSION_ID # tool × severity heatmap with proportional gradient bars
85
+ codevigil history heatmap --axis task_type # cross-tab metrics against experimental task labels
86
+ ```
87
+
88
+ `codevigil report` with no date flags now renders three stacked windows — **today**, **7d**, and **30d** — in one invocation. Pass `--from` or `--to` to fall back to the original single-period mode. Scripts that depend on the old no-flag single-period output should pass `--from 1970-01-01` (or any open lower bound) to preserve the previous shape.
89
+
90
+ Full flag reference for every subcommand: [docs/cli.md](docs/cli.md).
91
+
92
+ ## Configuration
93
+
94
+ codevigil resolves its configuration from a layered precedence chain: built-in defaults → `~/.config/codevigil/config.toml` → `CODEVIGIL_*` environment variables → CLI flags. `watch.roots` is the canonical multi-root setting; `watch.root` and `CODEVIGIL_WATCH_ROOT` remain supported as deprecated single-root aliases. Run `codevigil config check` to see every resolved key with its source and any deprecation notices.
95
+
96
+ A minimal `~/.config/codevigil/config.toml`:
97
+
98
+ ```toml
99
+ [watch]
100
+ roots = ["~/.claude/projects"]
101
+ poll_interval = 1.0
102
+
103
+ [collectors.read_edit_ratio]
104
+ warn_threshold = 5.0
105
+ critical_threshold = 2.5
106
+ ```
107
+
108
+ The complete key reference, env-var bindings, and validation rules live in [docs/configuration.md](docs/configuration.md).
109
+
110
+ ## What gets measured
111
+
112
+ Five user-facing collectors plus an always-on integrity gate:
113
+
114
+ | Collector | Signal |
115
+ | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
116
+ | `read_edit_ratio` | Reads vs. mutations, blind-edit detection, file-tracking confidence |
117
+ | `stop_phrase` | Hits against ownership-dodging, permission-seeking, premature-stopping, and known-limitation phrase categories |
118
+ | `reasoning_loop` | Self-correction phrase rate per 1K tool calls plus longest consecutive burst |
119
+ | `thinking` | Visible-vs-redacted thinking-block ratio plus median visible / signature character lengths (headline signal for #42796 depth decline) |
120
+ | `prompts` | Cumulative user-turn count per session; feeds the #42796 "prompts per session" cohort mean |
121
+ | `parse_health` | Always-on. Flips to CRITICAL when parse confidence drops below 0.9 in any 50-line window |
122
+
123
+ `thinking` and `prompts` are descriptive counters — severity stays at OK by design. They exist to feed cohort trend reports, not to alarm. Threshold semantics for the three gated collectors, what each metric is sensitive to, and how to interpret CRITICAL signals live in [docs/collectors.md](docs/collectors.md).
124
+
125
+ ## Persistent memory
126
+
127
+ 0.4.0 adds first-class multi-root support on top of the local SQLite-backed processed-session store under `~/.local/state/codevigil/processed/`. Every finalised session now writes a root-aware identity (`session_key`, raw `session_id`, cursor byte offset, collector state snapshot, and derived metric summary), and the watcher seeds each polled file from the cached cursor on startup instead of re-parsing JSONL from byte 0. Rolling-window collector state (the `read_edit_ratio` 50-event deque, the `reasoning_loop` burst counter) is restored verbatim across restarts, even when different roots contain the same `session_id`. Run `codevigil ingest` once after install; after that, `codevigil watch` only processes newly-appended events on the hot path. Disable the cursor cache for reproducible cold-start benchmarks with `watch.cursor_cache_enabled = false`. Schema, migration policy, and the invariants the store upholds live in [docs/design.md](docs/design.md).
128
+
129
+ ## Cohort trend reports
130
+
131
+ `codevigil report --group-by {day,week,project,model,permission_mode}` aggregates every session in the store into cohort cells and emits a Markdown trend table with a methodology header, Δ-vs-prior-row annotations on chronological dimensions, and threshold highlighting for cells crossing warn / critical. Cells with `n<5` are redacted with an `n<5` sentinel. Additional cohort-only flags:
132
+
133
+ - `--pivot-date YYYY-MM-DD` — split the corpus at a change point and emit a Before/After delta table.
134
+ - `--compare-periods A_START:A_END,B_START:B_END` — signed delta table + prose summary per metric.
135
+ - `--experimental-correlations` — Pearson appendix across per-session metric columns; pairs below `n=30` are dropped. Exploratory only — correlation is not causation, and the rendered output says so explicitly.
136
+ - `--format csv` — flat `dimension_value,metric_name,mean,stdev,n,min,max` for notebook consumption.
137
+ - `--format json` — versioned JSON cohort document (`schema_version=1`) for downstream pipelines.
138
+ - `--output-file PATH` — write to an exact file path (parent dirs created, must resolve under `$HOME`).
139
+
140
+ Both new default collectors (`thinking`, `prompts`) surface in cohort reports as `thinking_visible_ratio`, `thinking_visible_chars_median`, `thinking_signature_chars_median`, and `user_turns`. Full reference: [docs/cli.md](docs/cli.md).
141
+
142
+ ## Task classifier `[experimental]`
143
+
144
+ The experimental task classifier labels each Claude Code turn as `exploration`, `mutation_heavy`, `debug_loop`, `planning`, or `mixed` using a two-stage cascade (tool-presence heuristic → keyword regex on the user message, stdlib `re` only, zero network, zero new dependencies). Session-level labels aggregate turn labels by majority vote. Labels surface in four places:
145
+
146
+ - **`history list`** — new `task_type` column and `--task-type <label>` filter
147
+ - **`history heatmap --axis task_type`** — cross-tab metrics against task labels
148
+ - **`history SESSION_ID`** — per-turn task-type headings in the event timeline
149
+ - **`codevigil watch`** — right-aligned task tag in each session header
150
+
151
+ Every surface is marked `[experimental]`. The classifier is opt-out via `[classifier]` in `~/.config/codevigil/config.toml`:
152
+
153
+ ```toml
154
+ [classifier]
155
+ enabled = false
156
+ ```
157
+
158
+ When disabled, the four surfaces degrade cleanly: no task column in list, no task tag in watch, no per-turn headings in detail, and `history heatmap --axis task_type` exits with a clear error. Category definitions, the cascade algorithm, and the calibration gate (≥85% agreement on a labeled corpus) are documented in [docs/classifier.md](docs/classifier.md).
159
+
160
+ ## Privacy
161
+
162
+ Three independent enforcement layers ensure session data never leaves your machine:
163
+
164
+ - **Runtime import allowlist hook** installed at package init refuses any import of `socket`, `urllib`, `http.client`, `httpx`, `requests`, `aiohttp`, `ftplib`, `smtplib`, `ssl`, `subprocess`, or related transports from inside a `codevigil` module.
165
+ - **CI grep gate** re-checks the source tree for the same banned names on every push as a belt-and-suspenders second layer.
166
+ - **Filesystem scope check** refuses any read or write path outside `$HOME` via a `Path.resolve().is_relative_to(home)` check.
167
+
168
+ The full privacy model and threat boundary: [docs/privacy.md](docs/privacy.md).
169
+
170
+ ## Documentation
171
+
172
+ | Doc | What it covers |
173
+ | -------------------------------------------------- | ----------------------------------------------------- |
174
+ | [docs/installation.md](docs/installation.md) | Install, upgrade, uninstall, from-source builds |
175
+ | [docs/getting-started.md](docs/getting-started.md) | First-run walkthrough and interpreting the output |
176
+ | [docs/cli.md](docs/cli.md) | Exhaustive CLI reference: every subcommand and flag |
177
+ | [docs/configuration.md](docs/configuration.md) | Every config key, env binding, and validation rule |
178
+ | [docs/collectors.md](docs/collectors.md) | What each metric measures and how to interpret it |
179
+ | [docs/classifier.md](docs/classifier.md) | Experimental task classifier: categories and surfaces |
180
+ | [docs/privacy.md](docs/privacy.md) | Privacy guarantees and the threat model |
181
+ | [docs/design.md](docs/design.md) | Architecture, plugin boundaries, error taxonomy |
182
+ | [CHANGELOG.md](CHANGELOG.md) | Release notes |
183
+
184
+ ## Experimental thresholds
185
+
186
+ The shipped default thresholds were derived from a single user's session window — one user is not a population baseline. Every default ships with `experimental = true` and the watch header shows `[experimental thresholds]` until you either flip the flag in config or let bootstrap mode personalise the thresholds for your own workflow.
187
+
188
+ Bootstrap mode observes the first 10 sessions (configurable) with all severities pinned to `OK`, records the per-collector value distributions, then derives WARN at p80 and CRITICAL at p95 of _your_ local data, clamped by the literal-value hard caps. No manual tuning required. See [docs/collectors.md#experimental-thresholds-and-bootstrap](docs/collectors.md#experimental-thresholds-and-bootstrap).
189
+
190
+ ## Contributing
191
+
192
+ ```bash
193
+ git clone https://github.com/Mathews-Tom/codevigil
194
+ cd codevigil
195
+ uv sync --dev
196
+ uv run pytest
197
+ uv run mypy --strict codevigil
198
+ uv run ruff check .
199
+ uv run ruff format --check .
200
+ bash scripts/ci_privacy_grep.sh
201
+ ```
202
+
203
+ All five gates must pass before a commit lands. The privacy grep runs as a separate CI job alongside the typecheck-and-test matrix on every PR.
204
+
205
+ ## License
206
+
207
+ Apache License 2.0. See [LICENSE](LICENSE).
@@ -14,6 +14,6 @@ from codevigil.privacy import install as _install_privacy_hook
14
14
 
15
15
  _install_privacy_hook()
16
16
 
17
- __version__: str = "0.1.1"
17
+ __version__: str = "0.4.0"
18
18
 
19
19
  __all__ = ["PrivacyViolationError", "__version__"]