tokenclinic 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,46 @@
1
+ # Changelog
2
+
3
+ ## 0.1.1
4
+
5
+ - Fix the ast-grep native-binding loader to use the correct per-platform package
6
+ names (Linux `…-gnu`/`…-musl`, Windows `…-msvc`), so installs and CI work off
7
+ macOS. Degrades gracefully (no ast-grep findings) if the binding is absent.
8
+ - CI: GitHub Actions runs typecheck + tests on push/PR (test timeout raised to
9
+ 30s for the tsc-spawning apply-loop tests).
10
+
11
+ ## 0.1.0
12
+
13
+ First release. The full clinic loop — triage → diagnose → treat → bill — plus the
14
+ retroactive audit, the amortization engine, provider-agnostic pricing, and an
15
+ advisory mode for running inside a harness.
16
+
17
+ ### Commands
18
+ - `audit <logs.jsonl>` — retroactive audit over past LLM call logs; reports the
19
+ eliminable-class fraction (eliminable / routable / essential) and what the
20
+ clinic loop would have saved.
21
+ - `scan [path]` — read-only pre-flight: type-check + promoted local rules,
22
+ partition into local ($0) / model lanes, print an EOB with a savings
23
+ counterfactual. Writes a `.tokenclinic/` Health Record.
24
+ - `scan [path] --json` — machine report for a host agent (no model call):
25
+ findings + tight context packets + recommended routing + `advice`.
26
+ - `scan [path] --apply` — live: fix each escalation with the routed model
27
+ (structured-output patch), then re-run the checker to verify. Reverts any patch
28
+ that makes things worse. Needs `ANTHROPIC_API_KEY`.
29
+ - `learn [path]` — amortize a recurring class into a deterministic ast-grep rule
30
+ (validated against generated fixtures before promotion); promoted rules then run
31
+ on-device for $0 forever. Needs `ANTHROPIC_API_KEY`.
32
+
33
+ ### Pricing
34
+ - Resolves through llm-intel (the OpenRouter catalog → any provider) with a
35
+ committed offline snapshot fallback. Unknown models are surfaced, never priced
36
+ with a fabricated number. Routing is declarative via `.tokenclinic/routing.json`.
37
+
38
+ ### Harness
39
+ - Ships a Claude Code skill (`skill/token-clinic/`) that runs `scan --json` as a
40
+ pre-flight gate so the host agent fixes from the packets with its own model.
41
+
42
+ ### Notes
43
+ - Runtime: Bun. The type checker (`tsc`) is invoked at runtime, so `typescript` is
44
+ a runtime dependency.
45
+ - `--apply` and `learn` require an Anthropic API key; everything else is free and
46
+ offline-capable.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 mrdulasolutions
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,180 @@
1
+ # 🩺 Token Clinic
2
+
3
+ A pre-flight gate for coding agents. It runs cheap, deterministic analysis **on-device** before any model touches your code, routes only the irreducible work to the right-priced model, and prints a bill showing what you saved.
4
+
5
+ > Thesis: most tokens in agentic coding are wasted having an expensive model rediscover what a cheap deterministic tool already knows. **Don't pay Opus to find a missing import.**
6
+
7
+ ## The clinic loop
8
+
9
+ | Stage | What it does |
10
+ | --- | --- |
11
+ | **Triage** | Detect deps, run on-device analyzers (v1: `tsc`), normalize everything into one `Finding` schema, rank by signal. Most findings die here, for $0. |
12
+ | **Diagnose** | Partition findings: autofixable → handled locally; `needs-llm` → escalated with a *tight context packet* (the relevant lines, not the whole repo). |
13
+ | **Treat** | Route each escalated fix by difficulty: mechanical → Haiku, semantic → Sonnet, architectural → Opus. Apply, then re-run the source check — a fix isn't done until it verifies. |
14
+ | **Bill (EOB)** | Cost per fix + savings vs. the naive "dump the file at a top model" baseline. The screenshot-able receipt. |
15
+
16
+ ## Install
17
+
18
+ Runs on [Bun](https://bun.sh).
19
+
20
+ ```bash
21
+ # global CLI
22
+ bun add -g tokenclinic # or, from a clone: bun link
23
+ tokenclinic scan ./my-project
24
+
25
+ # or run from a clone without installing
26
+ bun install
27
+ bun run src/cli.ts scan ./my-project
28
+ ```
29
+
30
+ `scan` (incl. `--json`), `audit`, and `learn`'s clustering are free and offline-capable. `scan --apply` and `learn`'s synthesis call a model and need `ANTHROPIC_API_KEY`.
31
+
32
+ ## The commands
33
+
34
+ Token Clinic ships the strategically-correct **first move** (the retroactive audit) and the **recurring product** (the live scan):
35
+
36
+ ```bash
37
+ # Approach A — measure the thesis from logs you already have ($0 risk, no code read)
38
+ bun run demo:audit # audits fixtures/sample-logs.jsonl
39
+ bun run src/cli.ts audit /path/to/your-llm-calls.jsonl
40
+
41
+ # Approach B — pre-flight scan of a repo (read-only, estimated EOB)
42
+ bun run demo # scans fixtures/sample-repo
43
+ bun run src/cli.ts scan /path/to/a/ts/project
44
+
45
+ # Approach B, live — actually fix + verify (needs ANTHROPIC_API_KEY)
46
+ ANTHROPIC_API_KEY=sk-ant-... bun run src/cli.ts scan /path/to/project --apply
47
+
48
+ # v2 — amortize a recurring class into a local rule (needs key), then it's $0 forever
49
+ ANTHROPIC_API_KEY=sk-ant-... bun run src/cli.ts learn /path/to/project
50
+ bun run src/cli.ts scan fixtures/with-rule # demo: a promoted rule running for $0
51
+ ```
52
+
53
+ ### `audit` — the retroactive audit (Approach A)
54
+
55
+ Run before building anything live. Ingests a JSONL of past LLM calls and prints the EOB **backwards** — what you spent, the *eliminable-class fraction* (the whole bet), and what the clinic loop would have saved. Runs entirely on exported logs, so there's no autofix risk and no code leaves the machine.
56
+
57
+ ```
58
+ 🩺 Token Clinic — retroactive audit · fixtures/sample-logs.jsonl
59
+ 12 calls · $0.20 spent · prices: snapshot (estimated — some calls bucketed heuristically)
60
+
61
+ ● eliminable 6 calls $0.09 42% of spend · killed on-device → $0
62
+ ● routable 3 calls $0.05 24% of spend · re-priced to cheapest tier
63
+ ● essential 3 calls $0.07 34% of spend · real reasoning → unchanged
64
+
65
+ eliminable-class fraction 42% (clearly large — build it)
66
+ projected spend $0.08 under the clinic loop
67
+ would have saved ~$0.12 (59% cheaper)
68
+ ```
69
+
70
+ Log format is one JSON object per line: `{ "model", "inputTokens", "outputTokens", "task"?, "category"? }`. A `category` is authoritative; without one, the call is bucketed heuristically from `task` and the audit is flagged `estimated`.
71
+
72
+ ### `scan` — the live pre-flight gate (Approach B)
73
+
74
+ Example output:
75
+
76
+ ```
77
+ 🩺 Token Clinic — fixtures/sample-repo
78
+ node project · 1 deps · 5 findings
79
+
80
+ ● TS2322 [semantic→sonnet-4-6] Type 'number' is not assignable to type 'string'.
81
+ ● TS6133 [local] 'unused' is declared but its value is never read.
82
+ ● TS2304 [mechanical→haiku-4-5] Cannot find name 'radius'.
83
+ ...
84
+
85
+ Explanation of Benefits (estimated — LLM step stubbed)
86
+ 5 findings
87
+ 1 fixed on-device · $0.00
88
+ 4 escalated to a model
89
+ clinic spend $0.0093
90
+ naive cost $0.12 (dump each file at the top model)
91
+ saved ~$0.11 (92% cheaper)
92
+ ```
93
+
94
+ ## Two fix modes
95
+
96
+ `scan` is read-only and free; `scan --apply` is the live loop.
97
+
98
+ - **`scan`** (default) — `DryRunFixer` estimates each escalation's cost from the real packet token count but does **not** call a model or touch files. The EOB is flagged `estimated`. Zero risk, zero spend.
99
+ - **`scan --apply`** — `AnthropicFixer` ([`@anthropic-ai/sdk`](https://github.com/anthropics/anthropic-sdk-typescript)) sends each tight packet to the routed model (Haiku/Sonnet/Opus), gets a corrected snippet via **structured output**, writes it, then **re-runs `tsc` to verify the finding is gone**. It loops — re-triaging each pass so line shifts are handled — until no escalatable findings remain. Costs are **exact**, from the API's `usage`; the EOB reads `(actual)`. Needs `ANTHROPIC_API_KEY` (it refuses cleanly without one).
100
+
101
+ ## What's real vs. still stubbed
102
+
103
+ **Real:** dep detection, `tsc` analysis + normalization, partition/routing, context-packet assembly, the Health Record, the `--apply` fix-and-verify loop, and — in `--apply` — exact token costs from the API.
104
+
105
+ **Still stubbed / placeholder:**
106
+ - Token *estimates* in read-only `scan` use chars/4 (real exact counts only arrive on `--apply`).
107
+ - Local autofix (the `[local]` lane) is still reported, not yet applied — that codemod path is v2.
108
+
109
+ ## Pricing & other providers
110
+
111
+ Prices resolve through [llm-intel](https://github.com/basisoasis/llm-intel) (the OpenRouter catalog) at command start, with a committed **offline snapshot** fallback so read-only `scan` never *requires* network. The footer shows which source was used (`prices: llm-intel` / `prices: snapshot`). An unknown model prices as `?` and is surfaced — never a fabricated number.
112
+
113
+ Because llm-intel is an OpenRouter catalog, **other providers come for free** on the cost side: anything keyed `openai/…`, `google/…`, etc. prices correctly. The split that makes this work:
114
+
115
+ - **Pricing, audit, EOB, and routing are provider-agnostic.** Routing is declarative — drop a `.tokenclinic/routing.json` mapping difficulty classes to *any* model id (`{ "semantic": "openai/gpt-4o" }`) and pricing resolves it.
116
+ - **Only the actual model call is provider-specific** — `--apply`/`learn` use the Anthropic SDK today; that single seam is what a future OpenRouter/LiteLLM client would swap, and nothing else changes.
117
+
118
+ ## Inside a harness (Claude Code) — advisory mode
119
+
120
+ Standalone, TokenClinic calls a model itself (`--apply`). **Inside a harness, it shouldn't** — the harness owns the model, the key, and the billing. So in-harness it runs as an **advisory pre-flight gate**: it does the $0 local elimination and hands the host agent a machine report; the agent does the reasoning fixes with *its own* model.
121
+
122
+ ```bash
123
+ tokenclinic scan <path> --json # read-only, NO model call — a report for an agent
124
+ ```
125
+
126
+ The report's `advice` is the contract:
127
+ - `advice.escalate` — the work list; each carries a `context.snippet` (fix from this, don't crawl the repo) and a `recommendedModel` (how hard the fix is).
128
+ - `advice.autoApply` — the `local` $0 lane (promoted rules + mechanical), apply directly.
129
+ - `eob` — the receipt to report back ("41 fixed locally for $0, 6 escalated, saved ~$0.40").
130
+
131
+ A drop-in Claude Code skill lives in [`skill/token-clinic/SKILL.md`](skill/token-clinic/SKILL.md) — it tells the agent to run `scan --json` before any fix pass and act on the `advice`. This is the original "install via a skill" path: TokenClinic's value in-harness is the elimination + tight packets + receipt, **not** the model call.
132
+
133
+ ## The Codebase Health Record
134
+
135
+ Each run writes `.tokenclinic/` into the scanned repo: `profile.json` (deps + analyzers) and an append-only `history.jsonl` (findings, spend, savings over time). v2 adds `rules/`, `quarantine/`, and `routing.json`. Every run reads it back, so every run gets cheaper and smarter — this is the compounding asset, not the router.
136
+
137
+ ## Roadmap
138
+
139
+ Sequenced A → B → C, per the [office-hours design](docs/) — measure before you build, sell the receipt, price the moat last.
140
+
141
+ - **A — the audit (here):** `tokenclinic audit` over existing logs. Puts a real dollar number on the unverified core thesis (the eliminable-class fraction) with zero code and zero risk. Earns revenue as a paid/concierge audit. **Gate:** fraction clearly large (>40%) → build B; clearly small (<15%) → walk away.
142
+ - **B — the live scan (here):** `tokenclinic scan` — Triage + local autofix lane + escalation estimate + verify + EOB + Health Record. One language (TS). The recurring product, distributed as a self-controlled CLI (npm + GitHub Releases) — not an integration into harnesses you don't own.
143
+ - **C — sell the moat (later):** open-core. Triage + receipt is the free funnel; charge for the compounding **Health Record** (promoted rules + fixtures + learned routing), shared team-wide.
144
+
145
+ ### v2 — the amortization engine (`learn`) — built
146
+
147
+ When a `needs-llm` class recurs (≥3×), `tokenclinic learn` spends *one* model call to synthesize a deterministic check **as data, not code**: an [ast-grep](https://ast-grep.github.io/) rule object + test fixtures. The rule is **never trusted directly** — it must flag every positive fixture and none of the negatives (`src/amortize/validate.ts`) before it's promoted to `.tokenclinic/rules/`; failures go to `quarantine/`. Promoted rules then run on-device in every `scan` (`src/triage/analyzers/astgrep.ts`), landing in the `[local]` $0 lane. That class is **$0 forever** — pay once, run free.
148
+
149
+ ```
150
+ src/amortize/
151
+ cluster.ts # group recurring needs-llm findings (≥3×)
152
+ synthesize.ts # ONE model call → ast-grep rule + fixtures (key-gated)
153
+ validate.ts # the trust gate: rule must pass its fixtures
154
+ promote.ts # → .tokenclinic/rules/ (promoted) or quarantine/
155
+ sg.ts # ast-grep loader (@ast-grep/napi)
156
+ ```
157
+
158
+ Only *eliminable* (bucket-1) findings amortize this way; *routable* (bucket-2) tacit-judgment work is routed cheaper, never eliminated. Still future: the [fff](https://github.com/dmtrKovalenko/fff) text-pattern fast lane and fff-powered Diagnose retrieval.
159
+
160
+ ## Architecture
161
+
162
+ ```
163
+ src/
164
+ types.ts # Finding / EOB / CallRecord — the records every stage shares
165
+ pricing/ # llm-intel adapter + offline snapshot + id/unit normalize
166
+ audit/ # Approach A: log ingest + bucket classifier + backwards EOB
167
+ amortize/ # v2: cluster → synthesize → validate → promote (ast-grep rules)
168
+ detect/deps.ts # dependency profile
169
+ triage/ # analyzers (tsc + promoted ast-grep rules) → Finding[]
170
+ diagnose/ # partition + context-packet assembly
171
+ treat/ # model routing + Fixer seam (DryRun estimate / Anthropic live)
172
+ bill/eob.ts # cost accounting + savings counterfactual
173
+ record/health.ts # the .tokenclinic/ Health Record
174
+ scan.ts # read-only scan assembly + the --json advisory contract
175
+ cli.ts # audit / scan / scan --apply / learn — wires the loops together
176
+ docs/
177
+ design-token-clinic.md # the office-hours strategy (A → B → C)
178
+ skill/
179
+ token-clinic/SKILL.md # Claude Code skill — advisory pre-flight gate
180
+ ```
package/package.json ADDED
@@ -0,0 +1,67 @@
1
+ {
2
+ "name": "tokenclinic",
3
+ "version": "0.1.1",
4
+ "description": "Pre-flight gate for coding agents: cheap local analysis first, route only the irreducible to the right-priced model, print the bill.",
5
+ "type": "module",
6
+ "license": "MIT",
7
+ "author": "mrdulasolutions <matt@mrdula.solutions>",
8
+ "homepage": "https://github.com/mrdulasolutions/TokenClinic#readme",
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "git+https://github.com/mrdulasolutions/TokenClinic.git"
12
+ },
13
+ "bugs": {
14
+ "url": "https://github.com/mrdulasolutions/TokenClinic/issues"
15
+ },
16
+ "keywords": [
17
+ "llm",
18
+ "tokens",
19
+ "cost",
20
+ "pricing",
21
+ "claude",
22
+ "anthropic",
23
+ "openrouter",
24
+ "ast-grep",
25
+ "code-review",
26
+ "static-analysis",
27
+ "cli",
28
+ "pre-flight",
29
+ "claude-code",
30
+ "agent"
31
+ ],
32
+ "bin": {
33
+ "tokenclinic": "./src/cli.ts"
34
+ },
35
+ "files": [
36
+ "src",
37
+ "skill",
38
+ "README.md",
39
+ "LICENSE",
40
+ "CHANGELOG.md"
41
+ ],
42
+ "engines": {
43
+ "bun": ">=1.0.0"
44
+ },
45
+ "scripts": {
46
+ "scan": "bun run src/cli.ts scan",
47
+ "audit": "bun run src/cli.ts audit",
48
+ "learn": "bun run src/cli.ts learn",
49
+ "demo": "bun run src/cli.ts scan fixtures/sample-repo",
50
+ "demo:audit": "bun run src/cli.ts audit fixtures/sample-logs.jsonl",
51
+ "test": "bun test --timeout 30000",
52
+ "typecheck": "tsc --noEmit",
53
+ "prepublishOnly": "bun run typecheck && bun run test"
54
+ },
55
+ "dependencies": {
56
+ "@anthropic-ai/sdk": "^0.106.0",
57
+ "@ast-grep/napi": "^0.44.0",
58
+ "@basisoasis/llm-intel": "^1.0.33",
59
+ "typescript": "^5.6.0"
60
+ },
61
+ "optionalDependencies": {
62
+ "@ast-grep/napi-darwin-arm64": "^0.44.0"
63
+ },
64
+ "devDependencies": {
65
+ "@types/node": "^22.0.0"
66
+ }
67
+ }
@@ -0,0 +1,76 @@
1
+ ---
2
+ name: token-clinic
3
+ description: >-
4
+ Pre-flight gate before editing code. Runs cheap on-device analysis first and
5
+ returns high-signal findings with tight context packets, so the model fixes
6
+ real problems instead of crawling the repo to rediscover them. Use BEFORE
7
+ starting a fix/refactor/cleanup pass on a TypeScript project, or when asked to
8
+ "clean up", "fix the errors", "lint", or "tighten up" a codebase.
9
+ ---
10
+
11
+ # Token Clinic — pre-flight gate
12
+
13
+ Token Clinic does the cheap, deterministic work on-device *before* you spend
14
+ model tokens: it runs the type checker plus the repo's promoted local rules,
15
+ eliminates what it can for $0, and hands you a tight packet for each remaining
16
+ problem. **It does not call a model — you do the reasoning fixes, with your own
17
+ model, from the packets it gives you.** That's the point: don't pay a premium
18
+ model to find a missing import a local tool already found.
19
+
20
+ ## When to use
21
+
22
+ Before any multi-file fix, refactor, lint, or "clean this up" pass on a
23
+ TypeScript project. Run it first; act on its output; don't re-derive its findings.
24
+
25
+ ## How to run
26
+
27
+ ```bash
28
+ tokenclinic scan <path> --json
29
+ ```
30
+
31
+ (If `tokenclinic` isn't on PATH, run from the repo: `bun run /path/to/TokenClinic/src/cli.ts scan <path> --json`.)
32
+
33
+ This prints a JSON report and **makes no API calls**. Parse it and act on `advice`.
34
+
35
+ ## What the report means
36
+
37
+ ```jsonc
38
+ {
39
+ "eob": { "fixedLocally": 1, "escalated": 4, "saved": 0.11, ... }, // the receipt
40
+ "findings": [
41
+ {
42
+ "rule": "TS2322", "file": "src/x.ts", "line": 4,
43
+ "lane": "model", // "local" = $0 cleanup; "model" = needs you
44
+ "recommendedModel": "claude-sonnet-4-6",
45
+ "context": { "snippet": "...the relevant lines...", "startLine": 1 }
46
+ }
47
+ ],
48
+ "advice": {
49
+ "autoApply": ["<ids of local-lane findings>"],
50
+ "escalate": [{ "id": "...", "file": "...", "line": 4, "recommendedModel": "..." }]
51
+ }
52
+ }
53
+ ```
54
+
55
+ ## What to do with it
56
+
57
+ 1. **`advice.escalate` is your work list.** Fix each one using its `context.snippet`
58
+ — do **not** open and re-read the whole file or crawl the repo; the packet is
59
+ the context you need. The `recommendedModel` tells you how hard Token Clinic
60
+ judged the fix (mechanical → cheap, semantic → mid, architectural → top); use
61
+ it to calibrate effort, not to switch models mid-session.
62
+ 2. **`advice.autoApply` (the `local` lane)** are $0 mechanical/promoted-rule hits.
63
+ Apply them directly if trivial and in scope; otherwise mention them — they don't
64
+ warrant deep reasoning.
65
+ 3. **Report the `eob`** (e.g. "41 fixed locally for $0, 6 escalated, ~$0.40 saved
66
+ vs. crawling the repo") so the user sees what the pre-flight saved.
67
+ 4. **Re-run `scan --json` after your fixes** to confirm the findings are gone.
68
+
69
+ ## Notes
70
+
71
+ - `scan` and `--json` are read-only and free; they never call a model.
72
+ - For a recurring class of finding, `tokenclinic learn <path>` (needs an API key)
73
+ amortizes it into a local rule so it's caught for $0 on every future scan.
74
+ - Standalone (outside this harness) `tokenclinic scan <path> --apply` will do the
75
+ fixes itself with its own key — but inside a harness, prefer `--json` and fix
76
+ with your own model, so spend and credentials stay on the harness's account.
@@ -0,0 +1,20 @@
1
+ import type { Finding, Cluster } from "../types";
2
+
3
+ // Group recurring needs-llm findings by their source rule. A class is only worth
4
+ // amortizing once it has recurred enough times to pay back the one synthesis call
5
+ // — default ≥3 (don't amortize one-offs).
6
+ export function cluster(findings: Finding[], min = 3): Cluster[] {
7
+ const groups = new Map<string, Finding[]>();
8
+ for (const f of findings) {
9
+ if (f.fixability !== "needs-llm") continue;
10
+ const key = f.rule;
11
+ const arr = groups.get(key);
12
+ if (arr) arr.push(f);
13
+ else groups.set(key, [f]);
14
+ }
15
+
16
+ return [...groups.entries()]
17
+ .filter(([, fs]) => fs.length >= min)
18
+ .map(([rule, fs]) => ({ rule, message: fs[0].message, findings: fs }))
19
+ .sort((a, b) => b.findings.length - a.findings.length);
20
+ }
@@ -0,0 +1,27 @@
1
+ import { mkdirSync, writeFileSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ import type { GeneratedRule } from "../types";
4
+ import { validate } from "./validate";
5
+
6
+ // Validate a synthesized rule against its fixtures, then file it: promoted rules
7
+ // go to .tokenclinic/rules/ (and run for $0 in every future triage); failures go
8
+ // to .tokenclinic/quarantine/ for inspection, never executed.
9
+
10
+ export type PromotionStatus = "promoted" | "quarantined";
11
+
12
+ export interface Promotion {
13
+ status: PromotionStatus;
14
+ rule: GeneratedRule;
15
+ failures: string[];
16
+ path: string;
17
+ }
18
+
19
+ export function promote(root: string, rule: GeneratedRule): Promotion {
20
+ const { ok, failures } = validate(rule);
21
+ const sub = ok ? "rules" : "quarantine";
22
+ const dir = join(root, ".tokenclinic", sub);
23
+ mkdirSync(dir, { recursive: true });
24
+ const path = join(dir, `${rule.id}.json`);
25
+ writeFileSync(path, JSON.stringify(rule, null, 2));
26
+ return { status: ok ? "promoted" : "quarantined", rule, failures, path };
27
+ }
@@ -0,0 +1,67 @@
1
+ import { createRequire } from "node:module";
2
+
3
+ // ast-grep loader + a thin match helper.
4
+ //
5
+ // Bun's global install cache symlinks @ast-grep/napi out of the project tree,
6
+ // which breaks napi's internal resolution of its platform binding. So we try the
7
+ // normal package first (works under npm/node and in CI) and fall back to the
8
+ // platform package directly (works under Bun on this machine).
9
+ const require = createRequire(import.meta.url);
10
+
11
+ interface Napi {
12
+ parse: (lang: unknown, src: string) => { root: () => SgNode };
13
+ Lang: Record<string, unknown>;
14
+ }
15
+ interface SgNode {
16
+ findAll: (matcher: { rule: Record<string, unknown> }) => SgNode[];
17
+ range: () => { start: { line: number; column: number } };
18
+ text: () => string;
19
+ }
20
+
21
+ // Platform binding package names follow napi-rs conventions: linux carries a
22
+ // libc suffix (-gnu/-musl), win32 carries -msvc, darwin is bare.
23
+ function bindingCandidates(): string[] {
24
+ const { platform, arch } = process;
25
+ if (platform === "linux") return [`@ast-grep/napi-linux-${arch}-gnu`, `@ast-grep/napi-linux-${arch}-musl`];
26
+ if (platform === "win32") return [`@ast-grep/napi-win32-${arch}-msvc`];
27
+ return [`@ast-grep/napi-${platform}-${arch}`];
28
+ }
29
+
30
+ let cached: Napi | undefined;
31
+ function napi(): Napi {
32
+ if (cached) return cached;
33
+ try {
34
+ cached = require("@ast-grep/napi") as Napi;
35
+ return cached;
36
+ } catch {
37
+ /* fall through to platform packages */
38
+ }
39
+ for (const name of bindingCandidates()) {
40
+ try {
41
+ cached = require(name) as Napi;
42
+ return cached;
43
+ } catch {
44
+ /* try next candidate */
45
+ }
46
+ }
47
+ throw new Error("ast-grep native binding unavailable for this platform");
48
+ }
49
+
50
+ export interface SgMatch {
51
+ line: number; // 1-based
52
+ col: number; // 1-based
53
+ text: string;
54
+ }
55
+
56
+ // Run an ast-grep rule object against source. Throws if the rule is malformed or
57
+ // the language is unknown — callers decide whether that's "skip" or "invalid".
58
+ export function runRule(language: string, code: string, rule: Record<string, unknown>): SgMatch[] {
59
+ const { parse, Lang } = napi();
60
+ const lang = Lang[language];
61
+ if (lang === undefined) throw new Error(`unknown ast-grep language: ${language}`);
62
+ const root = parse(lang, code).root();
63
+ return root.findAll({ rule }).map((m) => {
64
+ const r = m.range();
65
+ return { line: r.start.line + 1, col: r.start.column + 1, text: m.text() };
66
+ });
67
+ }
@@ -0,0 +1,68 @@
1
+ import Anthropic from "@anthropic-ai/sdk";
2
+ import type { Cluster, GeneratedRule } from "../types";
3
+ import { buildContext } from "../diagnose/context";
4
+
5
+ // Spend ONE model call to turn a recurring class into a deterministic ast-grep
6
+ // rule + fixtures. The output is never trusted directly — it goes through the
7
+ // fixture gate (validate.ts) before it can be promoted. This is the only place
8
+ // in amortization that costs tokens; everything downstream is free and local.
9
+
10
+ const SYSTEM =
11
+ "You author ast-grep rules. Given several real examples of the same class of " +
12
+ "code problem, write ONE ast-grep rule that matches this class STRUCTURALLY for " +
13
+ "the given language, plus test fixtures. ast-grep patterns use metavariables like " +
14
+ "$A, $B for sub-expressions (e.g. `console.log($A)`). Keep the pattern minimal and " +
15
+ "precise. Positive fixtures MUST match; negative fixtures are valid, similar-looking " +
16
+ "code that MUST NOT match. Return the ast-grep rule object as a JSON string.";
17
+
18
+ const SCHEMA = {
19
+ type: "object",
20
+ properties: {
21
+ id: { type: "string", description: "short kebab-case rule id" },
22
+ message: { type: "string", description: "one-line description of what the rule flags" },
23
+ ruleJson: { type: "string", description: 'ast-grep rule object as JSON, e.g. {"pattern":"console.log($A)"}' },
24
+ positive: { type: "array", items: { type: "string" }, description: "code snippets the rule must flag (≥2)" },
25
+ negative: { type: "array", items: { type: "string" }, description: "similar valid code the rule must NOT flag (≥2)" },
26
+ },
27
+ required: ["id", "message", "ruleJson", "positive", "negative"],
28
+ additionalProperties: false,
29
+ };
30
+
31
+ export async function synthesize(root: string, cl: Cluster, language = "TypeScript"): Promise<GeneratedRule | null> {
32
+ const client = new Anthropic();
33
+
34
+ const examples = cl.findings
35
+ .slice(0, 4)
36
+ .map((f, i) => `Example ${i + 1} (${f.file}:${f.line}) — ${f.message}\n` + "```\n" + (buildContext(root, f).snippet) + "\n```")
37
+ .join("\n\n");
38
+
39
+ const user =
40
+ `Language: ${language}\n` +
41
+ `These ${cl.findings.length} findings are all "${cl.rule}: ${cl.message}".\n` +
42
+ `Author an ast-grep rule that catches this class on-device, with fixtures.\n\n${examples}`;
43
+
44
+ const res = await client.messages.create({
45
+ // synthesis is a reasoning task — use the top model once; it pays for itself forever
46
+ model: "claude-opus-4-8",
47
+ max_tokens: 2000,
48
+ system: SYSTEM,
49
+ messages: [{ role: "user", content: user }],
50
+ output_config: { format: { type: "json_schema", schema: SCHEMA } },
51
+ });
52
+
53
+ const text = res.content.find((b) => b.type === "text")?.text ?? "{}";
54
+ try {
55
+ const o = JSON.parse(text) as { id: string; message: string; ruleJson: string; positive: string[]; negative: string[] };
56
+ return {
57
+ id: o.id,
58
+ language,
59
+ message: o.message,
60
+ severity: "warning",
61
+ rule: JSON.parse(o.ruleJson) as Record<string, unknown>,
62
+ origin: cl.rule,
63
+ fixtures: { positive: o.positive, negative: o.negative },
64
+ };
65
+ } catch {
66
+ return null; // malformed output — the cluster simply isn't amortized this run
67
+ }
68
+ }
@@ -0,0 +1,51 @@
1
+ import type { GeneratedRule } from "../types";
2
+ import { runRule } from "./sg";
3
+
4
+ // The trust gate. A synthesized rule may only be promoted if it flags every
5
+ // positive fixture and flags none of the negatives — proving, on examples, that
6
+ // it catches the class without false positives. This is what stops a noisy
7
+ // LLM-authored rule from poisoning the "high-signal" promise.
8
+
9
+ export interface ValidationResult {
10
+ ok: boolean;
11
+ failures: string[];
12
+ }
13
+
14
+ const oneline = (s: string) => s.replace(/\s+/g, " ").trim().slice(0, 60);
15
+
16
+ export function validate(rule: GeneratedRule): ValidationResult {
17
+ const failures: string[] = [];
18
+
19
+ // The rule must at least be runnable.
20
+ try {
21
+ runRule(rule.language, "", rule.rule);
22
+ } catch (e) {
23
+ return { ok: false, failures: [`rule is not runnable: ${(e as Error).message}`] };
24
+ }
25
+
26
+ // A rule can't trivially pass with empty fixtures.
27
+ if (rule.fixtures.positive.length === 0) failures.push("no positive fixtures");
28
+ if (rule.fixtures.negative.length === 0) failures.push("no negative fixtures");
29
+
30
+ for (const code of rule.fixtures.positive) {
31
+ let n = 0;
32
+ try {
33
+ n = runRule(rule.language, code, rule.rule).length;
34
+ } catch {
35
+ /* treated as a miss below */
36
+ }
37
+ if (n < 1) failures.push(`positive fixture not matched: ${oneline(code)}`);
38
+ }
39
+
40
+ for (const code of rule.fixtures.negative) {
41
+ let n = 0;
42
+ try {
43
+ n = runRule(rule.language, code, rule.rule).length;
44
+ } catch {
45
+ /* a throw means it didn't match — fine for a negative */
46
+ }
47
+ if (n > 0) failures.push(`negative fixture matched (false positive): ${oneline(code)}`);
48
+ }
49
+
50
+ return { ok: failures.length === 0, failures };
51
+ }