npm - tokenclinic - Versions diffs - 0.1.1 - Mend

tokenclinic 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/CHANGELOG.md +46 -0
package/LICENSE +21 -0
package/README.md +180 -0
package/package.json +67 -0
package/skill/token-clinic/SKILL.md +76 -0
package/src/amortize/cluster.ts +20 -0
package/src/amortize/promote.ts +27 -0
package/src/amortize/sg.ts +67 -0
package/src/amortize/synthesize.ts +68 -0
package/src/amortize/validate.ts +51 -0
package/src/audit/audit.ts +76 -0
package/src/audit/classify.ts +56 -0
package/src/bill/eob.ts +77 -0
package/src/cli.ts +221 -0
package/src/detect/deps.ts +37 -0
package/src/diagnose/context.ts +27 -0
package/src/diagnose/partition.ts +37 -0
package/src/pricing/llmIntel.ts +41 -0
package/src/pricing/normalize.ts +35 -0
package/src/pricing/table.ts +50 -0
package/src/record/health.ts +44 -0
package/src/scan.ts +95 -0
package/src/treat/anthropic.ts +85 -0
package/src/treat/apply.ts +74 -0
package/src/treat/fixer.ts +38 -0
package/src/treat/route.ts +39 -0
package/src/triage/analyzers/astgrep.ts +74 -0
package/src/triage/analyzers/tsc.ts +49 -0
package/src/triage/index.ts +20 -0
package/src/types.ts +114 -0
package/src/util.ts +0 -0

package/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,46 @@
+# Changelog
+## 0.1.1
+- Fix the ast-grep native-binding loader to use the correct per-platform package
+  names (Linux `…-gnu`/`…-musl`, Windows `…-msvc`), so installs and CI work off
+  macOS. Degrades gracefully (no ast-grep findings) if the binding is absent.
+- CI: GitHub Actions runs typecheck + tests on push/PR (test timeout raised to
+  30s for the tsc-spawning apply-loop tests).
+## 0.1.0
+First release. The full clinic loop — triage → diagnose → treat → bill — plus the
+retroactive audit, the amortization engine, provider-agnostic pricing, and an
+advisory mode for running inside a harness.
+### Commands
+- `audit <logs.jsonl>` — retroactive audit over past LLM call logs; reports the
+  eliminable-class fraction (eliminable / routable / essential) and what the
+  clinic loop would have saved.
+- `scan [path]` — read-only pre-flight: type-check + promoted local rules,
+  partition into local ($0) / model lanes, print an EOB with a savings
+  counterfactual. Writes a `.tokenclinic/` Health Record.
+- `scan [path] --json` — machine report for a host agent (no model call):
+  findings + tight context packets + recommended routing + `advice`.
+- `scan [path] --apply` — live: fix each escalation with the routed model
+  (structured-output patch), then re-run the checker to verify. Reverts any patch
+  that makes things worse. Needs `ANTHROPIC_API_KEY`.
+- `learn [path]` — amortize a recurring class into a deterministic ast-grep rule
+  (validated against generated fixtures before promotion); promoted rules then run
+  on-device for $0 forever. Needs `ANTHROPIC_API_KEY`.
+### Pricing
+- Resolves through llm-intel (the OpenRouter catalog → any provider) with a
+  committed offline snapshot fallback. Unknown models are surfaced, never priced
+  with a fabricated number. Routing is declarative via `.tokenclinic/routing.json`.
+### Harness
+- Ships a Claude Code skill (`skill/token-clinic/`) that runs `scan --json` as a
+  pre-flight gate so the host agent fixes from the packets with its own model.
+### Notes
+- Runtime: Bun. The type checker (`tsc`) is invoked at runtime, so `typescript` is
+  a runtime dependency.
+- `--apply` and `learn` require an Anthropic API key; everything else is free and
+  offline-capable.

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 mrdulasolutions
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,180 @@
+# 🩺 Token Clinic
+A pre-flight gate for coding agents. It runs cheap, deterministic analysis **on-device** before any model touches your code, routes only the irreducible work to the right-priced model, and prints a bill showing what you saved.
+> Thesis: most tokens in agentic coding are wasted having an expensive model rediscover what a cheap deterministic tool already knows. **Don't pay Opus to find a missing import.**
+## The clinic loop
+| Stage | What it does |
+| --- | --- |
+| **Triage** | Detect deps, run on-device analyzers (v1: `tsc`), normalize everything into one `Finding` schema, rank by signal. Most findings die here, for $0. |
+| **Diagnose** | Partition findings: autofixable → handled locally; `needs-llm` → escalated with a *tight context packet* (the relevant lines, not the whole repo). |
+| **Treat** | Route each escalated fix by difficulty: mechanical → Haiku, semantic → Sonnet, architectural → Opus. Apply, then re-run the source check — a fix isn't done until it verifies. |
+| **Bill (EOB)** | Cost per fix + savings vs. the naive "dump the file at a top model" baseline. The screenshot-able receipt. |
+## Install
+Runs on [Bun](https://bun.sh).
+```bash
+# global CLI
+bun add -g tokenclinic        # or, from a clone: bun link
+tokenclinic scan ./my-project
+# or run from a clone without installing
+bun install
+bun run src/cli.ts scan ./my-project
+```
+`scan` (incl. `--json`), `audit`, and `learn`'s clustering are free and offline-capable. `scan --apply` and `learn`'s synthesis call a model and need `ANTHROPIC_API_KEY`.
+## The commands
+Token Clinic ships the strategically-correct **first move** (the retroactive audit) and the **recurring product** (the live scan):
+```bash
+# Approach A — measure the thesis from logs you already have ($0 risk, no code read)
+bun run demo:audit                                  # audits fixtures/sample-logs.jsonl
+bun run src/cli.ts audit /path/to/your-llm-calls.jsonl
+# Approach B — pre-flight scan of a repo (read-only, estimated EOB)
+bun run demo                                        # scans fixtures/sample-repo
+bun run src/cli.ts scan /path/to/a/ts/project
+# Approach B, live — actually fix + verify (needs ANTHROPIC_API_KEY)
+ANTHROPIC_API_KEY=sk-ant-... bun run src/cli.ts scan /path/to/project --apply
+# v2 — amortize a recurring class into a local rule (needs key), then it's $0 forever
+ANTHROPIC_API_KEY=sk-ant-... bun run src/cli.ts learn /path/to/project
+bun run src/cli.ts scan fixtures/with-rule    # demo: a promoted rule running for $0
+```
+### `audit` — the retroactive audit (Approach A)
+Run before building anything live. Ingests a JSONL of past LLM calls and prints the EOB **backwards** — what you spent, the *eliminable-class fraction* (the whole bet), and what the clinic loop would have saved. Runs entirely on exported logs, so there's no autofix risk and no code leaves the machine.
+```
+🩺 Token Clinic — retroactive audit · fixtures/sample-logs.jsonl
+   12 calls · $0.20 spent · prices: snapshot (estimated — some calls bucketed heuristically)
+  ● eliminable   6 calls    $0.09  42% of spend · killed on-device → $0
+  ● routable     3 calls    $0.05  24% of spend · re-priced to cheapest tier
+  ● essential    3 calls    $0.07  34% of spend · real reasoning → unchanged
+  eliminable-class fraction  42%  (clearly large — build it)
+  projected spend            $0.08 under the clinic loop
+  would have saved ~$0.12  (59% cheaper)
+```
+Log format is one JSON object per line: `{ "model", "inputTokens", "outputTokens", "task"?, "category"? }`. A `category` is authoritative; without one, the call is bucketed heuristically from `task` and the audit is flagged `estimated`.
+### `scan` — the live pre-flight gate (Approach B)
+Example output:
+```
+🩺 Token Clinic — fixtures/sample-repo
+   node project · 1 deps · 5 findings
+  ● TS2322 [semantic→sonnet-4-6] Type 'number' is not assignable to type 'string'.
+  ● TS6133 [local]               'unused' is declared but its value is never read.
+  ● TS2304 [mechanical→haiku-4-5] Cannot find name 'radius'.
+  ...
+  Explanation of Benefits (estimated — LLM step stubbed)
+    5 findings
+    1 fixed on-device   · $0.00
+    4 escalated to a model
+  clinic spend   $0.0093
+  naive cost     $0.12  (dump each file at the top model)
+  saved ~$0.11  (92% cheaper)
+```
+## Two fix modes
+`scan` is read-only and free; `scan --apply` is the live loop.
+- **`scan`** (default) — `DryRunFixer` estimates each escalation's cost from the real packet token count but does **not** call a model or touch files. The EOB is flagged `estimated`. Zero risk, zero spend.
+- **`scan --apply`** — `AnthropicFixer` ([`@anthropic-ai/sdk`](https://github.com/anthropics/anthropic-sdk-typescript)) sends each tight packet to the routed model (Haiku/Sonnet/Opus), gets a corrected snippet via **structured output**, writes it, then **re-runs `tsc` to verify the finding is gone**. It loops — re-triaging each pass so line shifts are handled — until no escalatable findings remain. Costs are **exact**, from the API's `usage`; the EOB reads `(actual)`. Needs `ANTHROPIC_API_KEY` (it refuses cleanly without one).
+## What's real vs. still stubbed
+**Real:** dep detection, `tsc` analysis + normalization, partition/routing, context-packet assembly, the Health Record, the `--apply` fix-and-verify loop, and — in `--apply` — exact token costs from the API.
+**Still stubbed / placeholder:**
+- Token *estimates* in read-only `scan` use chars/4 (real exact counts only arrive on `--apply`).
+- Local autofix (the `[local]` lane) is still reported, not yet applied — that codemod path is v2.
+## Pricing & other providers
+Prices resolve through [llm-intel](https://github.com/basisoasis/llm-intel) (the OpenRouter catalog) at command start, with a committed **offline snapshot** fallback so read-only `scan` never *requires* network. The footer shows which source was used (`prices: llm-intel` / `prices: snapshot`). An unknown model prices as `?` and is surfaced — never a fabricated number.
+Because llm-intel is an OpenRouter catalog, **other providers come for free** on the cost side: anything keyed `openai/…`, `google/…`, etc. prices correctly. The split that makes this work:
+- **Pricing, audit, EOB, and routing are provider-agnostic.** Routing is declarative — drop a `.tokenclinic/routing.json` mapping difficulty classes to *any* model id (`{ "semantic": "openai/gpt-4o" }`) and pricing resolves it.
+- **Only the actual model call is provider-specific** — `--apply`/`learn` use the Anthropic SDK today; that single seam is what a future OpenRouter/LiteLLM client would swap, and nothing else changes.
+## Inside a harness (Claude Code) — advisory mode
+Standalone, TokenClinic calls a model itself (`--apply`). **Inside a harness, it shouldn't** — the harness owns the model, the key, and the billing. So in-harness it runs as an **advisory pre-flight gate**: it does the $0 local elimination and hands the host agent a machine report; the agent does the reasoning fixes with *its own* model.
+```bash
+tokenclinic scan <path> --json   # read-only, NO model call — a report for an agent
+```
+The report's `advice` is the contract:
+- `advice.escalate` — the work list; each carries a `context.snippet` (fix from this, don't crawl the repo) and a `recommendedModel` (how hard the fix is).
+- `advice.autoApply` — the `local` $0 lane (promoted rules + mechanical), apply directly.
+- `eob` — the receipt to report back ("41 fixed locally for $0, 6 escalated, saved ~$0.40").
+A drop-in Claude Code skill lives in [`skill/token-clinic/SKILL.md`](skill/token-clinic/SKILL.md) — it tells the agent to run `scan --json` before any fix pass and act on the `advice`. This is the original "install via a skill" path: TokenClinic's value in-harness is the elimination + tight packets + receipt, **not** the model call.
+## The Codebase Health Record
+Each run writes `.tokenclinic/` into the scanned repo: `profile.json` (deps + analyzers) and an append-only `history.jsonl` (findings, spend, savings over time). v2 adds `rules/`, `quarantine/`, and `routing.json`. Every run reads it back, so every run gets cheaper and smarter — this is the compounding asset, not the router.
+## Roadmap
+Sequenced A → B → C, per the [office-hours design](docs/) — measure before you build, sell the receipt, price the moat last.
+- **A — the audit (here):** `tokenclinic audit` over existing logs. Puts a real dollar number on the unverified core thesis (the eliminable-class fraction) with zero code and zero risk. Earns revenue as a paid/concierge audit. **Gate:** fraction clearly large (>40%) → build B; clearly small (<15%) → walk away.
+- **B — the live scan (here):** `tokenclinic scan` — Triage + local autofix lane + escalation estimate + verify + EOB + Health Record. One language (TS). The recurring product, distributed as a self-controlled CLI (npm + GitHub Releases) — not an integration into harnesses you don't own.
+- **C — sell the moat (later):** open-core. Triage + receipt is the free funnel; charge for the compounding **Health Record** (promoted rules + fixtures + learned routing), shared team-wide.
+### v2 — the amortization engine (`learn`) — built
+When a `needs-llm` class recurs (≥3×), `tokenclinic learn` spends *one* model call to synthesize a deterministic check **as data, not code**: an [ast-grep](https://ast-grep.github.io/) rule object + test fixtures. The rule is **never trusted directly** — it must flag every positive fixture and none of the negatives (`src/amortize/validate.ts`) before it's promoted to `.tokenclinic/rules/`; failures go to `quarantine/`. Promoted rules then run on-device in every `scan` (`src/triage/analyzers/astgrep.ts`), landing in the `[local]` $0 lane. That class is **$0 forever** — pay once, run free.
+```
+src/amortize/
+  cluster.ts      # group recurring needs-llm findings (≥3×)
+  synthesize.ts   # ONE model call → ast-grep rule + fixtures (key-gated)
+  validate.ts     # the trust gate: rule must pass its fixtures
+  promote.ts      # → .tokenclinic/rules/ (promoted) or quarantine/
+  sg.ts           # ast-grep loader (@ast-grep/napi)
+```
+Only *eliminable* (bucket-1) findings amortize this way; *routable* (bucket-2) tacit-judgment work is routed cheaper, never eliminated. Still future: the [fff](https://github.com/dmtrKovalenko/fff) text-pattern fast lane and fff-powered Diagnose retrieval.
+## Architecture
+```
+src/
+  types.ts            # Finding / EOB / CallRecord — the records every stage shares
+  pricing/            # llm-intel adapter + offline snapshot + id/unit normalize
+  audit/              # Approach A: log ingest + bucket classifier + backwards EOB
+  amortize/           # v2: cluster → synthesize → validate → promote (ast-grep rules)
+  detect/deps.ts      # dependency profile
+  triage/             # analyzers (tsc + promoted ast-grep rules) → Finding[]
+  diagnose/           # partition + context-packet assembly
+  treat/              # model routing + Fixer seam (DryRun estimate / Anthropic live)
+  bill/eob.ts         # cost accounting + savings counterfactual
+  record/health.ts    # the .tokenclinic/ Health Record
+  scan.ts             # read-only scan assembly + the --json advisory contract
+  cli.ts              # audit / scan / scan --apply / learn — wires the loops together
+docs/
+  design-token-clinic.md   # the office-hours strategy (A → B → C)
+skill/
+  token-clinic/SKILL.md    # Claude Code skill — advisory pre-flight gate
+```

package/package.json ADDED Viewed

@@ -0,0 +1,67 @@
+{
+  "name": "tokenclinic",
+  "version": "0.1.1",
+  "description": "Pre-flight gate for coding agents: cheap local analysis first, route only the irreducible to the right-priced model, print the bill.",
+  "type": "module",
+  "license": "MIT",
+  "author": "mrdulasolutions <matt@mrdula.solutions>",
+  "homepage": "https://github.com/mrdulasolutions/TokenClinic#readme",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/mrdulasolutions/TokenClinic.git"
+  },
+  "bugs": {
+    "url": "https://github.com/mrdulasolutions/TokenClinic/issues"
+  },
+  "keywords": [
+    "llm",
+    "tokens",
+    "cost",
+    "pricing",
+    "claude",
+    "anthropic",
+    "openrouter",
+    "ast-grep",
+    "code-review",
+    "static-analysis",
+    "cli",
+    "pre-flight",
+    "claude-code",
+    "agent"
+  ],
+  "bin": {
+    "tokenclinic": "./src/cli.ts"
+  },
+  "files": [
+    "src",
+    "skill",
+    "README.md",
+    "LICENSE",
+    "CHANGELOG.md"
+  ],
+  "engines": {
+    "bun": ">=1.0.0"
+  },
+  "scripts": {
+    "scan": "bun run src/cli.ts scan",
+    "audit": "bun run src/cli.ts audit",
+    "learn": "bun run src/cli.ts learn",
+    "demo": "bun run src/cli.ts scan fixtures/sample-repo",
+    "demo:audit": "bun run src/cli.ts audit fixtures/sample-logs.jsonl",
+    "test": "bun test --timeout 30000",
+    "typecheck": "tsc --noEmit",
+    "prepublishOnly": "bun run typecheck && bun run test"
+  },
+  "dependencies": {
+    "@anthropic-ai/sdk": "^0.106.0",
+    "@ast-grep/napi": "^0.44.0",
+    "@basisoasis/llm-intel": "^1.0.33",
+    "typescript": "^5.6.0"
+  },
+  "optionalDependencies": {
+    "@ast-grep/napi-darwin-arm64": "^0.44.0"
+  },
+  "devDependencies": {
+    "@types/node": "^22.0.0"
+  }
+}

package/skill/token-clinic/SKILL.md ADDED Viewed

@@ -0,0 +1,76 @@
+---
+name: token-clinic
+description: >-
+  Pre-flight gate before editing code. Runs cheap on-device analysis first and
+  returns high-signal findings with tight context packets, so the model fixes
+  real problems instead of crawling the repo to rediscover them. Use BEFORE
+  starting a fix/refactor/cleanup pass on a TypeScript project, or when asked to
+  "clean up", "fix the errors", "lint", or "tighten up" a codebase.
+---
+# Token Clinic — pre-flight gate
+Token Clinic does the cheap, deterministic work on-device *before* you spend
+model tokens: it runs the type checker plus the repo's promoted local rules,
+eliminates what it can for $0, and hands you a tight packet for each remaining
+problem. **It does not call a model — you do the reasoning fixes, with your own
+model, from the packets it gives you.** That's the point: don't pay a premium
+model to find a missing import a local tool already found.
+## When to use
+Before any multi-file fix, refactor, lint, or "clean this up" pass on a
+TypeScript project. Run it first; act on its output; don't re-derive its findings.
+## How to run
+```bash
+tokenclinic scan <path> --json
+```
+(If `tokenclinic` isn't on PATH, run from the repo: `bun run /path/to/TokenClinic/src/cli.ts scan <path> --json`.)
+This prints a JSON report and **makes no API calls**. Parse it and act on `advice`.
+## What the report means
+```jsonc
+{
+  "eob": { "fixedLocally": 1, "escalated": 4, "saved": 0.11, ... }, // the receipt
+  "findings": [
+    {
+      "rule": "TS2322", "file": "src/x.ts", "line": 4,
+      "lane": "model",                  // "local" = $0 cleanup; "model" = needs you
+      "recommendedModel": "claude-sonnet-4-6",
+      "context": { "snippet": "...the relevant lines...", "startLine": 1 }
+    }
+  ],
+  "advice": {
+    "autoApply": ["<ids of local-lane findings>"],
+    "escalate":  [{ "id": "...", "file": "...", "line": 4, "recommendedModel": "..." }]
+  }
+}
+```
+## What to do with it
+1. **`advice.escalate` is your work list.** Fix each one using its `context.snippet`
+   — do **not** open and re-read the whole file or crawl the repo; the packet is
+   the context you need. The `recommendedModel` tells you how hard Token Clinic
+   judged the fix (mechanical → cheap, semantic → mid, architectural → top); use
+   it to calibrate effort, not to switch models mid-session.
+2. **`advice.autoApply` (the `local` lane)** are $0 mechanical/promoted-rule hits.
+   Apply them directly if trivial and in scope; otherwise mention them — they don't
+   warrant deep reasoning.
+3. **Report the `eob`** (e.g. "41 fixed locally for $0, 6 escalated, ~$0.40 saved
+   vs. crawling the repo") so the user sees what the pre-flight saved.
+4. **Re-run `scan --json` after your fixes** to confirm the findings are gone.
+## Notes
+- `scan` and `--json` are read-only and free; they never call a model.
+- For a recurring class of finding, `tokenclinic learn <path>` (needs an API key)
+  amortizes it into a local rule so it's caught for $0 on every future scan.
+- Standalone (outside this harness) `tokenclinic scan <path> --apply` will do the
+  fixes itself with its own key — but inside a harness, prefer `--json` and fix
+  with your own model, so spend and credentials stay on the harness's account.

package/src/amortize/cluster.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import type { Finding, Cluster } from "../types";
+// Group recurring needs-llm findings by their source rule. A class is only worth
+// amortizing once it has recurred enough times to pay back the one synthesis call
+// — default ≥3 (don't amortize one-offs).
+export function cluster(findings: Finding[], min = 3): Cluster[] {
+  const groups = new Map<string, Finding[]>();
+  for (const f of findings) {
+    if (f.fixability !== "needs-llm") continue;
+    const key = f.rule;
+    const arr = groups.get(key);
+    if (arr) arr.push(f);
+    else groups.set(key, [f]);
+  }
+  return [...groups.entries()]
+    .filter(([, fs]) => fs.length >= min)
+    .map(([rule, fs]) => ({ rule, message: fs[0].message, findings: fs }))
+    .sort((a, b) => b.findings.length - a.findings.length);
+}

package/src/amortize/promote.ts ADDED Viewed

@@ -0,0 +1,27 @@
+import { mkdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import type { GeneratedRule } from "../types";
+import { validate } from "./validate";
+// Validate a synthesized rule against its fixtures, then file it: promoted rules
+// go to .tokenclinic/rules/ (and run for $0 in every future triage); failures go
+// to .tokenclinic/quarantine/ for inspection, never executed.
+export type PromotionStatus = "promoted" | "quarantined";
+export interface Promotion {
+  status: PromotionStatus;
+  rule: GeneratedRule;
+  failures: string[];
+  path: string;
+}
+export function promote(root: string, rule: GeneratedRule): Promotion {
+  const { ok, failures } = validate(rule);
+  const sub = ok ? "rules" : "quarantine";
+  const dir = join(root, ".tokenclinic", sub);
+  mkdirSync(dir, { recursive: true });
+  const path = join(dir, `${rule.id}.json`);
+  writeFileSync(path, JSON.stringify(rule, null, 2));
+  return { status: ok ? "promoted" : "quarantined", rule, failures, path };
+}

package/src/amortize/sg.ts ADDED Viewed

@@ -0,0 +1,67 @@
+import { createRequire } from "node:module";
+// ast-grep loader + a thin match helper.
+//
+// Bun's global install cache symlinks @ast-grep/napi out of the project tree,
+// which breaks napi's internal resolution of its platform binding. So we try the
+// normal package first (works under npm/node and in CI) and fall back to the
+// platform package directly (works under Bun on this machine).
+const require = createRequire(import.meta.url);
+interface Napi {
+  parse: (lang: unknown, src: string) => { root: () => SgNode };
+  Lang: Record<string, unknown>;
+}
+interface SgNode {
+  findAll: (matcher: { rule: Record<string, unknown> }) => SgNode[];
+  range: () => { start: { line: number; column: number } };
+  text: () => string;
+}
+// Platform binding package names follow napi-rs conventions: linux carries a
+// libc suffix (-gnu/-musl), win32 carries -msvc, darwin is bare.
+function bindingCandidates(): string[] {
+  const { platform, arch } = process;
+  if (platform === "linux") return [`@ast-grep/napi-linux-${arch}-gnu`, `@ast-grep/napi-linux-${arch}-musl`];
+  if (platform === "win32") return [`@ast-grep/napi-win32-${arch}-msvc`];
+  return [`@ast-grep/napi-${platform}-${arch}`];
+}
+let cached: Napi | undefined;
+function napi(): Napi {
+  if (cached) return cached;
+  try {
+    cached = require("@ast-grep/napi") as Napi;
+    return cached;
+  } catch {
+    /* fall through to platform packages */
+  }
+  for (const name of bindingCandidates()) {
+    try {
+      cached = require(name) as Napi;
+      return cached;
+    } catch {
+      /* try next candidate */
+    }
+  }
+  throw new Error("ast-grep native binding unavailable for this platform");
+}
+export interface SgMatch {
+  line: number; // 1-based
+  col: number; // 1-based
+  text: string;
+}
+// Run an ast-grep rule object against source. Throws if the rule is malformed or
+// the language is unknown — callers decide whether that's "skip" or "invalid".
+export function runRule(language: string, code: string, rule: Record<string, unknown>): SgMatch[] {
+  const { parse, Lang } = napi();
+  const lang = Lang[language];
+  if (lang === undefined) throw new Error(`unknown ast-grep language: ${language}`);
+  const root = parse(lang, code).root();
+  return root.findAll({ rule }).map((m) => {
+    const r = m.range();
+    return { line: r.start.line + 1, col: r.start.column + 1, text: m.text() };
+  });
+}

package/src/amortize/synthesize.ts ADDED Viewed

@@ -0,0 +1,68 @@
+import Anthropic from "@anthropic-ai/sdk";
+import type { Cluster, GeneratedRule } from "../types";
+import { buildContext } from "../diagnose/context";
+// Spend ONE model call to turn a recurring class into a deterministic ast-grep
+// rule + fixtures. The output is never trusted directly — it goes through the
+// fixture gate (validate.ts) before it can be promoted. This is the only place
+// in amortization that costs tokens; everything downstream is free and local.
+const SYSTEM =
+  "You author ast-grep rules. Given several real examples of the same class of " +
+  "code problem, write ONE ast-grep rule that matches this class STRUCTURALLY for " +
+  "the given language, plus test fixtures. ast-grep patterns use metavariables like " +
+  "$A, $B for sub-expressions (e.g. `console.log($A)`). Keep the pattern minimal and " +
+  "precise. Positive fixtures MUST match; negative fixtures are valid, similar-looking " +
+  "code that MUST NOT match. Return the ast-grep rule object as a JSON string.";
+const SCHEMA = {
+  type: "object",
+  properties: {
+    id: { type: "string", description: "short kebab-case rule id" },
+    message: { type: "string", description: "one-line description of what the rule flags" },
+    ruleJson: { type: "string", description: 'ast-grep rule object as JSON, e.g. {"pattern":"console.log($A)"}' },
+    positive: { type: "array", items: { type: "string" }, description: "code snippets the rule must flag (≥2)" },
+    negative: { type: "array", items: { type: "string" }, description: "similar valid code the rule must NOT flag (≥2)" },
+  },
+  required: ["id", "message", "ruleJson", "positive", "negative"],
+  additionalProperties: false,
+};
+export async function synthesize(root: string, cl: Cluster, language = "TypeScript"): Promise<GeneratedRule | null> {
+  const client = new Anthropic();
+  const examples = cl.findings
+    .slice(0, 4)
+    .map((f, i) => `Example ${i + 1} (${f.file}:${f.line}) — ${f.message}\n` + "```\n" + (buildContext(root, f).snippet) + "\n```")
+    .join("\n\n");
+  const user =
+    `Language: ${language}\n` +
+    `These ${cl.findings.length} findings are all "${cl.rule}: ${cl.message}".\n` +
+    `Author an ast-grep rule that catches this class on-device, with fixtures.\n\n${examples}`;
+  const res = await client.messages.create({
+    // synthesis is a reasoning task — use the top model once; it pays for itself forever
+    model: "claude-opus-4-8",
+    max_tokens: 2000,
+    system: SYSTEM,
+    messages: [{ role: "user", content: user }],
+    output_config: { format: { type: "json_schema", schema: SCHEMA } },
+  });
+  const text = res.content.find((b) => b.type === "text")?.text ?? "{}";
+  try {
+    const o = JSON.parse(text) as { id: string; message: string; ruleJson: string; positive: string[]; negative: string[] };
+    return {
+      id: o.id,
+      language,
+      message: o.message,
+      severity: "warning",
+      rule: JSON.parse(o.ruleJson) as Record<string, unknown>,
+      origin: cl.rule,
+      fixtures: { positive: o.positive, negative: o.negative },
+    };
+  } catch {
+    return null; // malformed output — the cluster simply isn't amortized this run
+  }
+}

package/src/amortize/validate.ts ADDED Viewed

@@ -0,0 +1,51 @@
+import type { GeneratedRule } from "../types";
+import { runRule } from "./sg";
+// The trust gate. A synthesized rule may only be promoted if it flags every
+// positive fixture and flags none of the negatives — proving, on examples, that
+// it catches the class without false positives. This is what stops a noisy
+// LLM-authored rule from poisoning the "high-signal" promise.
+export interface ValidationResult {
+  ok: boolean;
+  failures: string[];
+}
+const oneline = (s: string) => s.replace(/\s+/g, " ").trim().slice(0, 60);
+export function validate(rule: GeneratedRule): ValidationResult {
+  const failures: string[] = [];
+  // The rule must at least be runnable.
+  try {
+    runRule(rule.language, "", rule.rule);
+  } catch (e) {
+    return { ok: false, failures: [`rule is not runnable: ${(e as Error).message}`] };
+  }
+  // A rule can't trivially pass with empty fixtures.
+  if (rule.fixtures.positive.length === 0) failures.push("no positive fixtures");
+  if (rule.fixtures.negative.length === 0) failures.push("no negative fixtures");
+  for (const code of rule.fixtures.positive) {
+    let n = 0;
+    try {
+      n = runRule(rule.language, code, rule.rule).length;
+    } catch {
+      /* treated as a miss below */
+    }
+    if (n < 1) failures.push(`positive fixture not matched: ${oneline(code)}`);
+  }
+  for (const code of rule.fixtures.negative) {
+    let n = 0;
+    try {
+      n = runRule(rule.language, code, rule.rule).length;
+    } catch {
+      /* a throw means it didn't match — fine for a negative */
+    }
+    if (n > 0) failures.push(`negative fixture matched (false positive): ${oneline(code)}`);
+  }
+  return { ok: failures.length === 0, failures };
+}