tokenclinic 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/LICENSE +21 -0
- package/README.md +180 -0
- package/package.json +67 -0
- package/skill/token-clinic/SKILL.md +76 -0
- package/src/amortize/cluster.ts +20 -0
- package/src/amortize/promote.ts +27 -0
- package/src/amortize/sg.ts +67 -0
- package/src/amortize/synthesize.ts +68 -0
- package/src/amortize/validate.ts +51 -0
- package/src/audit/audit.ts +76 -0
- package/src/audit/classify.ts +56 -0
- package/src/bill/eob.ts +77 -0
- package/src/cli.ts +221 -0
- package/src/detect/deps.ts +37 -0
- package/src/diagnose/context.ts +27 -0
- package/src/diagnose/partition.ts +37 -0
- package/src/pricing/llmIntel.ts +41 -0
- package/src/pricing/normalize.ts +35 -0
- package/src/pricing/table.ts +50 -0
- package/src/record/health.ts +44 -0
- package/src/scan.ts +95 -0
- package/src/treat/anthropic.ts +85 -0
- package/src/treat/apply.ts +74 -0
- package/src/treat/fixer.ts +38 -0
- package/src/treat/route.ts +39 -0
- package/src/triage/analyzers/astgrep.ts +74 -0
- package/src/triage/analyzers/tsc.ts +49 -0
- package/src/triage/index.ts +20 -0
- package/src/types.ts +114 -0
- package/src/util.ts +0 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import type { AuditResult, Bucket, CallRecord } from "../types";
|
|
3
|
+
import { cost, isPriced } from "../pricing/table";
|
|
4
|
+
import { classify } from "./classify";
|
|
5
|
+
|
|
6
|
+
// Approach A — the retroactive audit. Ingest a JSONL of past LLM calls and print
|
|
7
|
+
// the EOB backwards: total spend, the eliminable-class fraction (the bet), and the
|
|
8
|
+
// projected savings had the clinic loop been in place. No code is read, nothing is
|
|
9
|
+
// fixed — it runs entirely on exported logs, so it carries zero autofix or
|
|
10
|
+
// code-exfiltration risk. This is the move that measures the core thesis (Premise 2).
|
|
11
|
+
|
|
12
|
+
// Where routable work would have run instead — the cheapest tier (Premise 5: bucket 2
|
|
13
|
+
// is re-priced, not eliminated).
|
|
14
|
+
const ROUTABLE_TARGET = "claude-haiku-4-5";
|
|
15
|
+
|
|
16
|
+
export function parseLog(path: string): CallRecord[] {
|
|
17
|
+
const raw = readFileSync(path, "utf8");
|
|
18
|
+
const records: CallRecord[] = [];
|
|
19
|
+
for (const line of raw.split("\n")) {
|
|
20
|
+
const trimmed = line.trim();
|
|
21
|
+
if (!trimmed) continue;
|
|
22
|
+
try {
|
|
23
|
+
const r = JSON.parse(trimmed) as CallRecord;
|
|
24
|
+
if (typeof r.model === "string" && typeof r.inputTokens === "number") records.push(r);
|
|
25
|
+
} catch {
|
|
26
|
+
// skip malformed lines — partial logs shouldn't abort the audit
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return records;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function audit(calls: CallRecord[]): AuditResult {
|
|
33
|
+
const byBucket: AuditResult["byBucket"] = {
|
|
34
|
+
eliminable: { count: 0, spend: 0 },
|
|
35
|
+
routable: { count: 0, spend: 0 },
|
|
36
|
+
essential: { count: 0, spend: 0 },
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
let spend = 0;
|
|
40
|
+
let projectedSpend = 0;
|
|
41
|
+
let estimated = false;
|
|
42
|
+
let unpriced = 0;
|
|
43
|
+
|
|
44
|
+
for (const call of calls) {
|
|
45
|
+
if (!isPriced(call.model)) unpriced++;
|
|
46
|
+
const actual = cost(call.model, call.inputTokens, call.outputTokens);
|
|
47
|
+
const { bucket, estimated: wasGuessed } = classify(call);
|
|
48
|
+
estimated ||= wasGuessed;
|
|
49
|
+
|
|
50
|
+
spend += actual;
|
|
51
|
+
byBucket[bucket].count++;
|
|
52
|
+
byBucket[bucket].spend += actual;
|
|
53
|
+
|
|
54
|
+
projectedSpend += projectedCost(bucket, call);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const eliminableFraction = spend > 0 ? byBucket.eliminable.spend / spend : 0;
|
|
58
|
+
|
|
59
|
+
return {
|
|
60
|
+
calls: calls.length,
|
|
61
|
+
spend,
|
|
62
|
+
byBucket,
|
|
63
|
+
eliminableFraction,
|
|
64
|
+
projectedSpend,
|
|
65
|
+
projectedSaved: spend - projectedSpend,
|
|
66
|
+
estimated,
|
|
67
|
+
unpriced,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// What each call would have cost under the clinic loop.
|
|
72
|
+
function projectedCost(bucket: Bucket, call: CallRecord): number {
|
|
73
|
+
if (bucket === "eliminable") return 0; // killed on-device
|
|
74
|
+
if (bucket === "routable") return cost(ROUTABLE_TARGET, call.inputTokens, call.outputTokens);
|
|
75
|
+
return cost(call.model, call.inputTokens, call.outputTokens); // essential — unchanged
|
|
76
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type { Bucket, CallRecord } from "../types";
|
|
2
|
+
|
|
3
|
+
// Bucket a past call into the two-bucket economics (Premise 5).
|
|
4
|
+
//
|
|
5
|
+
// Authoritative path: if the log carries a `category` (instrumented or concierge
|
|
6
|
+
// tagging), trust it. Heuristic path: otherwise infer from the task text and flag
|
|
7
|
+
// the whole audit as estimated. The heuristic rules are intentionally explicit and
|
|
8
|
+
// printed in the report — the audit's credibility depends on the bucketing being
|
|
9
|
+
// inspectable, not magic.
|
|
10
|
+
|
|
11
|
+
const CATEGORY_BUCKET: Record<string, Bucket> = {
|
|
12
|
+
import: "eliminable",
|
|
13
|
+
lint: "eliminable",
|
|
14
|
+
type: "eliminable",
|
|
15
|
+
format: "eliminable",
|
|
16
|
+
syntax: "eliminable",
|
|
17
|
+
refactor: "routable",
|
|
18
|
+
boilerplate: "routable",
|
|
19
|
+
docs: "routable",
|
|
20
|
+
test: "routable",
|
|
21
|
+
architecture: "essential",
|
|
22
|
+
design: "essential",
|
|
23
|
+
reasoning: "essential",
|
|
24
|
+
debug: "essential",
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const ELIMINABLE_PATTERNS = [
|
|
28
|
+
/\bimport\b/i,
|
|
29
|
+
/\blint\b/i,
|
|
30
|
+
/eslint|prettier|format/i,
|
|
31
|
+
/unused/i,
|
|
32
|
+
/semicolon|\bsyntax\b/i,
|
|
33
|
+
/\bmissing\b/i,
|
|
34
|
+
/\btypo\b/i,
|
|
35
|
+
/type error|type mismatch|not assignable/i,
|
|
36
|
+
];
|
|
37
|
+
|
|
38
|
+
const ROUTABLE_PATTERNS = [
|
|
39
|
+
/boilerplate/i,
|
|
40
|
+
/docstring|\bcomment\b/i,
|
|
41
|
+
/test stub|scaffold/i,
|
|
42
|
+
/\brename\b/i,
|
|
43
|
+
/simple refactor/i,
|
|
44
|
+
];
|
|
45
|
+
|
|
46
|
+
export function classify(call: CallRecord): { bucket: Bucket; estimated: boolean } {
|
|
47
|
+
if (call.category) {
|
|
48
|
+
const mapped = CATEGORY_BUCKET[call.category.toLowerCase()];
|
|
49
|
+
if (mapped) return { bucket: mapped, estimated: false };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const task = call.task ?? "";
|
|
53
|
+
if (ELIMINABLE_PATTERNS.some((re) => re.test(task))) return { bucket: "eliminable", estimated: true };
|
|
54
|
+
if (ROUTABLE_PATTERNS.some((re) => re.test(task))) return { bucket: "routable", estimated: true };
|
|
55
|
+
return { bucket: "essential", estimated: true };
|
|
56
|
+
}
|
package/src/bill/eob.ts
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import type { Finding, EOB } from "../types";
|
|
4
|
+
import { cost, estimateTokens } from "../pricing/table";
|
|
5
|
+
|
|
6
|
+
// Bill: turn resolved Findings into an Explanation Of Benefits.
|
|
7
|
+
//
|
|
8
|
+
// The savings number is a counterfactual: for each finding, the *naive* approach
|
|
9
|
+
// dumps the whole containing file at a top model to find-and-fix it. The clinic
|
|
10
|
+
// loop instead fixes autofixable findings for $0 and escalates the rest with a
|
|
11
|
+
// tight packet to a routed (often cheaper) model. saved = naive - clinic.
|
|
12
|
+
//
|
|
13
|
+
// Honest caveat: in v1 the per-escalation cost is an estimate (DryRunFixer), so
|
|
14
|
+
// the EOB is flagged estimated. The token counts underlying it are real.
|
|
15
|
+
const NAIVE_MODEL = "claude-opus-4-8"; // naive = throw the whole file at the top model
|
|
16
|
+
const NAIVE_OUTPUT_TOKENS = 300;
|
|
17
|
+
|
|
18
|
+
export function buildEOB(root: string, findings: Finding[], estimated = true): EOB {
|
|
19
|
+
const fileTokenCache = new Map<string, number>();
|
|
20
|
+
const fileTokens = (file: string): number => {
|
|
21
|
+
let t = fileTokenCache.get(file);
|
|
22
|
+
if (t === undefined) {
|
|
23
|
+
try {
|
|
24
|
+
t = estimateTokens(readFileSync(join(root, file), "utf8"));
|
|
25
|
+
} catch {
|
|
26
|
+
t = 0;
|
|
27
|
+
}
|
|
28
|
+
fileTokenCache.set(file, t);
|
|
29
|
+
}
|
|
30
|
+
return t;
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const byModel: EOB["byModel"] = {};
|
|
34
|
+
const bump = (key: string, c: number) => {
|
|
35
|
+
(byModel[key] ??= { count: 0, cost: 0 }).count++;
|
|
36
|
+
byModel[key].cost += c;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
let spend = 0;
|
|
40
|
+
let naiveCost = 0;
|
|
41
|
+
let fixedLocally = 0;
|
|
42
|
+
let escalated = 0;
|
|
43
|
+
let ignored = 0;
|
|
44
|
+
|
|
45
|
+
for (const f of findings) {
|
|
46
|
+
// Naive baseline: every finding would have cost a full-file pass at the top model.
|
|
47
|
+
naiveCost += cost(NAIVE_MODEL, fileTokens(f.file), NAIVE_OUTPUT_TOKENS);
|
|
48
|
+
|
|
49
|
+
if (f.fixability === "autofix") {
|
|
50
|
+
fixedLocally++;
|
|
51
|
+
bump("local", 0);
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
if (f.fixability === "ignore") {
|
|
55
|
+
ignored++;
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
escalated++;
|
|
60
|
+
if (f.resolution) {
|
|
61
|
+
spend += f.resolution.cost;
|
|
62
|
+
bump(f.resolution.model, f.resolution.cost);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return {
|
|
67
|
+
total: findings.length,
|
|
68
|
+
fixedLocally,
|
|
69
|
+
escalated,
|
|
70
|
+
ignored,
|
|
71
|
+
spend,
|
|
72
|
+
naiveCost,
|
|
73
|
+
saved: naiveCost - spend,
|
|
74
|
+
byModel,
|
|
75
|
+
estimated,
|
|
76
|
+
};
|
|
77
|
+
}
|
package/src/cli.ts
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
import { resolve, relative } from "node:path";
|
|
3
|
+
import type { Finding, EOB, AuditResult, Bucket } from "./types";
|
|
4
|
+
import { detectDeps } from "./detect/deps";
|
|
5
|
+
import { triage } from "./triage";
|
|
6
|
+
import { partition } from "./diagnose/partition";
|
|
7
|
+
import { assembleScan, toReport } from "./scan";
|
|
8
|
+
import { AnthropicFixer } from "./treat/anthropic";
|
|
9
|
+
import { runApplyLoop } from "./treat/apply";
|
|
10
|
+
import { loadRouting, routedModels } from "./treat/route";
|
|
11
|
+
import { loadPrices } from "./pricing/llmIntel";
|
|
12
|
+
import { cluster } from "./amortize/cluster";
|
|
13
|
+
import { synthesize } from "./amortize/synthesize";
|
|
14
|
+
import { promote } from "./amortize/promote";
|
|
15
|
+
import { buildEOB } from "./bill/eob";
|
|
16
|
+
import { writeHealthRecord } from "./record/health";
|
|
17
|
+
import { parseLog, audit } from "./audit/audit";
|
|
18
|
+
import { c, usd } from "./util";
|
|
19
|
+
|
|
20
|
+
// ── scan (Approach B): read-only pre-flight, estimated EOB ──────────────────
|
|
21
|
+
async function scan(target: string, json: boolean) {
|
|
22
|
+
const root = resolve(target);
|
|
23
|
+
const now = new Date().toISOString();
|
|
24
|
+
|
|
25
|
+
loadRouting(root);
|
|
26
|
+
const prices = await loadPrices(routedModels());
|
|
27
|
+
const data = await assembleScan(root);
|
|
28
|
+
const recordDir = writeHealthRecord(root, data.deps, data.findings, data.eob, now);
|
|
29
|
+
|
|
30
|
+
if (json) {
|
|
31
|
+
// The in-harness contract: emit findings + tight packets + recommended
|
|
32
|
+
// routing for a host agent to act on. No model is called here.
|
|
33
|
+
console.log(JSON.stringify(toReport(root, prices.source, data), null, 2));
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const where = relative(process.cwd(), root) || ".";
|
|
38
|
+
console.log(`\n${c.bold("🩺 Token Clinic")} ${c.dim(`— ${where}`)}`);
|
|
39
|
+
console.log(c.dim(` ${data.deps.manager} project · ${Object.keys(data.deps.deps).length} deps · ${data.findings.length} findings · prices: ${prices.source}\n`));
|
|
40
|
+
for (const f of data.findings) printFinding(f);
|
|
41
|
+
printEOB(data.eob);
|
|
42
|
+
console.log(c.dim(` health record → ${relative(process.cwd(), recordDir)}/\n`));
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ── scan --apply (Approach B, live): iterative fix + verify ─────────────────
|
|
46
|
+
async function scanApply(target: string) {
|
|
47
|
+
if (!process.env.ANTHROPIC_API_KEY) {
|
|
48
|
+
console.log(`${c.red("scan --apply needs a model")} — set ANTHROPIC_API_KEY, then re-run.`);
|
|
49
|
+
console.log(c.dim(" (without it, run plain `tokenclinic scan` for the estimated EOB.)"));
|
|
50
|
+
process.exit(1);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const root = resolve(target);
|
|
54
|
+
const now = new Date().toISOString();
|
|
55
|
+
loadRouting(root);
|
|
56
|
+
await loadPrices(routedModels());
|
|
57
|
+
const deps = detectDeps(root);
|
|
58
|
+
|
|
59
|
+
const { before, fixed } = await runApplyLoop(root, new AnthropicFixer());
|
|
60
|
+
|
|
61
|
+
for (const f of fixed) {
|
|
62
|
+
const ok = f.resolution?.verified;
|
|
63
|
+
console.log(
|
|
64
|
+
` ${ok ? c.green("✓") : c.yellow("✗")} ${c.bold(f.rule)} ` +
|
|
65
|
+
`${c.dim(`${f.file}:${f.line}`)} → ${f.resolution?.model.replace("claude-", "")} ${c.dim(usd(f.resolution?.cost ?? 0))}`,
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// EOB: real costs from `fixed`, plus the locally-fixable / ignored buckets from `before`.
|
|
70
|
+
const reported = [...before.filter((f) => f.fixability !== "needs-llm"), ...fixed];
|
|
71
|
+
const eob = buildEOB(root, reported, false);
|
|
72
|
+
const recordDir = writeHealthRecord(root, deps, reported, eob, now);
|
|
73
|
+
|
|
74
|
+
const verified = fixed.filter((f) => f.resolution?.verified).length;
|
|
75
|
+
console.log(`\n${c.bold("🩺 Token Clinic")} ${c.dim(`— applied ${verified}/${fixed.length} fixes verified`)}\n`);
|
|
76
|
+
printEOB(eob);
|
|
77
|
+
console.log(c.dim(` health record → ${relative(process.cwd(), recordDir)}/\n`));
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ── learn (v2): amortize recurring classes into local rules ─────────────────
|
|
81
|
+
const CLUSTER_MIN = 3;
|
|
82
|
+
|
|
83
|
+
async function learn(target: string) {
|
|
84
|
+
if (!process.env.ANTHROPIC_API_KEY) {
|
|
85
|
+
console.log(`${c.red("learn needs a model")} — set ANTHROPIC_API_KEY, then re-run.`);
|
|
86
|
+
process.exit(1);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const root = resolve(target);
|
|
90
|
+
loadRouting(root);
|
|
91
|
+
const findings = partition(triage(root));
|
|
92
|
+
const clusters = cluster(findings, CLUSTER_MIN);
|
|
93
|
+
|
|
94
|
+
console.log(`\n${c.bold("🩺 Token Clinic — learn")} ${c.dim(`— ${relative(process.cwd(), root) || "."}`)}`);
|
|
95
|
+
if (clusters.length === 0) {
|
|
96
|
+
console.log(c.dim(` no needs-llm class recurs ≥${CLUSTER_MIN}× — nothing to amortize yet.\n`));
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
for (const cl of clusters) {
|
|
101
|
+
process.stdout.write(` synthesizing ${c.bold(cl.rule)} ${c.dim(`(${cl.findings.length}×)`)} … `);
|
|
102
|
+
const rule = await synthesize(root, cl);
|
|
103
|
+
if (!rule) {
|
|
104
|
+
console.log(c.yellow("no usable rule returned"));
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
const p = promote(root, rule);
|
|
108
|
+
if (p.status === "promoted") {
|
|
109
|
+
console.log(`${c.green("✓ promoted")} ${c.dim(`→ rules/${rule.id}.json — this class is now $0 forever`)}`);
|
|
110
|
+
} else {
|
|
111
|
+
console.log(`${c.yellow("⚠ quarantined")} ${c.dim(rule.id)}`);
|
|
112
|
+
for (const f of p.failures) console.log(c.dim(` ${f}`));
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
console.log(c.dim(`\n promoted rules run on every future ${c.bold("scan")} — re-run scan to see the new $0 lane.\n`));
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ── shared rendering ────────────────────────────────────────────────────────
|
|
119
|
+
function printFinding(f: Finding) {
|
|
120
|
+
const sev = f.severity === "error" ? c.red("●") : c.yellow("●");
|
|
121
|
+
console.log(` ${sev} ${c.bold(f.rule)} ${laneTag(f)} ${f.message}`);
|
|
122
|
+
console.log(` ${c.dim(`${f.file}:${f.line}`)}`);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function laneTag(f: Finding): string {
|
|
126
|
+
if (f.fixability === "autofix") return c.green("[local]");
|
|
127
|
+
if (f.fixability === "ignore") return c.dim("[ignore]");
|
|
128
|
+
return c.cyan(`[${f.difficulty}→${f.resolution?.model.replace("claude-", "") ?? "?"}]`);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function printEOB(eob: EOB) {
|
|
132
|
+
const flag = eob.estimated ? c.dim(" (estimated — LLM step stubbed)") : c.dim(" (actual)");
|
|
133
|
+
console.log(`${c.bold(" Explanation of Benefits")}${flag}`);
|
|
134
|
+
console.log(c.dim(" ─────────────────────────────────────────"));
|
|
135
|
+
console.log(` ${String(eob.total).padStart(3)} findings`);
|
|
136
|
+
console.log(` ${c.green(String(eob.fixedLocally).padStart(3))} fixed on-device ${c.dim("· $0.00")}`);
|
|
137
|
+
console.log(` ${c.cyan(String(eob.escalated).padStart(3))} escalated to a model`);
|
|
138
|
+
for (const [model, m] of Object.entries(eob.byModel)) {
|
|
139
|
+
if (model === "local") continue;
|
|
140
|
+
console.log(` ${c.dim("→")} ${model.padEnd(20)} ${m.count}× ${c.dim(usd(m.cost))}`);
|
|
141
|
+
}
|
|
142
|
+
console.log(c.dim(" ─────────────────────────────────────────"));
|
|
143
|
+
console.log(` clinic spend ${c.bold(usd(eob.spend))}`);
|
|
144
|
+
console.log(` naive cost ${c.dim(usd(eob.naiveCost))} ${c.dim("(dump each file at the top model)")}`);
|
|
145
|
+
console.log(` ${c.green(c.bold(`saved ~${usd(eob.saved)}`))} ${c.dim(`(${pct(eob.saved, eob.naiveCost)} cheaper)`)}\n`);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const pct = (saved: number, base: number) => (base > 0 ? `${Math.round((saved / base) * 100)}%` : "0%");
|
|
149
|
+
|
|
150
|
+
// ── audit (Approach A): retroactive audit over existing call logs ────────────
|
|
151
|
+
async function runAudit(logPath: string | undefined) {
|
|
152
|
+
if (!logPath) {
|
|
153
|
+
console.log("usage: tokenclinic audit <logs.jsonl>");
|
|
154
|
+
process.exit(1);
|
|
155
|
+
}
|
|
156
|
+
const calls = parseLog(resolve(logPath));
|
|
157
|
+
const prices = await loadPrices(calls.map((c) => c.model)); // price against the log's own models
|
|
158
|
+
reportAudit(relative(process.cwd(), resolve(logPath)), audit(calls), prices.source);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const BUCKET_LABEL: Record<Bucket, (s: string) => string> = {
|
|
162
|
+
eliminable: c.green,
|
|
163
|
+
routable: c.cyan,
|
|
164
|
+
essential: c.dim,
|
|
165
|
+
};
|
|
166
|
+
const BUCKET_NOTE: Record<Bucket, string> = {
|
|
167
|
+
eliminable: "killed on-device → $0",
|
|
168
|
+
routable: "re-priced to cheapest tier",
|
|
169
|
+
essential: "real reasoning → unchanged",
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
function reportAudit(source: string, a: AuditResult, priceSource: string) {
|
|
173
|
+
console.log(`\n${c.bold("🩺 Token Clinic — retroactive audit")} ${c.dim(`· ${source}`)}`);
|
|
174
|
+
const flag = a.estimated ? c.dim(" (estimated — some calls bucketed heuristically)") : "";
|
|
175
|
+
console.log(c.dim(` ${a.calls} calls · ${usd(a.spend)} spent · prices: ${priceSource}${flag}\n`));
|
|
176
|
+
if (a.unpriced > 0) {
|
|
177
|
+
console.log(c.yellow(` ⚠ ${a.unpriced} call(s) had no known price — excluded from cost\n`));
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
for (const bucket of ["eliminable", "routable", "essential"] as Bucket[]) {
|
|
181
|
+
const b = a.byBucket[bucket];
|
|
182
|
+
const share = a.spend > 0 ? Math.round((b.spend / a.spend) * 100) : 0;
|
|
183
|
+
const paint = BUCKET_LABEL[bucket];
|
|
184
|
+
console.log(
|
|
185
|
+
` ${paint("●")} ${paint(bucket.padEnd(11))} ${String(b.count).padStart(2)} calls ` +
|
|
186
|
+
`${usd(b.spend).padStart(8)} ${c.dim(`${String(share).padStart(2)}% of spend · ${BUCKET_NOTE[bucket]}`)}`,
|
|
187
|
+
);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const frac = Math.round(a.eliminableFraction * 100);
|
|
191
|
+
const verdict = frac >= 40 ? c.green("clearly large — build it") : frac < 15 ? c.yellow("clearly small — walk away") : c.yellow("murky — instrument deeper");
|
|
192
|
+
console.log(c.dim("\n ─────────────────────────────────────────"));
|
|
193
|
+
console.log(` eliminable-class fraction ${c.bold(`${frac}%`)} ${c.dim(`(${verdict})`)}`);
|
|
194
|
+
console.log(` projected spend ${c.bold(usd(a.projectedSpend))} ${c.dim("under the clinic loop")}`);
|
|
195
|
+
console.log(` ${c.green(c.bold(`would have saved ~${usd(a.projectedSaved)}`))} ${c.dim(`(${pct(a.projectedSaved, a.spend)} cheaper)`)}\n`);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// ── entry ────────────────────────────────────────────────────────────────────
|
|
199
|
+
const [cmd, ...rest] = process.argv.slice(2);
|
|
200
|
+
const apply = rest.includes("--apply") || rest.includes("--fix");
|
|
201
|
+
const json = rest.includes("--json");
|
|
202
|
+
const path = rest.find((a) => !a.startsWith("-"));
|
|
203
|
+
|
|
204
|
+
if (cmd === "scan") {
|
|
205
|
+
if (apply) await scanApply(path ?? ".");
|
|
206
|
+
else await scan(path ?? ".", json);
|
|
207
|
+
} else if (cmd === "audit") {
|
|
208
|
+
await runAudit(path);
|
|
209
|
+
} else if (cmd === "learn") {
|
|
210
|
+
await learn(path ?? ".");
|
|
211
|
+
} else {
|
|
212
|
+
console.log(
|
|
213
|
+
"usage:\n" +
|
|
214
|
+
" tokenclinic audit <logs.jsonl> retroactive audit over past LLM calls\n" +
|
|
215
|
+
" tokenclinic scan [path] read-only pre-flight (estimated EOB)\n" +
|
|
216
|
+
" tokenclinic scan [path] --json machine report for a host agent (no model call)\n" +
|
|
217
|
+
" tokenclinic scan [path] --apply live: fix + verify (needs ANTHROPIC_API_KEY)\n" +
|
|
218
|
+
" tokenclinic learn [path] amortize recurring classes → local rules (needs key)",
|
|
219
|
+
);
|
|
220
|
+
process.exit(1);
|
|
221
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
|
|
4
|
+
// Dependency detection. v1 reads package.json + the lockfile to identify the
|
|
5
|
+
// package manager and installed deps. This is what later drives dep-aware
|
|
6
|
+
// analyzer auto-configuration (enable the react-hooks rules iff react is a dep,
|
|
7
|
+
// etc.) — for v1 we just surface the profile in the report and Health Record.
|
|
8
|
+
|
|
9
|
+
export interface DepProfile {
|
|
10
|
+
manager: string;
|
|
11
|
+
deps: Record<string, string>;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export function detectDeps(root: string): DepProfile {
|
|
15
|
+
const pkgPath = join(root, "package.json");
|
|
16
|
+
if (!existsSync(pkgPath)) return { manager: "unknown", deps: {} };
|
|
17
|
+
|
|
18
|
+
let pkg: { dependencies?: Record<string, string>; devDependencies?: Record<string, string> } = {};
|
|
19
|
+
try {
|
|
20
|
+
pkg = JSON.parse(readFileSync(pkgPath, "utf8"));
|
|
21
|
+
} catch {
|
|
22
|
+
return { manager: "unknown", deps: {} };
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const manager = existsSync(join(root, "bun.lock")) || existsSync(join(root, "bun.lockb"))
|
|
26
|
+
? "bun"
|
|
27
|
+
: existsSync(join(root, "pnpm-lock.yaml"))
|
|
28
|
+
? "pnpm"
|
|
29
|
+
: existsSync(join(root, "package-lock.json"))
|
|
30
|
+
? "npm"
|
|
31
|
+
: "node";
|
|
32
|
+
|
|
33
|
+
return {
|
|
34
|
+
manager,
|
|
35
|
+
deps: { ...(pkg.dependencies ?? {}), ...(pkg.devDependencies ?? {}) },
|
|
36
|
+
};
|
|
37
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import type { Finding, ContextPacket } from "../types";
|
|
4
|
+
import { estimateTokens } from "../pricing/table";
|
|
5
|
+
|
|
6
|
+
// Build the tight context packet handed to the model — the few lines around the
|
|
7
|
+
// finding, not the whole file. This is the core token-saving move of Diagnose.
|
|
8
|
+
//
|
|
9
|
+
// v1 slices a fixed line radius. v2 swaps this for fff
|
|
10
|
+
// (https://github.com/dmtrKovalenko/fff): fast, frecency-ranked retrieval of the
|
|
11
|
+
// *related* symbols/definitions, not just the physically-adjacent lines.
|
|
12
|
+
const RADIUS = 15;
|
|
13
|
+
|
|
14
|
+
export function buildContext(root: string, f: Finding): ContextPacket {
|
|
15
|
+
let lines: string[] = [];
|
|
16
|
+
try {
|
|
17
|
+
lines = readFileSync(join(root, f.file), "utf8").split("\n");
|
|
18
|
+
} catch {
|
|
19
|
+
return { snippet: "", startLine: f.line, tokensEstimate: 0 };
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const start = Math.max(0, f.line - 1 - RADIUS);
|
|
23
|
+
const end = Math.min(lines.length, f.line - 1 + RADIUS + 1);
|
|
24
|
+
const snippet = lines.slice(start, end).join("\n");
|
|
25
|
+
|
|
26
|
+
return { snippet, startLine: start + 1, tokensEstimate: estimateTokens(snippet) };
|
|
27
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type { Finding, Fixability, DifficultyClass } from "../types";
|
|
2
|
+
|
|
3
|
+
// Partition: decide, per finding, whether it can be fixed on-device for $0, or
|
|
4
|
+
// must escalate to a model — and if so, how hard the fix is (which routes it to
|
|
5
|
+
// a model tier in Treat).
|
|
6
|
+
//
|
|
7
|
+
// v1 uses a small hand-curated map of TS error codes. v2 replaces the static
|
|
8
|
+
// maps with the generated-rule engine: a recurring needs-llm class gets
|
|
9
|
+
// synthesized into an autofix rule, after which it lands in AUTOFIX for free.
|
|
10
|
+
|
|
11
|
+
// Unused declarations — safely removable by a deterministic codemod. $0.
|
|
12
|
+
const AUTOFIX = new Set(["TS6133", "TS6138", "TS6192", "TS6196", "TS6198"]);
|
|
13
|
+
|
|
14
|
+
// Localized, low-ambiguity fixes (missing import, typo, simple syntax).
|
|
15
|
+
const MECHANICAL = new Set(["TS2304", "TS2307", "TS2552", "TS1005", "TS1109", "TS1003"]);
|
|
16
|
+
|
|
17
|
+
export function partition(findings: Finding[]): Finding[] {
|
|
18
|
+
for (const f of findings) {
|
|
19
|
+
// Findings from a promoted (amortized) rule are already a $0 on-device check —
|
|
20
|
+
// keep them in the autofix lane; don't reclassify by the TS-code maps.
|
|
21
|
+
if (f.source.startsWith("ast-grep:")) {
|
|
22
|
+
f.difficulty = "mechanical";
|
|
23
|
+
continue;
|
|
24
|
+
}
|
|
25
|
+
const { fixability, difficulty } = classify(f.rule);
|
|
26
|
+
f.fixability = fixability;
|
|
27
|
+
f.difficulty = difficulty;
|
|
28
|
+
}
|
|
29
|
+
return findings;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function classify(rule: string): { fixability: Fixability; difficulty: DifficultyClass } {
|
|
33
|
+
if (AUTOFIX.has(rule)) return { fixability: "autofix", difficulty: "mechanical" };
|
|
34
|
+
if (MECHANICAL.has(rule)) return { fixability: "needs-llm", difficulty: "mechanical" };
|
|
35
|
+
// Everything else from a type checker is, by default, a semantic fix.
|
|
36
|
+
return { fixability: "needs-llm", difficulty: "semantic" };
|
|
37
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { tmpdir } from "node:os";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { getModelInfo, type LLMIntelConfigInput } from "@basisoasis/llm-intel";
|
|
4
|
+
import { setPrice } from "./table";
|
|
5
|
+
import { toOpenRouterId, perMillion } from "./normalize";
|
|
6
|
+
|
|
7
|
+
// Warm-up: resolve the given model ids against llm-intel (the OpenRouter catalog)
|
|
8
|
+
// once at command start and populate the price book. Keeps the hot path sync —
|
|
9
|
+
// cost() never awaits. Fully non-fatal: offline or unknown ids just fall through
|
|
10
|
+
// to the committed snapshot, so the read-only path keeps working with no network.
|
|
11
|
+
|
|
12
|
+
export interface PriceLoad {
|
|
13
|
+
loaded: number;
|
|
14
|
+
source: "llm-intel" | "snapshot";
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const OPTS: LLMIntelConfigInput = {
|
|
18
|
+
provider: "openrouter",
|
|
19
|
+
cacheDir: join(tmpdir(), "tokenclinic-llm-intel"),
|
|
20
|
+
cacheTtl: 24 * 60 * 60 * 1000, // 24h
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export async function loadPrices(ids: string[]): Promise<PriceLoad> {
|
|
24
|
+
let loaded = 0;
|
|
25
|
+
for (const id of [...new Set(ids)]) {
|
|
26
|
+
try {
|
|
27
|
+
const res = await getModelInfo(toOpenRouterId(id), OPTS);
|
|
28
|
+
const pricing = res?.data?.pricing;
|
|
29
|
+
if (!pricing) continue;
|
|
30
|
+
const inputPerM = perMillion(pricing.input.amount, pricing.input.unit);
|
|
31
|
+
const outputPerM = perMillion(pricing.output.amount, pricing.output.unit);
|
|
32
|
+
if (inputPerM !== null && outputPerM !== null) {
|
|
33
|
+
setPrice(id, { inputPerM, outputPerM });
|
|
34
|
+
loaded++;
|
|
35
|
+
}
|
|
36
|
+
} catch {
|
|
37
|
+
// offline, rate-limited, or unknown id — snapshot covers it
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return { loaded, source: loaded > 0 ? "llm-intel" : "snapshot" };
|
|
41
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
// Model-id normalization between TokenClinic's bare ids and llm-intel's
|
|
2
|
+
// OpenRouter-style `provider/model` ids — and unit conversion to $/1M tokens.
|
|
3
|
+
|
|
4
|
+
const BARE_TO_OPENROUTER: Record<string, string> = {
|
|
5
|
+
"claude-opus-4-8": "anthropic/claude-opus-4-8",
|
|
6
|
+
"claude-sonnet-4-6": "anthropic/claude-sonnet-4-6",
|
|
7
|
+
"claude-haiku-4-5": "anthropic/claude-haiku-4-5",
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
// A bare Anthropic id → its OpenRouter id. Anything already provider-prefixed
|
|
11
|
+
// (`openai/…`, `google/…`, or an explicit `anthropic/…`) is passed through, so
|
|
12
|
+
// other providers work without special-casing.
|
|
13
|
+
export function toOpenRouterId(id: string): string {
|
|
14
|
+
if (id.includes("/")) return id;
|
|
15
|
+
return BARE_TO_OPENROUTER[id] ?? `anthropic/${id}`;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// llm-intel reports an amount + a unit; normalize everything to $/1M tokens.
|
|
19
|
+
// Non-token units (per_image / per_request) aren't token pricing → null.
|
|
20
|
+
export function perMillion(amount: unknown, unit: string): number | null {
|
|
21
|
+
const n = Number(amount && typeof amount === "object" ? String(amount) : amount);
|
|
22
|
+
if (!Number.isFinite(n)) return null;
|
|
23
|
+
switch (unit) {
|
|
24
|
+
case "per_million_tokens":
|
|
25
|
+
return n;
|
|
26
|
+
case "per_thousand_tokens":
|
|
27
|
+
return n * 1_000;
|
|
28
|
+
case "per_token":
|
|
29
|
+
return n * 1_000_000;
|
|
30
|
+
case "free":
|
|
31
|
+
return 0;
|
|
32
|
+
default:
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
// Pricing intelligence.
|
|
2
|
+
//
|
|
3
|
+
// A cost resolves in this order:
|
|
4
|
+
// 1. the live price book — populated at command start from llm-intel
|
|
5
|
+
// (the OpenRouter catalog; any provider's model prices for free)
|
|
6
|
+
// 2. a committed offline SNAPSHOT — so read-only `scan` never *requires*
|
|
7
|
+
// network and the routed Anthropic models are always priced
|
|
8
|
+
// 3. unknown → null (cost 0, surfaced separately; never a fabricated number)
|
|
9
|
+
//
|
|
10
|
+
// See ./llmIntel.ts for the warm-up and ./normalize.ts for id/unit handling.
|
|
11
|
+
|
|
12
|
+
export interface ModelPrice {
|
|
13
|
+
inputPerM: number;
|
|
14
|
+
outputPerM: number;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Offline fallback (USD per 1M tokens). Overridden by llm-intel when reachable.
|
|
18
|
+
const SNAPSHOT: Record<string, ModelPrice> = {
|
|
19
|
+
"claude-haiku-4-5": { inputPerM: 1.0, outputPerM: 5.0 },
|
|
20
|
+
"claude-sonnet-4-6": { inputPerM: 3.0, outputPerM: 15.0 },
|
|
21
|
+
"claude-opus-4-8": { inputPerM: 5.0, outputPerM: 25.0 },
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
const book = new Map<string, ModelPrice>();
|
|
25
|
+
|
|
26
|
+
export function setPrice(id: string, p: ModelPrice): void {
|
|
27
|
+
book.set(id, p);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function resetPrices(): void {
|
|
31
|
+
book.clear();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function priceOf(id: string): ModelPrice | null {
|
|
35
|
+
return book.get(id) ?? SNAPSHOT[id] ?? null;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function isPriced(id: string): boolean {
|
|
39
|
+
return priceOf(id) !== null;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function cost(model: string, tokensIn: number, tokensOut: number): number {
|
|
43
|
+
const p = priceOf(model);
|
|
44
|
+
if (!p) return 0; // unknown model — callers surface this via isPriced(), don't invent a price
|
|
45
|
+
return (tokensIn / 1e6) * p.inputPerM + (tokensOut / 1e6) * p.outputPerM;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Rough token estimate for the read-only path. chars/4 — replaced by exact usage
|
|
49
|
+
// counts on `--apply`.
|
|
50
|
+
export const estimateTokens = (text: string) => Math.ceil(text.length / 4);
|