ultracost 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +53 -2
- package/NOTICE +16 -3
- package/README.md +62 -51
- package/bin/cli.js +514 -117
- package/docs/ESTIMATES.md +24 -0
- package/docs/PUBLISHING.md +37 -35
- package/docs/TESTING.md +4 -15
- package/docs/architecture.md +28 -0
- package/docs/policy.md +25 -2
- package/package.json +1 -1
- package/src/classify.js +125 -0
- package/src/cost.js +54 -0
- package/src/detect.js +93 -0
- package/src/estimate.js +18 -0
- package/src/guard.js +244 -166
- package/src/index.js +7 -1
- package/src/lexer.js +227 -0
- package/src/log.js +20 -13
- package/src/loop.js +143 -0
- package/src/paths.js +10 -0
- package/src/policy.js +14 -0
- package/src/render.js +211 -0
- package/src/rules.js +17 -5
- package/src/transcript.js +186 -0
- package/templates/hooks/reinject.mjs +21 -18
- package/templates/hooks/workflow-gate.mjs +51 -45
- package/templates/policy.default.json +15 -2
package/src/rules.js
CHANGED
|
@@ -59,19 +59,31 @@ e.g. \`agent(task, { model: 'sonnet', effort: 'low' })\` for a mechanical scan.
|
|
|
59
59
|
|
|
60
60
|
Before launching a dynamic workflow:
|
|
61
61
|
1. Draft the workflow script with per-stage \`model\` and \`effort\` set.
|
|
62
|
-
2. Write the draft to a temp file and
|
|
63
|
-
|
|
62
|
+
2. Write the draft to a temp file and estimate it: \`/ultracost:check <file>\` to verify
|
|
63
|
+
pins, then the cost estimate — run \`ultracost estimate <file>\`, or under the plugin
|
|
64
|
+
\`node "$CLAUDE_PLUGIN_ROOT/bin/cli.js" estimate <file>\` (no global \`ultracost\` bin
|
|
65
|
+
is required). It reports the agent count, model mix, and cost versus an
|
|
66
|
+
all-\`${policy.tiers[policy.default].model}\` baseline.
|
|
64
67
|
3. Show the estimate and use the AskUserQuestion tool to offer three options:
|
|
65
68
|
**Approve** (launch it), **Cancel** (do not launch), **Modify** (restructure to
|
|
66
69
|
cut cost — drop unneeded stages, move mechanical stages to a cheaper tier and
|
|
67
70
|
lower effort, reduce fan-out — then re-estimate and ask again).
|
|
68
|
-
4. Launch the workflow only after Approve.
|
|
71
|
+
4. Launch the workflow only after Approve. The \`PreToolUse\` cost gate also stops the
|
|
72
|
+
launch automatically with these numbers, so this holds even if the steps are skipped.
|
|
69
73
|
|
|
70
|
-
Verify any script with \`/ultracost:check\` or \`ultracost check
|
|
71
|
-
stages missing a model pin
|
|
74
|
+
Verify any script with \`/ultracost:check\` (the plugin command) or \`ultracost check
|
|
75
|
+
<script>\` on the CLI — it flags stages missing a model pin, a pin that mismatches the
|
|
76
|
+
work the prompt describes, and effort over the model's cap.
|
|
72
77
|
${MARKER_END}`;
|
|
73
78
|
}
|
|
74
79
|
|
|
80
|
+
// The routing block without the HTML markers — the single source for the SessionStart
|
|
81
|
+
// hook injection (reinject.mjs) and the routing skill (skills/ultracost/SKILL.md), so
|
|
82
|
+
// neither can drift from policy.json.
|
|
83
|
+
export function routingGuidance(policy) {
|
|
84
|
+
return compileRules(policy).split('\n').slice(1, -1).join('\n').trim();
|
|
85
|
+
}
|
|
86
|
+
|
|
75
87
|
export function replaceBlock(content, block) {
|
|
76
88
|
const re = new RegExp(`${MARKER_START}[\\s\\S]*?${MARKER_END}`);
|
|
77
89
|
if (!re.test(content)) return null;
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs';
|
|
2
|
+
import { join, sep, basename } from 'node:path';
|
|
3
|
+
import { homedir } from 'node:os';
|
|
4
|
+
import { sumUsage } from './cost.js';
|
|
5
|
+
|
|
6
|
+
// Read Claude Code's local session transcripts (offline) so ultracost can reconcile
|
|
7
|
+
// its estimate against real token usage and learn from it. Clean-room reimplementation
|
|
8
|
+
// of the well-known parse+dedup contract: assistant lines carry message.usage; the
|
|
9
|
+
// same message can recur across files (resumed sessions, sidechain replays) so we dedup
|
|
10
|
+
// on message.id + requestId; dynamic-workflow agent() stages live in their own
|
|
11
|
+
// subagents/workflows/wf_<id>/agent-<aid>.jsonl files next to a journal.jsonl.
|
|
12
|
+
|
|
13
|
+
const expandTilde = (p) => (p === '~' || p.startsWith('~/') ? join(homedir(), p.slice(1)) : p);
|
|
14
|
+
|
|
15
|
+
// All Claude Code `projects/` directories: CLAUDE_CONFIG_DIR (comma-separated, each
|
|
16
|
+
// entry a config dir OR a projects dir), else ~/.config/claude and ~/.claude.
|
|
17
|
+
export function projectsDirs(env = process.env) {
|
|
18
|
+
const out = [];
|
|
19
|
+
const add = (dir) => {
|
|
20
|
+
if (existsSync(join(dir, 'projects'))) out.push(join(dir, 'projects'));
|
|
21
|
+
else if (basename(dir) === 'projects' && existsSync(dir)) out.push(dir);
|
|
22
|
+
};
|
|
23
|
+
if (env.CLAUDE_CONFIG_DIR) {
|
|
24
|
+
env.CLAUDE_CONFIG_DIR.split(',').map((s) => s.trim()).filter(Boolean).forEach((p) => add(expandTilde(p)));
|
|
25
|
+
} else {
|
|
26
|
+
add(env.XDG_CONFIG_HOME ? join(env.XDG_CONFIG_HOME, 'claude') : join(homedir(), '.config', 'claude'));
|
|
27
|
+
add(join(homedir(), '.claude'));
|
|
28
|
+
}
|
|
29
|
+
return [...new Set(out)];
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function walk(dir, test, out = []) {
|
|
33
|
+
let names;
|
|
34
|
+
try { names = readdirSync(dir); } catch { return out; }
|
|
35
|
+
for (const name of names) {
|
|
36
|
+
if (name.startsWith('.')) continue;
|
|
37
|
+
const full = join(dir, name);
|
|
38
|
+
let st;
|
|
39
|
+
try { st = statSync(full); } catch { continue; }
|
|
40
|
+
if (st.isDirectory()) walk(full, test, out);
|
|
41
|
+
else if (test(full)) out.push(full);
|
|
42
|
+
}
|
|
43
|
+
return out;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// One transcript line -> a normalized usage record, or null if it isn't an assistant
|
|
47
|
+
// message that reports usage.
|
|
48
|
+
export function parseUsageLine(line) {
|
|
49
|
+
let obj;
|
|
50
|
+
try { obj = JSON.parse(line); } catch { return null; }
|
|
51
|
+
if (!obj || obj.isApiErrorMessage) return null;
|
|
52
|
+
const m = obj.message;
|
|
53
|
+
if (!m || !m.usage) return null;
|
|
54
|
+
if (m.role && m.role !== 'assistant' && obj.type !== 'assistant') return null;
|
|
55
|
+
return {
|
|
56
|
+
id: m.id || null,
|
|
57
|
+
requestId: obj.requestId || null,
|
|
58
|
+
model: m.model || null,
|
|
59
|
+
usage: m.usage,
|
|
60
|
+
ts: obj.timestamp || null,
|
|
61
|
+
isSidechain: !!obj.isSidechain
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function readUsage(file) {
|
|
66
|
+
let text;
|
|
67
|
+
try { text = readFileSync(file, 'utf8'); } catch { return []; }
|
|
68
|
+
const out = [];
|
|
69
|
+
for (const line of text.split('\n')) {
|
|
70
|
+
if (!line.trim()) continue;
|
|
71
|
+
const e = parseUsageLine(line);
|
|
72
|
+
if (e) out.push(e);
|
|
73
|
+
}
|
|
74
|
+
return out;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Dedup on message.id + requestId; lines without an id are always kept; on a collision
|
|
78
|
+
// keep the copy with the most output tokens (a sidechain/replay tie-break).
|
|
79
|
+
export function dedupe(entries) {
|
|
80
|
+
const seen = new Map();
|
|
81
|
+
const kept = [];
|
|
82
|
+
for (const e of entries) {
|
|
83
|
+
if (!e.id) { kept.push(e); continue; }
|
|
84
|
+
const key = `${e.id}:${e.requestId || ''}`;
|
|
85
|
+
const idx = seen.get(key);
|
|
86
|
+
if (idx === undefined) { seen.set(key, kept.length); kept.push(e); }
|
|
87
|
+
else if ((e.usage.output_tokens || 0) > (kept[idx].usage.output_tokens || 0)) kept[idx] = e;
|
|
88
|
+
}
|
|
89
|
+
return kept;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Classify a transcript file by its path: 'main', 'subagent', or 'workflow-stage'
|
|
93
|
+
// (the ultracode dynamic-workflow agent() stage). Separation is by PATH, never by
|
|
94
|
+
// sessionId (subagent files inherit the parent's sessionId).
|
|
95
|
+
export function classifyTranscriptFile(file, projectsDir) {
|
|
96
|
+
const rel = projectsDir && file.startsWith(projectsDir) ? file.slice(projectsDir.length + 1) : file;
|
|
97
|
+
const parts = rel.split(sep);
|
|
98
|
+
const project = parts[0];
|
|
99
|
+
const sub = parts.indexOf('subagents');
|
|
100
|
+
if (sub !== -1) {
|
|
101
|
+
const parentSessionId = parts[sub - 1];
|
|
102
|
+
const agentId = basename(file, '.jsonl').replace(/^agent-/, '');
|
|
103
|
+
if (parts[sub + 1] === 'workflows' && (parts[sub + 2] || '').startsWith('wf_')) {
|
|
104
|
+
return { kind: 'workflow-stage', project, parentSessionId, wfId: parts[sub + 2], agentId, file };
|
|
105
|
+
}
|
|
106
|
+
return { kind: 'subagent', project, parentSessionId, agentId, file };
|
|
107
|
+
}
|
|
108
|
+
return { kind: 'main', project, sessionId: basename(file, '.jsonl'), file };
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// All usage records across every transcript, classified and globally deduped.
|
|
112
|
+
export function readTranscripts({ env = process.env, root = null } = {}) {
|
|
113
|
+
const dirs = root ? [root] : projectsDirs(env);
|
|
114
|
+
const all = [];
|
|
115
|
+
for (const dir of dirs) {
|
|
116
|
+
for (const file of walk(dir, (f) => f.endsWith('.jsonl'))) {
|
|
117
|
+
const cls = classifyTranscriptFile(file, dir);
|
|
118
|
+
for (const e of readUsage(file)) all.push({ ...e, ...cls });
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return dedupe(all);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function readJournal(file) {
|
|
125
|
+
const map = {};
|
|
126
|
+
if (!existsSync(file)) return map;
|
|
127
|
+
let text;
|
|
128
|
+
try { text = readFileSync(file, 'utf8'); } catch { return map; }
|
|
129
|
+
for (const line of text.split('\n')) {
|
|
130
|
+
if (!line.trim()) continue;
|
|
131
|
+
let j;
|
|
132
|
+
try { j = JSON.parse(line); } catch { continue; }
|
|
133
|
+
if (j && j.agentId && (j.key || !(j.agentId in map))) map[j.agentId] = j.key || map[j.agentId] || null;
|
|
134
|
+
}
|
|
135
|
+
return map;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Every dynamic-workflow run on disk, newest first, with per-stage token sums. This is
|
|
139
|
+
// what `reconcile` / the savings ledger compare against the estimate.
|
|
140
|
+
export function locateWorkflowRuns({ env = process.env, root = null } = {}) {
|
|
141
|
+
const dirs = root ? [root] : projectsDirs(env);
|
|
142
|
+
const runs = [];
|
|
143
|
+
for (const dir of dirs) {
|
|
144
|
+
const wfDirs = new Set();
|
|
145
|
+
walk(dir, (f) => {
|
|
146
|
+
const p = f.split(sep);
|
|
147
|
+
const sub = p.indexOf('subagents');
|
|
148
|
+
if (sub !== -1 && p[sub + 1] === 'workflows' && (p[sub + 2] || '').startsWith('wf_')) {
|
|
149
|
+
wfDirs.add(p.slice(0, sub + 3).join(sep));
|
|
150
|
+
}
|
|
151
|
+
return false;
|
|
152
|
+
});
|
|
153
|
+
for (const wfDir of wfDirs) {
|
|
154
|
+
let names;
|
|
155
|
+
try { names = readdirSync(wfDir); } catch { continue; }
|
|
156
|
+
const journal = readJournal(join(wfDir, 'journal.jsonl'));
|
|
157
|
+
const stages = names
|
|
158
|
+
.filter((f) => /^agent-.*\.jsonl$/.test(f))
|
|
159
|
+
.map((f) => {
|
|
160
|
+
const agentId = f.slice('agent-'.length, -'.jsonl'.length);
|
|
161
|
+
const entries = dedupe(readUsage(join(wfDir, f)));
|
|
162
|
+
return {
|
|
163
|
+
agentId,
|
|
164
|
+
stageKey: journal[agentId] || null,
|
|
165
|
+
model: entries.length ? entries[entries.length - 1].model : null,
|
|
166
|
+
usage: sumUsage(entries.map((e) => e.usage)),
|
|
167
|
+
lines: entries.length
|
|
168
|
+
};
|
|
169
|
+
})
|
|
170
|
+
.filter((s) => s.lines > 0);
|
|
171
|
+
if (!stages.length) continue;
|
|
172
|
+
const parts = wfDir.split(sep);
|
|
173
|
+
let mtime = 0;
|
|
174
|
+
try { mtime = statSync(wfDir).mtimeMs; } catch { /* ignore */ }
|
|
175
|
+
runs.push({
|
|
176
|
+
wfId: basename(wfDir),
|
|
177
|
+
dir: wfDir,
|
|
178
|
+
project: parts[parts.indexOf('projects') + 1],
|
|
179
|
+
parentSessionId: parts[parts.indexOf('subagents') - 1],
|
|
180
|
+
stages,
|
|
181
|
+
mtime
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
return runs.sort((a, b) => b.mtime - a.mtime);
|
|
186
|
+
}
|
|
@@ -1,21 +1,15 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// ultracost SessionStart hook. Injects the model-routing policy as context at the
|
|
3
3
|
// start of every session (and after compaction), so workflow authoring sees it
|
|
4
|
-
// without relying on the model choosing to open a skill.
|
|
5
|
-
//
|
|
4
|
+
// without relying on the model choosing to open a skill.
|
|
5
|
+
//
|
|
6
|
+
// The injected text is COMPILED from the active policy via src/rules.js — the single
|
|
7
|
+
// source of truth. It is no longer a hand-maintained copy, so it cannot drift from
|
|
8
|
+
// policy.json (or from the CLAUDE.md block and the routing skill). Pure node, reads
|
|
9
|
+
// the hook JSON from stdin, emits SessionStart additionalContext. No npm dependency.
|
|
6
10
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
Per-stage model: every agent() stage sets an explicit \`model\` in its options rather than inheriting the session model; haiku is not used.
|
|
10
|
-
- opus for coding and reasoning: writing/editing/refactoring/deleting code; debugging; designing APIs, schemas, or architecture; non-trivial tests; code review; security and performance analysis; planning; synthesis. The orchestrator/planner and the final consolidation stage are always opus.
|
|
11
|
-
- sonnet for pre-planned mechanical and support work: applying an already-decided edit; search, grep, and file discovery; collecting/listing/extracting; running tests and reporting; gathering or summarizing context for an opus stage.
|
|
12
|
-
When a stage is ambiguous, opus is the default.
|
|
13
|
-
|
|
14
|
-
Per-stage effort: also set \`effort\` per stage, choosing the lowest level that fits, bounded by model (sonnet up to high, opus up to xhigh): low = trivial deterministic work (listing/globbing, simple extraction, formatting); medium = light judgment on a small surface; high = standard coding/analysis; xhigh = hard cross-file reasoning, adversarial review, planning, final synthesis.
|
|
15
|
-
|
|
16
|
-
Pre-flight cost gate: before launching a workflow, draft the script with per-stage model and effort, write it to a temp file, run \`ultracost estimate <file>\` to get the agent count, model mix, and cost vs an all-opus baseline, then use the AskUserQuestion tool to offer three options — Approve (launch), Cancel (do not launch), or Modify (restructure to cut cost: drop unneeded stages, move mechanical stages to sonnet and lower effort, reduce fan-out; then re-estimate and ask again). Launch only after Approve.
|
|
17
|
-
|
|
18
|
-
Verify scripts with the /ultracost:check command or \`ultracost check <script>\`, which flags any agent() stage missing a model.`;
|
|
11
|
+
import { loadPolicy } from '../../src/policy.js';
|
|
12
|
+
import { routingGuidance } from '../../src/rules.js';
|
|
19
13
|
|
|
20
14
|
async function readStdin() {
|
|
21
15
|
if (process.stdin.isTTY) return '';
|
|
@@ -25,17 +19,26 @@ async function readStdin() {
|
|
|
25
19
|
return data;
|
|
26
20
|
}
|
|
27
21
|
|
|
28
|
-
// Only wired to SessionStart (all sources), so emit the policy unconditionally.
|
|
29
22
|
// Parsing stdin is best-effort; a missing/invalid payload still injects the policy.
|
|
23
|
+
try { await readStdin(); } catch {}
|
|
24
|
+
|
|
25
|
+
let context;
|
|
30
26
|
try {
|
|
31
|
-
|
|
32
|
-
|
|
27
|
+
const { policy } = loadPolicy();
|
|
28
|
+
context = routingGuidance(policy);
|
|
29
|
+
} catch {
|
|
30
|
+
// Fail open with a minimal reminder rather than injecting nothing.
|
|
31
|
+
context =
|
|
32
|
+
'ultracost: route every agent() stage explicitly — pin a per-stage model (opus for ' +
|
|
33
|
+
'coding/reasoning, sonnet for pre-planned mechanical and search work; never haiku) and ' +
|
|
34
|
+
'an effort level. Verify with /ultracost:check before launching a dynamic workflow.';
|
|
35
|
+
}
|
|
33
36
|
|
|
34
37
|
process.stdout.write(
|
|
35
38
|
JSON.stringify({
|
|
36
39
|
hookSpecificOutput: {
|
|
37
40
|
hookEventName: 'SessionStart',
|
|
38
|
-
additionalContext:
|
|
41
|
+
additionalContext: context
|
|
39
42
|
}
|
|
40
43
|
})
|
|
41
44
|
);
|
|
@@ -2,44 +2,39 @@
|
|
|
2
2
|
// ultracost deterministic cost gate — ON BY DEFAULT (PreToolUse, matcher "Workflow").
|
|
3
3
|
// The plugin registers this in hooks/hooks.json so EVERY dynamic-workflow launch
|
|
4
4
|
// pauses before it runs — it does not depend on the model choosing to ask. It reads
|
|
5
|
-
// the drafted script from tool_input.script, runs the static guard + cost estimate
|
|
6
|
-
//
|
|
7
|
-
//
|
|
5
|
+
// the drafted script from tool_input.script, runs the static guard + cost estimate
|
|
6
|
+
// (calibrated from your real usage when available), enforces the policy budget caps,
|
|
7
|
+
// and returns a permission decision with an aligned mini cost table up front, so an
|
|
8
|
+
// accidental all-Opus fan-out (or an over-budget launch) can't slip through.
|
|
8
9
|
//
|
|
9
10
|
// A PreToolUse hook runs in EVERY permission mode (bypass only auto-approves the
|
|
10
11
|
// "ask" path; a "deny" is honored regardless of mode). So the gate is mode-aware:
|
|
11
|
-
// it
|
|
12
|
-
// the modes where an "ask" can't pause.
|
|
12
|
+
// it hard-denies a problem workflow in the modes where an "ask" can't pause.
|
|
13
13
|
//
|
|
14
14
|
// Modes (env ULTRACOST_GATE):
|
|
15
|
-
// (unset) mode-aware default. Clean (all pinned) -> ask + estimate,
|
|
16
|
-
// Problem (unpinned/banned/inherit) -> ask +
|
|
17
|
-
// acceptEdits
|
|
18
|
-
//
|
|
19
|
-
// strict deny on ANY problem, in every mode; ask (with estimate) when all
|
|
20
|
-
// ask never escalate to deny — always ask (
|
|
21
|
-
// off disable entirely
|
|
22
|
-
// Auto Mode, CI), where an unanswered "ask" is denied (the gate fails closed).
|
|
15
|
+
// (unset) mode-aware default. Clean (all pinned, within budget) -> ask + estimate,
|
|
16
|
+
// every mode. Problem (unpinned/banned/inherit) -> ask + warning in default
|
|
17
|
+
// /acceptEdits/auto; DENY in bypassPermissions/dontAsk. Budget exceeded ->
|
|
18
|
+
// DENY in every mode (a hard cap).
|
|
19
|
+
// strict deny on ANY problem, in every mode; ask (with estimate) when all clean.
|
|
20
|
+
// ask never escalate to deny — always ask (opts out of budget + mode denies).
|
|
21
|
+
// off disable entirely (headless `claude -p`, Auto Mode, CI).
|
|
23
22
|
//
|
|
24
23
|
// Residual limitation: Claude Code currently skips PreToolUse hooks for subagents
|
|
25
|
-
// dispatched under bypassPermissions (anthropics/claude-code#43772)
|
|
26
|
-
// agent there can evade the gate. The top-level Workflow launch is still gated.
|
|
24
|
+
// dispatched under bypassPermissions (anthropics/claude-code#43772).
|
|
27
25
|
|
|
28
26
|
import { loadPolicy } from '../../src/policy.js';
|
|
29
27
|
import { estimateText } from '../../src/estimate.js';
|
|
30
28
|
import { analyze, CODES } from '../../src/guard.js';
|
|
29
|
+
import { applyCalibration, spentToday } from '../../src/loop.js';
|
|
31
30
|
|
|
32
31
|
const money = (x) => '$' + Number(x).toFixed(4);
|
|
33
32
|
const MODE = process.env.ULTRACOST_GATE;
|
|
34
|
-
// Modes where an "ask" decision won't actually pause the user, so a problem
|
|
35
|
-
// workflow must be denied instead to be enforced.
|
|
36
33
|
const ESCALATE_MODES = new Set(['bypassPermissions', 'dontAsk']);
|
|
37
34
|
|
|
38
|
-
//
|
|
39
|
-
//
|
|
40
|
-
//
|
|
41
|
-
// (anthropics/claude-code#24059) — without systemMessage the estimate would be
|
|
42
|
-
// computed but invisible. For "deny" the reason renders too; we set both regardless.
|
|
35
|
+
// systemMessage is the documented channel for surfacing text to the USER from a hook
|
|
36
|
+
// (hooks have no TTY); Claude Code does NOT render permissionDecisionReason for an
|
|
37
|
+
// "ask" (anthropics/claude-code#24059), so we send both.
|
|
43
38
|
function decide(decision, message) {
|
|
44
39
|
process.stdout.write(JSON.stringify({
|
|
45
40
|
systemMessage: message,
|
|
@@ -62,65 +57,76 @@ async function readStdin() {
|
|
|
62
57
|
return d;
|
|
63
58
|
}
|
|
64
59
|
|
|
65
|
-
// Explicit opt-out for automation / headless / CI.
|
|
66
60
|
if (MODE === 'off') process.exit(0);
|
|
67
61
|
|
|
68
62
|
let evt = {};
|
|
69
63
|
try {
|
|
70
64
|
evt = JSON.parse(await readStdin());
|
|
71
65
|
} catch {
|
|
72
|
-
process.exit(0);
|
|
66
|
+
process.exit(0);
|
|
73
67
|
}
|
|
74
68
|
|
|
75
|
-
// Only govern the Workflow tool; every other tool passes untouched.
|
|
76
69
|
if (evt?.tool_name !== 'Workflow') process.exit(0);
|
|
77
|
-
|
|
78
70
|
const permMode = evt?.permission_mode;
|
|
79
71
|
|
|
80
|
-
// A workflow IS launching: always pause. Show numbers when the script is readable.
|
|
81
72
|
const script = evt?.tool_input?.script;
|
|
82
73
|
if (typeof script !== 'string') {
|
|
83
74
|
ask('ultracost cost gate: a dynamic workflow is about to launch, but its script could not be read to estimate cost. Approve to launch, or deny and review.');
|
|
84
75
|
}
|
|
85
76
|
|
|
77
|
+
// An aligned, multi-line cost table — far more scannable than one dense line.
|
|
78
|
+
function costTable(e) {
|
|
79
|
+
const a = e.agents;
|
|
80
|
+
const agents = a.fanoutGroups
|
|
81
|
+
? `~${a.assumedTotal} (${a.known} fixed + ${a.fanoutGroups} fan-out x ~${a.assumedPerFanout})`
|
|
82
|
+
: `${a.known}`;
|
|
83
|
+
const mix = Object.entries(e.modelMix).map(([k, v]) => `${v}x ${k}`).join(', ') || 'none';
|
|
84
|
+
return [
|
|
85
|
+
` agents ${agents}`,
|
|
86
|
+
` model mix ${mix}`,
|
|
87
|
+
` tiered ${money(e.cost.tiered)} vs all-${e.assumptions.sessionModel} ${money(e.cost.baseline)} (save ${money(e.cost.savings)}, ${e.cost.savingsPct}%)`
|
|
88
|
+
].join('\n');
|
|
89
|
+
}
|
|
90
|
+
|
|
86
91
|
try {
|
|
87
92
|
const { policy } = loadPolicy();
|
|
88
|
-
const e = estimateText(script, policy);
|
|
93
|
+
const e = estimateText(script, applyCalibration(policy));
|
|
89
94
|
const { stages, findings } = analyze(script, policy);
|
|
90
95
|
|
|
91
96
|
const unpinned = findings.filter((f) => f.code === CODES.NOOPTS || f.code === CODES.MISSING).length;
|
|
92
97
|
const banned = findings.filter((f) => f.code === CODES.BANNED).length;
|
|
93
98
|
const inherit = findings.filter((f) => f.code === CODES.INHERIT).length;
|
|
99
|
+
const table = costTable(e);
|
|
94
100
|
|
|
95
|
-
|
|
96
|
-
const
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
const
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
101
|
+
// 1) Budget caps — a hard pre-flight stop in every mode (unless =ask opts out).
|
|
102
|
+
const budget = policy.budget || {};
|
|
103
|
+
const today = spentToday();
|
|
104
|
+
const overRun = budget.perRun != null && e.cost.tiered > budget.perRun;
|
|
105
|
+
const overDay = budget.perDay != null && today + e.cost.tiered > budget.perDay;
|
|
106
|
+
if ((overRun || overDay) && MODE !== 'ask') {
|
|
107
|
+
const why = overRun
|
|
108
|
+
? `est. ${money(e.cost.tiered)} exceeds budget.perRun ${money(budget.perRun)}`
|
|
109
|
+
: `today's spend ${money(today)} + est. ${money(e.cost.tiered)} exceeds budget.perDay ${money(budget.perDay)}`;
|
|
110
|
+
deny(`\u26a0 ultracost budget: ${why}.\nultracost estimate:\n${table}\nReduce the workflow (cheaper tiers, fewer stages, less fan-out) and relaunch, or raise the cap in policy.json.`);
|
|
111
|
+
}
|
|
104
112
|
|
|
113
|
+
// 2) Pinning problems.
|
|
105
114
|
const problems = [];
|
|
106
115
|
if (unpinned) problems.push(`${unpinned}/${stages} stage(s) NOT pinned -> will inherit ${e.assumptions.sessionModel}`);
|
|
107
116
|
if (banned) problems.push(`${banned} stage(s) pin a banned model`);
|
|
108
117
|
if (inherit) problems.push(`${inherit} stage(s) use model:'inherit'`);
|
|
109
118
|
|
|
110
119
|
if (problems.length) {
|
|
111
|
-
const head = `\u26a0 ultracost: ${problems.join('; ')}
|
|
112
|
-
// Hard-deny when forced (strict) or when the current mode wouldn't surface an
|
|
113
|
-
// ask anyway (bypassPermissions/dontAsk). ULTRACOST_GATE=ask opts out of the
|
|
114
|
-
// mode-aware escalation and always asks.
|
|
120
|
+
const head = `\u26a0 ultracost: ${problems.join('; ')}.`;
|
|
115
121
|
const hard = MODE === 'strict' || (MODE !== 'ask' && ESCALATE_MODES.has(permMode));
|
|
116
122
|
if (hard) {
|
|
117
|
-
deny(`${head}estimate
|
|
123
|
+
deny(`${head}\nultracost estimate:\n${table}\nPin every stage (opus for reasoning, sonnet for mechanical work) and relaunch.`);
|
|
118
124
|
}
|
|
119
|
-
ask(`${head}estimate
|
|
125
|
+
ask(`${head}\nultracost estimate:\n${table}\nDeny and ask me to pin every stage, or approve to run as-is.`);
|
|
120
126
|
}
|
|
121
127
|
|
|
122
|
-
|
|
128
|
+
// 3) Clean.
|
|
129
|
+
ask(`ultracost estimate:\n${table}\nApprove to launch, or deny and ask me to make it cheaper.`);
|
|
123
130
|
} catch {
|
|
124
|
-
// Estimator/policy failure must not silently let an unpriced fan-out through.
|
|
125
131
|
ask('ultracost cost gate: a dynamic workflow is about to launch (cost estimate unavailable). Approve to launch, or deny and review.');
|
|
126
132
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version":
|
|
2
|
+
"version": 2,
|
|
3
3
|
"neverUse": ["haiku"],
|
|
4
4
|
"allowInherit": false,
|
|
5
5
|
"default": "opus",
|
|
@@ -32,6 +32,18 @@
|
|
|
32
32
|
},
|
|
33
33
|
"maxByModel": { "sonnet": "high", "opus": "xhigh" }
|
|
34
34
|
},
|
|
35
|
+
"classify": {
|
|
36
|
+
"_note": "Extra keyword signals (merged with the built-in rubric) for the UC006 wrong-tier check and `ultracost explain`. The opening imperative verb of a prompt is weighted most.",
|
|
37
|
+
"keywords": {
|
|
38
|
+
"opus": ["architecture", "threat-model", "migrate", "redesign", "tradeoff"],
|
|
39
|
+
"sonnet": ["lint", "stub", "boilerplate", "transcribe", "tally"]
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
"budget": {
|
|
43
|
+
"_note": "Pre-flight caps enforced by the cost gate. null = no cap. perRun is per workflow launch; perDay sums the savings-ledger spend for the current day.",
|
|
44
|
+
"perRun": null,
|
|
45
|
+
"perDay": null
|
|
46
|
+
},
|
|
35
47
|
"pricing": {
|
|
36
48
|
"_unit": "USD per million tokens",
|
|
37
49
|
"_source": "https://platform.claude.com/docs/en/about-claude/pricing.md",
|
|
@@ -44,6 +56,7 @@
|
|
|
44
56
|
"estimation": {
|
|
45
57
|
"tokensPerStage": { "input": 2000, "output": 1200 },
|
|
46
58
|
"effortOutputMultiplier": { "low": 0.4, "medium": 1, "high": 1.8, "xhigh": 3, "max": 4 },
|
|
47
|
-
"assumedFanout": 5
|
|
59
|
+
"assumedFanout": 5,
|
|
60
|
+
"cacheMultipliers": { "cacheRead": 0.1, "cacheWrite": 1.25 }
|
|
48
61
|
}
|
|
49
62
|
}
|