ultracost 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/rules.js CHANGED
@@ -59,19 +59,31 @@ e.g. \`agent(task, { model: 'sonnet', effort: 'low' })\` for a mechanical scan.
59
59
 
60
60
  Before launching a dynamic workflow:
61
61
  1. Draft the workflow script with per-stage \`model\` and \`effort\` set.
62
- 2. Write the draft to a temp file and run \`ultracost estimate <file>\` to get the
63
- agent count, model mix, and cost versus an all-\`${policy.tiers[policy.default].model}\` baseline.
62
+ 2. Write the draft to a temp file and estimate it: \`/ultracost:check <file>\` to verify
63
+ pins, then the cost estimate run \`ultracost estimate <file>\`, or under the plugin
64
+ \`node "$CLAUDE_PLUGIN_ROOT/bin/cli.js" estimate <file>\` (no global \`ultracost\` bin
65
+ is required). It reports the agent count, model mix, and cost versus an
66
+ all-\`${policy.tiers[policy.default].model}\` baseline.
64
67
  3. Show the estimate and use the AskUserQuestion tool to offer three options:
65
68
  **Approve** (launch it), **Cancel** (do not launch), **Modify** (restructure to
66
69
  cut cost — drop unneeded stages, move mechanical stages to a cheaper tier and
67
70
  lower effort, reduce fan-out — then re-estimate and ask again).
68
- 4. Launch the workflow only after Approve.
71
+ 4. Launch the workflow only after Approve. The \`PreToolUse\` cost gate also stops the
72
+ launch automatically with these numbers, so this holds even if the steps are skipped.
69
73
 
70
- Verify any script with \`/ultracost:check\` or \`ultracost check <script>\` — it flags
71
- stages missing a model pin.
74
+ Verify any script with \`/ultracost:check\` (the plugin command) or \`ultracost check
75
+ <script>\` on the CLI — it flags stages missing a model pin, a pin that mismatches the
76
+ work the prompt describes, and effort over the model's cap.
72
77
  ${MARKER_END}`;
73
78
  }
74
79
 
80
+ // The routing block without the HTML markers — the single source for the SessionStart
81
+ // hook injection (reinject.mjs) and the routing skill (skills/ultracost/SKILL.md), so
82
+ // neither can drift from policy.json.
83
+ export function routingGuidance(policy) {
84
+ return compileRules(policy).split('\n').slice(1, -1).join('\n').trim();
85
+ }
86
+
75
87
  export function replaceBlock(content, block) {
76
88
  const re = new RegExp(`${MARKER_START}[\\s\\S]*?${MARKER_END}`);
77
89
  if (!re.test(content)) return null;
@@ -0,0 +1,186 @@
1
+ import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs';
2
+ import { join, sep, basename } from 'node:path';
3
+ import { homedir } from 'node:os';
4
+ import { sumUsage } from './cost.js';
5
+
6
+ // Read Claude Code's local session transcripts (offline) so ultracost can reconcile
7
+ // its estimate against real token usage and learn from it. Clean-room reimplementation
8
+ // of the well-known parse+dedup contract: assistant lines carry message.usage; the
9
+ // same message can recur across files (resumed sessions, sidechain replays) so we dedup
10
+ // on message.id + requestId; dynamic-workflow agent() stages live in their own
11
+ // subagents/workflows/wf_<id>/agent-<aid>.jsonl files next to a journal.jsonl.
12
+
13
+ const expandTilde = (p) => (p === '~' || p.startsWith('~/') ? join(homedir(), p.slice(1)) : p);
14
+
15
+ // All Claude Code `projects/` directories: CLAUDE_CONFIG_DIR (comma-separated, each
16
+ // entry a config dir OR a projects dir), else ~/.config/claude and ~/.claude.
17
+ export function projectsDirs(env = process.env) {
18
+ const out = [];
19
+ const add = (dir) => {
20
+ if (existsSync(join(dir, 'projects'))) out.push(join(dir, 'projects'));
21
+ else if (basename(dir) === 'projects' && existsSync(dir)) out.push(dir);
22
+ };
23
+ if (env.CLAUDE_CONFIG_DIR) {
24
+ env.CLAUDE_CONFIG_DIR.split(',').map((s) => s.trim()).filter(Boolean).forEach((p) => add(expandTilde(p)));
25
+ } else {
26
+ add(env.XDG_CONFIG_HOME ? join(env.XDG_CONFIG_HOME, 'claude') : join(homedir(), '.config', 'claude'));
27
+ add(join(homedir(), '.claude'));
28
+ }
29
+ return [...new Set(out)];
30
+ }
31
+
32
+ function walk(dir, test, out = []) {
33
+ let names;
34
+ try { names = readdirSync(dir); } catch { return out; }
35
+ for (const name of names) {
36
+ if (name.startsWith('.')) continue;
37
+ const full = join(dir, name);
38
+ let st;
39
+ try { st = statSync(full); } catch { continue; }
40
+ if (st.isDirectory()) walk(full, test, out);
41
+ else if (test(full)) out.push(full);
42
+ }
43
+ return out;
44
+ }
45
+
46
+ // One transcript line -> a normalized usage record, or null if it isn't an assistant
47
+ // message that reports usage.
48
+ export function parseUsageLine(line) {
49
+ let obj;
50
+ try { obj = JSON.parse(line); } catch { return null; }
51
+ if (!obj || obj.isApiErrorMessage) return null;
52
+ const m = obj.message;
53
+ if (!m || !m.usage) return null;
54
+ if (m.role && m.role !== 'assistant' && obj.type !== 'assistant') return null;
55
+ return {
56
+ id: m.id || null,
57
+ requestId: obj.requestId || null,
58
+ model: m.model || null,
59
+ usage: m.usage,
60
+ ts: obj.timestamp || null,
61
+ isSidechain: !!obj.isSidechain
62
+ };
63
+ }
64
+
65
+ function readUsage(file) {
66
+ let text;
67
+ try { text = readFileSync(file, 'utf8'); } catch { return []; }
68
+ const out = [];
69
+ for (const line of text.split('\n')) {
70
+ if (!line.trim()) continue;
71
+ const e = parseUsageLine(line);
72
+ if (e) out.push(e);
73
+ }
74
+ return out;
75
+ }
76
+
77
+ // Dedup on message.id + requestId; lines without an id are always kept; on a collision
78
+ // keep the copy with the most output tokens (a sidechain/replay tie-break).
79
+ export function dedupe(entries) {
80
+ const seen = new Map();
81
+ const kept = [];
82
+ for (const e of entries) {
83
+ if (!e.id) { kept.push(e); continue; }
84
+ const key = `${e.id}:${e.requestId || ''}`;
85
+ const idx = seen.get(key);
86
+ if (idx === undefined) { seen.set(key, kept.length); kept.push(e); }
87
+ else if ((e.usage.output_tokens || 0) > (kept[idx].usage.output_tokens || 0)) kept[idx] = e;
88
+ }
89
+ return kept;
90
+ }
91
+
92
+ // Classify a transcript file by its path: 'main', 'subagent', or 'workflow-stage'
93
+ // (the ultracode dynamic-workflow agent() stage). Separation is by PATH, never by
94
+ // sessionId (subagent files inherit the parent's sessionId).
95
+ export function classifyTranscriptFile(file, projectsDir) {
96
+ const rel = projectsDir && file.startsWith(projectsDir) ? file.slice(projectsDir.length + 1) : file;
97
+ const parts = rel.split(sep);
98
+ const project = parts[0];
99
+ const sub = parts.indexOf('subagents');
100
+ if (sub !== -1) {
101
+ const parentSessionId = parts[sub - 1];
102
+ const agentId = basename(file, '.jsonl').replace(/^agent-/, '');
103
+ if (parts[sub + 1] === 'workflows' && (parts[sub + 2] || '').startsWith('wf_')) {
104
+ return { kind: 'workflow-stage', project, parentSessionId, wfId: parts[sub + 2], agentId, file };
105
+ }
106
+ return { kind: 'subagent', project, parentSessionId, agentId, file };
107
+ }
108
+ return { kind: 'main', project, sessionId: basename(file, '.jsonl'), file };
109
+ }
110
+
111
+ // All usage records across every transcript, classified and globally deduped.
112
+ export function readTranscripts({ env = process.env, root = null } = {}) {
113
+ const dirs = root ? [root] : projectsDirs(env);
114
+ const all = [];
115
+ for (const dir of dirs) {
116
+ for (const file of walk(dir, (f) => f.endsWith('.jsonl'))) {
117
+ const cls = classifyTranscriptFile(file, dir);
118
+ for (const e of readUsage(file)) all.push({ ...e, ...cls });
119
+ }
120
+ }
121
+ return dedupe(all);
122
+ }
123
+
124
+ function readJournal(file) {
125
+ const map = {};
126
+ if (!existsSync(file)) return map;
127
+ let text;
128
+ try { text = readFileSync(file, 'utf8'); } catch { return map; }
129
+ for (const line of text.split('\n')) {
130
+ if (!line.trim()) continue;
131
+ let j;
132
+ try { j = JSON.parse(line); } catch { continue; }
133
+ if (j && j.agentId && (j.key || !(j.agentId in map))) map[j.agentId] = j.key || map[j.agentId] || null;
134
+ }
135
+ return map;
136
+ }
137
+
138
+ // Every dynamic-workflow run on disk, newest first, with per-stage token sums. This is
139
+ // what `reconcile` / the savings ledger compare against the estimate.
140
+ export function locateWorkflowRuns({ env = process.env, root = null } = {}) {
141
+ const dirs = root ? [root] : projectsDirs(env);
142
+ const runs = [];
143
+ for (const dir of dirs) {
144
+ const wfDirs = new Set();
145
+ walk(dir, (f) => {
146
+ const p = f.split(sep);
147
+ const sub = p.indexOf('subagents');
148
+ if (sub !== -1 && p[sub + 1] === 'workflows' && (p[sub + 2] || '').startsWith('wf_')) {
149
+ wfDirs.add(p.slice(0, sub + 3).join(sep));
150
+ }
151
+ return false;
152
+ });
153
+ for (const wfDir of wfDirs) {
154
+ let names;
155
+ try { names = readdirSync(wfDir); } catch { continue; }
156
+ const journal = readJournal(join(wfDir, 'journal.jsonl'));
157
+ const stages = names
158
+ .filter((f) => /^agent-.*\.jsonl$/.test(f))
159
+ .map((f) => {
160
+ const agentId = f.slice('agent-'.length, -'.jsonl'.length);
161
+ const entries = dedupe(readUsage(join(wfDir, f)));
162
+ return {
163
+ agentId,
164
+ stageKey: journal[agentId] || null,
165
+ model: entries.length ? entries[entries.length - 1].model : null,
166
+ usage: sumUsage(entries.map((e) => e.usage)),
167
+ lines: entries.length
168
+ };
169
+ })
170
+ .filter((s) => s.lines > 0);
171
+ if (!stages.length) continue;
172
+ const parts = wfDir.split(sep);
173
+ let mtime = 0;
174
+ try { mtime = statSync(wfDir).mtimeMs; } catch { /* ignore */ }
175
+ runs.push({
176
+ wfId: basename(wfDir),
177
+ dir: wfDir,
178
+ project: parts[parts.indexOf('projects') + 1],
179
+ parentSessionId: parts[parts.indexOf('subagents') - 1],
180
+ stages,
181
+ mtime
182
+ });
183
+ }
184
+ }
185
+ return runs.sort((a, b) => b.mtime - a.mtime);
186
+ }
@@ -1,21 +1,15 @@
1
1
  #!/usr/bin/env node
2
2
  // ultracost SessionStart hook. Injects the model-routing policy as context at the
3
3
  // start of every session (and after compaction), so workflow authoring sees it
4
- // without relying on the model choosing to open a skill. Pure node, reads the hook
5
- // JSON from stdin, emits SessionStart additionalContext. No bash or jq dependency.
4
+ // without relying on the model choosing to open a skill.
5
+ //
6
+ // The injected text is COMPILED from the active policy via src/rules.js — the single
7
+ // source of truth. It is no longer a hand-maintained copy, so it cannot drift from
8
+ // policy.json (or from the CLAUDE.md block and the routing skill). Pure node, reads
9
+ // the hook JSON from stdin, emits SessionStart additionalContext. No npm dependency.
6
10
 
7
- const POLICY = `This project follows the ultracost model-routing and cost policy for Claude Code dynamic workflows (ultracode).
8
-
9
- Per-stage model: every agent() stage sets an explicit \`model\` in its options rather than inheriting the session model; haiku is not used.
10
- - opus for coding and reasoning: writing/editing/refactoring/deleting code; debugging; designing APIs, schemas, or architecture; non-trivial tests; code review; security and performance analysis; planning; synthesis. The orchestrator/planner and the final consolidation stage are always opus.
11
- - sonnet for pre-planned mechanical and support work: applying an already-decided edit; search, grep, and file discovery; collecting/listing/extracting; running tests and reporting; gathering or summarizing context for an opus stage.
12
- When a stage is ambiguous, opus is the default.
13
-
14
- Per-stage effort: also set \`effort\` per stage, choosing the lowest level that fits, bounded by model (sonnet up to high, opus up to xhigh): low = trivial deterministic work (listing/globbing, simple extraction, formatting); medium = light judgment on a small surface; high = standard coding/analysis; xhigh = hard cross-file reasoning, adversarial review, planning, final synthesis.
15
-
16
- Pre-flight cost gate: before launching a workflow, draft the script with per-stage model and effort, write it to a temp file, run \`ultracost estimate <file>\` to get the agent count, model mix, and cost vs an all-opus baseline, then use the AskUserQuestion tool to offer three options — Approve (launch), Cancel (do not launch), or Modify (restructure to cut cost: drop unneeded stages, move mechanical stages to sonnet and lower effort, reduce fan-out; then re-estimate and ask again). Launch only after Approve.
17
-
18
- Verify scripts with the /ultracost:check command or \`ultracost check <script>\`, which flags any agent() stage missing a model.`;
11
+ import { loadPolicy } from '../../src/policy.js';
12
+ import { routingGuidance } from '../../src/rules.js';
19
13
 
20
14
  async function readStdin() {
21
15
  if (process.stdin.isTTY) return '';
@@ -25,17 +19,26 @@ async function readStdin() {
25
19
  return data;
26
20
  }
27
21
 
28
- // Only wired to SessionStart (all sources), so emit the policy unconditionally.
29
22
  // Parsing stdin is best-effort; a missing/invalid payload still injects the policy.
23
+ try { await readStdin(); } catch {}
24
+
25
+ let context;
30
26
  try {
31
- await readStdin();
32
- } catch {}
27
+ const { policy } = loadPolicy();
28
+ context = routingGuidance(policy);
29
+ } catch {
30
+ // Fail open with a minimal reminder rather than injecting nothing.
31
+ context =
32
+ 'ultracost: route every agent() stage explicitly — pin a per-stage model (opus for ' +
33
+ 'coding/reasoning, sonnet for pre-planned mechanical and search work; never haiku) and ' +
34
+ 'an effort level. Verify with /ultracost:check before launching a dynamic workflow.';
35
+ }
33
36
 
34
37
  process.stdout.write(
35
38
  JSON.stringify({
36
39
  hookSpecificOutput: {
37
40
  hookEventName: 'SessionStart',
38
- additionalContext: POLICY
41
+ additionalContext: context
39
42
  }
40
43
  })
41
44
  );
@@ -2,44 +2,39 @@
2
2
  // ultracost deterministic cost gate — ON BY DEFAULT (PreToolUse, matcher "Workflow").
3
3
  // The plugin registers this in hooks/hooks.json so EVERY dynamic-workflow launch
4
4
  // pauses before it runs — it does not depend on the model choosing to ask. It reads
5
- // the drafted script from tool_input.script, runs the static guard + cost estimate,
6
- // and returns a permission decision with the numbers AND any unpinned-stage warning
7
- // up front, so an accidental all-Opus fan-out can't slip through.
5
+ // the drafted script from tool_input.script, runs the static guard + cost estimate
6
+ // (calibrated from your real usage when available), enforces the policy budget caps,
7
+ // and returns a permission decision with an aligned mini cost table up front, so an
8
+ // accidental all-Opus fan-out (or an over-budget launch) can't slip through.
8
9
  //
9
10
  // A PreToolUse hook runs in EVERY permission mode (bypass only auto-approves the
10
11
  // "ask" path; a "deny" is honored regardless of mode). So the gate is mode-aware:
11
- // it reads `permission_mode` from the event and hard-denies a problem workflow in
12
- // the modes where an "ask" can't pause.
12
+ // it hard-denies a problem workflow in the modes where an "ask" can't pause.
13
13
  //
14
14
  // Modes (env ULTRACOST_GATE):
15
- // (unset) mode-aware default. Clean (all pinned) -> ask + estimate, every mode.
16
- // Problem (unpinned/banned/inherit) -> ask + warning in default /
17
- // acceptEdits / auto (an ask surfaces there); DENY in bypassPermissions /
18
- // dontAsk (an ask is auto-approved/won't pause there, so we block instead).
19
- // strict deny on ANY problem, in every mode; ask (with estimate) when all pinned.
20
- // ask never escalate to deny — always ask (opt out of the mode-aware deny).
21
- // off disable entirely — for non-interactive runs (headless `claude -p`,
22
- // Auto Mode, CI), where an unanswered "ask" is denied (the gate fails closed).
15
+ // (unset) mode-aware default. Clean (all pinned, within budget) -> ask + estimate,
16
+ // every mode. Problem (unpinned/banned/inherit) -> ask + warning in default
17
+ // /acceptEdits/auto; DENY in bypassPermissions/dontAsk. Budget exceeded ->
18
+ // DENY in every mode (a hard cap).
19
+ // strict deny on ANY problem, in every mode; ask (with estimate) when all clean.
20
+ // ask never escalate to deny — always ask (opts out of budget + mode denies).
21
+ // off disable entirely (headless `claude -p`, Auto Mode, CI).
23
22
  //
24
23
  // Residual limitation: Claude Code currently skips PreToolUse hooks for subagents
25
- // dispatched under bypassPermissions (anthropics/claude-code#43772), so a nested
26
- // agent there can evade the gate. The top-level Workflow launch is still gated.
24
+ // dispatched under bypassPermissions (anthropics/claude-code#43772).
27
25
 
28
26
  import { loadPolicy } from '../../src/policy.js';
29
27
  import { estimateText } from '../../src/estimate.js';
30
28
  import { analyze, CODES } from '../../src/guard.js';
29
+ import { applyCalibration, spentToday } from '../../src/loop.js';
31
30
 
32
31
  const money = (x) => '$' + Number(x).toFixed(4);
33
32
  const MODE = process.env.ULTRACOST_GATE;
34
- // Modes where an "ask" decision won't actually pause the user, so a problem
35
- // workflow must be denied instead to be enforced.
36
33
  const ESCALATE_MODES = new Set(['bypassPermissions', 'dontAsk']);
37
34
 
38
- // `systemMessage` is the documented channel for surfacing text to the USER from a
39
- // hook (hooks have no TTY). We send it alongside permissionDecisionReason because
40
- // Claude Code does NOT render the reason for an "ask" decision in the TUI
41
- // (anthropics/claude-code#24059) — without systemMessage the estimate would be
42
- // computed but invisible. For "deny" the reason renders too; we set both regardless.
35
+ // systemMessage is the documented channel for surfacing text to the USER from a hook
36
+ // (hooks have no TTY); Claude Code does NOT render permissionDecisionReason for an
37
+ // "ask" (anthropics/claude-code#24059), so we send both.
43
38
  function decide(decision, message) {
44
39
  process.stdout.write(JSON.stringify({
45
40
  systemMessage: message,
@@ -62,65 +57,76 @@ async function readStdin() {
62
57
  return d;
63
58
  }
64
59
 
65
- // Explicit opt-out for automation / headless / CI.
66
60
  if (MODE === 'off') process.exit(0);
67
61
 
68
62
  let evt = {};
69
63
  try {
70
64
  evt = JSON.parse(await readStdin());
71
65
  } catch {
72
- process.exit(0); // can't parse the event -> stay out of the way
66
+ process.exit(0);
73
67
  }
74
68
 
75
- // Only govern the Workflow tool; every other tool passes untouched.
76
69
  if (evt?.tool_name !== 'Workflow') process.exit(0);
77
-
78
70
  const permMode = evt?.permission_mode;
79
71
 
80
- // A workflow IS launching: always pause. Show numbers when the script is readable.
81
72
  const script = evt?.tool_input?.script;
82
73
  if (typeof script !== 'string') {
83
74
  ask('ultracost cost gate: a dynamic workflow is about to launch, but its script could not be read to estimate cost. Approve to launch, or deny and review.');
84
75
  }
85
76
 
77
+ // An aligned, multi-line cost table — far more scannable than one dense line.
78
+ function costTable(e) {
79
+ const a = e.agents;
80
+ const agents = a.fanoutGroups
81
+ ? `~${a.assumedTotal} (${a.known} fixed + ${a.fanoutGroups} fan-out x ~${a.assumedPerFanout})`
82
+ : `${a.known}`;
83
+ const mix = Object.entries(e.modelMix).map(([k, v]) => `${v}x ${k}`).join(', ') || 'none';
84
+ return [
85
+ ` agents ${agents}`,
86
+ ` model mix ${mix}`,
87
+ ` tiered ${money(e.cost.tiered)} vs all-${e.assumptions.sessionModel} ${money(e.cost.baseline)} (save ${money(e.cost.savings)}, ${e.cost.savingsPct}%)`
88
+ ].join('\n');
89
+ }
90
+
86
91
  try {
87
92
  const { policy } = loadPolicy();
88
- const e = estimateText(script, policy);
93
+ const e = estimateText(script, applyCalibration(policy));
89
94
  const { stages, findings } = analyze(script, policy);
90
95
 
91
96
  const unpinned = findings.filter((f) => f.code === CODES.NOOPTS || f.code === CODES.MISSING).length;
92
97
  const banned = findings.filter((f) => f.code === CODES.BANNED).length;
93
98
  const inherit = findings.filter((f) => f.code === CODES.INHERIT).length;
99
+ const table = costTable(e);
94
100
 
95
- const a = e.agents;
96
- const agents = a.fanoutGroups
97
- ? `~${a.assumedTotal} (${a.known} fixed + ${a.fanoutGroups} fan-out x ~${a.assumedPerFanout})`
98
- : `${a.known}`;
99
- const mix = Object.entries(e.modelMix).map(([k, v]) => `${v}x ${k}`).join(', ') || 'none';
100
- const estLine =
101
- `${agents} agents; model mix ${mix}; ` +
102
- `est. ${money(e.cost.tiered)} vs all-${e.assumptions.sessionModel} baseline ${money(e.cost.baseline)} ` +
103
- `(save ${money(e.cost.savings)}, ${e.cost.savingsPct}%).`;
101
+ // 1) Budget caps — a hard pre-flight stop in every mode (unless =ask opts out).
102
+ const budget = policy.budget || {};
103
+ const today = spentToday();
104
+ const overRun = budget.perRun != null && e.cost.tiered > budget.perRun;
105
+ const overDay = budget.perDay != null && today + e.cost.tiered > budget.perDay;
106
+ if ((overRun || overDay) && MODE !== 'ask') {
107
+ const why = overRun
108
+ ? `est. ${money(e.cost.tiered)} exceeds budget.perRun ${money(budget.perRun)}`
109
+ : `today's spend ${money(today)} + est. ${money(e.cost.tiered)} exceeds budget.perDay ${money(budget.perDay)}`;
110
+ deny(`\u26a0 ultracost budget: ${why}.\nultracost estimate:\n${table}\nReduce the workflow (cheaper tiers, fewer stages, less fan-out) and relaunch, or raise the cap in policy.json.`);
111
+ }
104
112
 
113
+ // 2) Pinning problems.
105
114
  const problems = [];
106
115
  if (unpinned) problems.push(`${unpinned}/${stages} stage(s) NOT pinned -> will inherit ${e.assumptions.sessionModel}`);
107
116
  if (banned) problems.push(`${banned} stage(s) pin a banned model`);
108
117
  if (inherit) problems.push(`${inherit} stage(s) use model:'inherit'`);
109
118
 
110
119
  if (problems.length) {
111
- const head = `\u26a0 ultracost: ${problems.join('; ')}. `;
112
- // Hard-deny when forced (strict) or when the current mode wouldn't surface an
113
- // ask anyway (bypassPermissions/dontAsk). ULTRACOST_GATE=ask opts out of the
114
- // mode-aware escalation and always asks.
120
+ const head = `\u26a0 ultracost: ${problems.join('; ')}.`;
115
121
  const hard = MODE === 'strict' || (MODE !== 'ask' && ESCALATE_MODES.has(permMode));
116
122
  if (hard) {
117
- deny(`${head}estimate: ${estLine} Pin every stage (opus for reasoning, sonnet for mechanical work) and relaunch.`);
123
+ deny(`${head}\nultracost estimate:\n${table}\nPin every stage (opus for reasoning, sonnet for mechanical work) and relaunch.`);
118
124
  }
119
- ask(`${head}estimate: ${estLine} Deny and ask me to pin every stage, or approve to run as-is.`);
125
+ ask(`${head}\nultracost estimate:\n${table}\nDeny and ask me to pin every stage, or approve to run as-is.`);
120
126
  }
121
127
 
122
- ask(`ultracost estimate: ${estLine} Approve to launch, or deny and ask me to make it cheaper.`);
128
+ // 3) Clean.
129
+ ask(`ultracost estimate:\n${table}\nApprove to launch, or deny and ask me to make it cheaper.`);
123
130
  } catch {
124
- // Estimator/policy failure must not silently let an unpriced fan-out through.
125
131
  ask('ultracost cost gate: a dynamic workflow is about to launch (cost estimate unavailable). Approve to launch, or deny and review.');
126
132
  }
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": 1,
2
+ "version": 2,
3
3
  "neverUse": ["haiku"],
4
4
  "allowInherit": false,
5
5
  "default": "opus",
@@ -32,6 +32,18 @@
32
32
  },
33
33
  "maxByModel": { "sonnet": "high", "opus": "xhigh" }
34
34
  },
35
+ "classify": {
36
+ "_note": "Extra keyword signals (merged with the built-in rubric) for the UC006 wrong-tier check and `ultracost explain`. The opening imperative verb of a prompt is weighted most.",
37
+ "keywords": {
38
+ "opus": ["architecture", "threat-model", "migrate", "redesign", "tradeoff"],
39
+ "sonnet": ["lint", "stub", "boilerplate", "transcribe", "tally"]
40
+ }
41
+ },
42
+ "budget": {
43
+ "_note": "Pre-flight caps enforced by the cost gate. null = no cap. perRun is per workflow launch; perDay sums the savings-ledger spend for the current day.",
44
+ "perRun": null,
45
+ "perDay": null
46
+ },
35
47
  "pricing": {
36
48
  "_unit": "USD per million tokens",
37
49
  "_source": "https://platform.claude.com/docs/en/about-claude/pricing.md",
@@ -44,6 +56,7 @@
44
56
  "estimation": {
45
57
  "tokensPerStage": { "input": 2000, "output": 1200 },
46
58
  "effortOutputMultiplier": { "low": 0.4, "medium": 1, "high": 1.8, "xhigh": 3, "max": 4 },
47
- "assumedFanout": 5
59
+ "assumedFanout": 5,
60
+ "cacheMultipliers": { "cacheRead": 0.1, "cacheWrite": 1.25 }
48
61
  }
49
62
  }