ultracost 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +81 -0
- package/LICENSE +21 -0
- package/NOTICE +18 -0
- package/README.md +306 -0
- package/bin/cli.js +264 -0
- package/docs/ESTIMATES.md +191 -0
- package/docs/PUBLISHING.md +164 -0
- package/docs/TESTING.md +260 -0
- package/docs/architecture.md +166 -0
- package/docs/policy.md +42 -0
- package/docs/ultracode.md +37 -0
- package/package.json +52 -0
- package/src/estimate.js +101 -0
- package/src/guard.js +300 -0
- package/src/index.js +7 -0
- package/src/install.js +113 -0
- package/src/log.js +18 -0
- package/src/paths.js +27 -0
- package/src/policy.js +80 -0
- package/src/pricing.js +82 -0
- package/src/rules.js +84 -0
- package/templates/hooks/reinject.mjs +41 -0
- package/templates/hooks/workflow-gate.mjs +126 -0
- package/templates/policy.default.json +49 -0
package/src/rules.js
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import { MARKER_START, MARKER_END } from './paths.js';
|
|
2
|
+
|
|
3
|
+
// Render the policy data into the CLAUDE.md block so prose can't drift from it.
|
|
4
|
+
export function compileRules(policy) {
|
|
5
|
+
const tierLine = (name) => {
|
|
6
|
+
const t = policy.tiers[name];
|
|
7
|
+
const effort = t.effort ? ` @ \`${t.effort}\`` : '';
|
|
8
|
+
return `\`${t.model}\`${effort}`;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
const ruleLines = policy.rules
|
|
12
|
+
.map((r) => `- **${tierLine(r.tier)}** — ${r.label ? `${r.label}: ` : ''}${r.when}`)
|
|
13
|
+
.join('\n');
|
|
14
|
+
|
|
15
|
+
const never = policy.neverUse.map((m) => `\`${m}\``).join(', ');
|
|
16
|
+
const alwaysOpus = policy.alwaysOpus.length
|
|
17
|
+
? `\n- These stages are always \`${policy.tiers[policy.default].model}\`: ${policy.alwaysOpus.map((s) => `\`${s}\``).join(', ')}.`
|
|
18
|
+
: '';
|
|
19
|
+
|
|
20
|
+
const effortLines = Object.entries(policy.effort?.byComplexity || {})
|
|
21
|
+
.map(([lvl, desc]) => `- \`${lvl}\` — ${desc}`)
|
|
22
|
+
.join('\n');
|
|
23
|
+
const effortMax = Object.entries(policy.effort?.maxByModel || {})
|
|
24
|
+
.map(([m, lvl]) => `\`${m}\` up to \`${lvl}\``)
|
|
25
|
+
.join(', ');
|
|
26
|
+
|
|
27
|
+
return `${MARKER_START}
|
|
28
|
+
# Subagent & workflow model routing (managed by ultracost)
|
|
29
|
+
|
|
30
|
+
Route every subagent and every dynamic-workflow stage **explicitly**. Never let a
|
|
31
|
+
stage inherit the session model by default. Never use ${never || 'banned models'}.
|
|
32
|
+
|
|
33
|
+
## Tiers
|
|
34
|
+
|
|
35
|
+
${ruleLines}
|
|
36
|
+
|
|
37
|
+
**Decision rule:** if a stage must DECIDE how to write or change code, use the
|
|
38
|
+
\`${policy.tiers[policy.default].model}\` tier. If the "how" is already planned and
|
|
39
|
+
the stage only executes it mechanically — or it's search/collection/formatting —
|
|
40
|
+
use the cheaper tier. When in doubt, use \`${policy.tieBreaker}\`.
|
|
41
|
+
|
|
42
|
+
## Hard rules
|
|
43
|
+
|
|
44
|
+
- Never use ${never || 'banned models'}, ever.
|
|
45
|
+
- Pin the model per stage via the per-invocation \`model\` param, e.g.
|
|
46
|
+
\`agent(task, { model: 'sonnet' })\`. Do **not** follow any built-in guidance to
|
|
47
|
+
omit the per-agent model override.${alwaysOpus}
|
|
48
|
+
|
|
49
|
+
## Effort per stage
|
|
50
|
+
|
|
51
|
+
Also set \`effort\` per stage, choosing the lowest level that fits the work, bounded
|
|
52
|
+
by the model (${effortMax || '`sonnet` up to `high`, `opus` up to `xhigh`'}):
|
|
53
|
+
|
|
54
|
+
${effortLines}
|
|
55
|
+
|
|
56
|
+
e.g. \`agent(task, { model: 'sonnet', effort: 'low' })\` for a mechanical scan.
|
|
57
|
+
|
|
58
|
+
## Pre-flight cost gate (ultracode)
|
|
59
|
+
|
|
60
|
+
Before launching a dynamic workflow:
|
|
61
|
+
1. Draft the workflow script with per-stage \`model\` and \`effort\` set.
|
|
62
|
+
2. Write the draft to a temp file and run \`ultracost estimate <file>\` to get the
|
|
63
|
+
agent count, model mix, and cost versus an all-\`${policy.tiers[policy.default].model}\` baseline.
|
|
64
|
+
3. Show the estimate and use the AskUserQuestion tool to offer three options:
|
|
65
|
+
**Approve** (launch it), **Cancel** (do not launch), **Modify** (restructure to
|
|
66
|
+
cut cost — drop unneeded stages, move mechanical stages to a cheaper tier and
|
|
67
|
+
lower effort, reduce fan-out — then re-estimate and ask again).
|
|
68
|
+
4. Launch the workflow only after Approve.
|
|
69
|
+
|
|
70
|
+
Verify any script with \`/ultracost:check\` or \`ultracost check <script>\` — it flags
|
|
71
|
+
stages missing a model pin.
|
|
72
|
+
${MARKER_END}`;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function replaceBlock(content, block) {
|
|
76
|
+
const re = new RegExp(`${MARKER_START}[\\s\\S]*?${MARKER_END}`);
|
|
77
|
+
if (!re.test(content)) return null;
|
|
78
|
+
return content.replace(re, block);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export function stripBlock(content) {
|
|
82
|
+
const re = new RegExp(`\\n*${MARKER_START}[\\s\\S]*?${MARKER_END}\\n*`);
|
|
83
|
+
return content.replace(re, '\n').trim();
|
|
84
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// ultracost SessionStart hook. Injects the model-routing policy as context at the
|
|
3
|
+
// start of every session (and after compaction), so workflow authoring sees it
|
|
4
|
+
// without relying on the model choosing to open a skill. Pure node, reads the hook
|
|
5
|
+
// JSON from stdin, emits SessionStart additionalContext. No bash or jq dependency.
|
|
6
|
+
|
|
7
|
+
const POLICY = `This project follows the ultracost model-routing and cost policy for Claude Code dynamic workflows (ultracode).
|
|
8
|
+
|
|
9
|
+
Per-stage model: every agent() stage sets an explicit \`model\` in its options rather than inheriting the session model; haiku is not used.
|
|
10
|
+
- opus for coding and reasoning: writing/editing/refactoring/deleting code; debugging; designing APIs, schemas, or architecture; non-trivial tests; code review; security and performance analysis; planning; synthesis. The orchestrator/planner and the final consolidation stage are always opus.
|
|
11
|
+
- sonnet for pre-planned mechanical and support work: applying an already-decided edit; search, grep, and file discovery; collecting/listing/extracting; running tests and reporting; gathering or summarizing context for an opus stage.
|
|
12
|
+
When a stage is ambiguous, opus is the default.
|
|
13
|
+
|
|
14
|
+
Per-stage effort: also set \`effort\` per stage, choosing the lowest level that fits, bounded by model (sonnet up to high, opus up to xhigh): low = trivial deterministic work (listing/globbing, simple extraction, formatting); medium = light judgment on a small surface; high = standard coding/analysis; xhigh = hard cross-file reasoning, adversarial review, planning, final synthesis.
|
|
15
|
+
|
|
16
|
+
Pre-flight cost gate: before launching a workflow, draft the script with per-stage model and effort, write it to a temp file, run \`ultracost estimate <file>\` to get the agent count, model mix, and cost vs an all-opus baseline, then use the AskUserQuestion tool to offer three options — Approve (launch), Cancel (do not launch), or Modify (restructure to cut cost: drop unneeded stages, move mechanical stages to sonnet and lower effort, reduce fan-out; then re-estimate and ask again). Launch only after Approve.
|
|
17
|
+
|
|
18
|
+
Verify scripts with the /ultracost:check command or \`ultracost check <script>\`, which flags any agent() stage missing a model.`;
|
|
19
|
+
|
|
20
|
+
async function readStdin() {
|
|
21
|
+
if (process.stdin.isTTY) return '';
|
|
22
|
+
let data = '';
|
|
23
|
+
process.stdin.setEncoding('utf8');
|
|
24
|
+
for await (const chunk of process.stdin) data += chunk;
|
|
25
|
+
return data;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Only wired to SessionStart (all sources), so emit the policy unconditionally.
|
|
29
|
+
// Parsing stdin is best-effort; a missing/invalid payload still injects the policy.
|
|
30
|
+
try {
|
|
31
|
+
await readStdin();
|
|
32
|
+
} catch {}
|
|
33
|
+
|
|
34
|
+
process.stdout.write(
|
|
35
|
+
JSON.stringify({
|
|
36
|
+
hookSpecificOutput: {
|
|
37
|
+
hookEventName: 'SessionStart',
|
|
38
|
+
additionalContext: POLICY
|
|
39
|
+
}
|
|
40
|
+
})
|
|
41
|
+
);
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// ultracost deterministic cost gate — ON BY DEFAULT (PreToolUse, matcher "Workflow").
|
|
3
|
+
// The plugin registers this in hooks/hooks.json so EVERY dynamic-workflow launch
|
|
4
|
+
// pauses before it runs — it does not depend on the model choosing to ask. It reads
|
|
5
|
+
// the drafted script from tool_input.script, runs the static guard + cost estimate,
|
|
6
|
+
// and returns a permission decision with the numbers AND any unpinned-stage warning
|
|
7
|
+
// up front, so an accidental all-Opus fan-out can't slip through.
|
|
8
|
+
//
|
|
9
|
+
// A PreToolUse hook runs in EVERY permission mode (bypass only auto-approves the
|
|
10
|
+
// "ask" path; a "deny" is honored regardless of mode). So the gate is mode-aware:
|
|
11
|
+
// it reads `permission_mode` from the event and hard-denies a problem workflow in
|
|
12
|
+
// the modes where an "ask" can't pause.
|
|
13
|
+
//
|
|
14
|
+
// Modes (env ULTRACOST_GATE):
|
|
15
|
+
// (unset) mode-aware default. Clean (all pinned) -> ask + estimate, every mode.
|
|
16
|
+
// Problem (unpinned/banned/inherit) -> ask + ⚠ warning in default /
|
|
17
|
+
// acceptEdits / auto (an ask surfaces there); DENY in bypassPermissions /
|
|
18
|
+
// dontAsk (an ask is auto-approved/won't pause there, so we block instead).
|
|
19
|
+
// strict deny on ANY problem, in every mode; ask (with estimate) when all pinned.
|
|
20
|
+
// ask never escalate to deny — always ask (opt out of the mode-aware deny).
|
|
21
|
+
// off disable entirely — for non-interactive runs (headless `claude -p`,
|
|
22
|
+
// Auto Mode, CI), where an unanswered "ask" is denied (the gate fails closed).
|
|
23
|
+
//
|
|
24
|
+
// Residual limitation: Claude Code currently skips PreToolUse hooks for subagents
|
|
25
|
+
// dispatched under bypassPermissions (anthropics/claude-code#43772), so a nested
|
|
26
|
+
// agent there can evade the gate. The top-level Workflow launch is still gated.
|
|
27
|
+
|
|
28
|
+
import { loadPolicy } from '../../src/policy.js';
|
|
29
|
+
import { estimateText } from '../../src/estimate.js';
|
|
30
|
+
import { analyze, CODES } from '../../src/guard.js';
|
|
31
|
+
|
|
32
|
+
const money = (x) => '$' + Number(x).toFixed(4);
|
|
33
|
+
const MODE = process.env.ULTRACOST_GATE;
|
|
34
|
+
// Modes where an "ask" decision won't actually pause the user, so a problem
|
|
35
|
+
// workflow must be denied instead to be enforced.
|
|
36
|
+
const ESCALATE_MODES = new Set(['bypassPermissions', 'dontAsk']);
|
|
37
|
+
|
|
38
|
+
// `systemMessage` is the documented channel for surfacing text to the USER from a
|
|
39
|
+
// hook (hooks have no TTY). We send it alongside permissionDecisionReason because
|
|
40
|
+
// Claude Code does NOT render the reason for an "ask" decision in the TUI
|
|
41
|
+
// (anthropics/claude-code#24059) — without systemMessage the estimate would be
|
|
42
|
+
// computed but invisible. For "deny" the reason renders too; we set both regardless.
|
|
43
|
+
function decide(decision, message) {
|
|
44
|
+
process.stdout.write(JSON.stringify({
|
|
45
|
+
systemMessage: message,
|
|
46
|
+
hookSpecificOutput: {
|
|
47
|
+
hookEventName: 'PreToolUse',
|
|
48
|
+
permissionDecision: decision,
|
|
49
|
+
permissionDecisionReason: message
|
|
50
|
+
}
|
|
51
|
+
}));
|
|
52
|
+
process.exit(0);
|
|
53
|
+
}
|
|
54
|
+
const ask = (r) => decide('ask', r);
|
|
55
|
+
const deny = (r) => decide('deny', r);
|
|
56
|
+
|
|
57
|
+
async function readStdin() {
|
|
58
|
+
if (process.stdin.isTTY) return '';
|
|
59
|
+
let d = '';
|
|
60
|
+
process.stdin.setEncoding('utf8');
|
|
61
|
+
for await (const c of process.stdin) d += c;
|
|
62
|
+
return d;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Explicit opt-out for automation / headless / CI.
|
|
66
|
+
if (MODE === 'off') process.exit(0);
|
|
67
|
+
|
|
68
|
+
let evt = {};
|
|
69
|
+
try {
|
|
70
|
+
evt = JSON.parse(await readStdin());
|
|
71
|
+
} catch {
|
|
72
|
+
process.exit(0); // can't parse the event -> stay out of the way
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Only govern the Workflow tool; every other tool passes untouched.
|
|
76
|
+
if (evt?.tool_name !== 'Workflow') process.exit(0);
|
|
77
|
+
|
|
78
|
+
const permMode = evt?.permission_mode;
|
|
79
|
+
|
|
80
|
+
// A workflow IS launching: always pause. Show numbers when the script is readable.
|
|
81
|
+
const script = evt?.tool_input?.script;
|
|
82
|
+
if (typeof script !== 'string') {
|
|
83
|
+
ask('ultracost cost gate: a dynamic workflow is about to launch, but its script could not be read to estimate cost. Approve to launch, or deny and review.');
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
try {
|
|
87
|
+
const { policy } = loadPolicy();
|
|
88
|
+
const e = estimateText(script, policy);
|
|
89
|
+
const { stages, findings } = analyze(script, policy);
|
|
90
|
+
|
|
91
|
+
const unpinned = findings.filter((f) => f.code === CODES.NOOPTS || f.code === CODES.MISSING).length;
|
|
92
|
+
const banned = findings.filter((f) => f.code === CODES.BANNED).length;
|
|
93
|
+
const inherit = findings.filter((f) => f.code === CODES.INHERIT).length;
|
|
94
|
+
|
|
95
|
+
const a = e.agents;
|
|
96
|
+
const agents = a.fanoutGroups
|
|
97
|
+
? `~${a.assumedTotal} (${a.known} fixed + ${a.fanoutGroups} fan-out x ~${a.assumedPerFanout})`
|
|
98
|
+
: `${a.known}`;
|
|
99
|
+
const mix = Object.entries(e.modelMix).map(([k, v]) => `${v}x ${k}`).join(', ') || 'none';
|
|
100
|
+
const estLine =
|
|
101
|
+
`${agents} agents; model mix ${mix}; ` +
|
|
102
|
+
`est. ${money(e.cost.tiered)} vs all-${e.assumptions.sessionModel} baseline ${money(e.cost.baseline)} ` +
|
|
103
|
+
`(save ${money(e.cost.savings)}, ${e.cost.savingsPct}%).`;
|
|
104
|
+
|
|
105
|
+
const problems = [];
|
|
106
|
+
if (unpinned) problems.push(`${unpinned}/${stages} stage(s) NOT pinned -> will inherit ${e.assumptions.sessionModel}`);
|
|
107
|
+
if (banned) problems.push(`${banned} stage(s) pin a banned model`);
|
|
108
|
+
if (inherit) problems.push(`${inherit} stage(s) use model:'inherit'`);
|
|
109
|
+
|
|
110
|
+
if (problems.length) {
|
|
111
|
+
const head = `\u26a0 ultracost: ${problems.join('; ')}. `;
|
|
112
|
+
// Hard-deny when forced (strict) or when the current mode wouldn't surface an
|
|
113
|
+
// ask anyway (bypassPermissions/dontAsk). ULTRACOST_GATE=ask opts out of the
|
|
114
|
+
// mode-aware escalation and always asks.
|
|
115
|
+
const hard = MODE === 'strict' || (MODE !== 'ask' && ESCALATE_MODES.has(permMode));
|
|
116
|
+
if (hard) {
|
|
117
|
+
deny(`${head}estimate: ${estLine} Pin every stage (opus for reasoning, sonnet for mechanical work) and relaunch.`);
|
|
118
|
+
}
|
|
119
|
+
ask(`${head}estimate: ${estLine} Deny and ask me to pin every stage, or approve to run as-is.`);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
ask(`ultracost estimate: ${estLine} Approve to launch, or deny and ask me to make it cheaper.`);
|
|
123
|
+
} catch {
|
|
124
|
+
// Estimator/policy failure must not silently let an unpriced fan-out through.
|
|
125
|
+
ask('ultracost cost gate: a dynamic workflow is about to launch (cost estimate unavailable). Approve to launch, or deny and review.');
|
|
126
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 1,
|
|
3
|
+
"neverUse": ["haiku"],
|
|
4
|
+
"allowInherit": false,
|
|
5
|
+
"default": "opus",
|
|
6
|
+
"tieBreaker": "opus",
|
|
7
|
+
"tiers": {
|
|
8
|
+
"opus": { "model": "opus", "effort": "xhigh" },
|
|
9
|
+
"sonnet": { "model": "sonnet", "effort": "high" }
|
|
10
|
+
},
|
|
11
|
+
"alwaysOpus": ["orchestrator", "planner", "final-synthesis", "consolidation"],
|
|
12
|
+
"rules": [
|
|
13
|
+
{
|
|
14
|
+
"tier": "opus",
|
|
15
|
+
"label": "Coding & reasoning",
|
|
16
|
+
"when": "anything requiring judgment or a decision: writing/editing/refactoring/deleting code, debugging, fixing errors, designing APIs/schemas/data models/architecture, non-trivial tests, code review, security/performance analysis, cross-file reasoning, adversarial review, planning, synthesis, final consolidation"
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"tier": "sonnet",
|
|
20
|
+
"label": "Pre-planned mechanical & support",
|
|
21
|
+
"when": "the how is already decided and the stage just applies it: mechanically applying a specified edit across many files, search/grep/glob, file discovery, collecting/listing/extracting, reformatting, mechanical renames, running tests and reporting results, routine git operations, gathering or summarizing context for an opus stage to consume"
|
|
22
|
+
}
|
|
23
|
+
],
|
|
24
|
+
"effort": {
|
|
25
|
+
"range": ["low", "medium", "high", "xhigh"],
|
|
26
|
+
"default": "high",
|
|
27
|
+
"byComplexity": {
|
|
28
|
+
"low": "trivial deterministic work with no real judgment: listing or globbing files, simple field extraction, formatting, mechanical renames following a given pattern",
|
|
29
|
+
"medium": "light judgment on a small surface: a single straightforward edit, summarizing one source, classifying short inputs",
|
|
30
|
+
"high": "standard coding and analysis: most refactors, per-file review, writing non-trivial tests, multi-step but well-scoped work",
|
|
31
|
+
"xhigh": "hard reasoning: cross-file architecture and design, adversarial review, planning, and final synthesis/consolidation"
|
|
32
|
+
},
|
|
33
|
+
"maxByModel": { "sonnet": "high", "opus": "xhigh" }
|
|
34
|
+
},
|
|
35
|
+
"pricing": {
|
|
36
|
+
"_unit": "USD per million tokens",
|
|
37
|
+
"_source": "https://platform.claude.com/docs/en/about-claude/pricing.md",
|
|
38
|
+
"_asOf": "2026-06-13",
|
|
39
|
+
"_models": { "opus": "Claude Opus 4.8", "sonnet": "Claude Sonnet 4.6", "haiku": "Claude Haiku 4.5" },
|
|
40
|
+
"opus": { "input": 5, "output": 25 },
|
|
41
|
+
"sonnet": { "input": 3, "output": 15 },
|
|
42
|
+
"haiku": { "input": 1, "output": 5 }
|
|
43
|
+
},
|
|
44
|
+
"estimation": {
|
|
45
|
+
"tokensPerStage": { "input": 2000, "output": 1200 },
|
|
46
|
+
"effortOutputMultiplier": { "low": 0.4, "medium": 1, "high": 1.8, "xhigh": 3, "max": 4 },
|
|
47
|
+
"assumedFanout": 5
|
|
48
|
+
}
|
|
49
|
+
}
|