ultracost 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +81 -0
- package/LICENSE +21 -0
- package/NOTICE +18 -0
- package/README.md +306 -0
- package/bin/cli.js +264 -0
- package/docs/ESTIMATES.md +191 -0
- package/docs/PUBLISHING.md +164 -0
- package/docs/TESTING.md +260 -0
- package/docs/architecture.md +166 -0
- package/docs/policy.md +42 -0
- package/docs/ultracode.md +37 -0
- package/package.json +52 -0
- package/src/estimate.js +101 -0
- package/src/guard.js +300 -0
- package/src/index.js +7 -0
- package/src/install.js +113 -0
- package/src/log.js +18 -0
- package/src/paths.js +27 -0
- package/src/policy.js +80 -0
- package/src/pricing.js +82 -0
- package/src/rules.js +84 -0
- package/templates/hooks/reinject.mjs +41 -0
- package/templates/hooks/workflow-gate.mjs +126 -0
- package/templates/policy.default.json +49 -0
package/docs/TESTING.md
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# Testing guide
|
|
2
|
+
|
|
3
|
+
A step-by-step manual test for ultracost — from a zero-risk sandbox install through a live
|
|
4
|
+
Claude Code `ultracode` run. Every command is copy-pasteable.
|
|
5
|
+
|
|
6
|
+
Each step is tagged:
|
|
7
|
+
|
|
8
|
+
- **[safe]** — read-only or fully sandboxed; cannot affect your real `~/.claude`.
|
|
9
|
+
- **[touches ~/.claude]** — writes to or registers state in your real Claude Code config.
|
|
10
|
+
Reversible; cleanup is in the last section.
|
|
11
|
+
|
|
12
|
+
All commands assume the repo is at `~/projects/ultracost`:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
cd ~/projects/ultracost
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
> The plugin steps (Step 3, and the live run in Step 5) require the plugin package
|
|
19
|
+
> (`.claude-plugin/`, `skills/`, `commands/`, `hooks/`) to be present in the repo. The npm
|
|
20
|
+
> steps need only `bin/`, `src/`, and `templates/`.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Step 1 — Sandbox install **[safe]**
|
|
25
|
+
|
|
26
|
+
Install into a throwaway config dir via `CLAUDE_CONFIG_DIR`. Nothing under your real
|
|
27
|
+
`~/.claude` is read or written.
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
SANDBOX=$(mktemp -d)
|
|
31
|
+
CLAUDE_CONFIG_DIR="$SANDBOX" node bin/cli.js init
|
|
32
|
+
CLAUDE_CONFIG_DIR="$SANDBOX" node bin/cli.js status
|
|
33
|
+
CLAUDE_CONFIG_DIR="$SANDBOX" node bin/cli.js doctor
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Inspect exactly what `init` wrote:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
ls -R "$SANDBOX"
|
|
40
|
+
cat "$SANDBOX/ultracost/policy.json"
|
|
41
|
+
cat "$SANDBOX/CLAUDE.md"
|
|
42
|
+
cat "$SANDBOX/settings.json"
|
|
43
|
+
cat "$SANDBOX/ultracost/reinject.mjs"
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Expect:
|
|
47
|
+
|
|
48
|
+
- `ultracost/policy.json` — the default quality-first policy.
|
|
49
|
+
- `CLAUDE.md` — a `<!-- ultracost:start -->` … `<!-- ultracost:end -->` routing block.
|
|
50
|
+
- `settings.json` — a `SessionStart` hook with matcher `startup|resume|clear|compact` and
|
|
51
|
+
command `node "<SANDBOX>/ultracost/reinject.mjs"`.
|
|
52
|
+
- `ultracost/reinject.mjs` — the node re-inject hook (no bash/jq).
|
|
53
|
+
|
|
54
|
+
Throw the sandbox away:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
rm -rf "$SANDBOX"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Step 2 — Deterministic proof **[safe]**
|
|
63
|
+
|
|
64
|
+
Read-only checks. `audit` only reads your real workflow scripts; it never writes.
|
|
65
|
+
|
|
66
|
+
Audit your real history (the proof point — most stages inherit the session model):
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
node bin/cli.js audit ~/.claude/projects
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Expect a stage/pin breakdown with a high `unpinned ratio`. If you have no workflow
|
|
73
|
+
scripts yet, it reports none found — generate one in Step 5, then re-run.
|
|
74
|
+
|
|
75
|
+
Confirm the guard is clean on a correctly-pinned script:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
node bin/cli.js check examples/workflow.good.js
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Expect: `1 file(s) scanned — every agent() stage pins a model.` and exit code `0`:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
node bin/cli.js check examples/workflow.good.js; echo "exit: $?"
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Optional — confirm the guard *catches* problems and that JSON output works. Make a
|
|
88
|
+
throwaway script with two unpinned stages in a temp dir (nothing under the repo or
|
|
89
|
+
`~/.claude` is touched):
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
BADDIR=$(mktemp -d)
|
|
93
|
+
cat > "$BADDIR/bad.js" <<'EOF'
|
|
94
|
+
agent("plan the work");
|
|
95
|
+
agent("apply the decided edit", { label: "apply" });
|
|
96
|
+
EOF
|
|
97
|
+
node bin/cli.js check "$BADDIR/bad.js" # expect UC001 + UC002, exit 1
|
|
98
|
+
node bin/cli.js check "$BADDIR/bad.js" --json # same findings, machine-readable
|
|
99
|
+
rm -rf "$BADDIR"
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Expect `UC001` (no options object) on the first stage and `UC002` (options object,
|
|
103
|
+
no `model`) on the second, a non-zero exit, and a `findings` array in the JSON form.
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## Step 3 — Plugin (local, pre-publish) **[touches ~/.claude]**
|
|
108
|
+
|
|
109
|
+
Two ways to load the plugin from your working copy. Option A registers a marketplace
|
|
110
|
+
(persistent until removed); Option B loads it for one session only.
|
|
111
|
+
|
|
112
|
+
### Option A — local marketplace install
|
|
113
|
+
|
|
114
|
+
Inside Claude Code:
|
|
115
|
+
|
|
116
|
+
```text
|
|
117
|
+
/plugin marketplace add ~/projects/ultracost
|
|
118
|
+
/plugin install ultracost@ultracost
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Or non-interactively from a shell:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
claude plugin marketplace add ~/projects/ultracost
|
|
125
|
+
claude plugin install ultracost@ultracost
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Option B — session-scoped load (most contained)
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
claude --plugin-dir ~/projects/ultracost
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Verify the plugin loaded
|
|
135
|
+
|
|
136
|
+
Inside the session:
|
|
137
|
+
|
|
138
|
+
```text
|
|
139
|
+
/help
|
|
140
|
+
/ultracost:check ~/projects/ultracost/examples/workflow.good.js
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Expect:
|
|
144
|
+
|
|
145
|
+
- `/help` lists the `ultracost` plugin and the `/ultracost:check` command.
|
|
146
|
+
- `/ultracost:check` on `workflow.good.js` reports a clean scan.
|
|
147
|
+
- The routing-policy **skill** is available — ask Claude to author a small workflow and
|
|
148
|
+
confirm it pins `model:` per stage (it should, because the skill is loaded).
|
|
149
|
+
|
|
150
|
+
If you edited the plugin and want to reload without restarting:
|
|
151
|
+
|
|
152
|
+
```text
|
|
153
|
+
/reload-plugins
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## Step 4 — npm (local link) **[touches ~/.claude]**
|
|
159
|
+
|
|
160
|
+
Link the package so the `ultracost` binary is on your PATH:
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
npm link
|
|
164
|
+
ultracost --version
|
|
165
|
+
ultracost audit ~/.claude/projects # read-only [safe]
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
`ultracost init` writes to your real `~/.claude`. To keep it contained, run it sandboxed:
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
CLAUDE_CONFIG_DIR=$(mktemp -d) ultracost init # [safe]
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Or, to test the real install path (reversible via `ultracost uninstall`):
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
ultracost init # [touches ~/.claude]
|
|
178
|
+
ultracost status
|
|
179
|
+
ultracost doctor
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
Unlink when done:
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
npm unlink -g ultracost
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Step 5 — Live Claude Code CLI run **[touches ~/.claude]**
|
|
191
|
+
|
|
192
|
+
End-to-end test against a real `ultracode` session. This is the one that proves ultracost
|
|
193
|
+
changes Claude's behavior.
|
|
194
|
+
|
|
195
|
+
1. Make sure routing is active — either the plugin is installed (Step 3) **or** the npm
|
|
196
|
+
CLI is installed (`ultracost init`, Step 4).
|
|
197
|
+
2. Start Claude Code and turn on ultracode:
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
claude
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
```text
|
|
204
|
+
/effort ultracode
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
3. Give it a small workflow prompt, for example:
|
|
208
|
+
|
|
209
|
+
```text
|
|
210
|
+
Refactor the error handling across these three files and review the result.
|
|
211
|
+
Use a dynamic workflow with a planning stage, a parallel apply stage, and a review stage.
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
4. When the run starts, Claude prints the path of the workflow script it authored, under
|
|
215
|
+
`~/.claude/projects/<project>/workflows/scripts/`. Check the newest one with the guard:
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
ultracost check "$(ls -t ~/.claude/projects/*/workflows/scripts/*.js | head -1)"
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
**Eyeball the result:** with ultracost active, the mechanical/apply stages should be
|
|
222
|
+
pinned to `sonnet` and the planning/review stages to `opus` — the guard reports a clean
|
|
223
|
+
scan. As a before/after, run the same command in a session *without* ultracost and you
|
|
224
|
+
should see `UC001`/`UC002` findings.
|
|
225
|
+
|
|
226
|
+
5. **Confirm the policy injection.** The `SessionStart` hook injects the routing policy as
|
|
227
|
+
context at the start of every session (and again after compaction). It is delivered as
|
|
228
|
+
`additionalContext`, so it shapes Claude's behavior without appearing as a chat message.
|
|
229
|
+
To confirm it is wired and firing, run the hook directly and check it returns the policy:
|
|
230
|
+
|
|
231
|
+
```text
|
|
232
|
+
printf '{"source":"startup"}' | node "<SANDBOX>/ultracost/reinject.mjs"
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
You should get a JSON object whose `additionalContext` states the routing policy. The
|
|
236
|
+
plugin attaches the same hook on every `SessionStart` source (`startup|resume|clear|compact`).
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## Cleanup
|
|
241
|
+
|
|
242
|
+
Undo everything the steps above can install.
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
# npm CLI install (Step 4)
|
|
246
|
+
ultracost uninstall # removes the CLAUDE.md block, hook, settings entry, policy dir
|
|
247
|
+
npm unlink -g ultracost
|
|
248
|
+
|
|
249
|
+
# plugin install (Step 3, Option A)
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
Inside Claude Code:
|
|
253
|
+
|
|
254
|
+
```text
|
|
255
|
+
/plugin uninstall ultracost@ultracost
|
|
256
|
+
/plugin marketplace remove ultracost
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
`--plugin-dir` (Step 3, Option B) leaves nothing behind — it ends with the session.
|
|
260
|
+
Sandbox dirs from Steps 1 and 4 are gone once you `rm -rf` them.
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
ultracost is **one shared core** (`src/`) with a single source of truth (`policy.json`),
|
|
4
|
+
exposed through **two delivery surfaces**: a Claude Code **plugin** (primary) and an
|
|
5
|
+
**npm CLI** (secondary). Both compile from the same policy, so the routing rules can never
|
|
6
|
+
drift between them.
|
|
7
|
+
|
|
8
|
+
```mermaid
|
|
9
|
+
flowchart TD
|
|
10
|
+
subgraph core["src/ — shared core"]
|
|
11
|
+
POL["policy.js<br/>(loads/validates policy.json)"]
|
|
12
|
+
RUL["rules.js<br/>(rule compiler)"]
|
|
13
|
+
GRD["guard.js<br/>(static analysis + audit)"]
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
subgraph plugin["Claude Code plugin — PRIMARY"]
|
|
17
|
+
SK["skills/ultracost<br/>routing policy (always-relevant)"]
|
|
18
|
+
CMD["/ultracost:check command"]
|
|
19
|
+
HK["hooks/hooks.json<br/>SessionStart + PreToolUse(Workflow)"]
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
subgraph cli["npm CLI — secondary"]
|
|
23
|
+
BIN["bin/cli.js<br/>init · check · audit · doctor · status · uninstall"]
|
|
24
|
+
CM["~/.claude/CLAUDE.md block"]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
POL --> RUL
|
|
28
|
+
RUL --> SK
|
|
29
|
+
RUL --> CM
|
|
30
|
+
BIN --> CM
|
|
31
|
+
GRD --> CMD
|
|
32
|
+
GRD --> BIN
|
|
33
|
+
HK --> RE["reinject.mjs<br/>(node, no bash/jq)"]
|
|
34
|
+
HK --> WG["workflow-gate.mjs<br/>(PreToolUse cost gate, default-on)"]
|
|
35
|
+
GRD --> WG
|
|
36
|
+
BIN --> RE
|
|
37
|
+
|
|
38
|
+
classDef ft fill:#1f6feb,stroke:#0b3d91,color:#fff;
|
|
39
|
+
class POL,RUL,GRD ft;
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## The two surfaces
|
|
43
|
+
|
|
44
|
+
| | Plugin (primary) | npm CLI (secondary) |
|
|
45
|
+
|---|---|---|
|
|
46
|
+
| **Install** | `/plugin marketplace add danielkremen818/ultracost` → `/plugin install ultracost@ultracost` | `npx ultracost init` |
|
|
47
|
+
| **Routing guidance** | **`SessionStart` hook** injects the policy as context (no file mutation); a skill ships alongside for explicit reference | block injected into `~/.claude/CLAUDE.md` |
|
|
48
|
+
| **Guard** | `/ultracost:check` command (runs `guard.js`) | `ultracost check` / `ultracost audit` |
|
|
49
|
+
| **Policy injection** | `hooks/hooks.json` → `node "${CLAUDE_PLUGIN_ROOT}/templates/hooks/reinject.mjs"` (all `SessionStart` sources) | `node "<config>/ultracost/reinject.mjs"`, registered in `settings.json` |
|
|
50
|
+
| **Cost gate** | `hooks/hooks.json` → `PreToolUse` (matcher `Workflow`) runs `workflow-gate.mjs` by default, pausing every launch with an estimate (`ULTRACOST_GATE=off` to disable) | run `ultracost estimate <script>` manually, or wire the same hook |
|
|
51
|
+
| **Best for** | day-to-day use, sharing, marketplace discovery | CI, scripting, the CLAUDE.md-injection path |
|
|
52
|
+
|
|
53
|
+
The skill and the CLAUDE.md block are both rendered by `rules.js` from the same policy, so
|
|
54
|
+
they say the same thing.
|
|
55
|
+
|
|
56
|
+
## 1. Policy (`src/policy.js`)
|
|
57
|
+
|
|
58
|
+
Loads, validates, and normalizes `policy.json`. Resolution order:
|
|
59
|
+
|
|
60
|
+
1. explicit `--policy`/path argument
|
|
61
|
+
2. installed `<config>/ultracost/policy.json`
|
|
62
|
+
3. bundled `templates/policy.default.json`
|
|
63
|
+
|
|
64
|
+
`<config>` is `~/.claude` by default, or `$CLAUDE_CONFIG_DIR` when set — all ultracost paths
|
|
65
|
+
hang off it, so a relocated Claude Code config is honored automatically.
|
|
66
|
+
|
|
67
|
+
Validation rejects a `default` tier that isn't defined and any tier whose `model` is in
|
|
68
|
+
`neverUse`, so a broken policy can never be installed.
|
|
69
|
+
|
|
70
|
+
## 2. Rules + install (`src/rules.js`, `src/install.js`)
|
|
71
|
+
|
|
72
|
+
`compileRules(policy)` renders a deterministic Markdown block (between
|
|
73
|
+
`<!-- ultracost:start -->` / `<!-- ultracost:end -->`) so the prose rules can never drift from
|
|
74
|
+
the policy data. The same compiled rules back both the plugin skill and the CLAUDE.md block.
|
|
75
|
+
|
|
76
|
+
On the **CLI** path, `install()`:
|
|
77
|
+
|
|
78
|
+
- writes the policy to `<config>/ultracost/policy.json`
|
|
79
|
+
- injects/updates the block in `<config>/CLAUDE.md` (the **canonical** user-global memory —
|
|
80
|
+
`~/CLAUDE.md` is *not* loaded reliably and is never used)
|
|
81
|
+
- copies the re-inject hook to `<config>/ultracost/reinject.mjs` and registers it for
|
|
82
|
+
`SessionStart` (matcher `startup|resume|clear|compact`) in `settings.json` as
|
|
83
|
+
`node "<config>/ultracost/reinject.mjs"`
|
|
84
|
+
|
|
85
|
+
All `settings.json` access is defensive: a missing file is created; an invalid file is
|
|
86
|
+
reported, never overwritten.
|
|
87
|
+
|
|
88
|
+
On the **plugin** path, the skill, command, and hook are bundled in the package, so nothing
|
|
89
|
+
in your config is mutated — the plugin loader wires them up.
|
|
90
|
+
|
|
91
|
+
## 3. The policy-injection hook (`templates/hooks/reinject.mjs`)
|
|
92
|
+
|
|
93
|
+
A cross-platform **node** script — no bash or `jq` dependency. It reads the hook JSON from
|
|
94
|
+
stdin and emits the routing policy as `SessionStart` `additionalContext`. Claude Code adds
|
|
95
|
+
`SessionStart` context **before the first prompt**, so the policy is present the moment Claude
|
|
96
|
+
authors a workflow — not dependent on the model choosing to open a skill (a model-invoked
|
|
97
|
+
skill is only *offered*, and is reliably ignored during fast workflow authoring). It fires on
|
|
98
|
+
every `SessionStart` source, so the policy is injected at startup/resume/clear and re-injected
|
|
99
|
+
after compaction (when Claude drops earlier context). The text is phrased as factual project
|
|
100
|
+
information, per the hooks docs, so it is treated as context rather than tripping
|
|
101
|
+
prompt-injection defenses.
|
|
102
|
+
|
|
103
|
+
- **CLI** registers it as `node "<config>/ultracost/reinject.mjs"`.
|
|
104
|
+
- **Plugin** registers it via `hooks/hooks.json` as
|
|
105
|
+
`node "${CLAUDE_PLUGIN_ROOT}/templates/hooks/reinject.mjs"`.
|
|
106
|
+
|
|
107
|
+
Both attach to `SessionStart` with matcher `startup|resume|clear|compact`.
|
|
108
|
+
|
|
109
|
+
## 4. Workflow Guard (`src/guard.js`)
|
|
110
|
+
|
|
111
|
+
A heuristic, dependency-free static analyzer for the JavaScript workflow scripts Claude Code
|
|
112
|
+
authors.
|
|
113
|
+
|
|
114
|
+
```mermaid
|
|
115
|
+
sequenceDiagram
|
|
116
|
+
participant U as You
|
|
117
|
+
participant CC as Claude Code (ultracode)
|
|
118
|
+
participant FS as workflow script
|
|
119
|
+
participant FT as ultracost check / /ultracost:check
|
|
120
|
+
|
|
121
|
+
U->>CC: prompt
|
|
122
|
+
CC->>FS: writes agent()/parallel()/phase() script
|
|
123
|
+
Note over CC,FS: SessionStart-injected policy / CLAUDE.md block guide per-stage model pins
|
|
124
|
+
U->>FT: check <script>
|
|
125
|
+
FT->>FS: scan agent() call sites
|
|
126
|
+
FT-->>U: flag stages missing a model pin (exit 1 in CI)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
It finds every `agent(` call (ignoring `subagent(`, `obj.agent(`), captures the argument
|
|
130
|
+
list with a string-aware paren matcher (handles nested calls and multiline), and classifies
|
|
131
|
+
each stage: missing options (`UC001`), missing model (`UC002`), banned model (`UC003`),
|
|
132
|
+
`inherit` (`UC004`), or dynamic/variable options (`UC005`). Ambiguous cases are **warnings**;
|
|
133
|
+
clear ones are **errors**.
|
|
134
|
+
|
|
135
|
+
The scan is **string- and comment-aware**: a single pre-pass records the spans inside
|
|
136
|
+
`'...'`, `"..."`, `` `...` `` literals and `//` / `/* */` comments, and any `agent(` token in
|
|
137
|
+
those spans is treated as prose (e.g. text inside a prompt) and never counted as a call. This
|
|
138
|
+
is what makes the output trustworthy on real scripts, whose prompts routinely contain the
|
|
139
|
+
word `agent(`.
|
|
140
|
+
|
|
141
|
+
`ultracost audit [dir]` reuses the same analysis over `<dir>/**/workflows/scripts/*.js` and
|
|
142
|
+
aggregates totals (stages, pinned, unpinned, banned, inherit, dynamic, and the unpinned
|
|
143
|
+
ratio) — the "audit your history" feature and the project's headline evidence.
|
|
144
|
+
|
|
145
|
+
`--fix` inserts the default tier's model into the unambiguous cases, processing matches
|
|
146
|
+
back-to-front so edits don't shift later offsets.
|
|
147
|
+
|
|
148
|
+
## Why this shape
|
|
149
|
+
|
|
150
|
+
- **Data over prose:** the policy is machine-checkable and user-editable; the rules text is
|
|
151
|
+
generated for both surfaces from the same source.
|
|
152
|
+
- **Defense in depth:** the `SessionStart` hook (and CLAUDE.md rule on the CLI path) *injects*
|
|
153
|
+
the policy as always-on context, and the guard *verifies* the output. The model can ignore
|
|
154
|
+
guidance; it cannot ignore a failing CI check. (Verified on a live run: a model-invoked skill
|
|
155
|
+
alone left every stage unpinned; the injected policy made the same prompt pin all stages.)
|
|
156
|
+
- **Narrow on purpose:** ultracost only routes the workflow/ultracode path. That keeps it
|
|
157
|
+
composable with a general router if you run one.
|
|
158
|
+
|
|
159
|
+
## Limitations
|
|
160
|
+
|
|
161
|
+
- The guard is a heuristic scanner, not a full JS parser. The string/comment pre-pass
|
|
162
|
+
removes the common false positives (prompt text containing `agent(`), but deeply unusual
|
|
163
|
+
constructs could still be misclassified. Dynamic/variable options are reported as warnings
|
|
164
|
+
(`UC005`) rather than guessed.
|
|
165
|
+
- `--fix` only handles the unambiguous cases (`UC001`, `UC002`); banned models and `inherit`
|
|
166
|
+
are reported for you to resolve, never silently rewritten.
|
package/docs/policy.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Policy reference
|
|
2
|
+
|
|
3
|
+
The policy lives at `~/.claude/ultracost/policy.json` after install. Edit it, then run
|
|
4
|
+
`ultracost init` to recompile the `~/.claude/CLAUDE.md` rules from it.
|
|
5
|
+
|
|
6
|
+
```json
|
|
7
|
+
{
|
|
8
|
+
"version": 1,
|
|
9
|
+
"neverUse": ["haiku"],
|
|
10
|
+
"allowInherit": false,
|
|
11
|
+
"default": "opus",
|
|
12
|
+
"tieBreaker": "opus",
|
|
13
|
+
"tiers": {
|
|
14
|
+
"opus": { "model": "opus", "effort": "xhigh" },
|
|
15
|
+
"sonnet": { "model": "sonnet", "effort": "high" }
|
|
16
|
+
},
|
|
17
|
+
"alwaysOpus": ["orchestrator", "planner", "final-synthesis", "consolidation"],
|
|
18
|
+
"rules": [
|
|
19
|
+
{ "tier": "opus", "label": "Coding & reasoning", "when": "..." },
|
|
20
|
+
{ "tier": "sonnet", "label": "Mechanical & support", "when": "..." }
|
|
21
|
+
]
|
|
22
|
+
}
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
| Field | Type | Meaning |
|
|
26
|
+
|-------|------|---------|
|
|
27
|
+
| `version` | number | Policy schema version. |
|
|
28
|
+
| `neverUse` | string[] | Models that must never be used. Matched by alias or substring, so `haiku` also bans `claude-haiku-4-5`. The guard raises `UC003` on these. |
|
|
29
|
+
| `allowInherit` | boolean | If `false`, `model: 'inherit'` is an error (`UC004`). |
|
|
30
|
+
| `default` | string | Tier used by `--fix` and recommended as the fallback. Must exist in `tiers`. |
|
|
31
|
+
| `tieBreaker` | string | Tier the rules tell Claude to use "when in doubt". |
|
|
32
|
+
| `tiers` | object | Named tiers. Each has `model` (alias or full id) and optional `effort`. A tier whose `model` is in `neverUse` is rejected at load time. |
|
|
33
|
+
| `alwaysOpus` | string[] | Stage roles that must always use the default tier (orchestrator, final synthesis, …). Rendered into the rules. |
|
|
34
|
+
| `rules` | object[] | Human/LLM-facing routing guidance. Each has `tier`, optional `label`, and `when` (the natural-language criteria). |
|
|
35
|
+
|
|
36
|
+
## Notes on effort
|
|
37
|
+
|
|
38
|
+
`xhigh` is currently available only on Opus 4.8. Sonnet tiers should use `high` (or `max`) — ultracost never downgrades the *model* to obtain more thinking; it keeps the model and uses that model's top effort.
|
|
39
|
+
|
|
40
|
+
## Switching to a cost-first policy
|
|
41
|
+
|
|
42
|
+
Edit the tiers/rules — for example, add a `haiku` tier, remove it from `neverUse`, and route search/format stages to it. ultracost is unopinionated about the contents; it only guarantees that whatever you decide is pinned explicitly on every stage.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Why ultracode needs per-stage routing
|
|
2
|
+
|
|
3
|
+
`ultracode` (Claude Code v2.1.154+, shipped May 2026 with Opus 4.8) combines two
|
|
4
|
+
things: **`xhigh` reasoning effort** and **automatic dynamic-workflow orchestration**.
|
|
5
|
+
Both interact badly with model defaults.
|
|
6
|
+
|
|
7
|
+
## The three compounding defaults
|
|
8
|
+
|
|
9
|
+
1. **`xhigh` is Opus-only.** Turning on ultracode forces the *session* onto Opus 4.8 —
|
|
10
|
+
you cannot run an ultracode session on Sonnet.
|
|
11
|
+
2. **Subagents inherit the session model.** With no per-stage override, every spawned
|
|
12
|
+
stage is Opus 4.8.
|
|
13
|
+
3. **Workflow-authoring guidance says to omit the per-agent model.** So inheritance is
|
|
14
|
+
the path of least resistance, and the whole fan-out lands on Opus.
|
|
15
|
+
|
|
16
|
+
The result is documented in
|
|
17
|
+
[anthropics/claude-code#66023](https://github.com/anthropics/claude-code/issues/66023):
|
|
18
|
+
a single prompt spawned **46 Opus subagents (~3M tokens)** with no cost warning.
|
|
19
|
+
|
|
20
|
+
## Why the usual levers don't fit
|
|
21
|
+
|
|
22
|
+
- **`/model sonnet` on the session** cascades a cheap model to the fan-out — but it's
|
|
23
|
+
incompatible with ultracode, because ultracode requires Opus for `xhigh`.
|
|
24
|
+
- **`CLAUDE_CODE_SUBAGENT_MODEL`** is a global override that beats per-invocation and
|
|
25
|
+
per-agent settings — so it *defeats* a mixed, per-stage policy.
|
|
26
|
+
|
|
27
|
+
That leaves exactly one correct lever: **pin the model per stage inside the workflow
|
|
28
|
+
script** (`agent(task, { model: 'sonnet' })`). ultracost makes that the default behavior
|
|
29
|
+
(via a `SessionStart` hook that injects the policy as context — plus the CLAUDE.md rule on
|
|
30
|
+
the CLI path) and **verifies it** (via the guard).
|
|
31
|
+
|
|
32
|
+
## ultracost's stance
|
|
33
|
+
|
|
34
|
+
Quality-first, not cost-first: coding and reasoning stay on Opus @ `xhigh`; only
|
|
35
|
+
pre-planned mechanical execution and search/collection drop to Sonnet; Haiku is never
|
|
36
|
+
used. The biggest win isn't shaving Sonnet off a few stages — it's stopping a 40-agent
|
|
37
|
+
fan-out from running Opus *by accident* on work that was already planned.
|
package/package.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "ultracost",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "Per-stage model routing for Claude Code dynamic workflows (ultracode). Quality-first policy, CLAUDE.md rule injection, and a workflow-script guard that catches subagent stages that would silently inherit Opus.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"ultracost": "./bin/cli.js"
|
|
8
|
+
},
|
|
9
|
+
"exports": {
|
|
10
|
+
".": "./src/index.js"
|
|
11
|
+
},
|
|
12
|
+
"scripts": {
|
|
13
|
+
"test": "node --test \"tests/**/*.test.js\"",
|
|
14
|
+
"check": "node --check bin/cli.js && node --check src/index.js",
|
|
15
|
+
"ultracost": "node bin/cli.js"
|
|
16
|
+
},
|
|
17
|
+
"keywords": [
|
|
18
|
+
"claude",
|
|
19
|
+
"claude-code",
|
|
20
|
+
"ultracode",
|
|
21
|
+
"dynamic-workflows",
|
|
22
|
+
"subagents",
|
|
23
|
+
"model-routing",
|
|
24
|
+
"token-optimization",
|
|
25
|
+
"cost-optimization",
|
|
26
|
+
"opus",
|
|
27
|
+
"sonnet"
|
|
28
|
+
],
|
|
29
|
+
"author": "Daniel Kremen",
|
|
30
|
+
"license": "MIT",
|
|
31
|
+
"repository": {
|
|
32
|
+
"type": "git",
|
|
33
|
+
"url": "git+https://github.com/danielkremen818/ultracost.git"
|
|
34
|
+
},
|
|
35
|
+
"bugs": {
|
|
36
|
+
"url": "https://github.com/danielkremen818/ultracost/issues"
|
|
37
|
+
},
|
|
38
|
+
"homepage": "https://github.com/danielkremen818/ultracost#readme",
|
|
39
|
+
"files": [
|
|
40
|
+
"bin/",
|
|
41
|
+
"src/",
|
|
42
|
+
"templates/",
|
|
43
|
+
"docs/",
|
|
44
|
+
"README.md",
|
|
45
|
+
"LICENSE",
|
|
46
|
+
"NOTICE",
|
|
47
|
+
"CHANGELOG.md"
|
|
48
|
+
],
|
|
49
|
+
"engines": {
|
|
50
|
+
"node": ">=24"
|
|
51
|
+
}
|
|
52
|
+
}
|
package/src/estimate.js
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import { readFileSync } from 'node:fs';
|
|
2
|
+
import { stageList } from './guard.js';
|
|
3
|
+
|
|
4
|
+
const PRICE_KEYS = ['opus', 'sonnet', 'haiku'];
|
|
5
|
+
|
|
6
|
+
// Map any model alias or full id to a pricing key (substring match; defaults to opus).
|
|
7
|
+
export function priceKey(model) {
|
|
8
|
+
const v = String(model).toLowerCase();
|
|
9
|
+
return PRICE_KEYS.find((k) => v.includes(k)) || 'opus';
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function round(x) {
|
|
13
|
+
return Math.round(x * 1e4) / 1e4;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function effortMultiplier(effort, policy) {
|
|
17
|
+
const m = policy.estimation.effortOutputMultiplier;
|
|
18
|
+
return m[effort] ?? m[policy.effort?.default] ?? 1;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Default effort for a model: the matching tier's effort, else the global default.
|
|
22
|
+
function tierEffortFor(model, policy) {
|
|
23
|
+
const key = priceKey(model);
|
|
24
|
+
for (const t of Object.values(policy.tiers)) {
|
|
25
|
+
if (priceKey(t.model) === key) return t.effort || policy.effort?.default || 'high';
|
|
26
|
+
}
|
|
27
|
+
return policy.effort?.default || 'high';
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function stageCost(model, effort, policy) {
|
|
31
|
+
const price = policy.pricing[priceKey(model)] || policy.pricing.opus;
|
|
32
|
+
const { input, output } = policy.estimation.tokensPerStage;
|
|
33
|
+
return (input / 1e6) * price.input + ((output * effortMultiplier(effort, policy)) / 1e6) * price.output;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Estimate agent count, model mix, and a tiered-vs-baseline cost for a workflow
|
|
37
|
+
// script. Baseline = every stage on the session model (the default tier, opus @
|
|
38
|
+
// xhigh) — what an unguided ultracode run does. Tiered = the per-stage models/effort
|
|
39
|
+
// actually pinned (unpinned stages inherit the session model, so they save nothing).
|
|
40
|
+
export function estimateText(text, policy, opts = {}) {
|
|
41
|
+
const assumedFanout = opts.assumedFanout ?? policy.estimation.assumedFanout;
|
|
42
|
+
const sessionModel = policy.tiers[policy.default]?.model || 'opus';
|
|
43
|
+
const sessionEffort = policy.tiers[policy.default]?.effort || 'xhigh';
|
|
44
|
+
|
|
45
|
+
const stages = stageList(text).map((s) => {
|
|
46
|
+
const tieredModel = s.model || sessionModel;
|
|
47
|
+
const tieredEffort = s.effort || (s.model ? tierEffortFor(s.model, policy) : sessionEffort);
|
|
48
|
+
return {
|
|
49
|
+
line: s.line,
|
|
50
|
+
fanout: s.fanout,
|
|
51
|
+
pinned: !!s.model,
|
|
52
|
+
model: tieredModel,
|
|
53
|
+
effort: tieredEffort,
|
|
54
|
+
tieredCost: stageCost(tieredModel, tieredEffort, policy),
|
|
55
|
+
baselineCost: stageCost(sessionModel, sessionEffort, policy)
|
|
56
|
+
};
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
const weight = (s) => (s.fanout ? assumedFanout : 1);
|
|
60
|
+
const tiered = stages.reduce((n, s) => n + s.tieredCost * weight(s), 0);
|
|
61
|
+
const baseline = stages.reduce((n, s) => n + s.baselineCost * weight(s), 0);
|
|
62
|
+
|
|
63
|
+
const known = stages.filter((s) => !s.fanout).length;
|
|
64
|
+
const fanoutGroups = stages.filter((s) => s.fanout).length;
|
|
65
|
+
|
|
66
|
+
const modelMix = {};
|
|
67
|
+
for (const s of stages) {
|
|
68
|
+
const k = priceKey(s.model);
|
|
69
|
+
modelMix[k] = (modelMix[k] || 0) + weight(s);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return {
|
|
73
|
+
agents: {
|
|
74
|
+
known,
|
|
75
|
+
fanoutGroups,
|
|
76
|
+
assumedPerFanout: assumedFanout,
|
|
77
|
+
assumedTotal: known + fanoutGroups * assumedFanout
|
|
78
|
+
},
|
|
79
|
+
modelMix,
|
|
80
|
+
cost: {
|
|
81
|
+
tiered: round(tiered),
|
|
82
|
+
baseline: round(baseline),
|
|
83
|
+
savings: round(baseline - tiered),
|
|
84
|
+
savingsPct: baseline ? Math.round((1 - tiered / baseline) * 100) : 0
|
|
85
|
+
},
|
|
86
|
+
stages,
|
|
87
|
+
assumptions: {
|
|
88
|
+
sessionModel,
|
|
89
|
+
pricing: policy.pricing,
|
|
90
|
+
pricingAsOf: policy.pricing?._asOf,
|
|
91
|
+
tokensPerStage: policy.estimation.tokensPerStage,
|
|
92
|
+
effortOutputMultiplier: policy.estimation.effortOutputMultiplier,
|
|
93
|
+
assumedFanout,
|
|
94
|
+
note: 'Estimate. Unpinned stages inherit the session model and save nothing. Fan-out groups assume N items each; total scales linearly with the real item count.'
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export function estimateFile(file, policy, opts) {
|
|
100
|
+
return estimateText(readFileSync(file, 'utf8'), policy, opts);
|
|
101
|
+
}
|