ultracost 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -1
- package/NOTICE +16 -3
- package/README.md +77 -12
- package/bin/cli.js +514 -117
- package/docs/ESTIMATES.md +24 -0
- package/docs/PUBLISHING.md +41 -34
- package/docs/architecture.md +19 -1
- package/docs/policy.md +25 -2
- package/package.json +1 -1
- package/src/classify.js +125 -0
- package/src/cost.js +54 -0
- package/src/detect.js +93 -0
- package/src/estimate.js +18 -0
- package/src/guard.js +244 -166
- package/src/index.js +7 -1
- package/src/lexer.js +227 -0
- package/src/log.js +20 -13
- package/src/loop.js +143 -0
- package/src/paths.js +10 -0
- package/src/policy.js +14 -0
- package/src/render.js +211 -0
- package/src/rules.js +17 -5
- package/src/transcript.js +186 -0
- package/templates/hooks/reinject.mjs +21 -18
- package/templates/hooks/workflow-gate.mjs +51 -45
- package/templates/policy.default.json +15 -2
package/docs/ESTIMATES.md
CHANGED
|
@@ -175,6 +175,30 @@ model invokes, rather than from the kernel. This is documented, not hidden.
|
|
|
175
175
|
is denied, so set `ULTRACOST_GATE=off` there. The 3-option AskUserQuestion menu needs a
|
|
176
176
|
TUI session.
|
|
177
177
|
|
|
178
|
+
## The closed loop (calibration, reconcile, ledger, budget)
|
|
179
|
+
|
|
180
|
+
The estimate above is *static* — it runs before the workflow. Phase 2 closes the loop by
|
|
181
|
+
reading the workflow's real token usage back from local transcripts (offline) and feeding it
|
|
182
|
+
forward:
|
|
183
|
+
|
|
184
|
+
- **`ultracost reconcile [--last|<wfId>]`** matches a real run's per-stage token usage
|
|
185
|
+
(`subagents/workflows/wf_*/agent-*.jsonl` + `journal.jsonl`) against the all-opus baseline,
|
|
186
|
+
using cache-aware pricing (`estimation.cacheMultipliers`, default cache-read `0.1x` / cache-write
|
|
187
|
+
`1.25x` input). Per-stage attribution is by file path + `isSidechain`/`agentId`, never `sessionId`
|
|
188
|
+
(subagent files inherit the parent session id).
|
|
189
|
+
- **`ultracost calibrate`** turns those per-stage token sizes into a prior
|
|
190
|
+
(`~/.claude/ultracost/calibration.json`), dropping outliers beyond `3x` / below `0.2x` the median.
|
|
191
|
+
`estimate`, `explain`, `simulate`, and the gate use it automatically when present, replacing the
|
|
192
|
+
flat `tokensPerStage` default with your measured numbers.
|
|
193
|
+
- **`ultracost usage`** reports real cost split across the main loop, plain subagents, and
|
|
194
|
+
dynamic-workflow stages.
|
|
195
|
+
- **`ultracost ledger`** persists per-run savings (`~/.claude/ultracost/ledger.jsonl`, idempotent
|
|
196
|
+
per workflow id) and reports the cumulative total versus all-opus.
|
|
197
|
+
- **Budget guard.** `budget.perRun` / `budget.perDay` make the `PreToolUse` gate **deny** a launch
|
|
198
|
+
whose estimate would exceed the cap (per-day reads the ledger's spend for the current day).
|
|
199
|
+
|
|
200
|
+
All of this is offline and Claude-Code-only; nothing leaves the machine.
|
|
201
|
+
|
|
178
202
|
## Validation (live, multi-domain)
|
|
179
203
|
|
|
180
204
|
Drafted by Claude under the plugin across domains; each script guard-clean (every stage
|
package/docs/PUBLISHING.md
CHANGED
|
@@ -5,9 +5,9 @@ first, then work down the distribution list.
|
|
|
5
5
|
|
|
6
6
|
> **External-site note.** Anthropic plugin/marketplace facts below were verified against
|
|
7
7
|
> the official docs (`code.claude.com/docs/en/plugins`,
|
|
8
|
-
> `code.claude.com/docs/en/plugin-marketplaces`) on **2026-06-14**.
|
|
9
|
-
> (awesome lists, auto-trackers)
|
|
10
|
-
> submission rules on each site before relying on them, since they change.
|
|
8
|
+
> `code.claude.com/docs/en/plugin-marketplaces`) on **2026-06-14**. The third-party
|
|
9
|
+
> directory mechanics (awesome lists, auto-trackers) were also checked on **2026-06-14** —
|
|
10
|
+
> confirm their current submission rules on each site before relying on them, since they change.
|
|
11
11
|
|
|
12
12
|
---
|
|
13
13
|
|
|
@@ -17,13 +17,13 @@ The GitHub handle is set to `danielkremen818` across the repo. If you fork or mo
|
|
|
17
17
|
update the handle in every file that ships:
|
|
18
18
|
|
|
19
19
|
- [x] `package.json` — `repository.url`, `bugs.url`, `homepage`.
|
|
20
|
-
- [x] `README.md` — the npm install command (`npx ultracost init`) and the npm/CI badge URLs.
|
|
20
|
+
- [x] `README.md` — the plugin install commands (`/plugin marketplace add danielkremen818/ultracost` → `/plugin install ultracost@ultracost`), the npm install command (`npx ultracost init`), and the npm/CI badge URLs.
|
|
21
21
|
- [x] `CHANGELOG.md` — the `[Unreleased]`/release compare links.
|
|
22
22
|
- [x] `.claude-plugin/plugin.json` — `homepage` and `repository`; also confirm `author` and `version`.
|
|
23
23
|
- [ ] `LICENSE` and `NOTICE` — confirm the copyright holder.
|
|
24
24
|
|
|
25
|
-
Names that must stay consistent across the plugin package and the docs (so the
|
|
26
|
-
plugin install
|
|
25
|
+
Names that must stay consistent across the plugin package and the docs (so the live
|
|
26
|
+
plugin install keeps working):
|
|
27
27
|
|
|
28
28
|
- Marketplace name: **`ultracost`** and plugin name: **`ultracost`** → the plugin resolves
|
|
29
29
|
as `ultracost@ultracost`.
|
|
@@ -67,37 +67,44 @@ Anthropic runs a public community marketplace, `anthropics/claude-plugins-commun
|
|
|
67
67
|
users add with `/plugin marketplace add anthropics/claude-plugins-community` and install
|
|
68
68
|
from as `@claude-community`. Approved plugins also surface on `claude.com/plugins`.
|
|
69
69
|
|
|
70
|
-
Submit through the in-app directory form. The
|
|
71
|
-
**`clau.de/plugin-directory-submission
|
|
72
|
-
canonical submission entry points:
|
|
70
|
+
Submit a **public GitHub link** (or a zip) through the in-app directory form. The short link
|
|
71
|
+
**`clau.de/plugin-directory-submission`** redirects to the canonical entry points:
|
|
73
72
|
|
|
74
|
-
- **claude.ai:** `claude.ai/admin-settings/directory/submissions/plugins/new` — requires a
|
|
75
|
-
Team or Enterprise org with directory-management access (org Owners have it by default).
|
|
76
73
|
- **Console:** `platform.claude.com/plugins/submit` — for individual authors not in a
|
|
77
74
|
Team/Enterprise org.
|
|
75
|
+
- **claude.ai:** `claude.ai/admin-settings/directory/submissions/plugins/new` — requires a
|
|
76
|
+
Team or Enterprise org with directory-management access (org Owners have it by default).
|
|
78
77
|
|
|
79
78
|
What to know:
|
|
80
79
|
|
|
81
|
-
- Submissions
|
|
82
|
-
|
|
83
|
-
-
|
|
84
|
-
|
|
85
|
-
|
|
80
|
+
- Submissions run `claude plugin validate` **plus an automated safety screening** — pass the
|
|
81
|
+
validate locally first.
|
|
82
|
+
- On approval the plugin is **pinned to a commit SHA**, **synced nightly** (expect a delay
|
|
83
|
+
before it appears), and also shown at `claude.com/plugins`. Future pushes **auto-mirror** —
|
|
84
|
+
no re-submission needed.
|
|
86
85
|
- The separate **official** marketplace (`claude-plugins-official`) is curated by Anthropic
|
|
87
86
|
at its discretion — there's no application; the submission form does not add to it.
|
|
88
87
|
|
|
89
|
-
### 2. Your own marketplace repo
|
|
88
|
+
### 2. Your own marketplace repo (live now)
|
|
90
89
|
|
|
91
|
-
ultracost ships its own `.claude-plugin/marketplace.json`, so the repo
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
plugin distribution is published, surface the marketplace-add + install steps here and in the
|
|
95
|
-
README and launch posts.
|
|
90
|
+
ultracost ships its own `.claude-plugin/marketplace.json`, so the repo **is** a self-hosted
|
|
91
|
+
plugin marketplace — no extra hosting required. Users install straight from it inside Claude
|
|
92
|
+
Code:
|
|
96
93
|
|
|
97
|
-
|
|
94
|
+
```text
|
|
95
|
+
/plugin marketplace add danielkremen818/ultracost
|
|
96
|
+
/plugin install ultracost@ultracost
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
These are the commands the README leads with; keep them in sync across the README, this doc,
|
|
100
|
+
and launch posts.
|
|
98
101
|
|
|
99
|
-
|
|
100
|
-
|
|
102
|
+
### 3. awesome-claude-code (hesreallyhim, ~46k stars)
|
|
103
|
+
|
|
104
|
+
A large, high-traffic curated list. **Submit via the issue form only** —
|
|
105
|
+
`https://github.com/hesreallyhim/awesome-claude-code/issues/new?template=recommend-resource.yml`.
|
|
106
|
+
**Do not open a PR** (PRs are auto-closed and trigger a submission cooldown). Their bar, which
|
|
107
|
+
ultracost already meets:
|
|
101
108
|
|
|
102
109
|
- **Evidence-based claims** — lead with the audit finding (most real `ultracode` stages are
|
|
103
110
|
unpinned; even Anthropic's bundled `deep-research` workflow pins zero stages) and a short
|
|
@@ -106,17 +113,17 @@ Submit via the repo's contribution form/PR process. Their bar, which ultracost a
|
|
|
106
113
|
- **No telemetry, no network calls** — ultracost is a local static analyzer + file installer;
|
|
107
114
|
it makes no outbound requests.
|
|
108
115
|
|
|
109
|
-
### 4.
|
|
110
|
-
|
|
111
|
-
These sites index public Claude Code plugin/marketplace repos automatically; a public repo
|
|
112
|
-
with a valid `marketplace.json` is usually enough. Per the plan:
|
|
116
|
+
### 4. Third-party directories (passive + light intake)
|
|
113
117
|
|
|
114
|
-
|
|
115
|
-
- `claudecodemarketplace.com`
|
|
116
|
-
- `claudecodeplugins.dev`
|
|
118
|
+
These sites index public Claude Code plugin/marketplace repos. Intake differs per site:
|
|
117
119
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
+
- **`claudemarketplaces.com`** — **no submission form**; it auto-crawls GitHub daily for repos
|
|
121
|
+
with a valid `.claude-plugin/marketplace.json`. Quality gate: **5+ GitHub stars**. Listed
|
|
122
|
+
within ~24h of meeting the bar.
|
|
123
|
+
- **`buildwithclaude.com`** — open a PR at `buildwithclaude.com/contribute` (repo
|
|
124
|
+
`davepoon/buildwithclaude`); it also indexes GitHub on its own.
|
|
125
|
+
- **ClaudePluginHub (`claudepluginhub.com`)** — submit the repo URL for fast indexing;
|
|
126
|
+
otherwise auto-discovered via GitHub Code Search.
|
|
120
127
|
|
|
121
128
|
### 5. npm publish + GitHub release
|
|
122
129
|
|
package/docs/architecture.md
CHANGED
|
@@ -49,11 +49,29 @@ flowchart TD
|
|
|
49
49
|
class POL,RUL,GRD ft;
|
|
50
50
|
```
|
|
51
51
|
|
|
52
|
+
## Phase 2 modules (precision, visuals, closed loop)
|
|
53
|
+
|
|
54
|
+
The shared core grew three capability groups, all zero-dependency:
|
|
55
|
+
|
|
56
|
+
- **Precision** — `lexer.js` (a hand-rolled JS tokenizer) backs `guard.js`, and `classify.js`
|
|
57
|
+
scores a prompt's tier so the guard can flag wrong-tier (`UC006`), over-effort (`UC007`), and
|
|
58
|
+
off-opus `alwaysOpus` roles (`UC008`).
|
|
59
|
+
- **Visuals** — `render.js` (truecolor/256/16 with `NO_COLOR`/`FORCE_COLOR`, ANSI-aware width via
|
|
60
|
+
`util.stripVTControlCharacters` + `Intl.Segmenter`, tables/bars/sparklines/panels) backs `log.js`
|
|
61
|
+
and every command; the cost gate emits an aligned multi-line table.
|
|
62
|
+
- **Closed loop** — `transcript.js` reads local session transcripts and attributes tokens per
|
|
63
|
+
workflow stage, `cost.js` prices them (cache-aware), and `loop.js` reconciles, calibrates, and
|
|
64
|
+
keeps the savings ledger. `detect.js` tells `status`/`doctor`/`init` how ultracost is delivered
|
|
65
|
+
(plugin vs CLI vs both) so they never misreport or double-install.
|
|
66
|
+
|
|
67
|
+
The SessionStart hook (`reinject.mjs`) and the routing skill are both compiled from
|
|
68
|
+
`rules.js`, so the CLAUDE.md block, the injected context, and the skill cannot drift.
|
|
69
|
+
|
|
52
70
|
## The two surfaces
|
|
53
71
|
|
|
54
72
|
| | Plugin (primary) | npm CLI (secondary) |
|
|
55
73
|
|---|---|---|
|
|
56
|
-
| **Install** |
|
|
74
|
+
| **Install** | `/plugin marketplace add danielkremen818/ultracost` → `/plugin install ultracost@ultracost` | `npx ultracost init` |
|
|
57
75
|
| **Routing guidance** | **`SessionStart` hook** injects the policy as context (no file mutation); a skill ships alongside for explicit reference | block injected into `~/.claude/CLAUDE.md` |
|
|
58
76
|
| **Guard** | `/ultracost:check` command (runs `guard.js`) | `ultracost check` / `ultracost audit` |
|
|
59
77
|
| **Policy injection** | `hooks/hooks.json` → `node "${CLAUDE_PLUGIN_ROOT}/templates/hooks/reinject.mjs"` (all `SessionStart` sources) | `node "<config>/ultracost/reinject.mjs"`, registered in `settings.json` |
|
package/docs/policy.md
CHANGED
|
@@ -5,7 +5,7 @@ The policy lives at `~/.claude/ultracost/policy.json` after install. Edit it, th
|
|
|
5
5
|
|
|
6
6
|
```json
|
|
7
7
|
{
|
|
8
|
-
"version":
|
|
8
|
+
"version": 2,
|
|
9
9
|
"neverUse": ["haiku"],
|
|
10
10
|
"allowInherit": false,
|
|
11
11
|
"default": "opus",
|
|
@@ -30,8 +30,31 @@ The policy lives at `~/.claude/ultracost/policy.json` after install. Edit it, th
|
|
|
30
30
|
| `default` | string | Tier used by `--fix` and recommended as the fallback. Must exist in `tiers`. |
|
|
31
31
|
| `tieBreaker` | string | Tier the rules tell Claude to use "when in doubt". |
|
|
32
32
|
| `tiers` | object | Named tiers. Each has `model` (alias or full id) and optional `effort`. A tier whose `model` is in `neverUse` is rejected at load time. |
|
|
33
|
-
| `alwaysOpus` | string[] | Stage roles that must always use the default tier (orchestrator, final synthesis, …). Rendered into the rules
|
|
33
|
+
| `alwaysOpus` | string[] | Stage roles that must always use the default tier (orchestrator, final synthesis, …). Rendered into the rules **and** enforced by the guard: a stage whose prompt reads like one of these roles but pins a cheaper tier raises `UC008`. |
|
|
34
34
|
| `rules` | object[] | Human/LLM-facing routing guidance. Each has `tier`, optional `label`, and `when` (the natural-language criteria). |
|
|
35
|
+
| `classify.keywords` | object | Optional extra `opus`/`sonnet` keyword signals, merged with the built-in rubric, used by the `UC006` wrong-tier check and `ultracost explain`. The opening imperative verb of a prompt is weighted most. |
|
|
36
|
+
| `budget.perRun` | number\|null | Pre-flight cap (USD) on a single workflow launch. When the estimate exceeds it, the cost gate **denies** the launch. `null` = no cap. |
|
|
37
|
+
| `budget.perDay` | number\|null | Pre-flight cap (USD) on a day's spend; the gate sums today's recorded ledger spend plus the new estimate. `null` = no cap. |
|
|
38
|
+
| `estimation.cacheMultipliers` | object | `cacheRead` / `cacheWrite` factors applied to cached input tokens when pricing real transcript usage (`usage`/`reconcile`/`ledger`). Defaults `0.1` / `1.25`. |
|
|
39
|
+
|
|
40
|
+
## New guard codes (v2)
|
|
41
|
+
|
|
42
|
+
| Code | Severity | Meaning |
|
|
43
|
+
|------|----------|---------|
|
|
44
|
+
| `UC006` | warning | The pinned model disagrees with the work the prompt describes (e.g. a `refactor` stage on `sonnet`, or a `grep` stage on `opus`). Heuristic; only fires on a confident, literal prompt. |
|
|
45
|
+
| `UC007` | warning | The pinned `effort` exceeds the model's `effort.maxByModel` cap (e.g. `sonnet` @ `xhigh`). |
|
|
46
|
+
| `UC008` | warning | A stage that reads like an `alwaysOpus` role pins a non-default tier. |
|
|
47
|
+
|
|
48
|
+
`UC006`–`UC008` are warnings — they never change the exit code on their own (only the
|
|
49
|
+
pin-presence errors `UC001`–`UC004` do). The wrong-tier scoring is deterministic and offline.
|
|
50
|
+
|
|
51
|
+
## The closed loop
|
|
52
|
+
|
|
53
|
+
`ultracost calibrate` writes a token prior learned from your real runs to
|
|
54
|
+
`~/.claude/ultracost/calibration.json`; `estimate`, `explain`, `simulate`, and the cost gate
|
|
55
|
+
use it automatically when present. `ultracost ledger` persists per-run savings to
|
|
56
|
+
`~/.claude/ultracost/ledger.jsonl`. Both are local and offline. See
|
|
57
|
+
[ESTIMATES.md](./ESTIMATES.md) for the cost model and reconciliation details.
|
|
35
58
|
|
|
36
59
|
## Notes on effort
|
|
37
60
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ultracost",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Per-stage model routing for Claude Code dynamic workflows (ultracode). Quality-first policy, CLAUDE.md rule injection, and a workflow-script guard that catches subagent stages that would silently inherit Opus.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
package/src/classify.js
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import { tierModel } from './policy.js';
|
|
2
|
+
|
|
3
|
+
// Deterministic, offline keyword scorer that maps a stage's prompt to the tier the
|
|
4
|
+
// work *reads like*, so the guard can flag a pin that disagrees with the task
|
|
5
|
+
// (UC006) without an LLM. The imperative verb that opens a prompt is the strongest
|
|
6
|
+
// signal ("List ...", "Design ...", "Apply ..."), so the first matched keyword is
|
|
7
|
+
// weighted heavily and later words only break ties. Keyword lists are reused from
|
|
8
|
+
// the public model-router rubrics (smart-router / model-matchmaker / model-changer)
|
|
9
|
+
// and can be extended per policy via policy.classify.keywords.
|
|
10
|
+
|
|
11
|
+
const DEFAULT_KEYWORDS = {
|
|
12
|
+
opus: [
|
|
13
|
+
'design', 'architect', 'architecture', 'refactor', 'rewrite', 'debug', 'review',
|
|
14
|
+
'audit', 'analyze', 'analyse', 'plan', 'planning', 'synthesize', 'synthesise',
|
|
15
|
+
'synthesis', 'consolidate', 'evaluate', 'assess', 'optimize', 'optimise',
|
|
16
|
+
'investigate', 'diagnose', 'reason', 'implement', 'security', 'vulnerability'
|
|
17
|
+
],
|
|
18
|
+
sonnet: [
|
|
19
|
+
'list', 'find', 'search', 'grep', 'glob', 'collect', 'gather', 'extract', 'fetch',
|
|
20
|
+
'read', 'scan', 'enumerate', 'count', 'format', 'rename', 'apply', 'run', 'execute',
|
|
21
|
+
'summarize', 'summarise', 'copy', 'move', 'retrieve', 'lookup', 'locate', 'file',
|
|
22
|
+
'files', 'tests'
|
|
23
|
+
]
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
// alwaysOpus role names matched only as specific words — deliberately NOT 'plan'
|
|
27
|
+
// (too ambiguous, e.g. "the plan glob"). Custom roles fall back to their own long words.
|
|
28
|
+
const ROLE_SYNONYMS = {
|
|
29
|
+
orchestrator: ['orchestrator', 'orchestrate'],
|
|
30
|
+
planner: ['planner'],
|
|
31
|
+
'final-synthesis': ['synthesis', 'synthesize', 'synthesise'],
|
|
32
|
+
consolidation: ['consolidation', 'consolidate']
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const words = (s) => String(s || '').toLowerCase().split(/[^a-z]+/).filter(Boolean);
|
|
36
|
+
|
|
37
|
+
function keywordSet(tier, policy) {
|
|
38
|
+
const extra = policy?.classify?.keywords?.[tier] || [];
|
|
39
|
+
return new Set([...DEFAULT_KEYWORDS[tier], ...extra.map((w) => String(w).toLowerCase())]);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Map a model alias/id to its tier name for comparison ('opus' | 'sonnet' | 'haiku').
|
|
43
|
+
export function tierOfModel(model) {
|
|
44
|
+
const v = String(model).toLowerCase();
|
|
45
|
+
if (v.includes('sonnet')) return 'sonnet';
|
|
46
|
+
if (v.includes('haiku')) return 'haiku';
|
|
47
|
+
return 'opus';
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function classifyPrompt(prompt, policy = {}) {
|
|
51
|
+
const opus = keywordSet('opus', policy);
|
|
52
|
+
const sonnet = keywordSet('sonnet', policy);
|
|
53
|
+
const scores = { opus: 0, sonnet: 0 };
|
|
54
|
+
const matched = [];
|
|
55
|
+
for (const w of words(prompt)) {
|
|
56
|
+
const tier = opus.has(w) ? 'opus' : sonnet.has(w) ? 'sonnet' : null;
|
|
57
|
+
if (!tier) continue;
|
|
58
|
+
scores[tier] += matched.length === 0 ? 3 : 1; // leading verb dominates
|
|
59
|
+
matched.push(w);
|
|
60
|
+
}
|
|
61
|
+
const winner = scores.opus === scores.sonnet ? null : scores.opus > scores.sonnet ? 'opus' : 'sonnet';
|
|
62
|
+
const top = Math.max(scores.opus, scores.sonnet);
|
|
63
|
+
const margin = Math.abs(scores.opus - scores.sonnet);
|
|
64
|
+
let confidence = 'none';
|
|
65
|
+
if (winner) confidence = top >= 3 && margin >= 2 ? 'high' : 'low';
|
|
66
|
+
return { tier: winner, confidence, scores, matched };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function matchedRole(prompt, roles = []) {
|
|
70
|
+
const set = new Set(words(prompt));
|
|
71
|
+
for (const role of roles) {
|
|
72
|
+
const syns = ROLE_SYNONYMS[role] || words(role).filter((w) => w.length >= 5);
|
|
73
|
+
if (syns.some((s) => set.has(s))) return role;
|
|
74
|
+
}
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const effortRank = (effort, policy) => {
|
|
79
|
+
const range = policy?.effort?.range || ['low', 'medium', 'high', 'xhigh'];
|
|
80
|
+
return range.indexOf(effort);
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// Advisory (warning-level) findings for a stage whose model is a valid literal pin:
|
|
84
|
+
// UC006 the pinned model disagrees with the work the prompt describes,
|
|
85
|
+
// UC007 the effort exceeds the model's cap,
|
|
86
|
+
// UC008 an alwaysOpus role is pinned to a non-default tier.
|
|
87
|
+
// Returns partial finding objects ({ code, severity, message }); the caller adds
|
|
88
|
+
// file/line/column. Conservative by design — only fires on confident signals.
|
|
89
|
+
export function semanticFindings({ model, effort, prompt }, policy, CODES) {
|
|
90
|
+
const out = [];
|
|
91
|
+
const mtier = tierOfModel(model);
|
|
92
|
+
const defaultTier = tierOfModel(tierModel(policy.default, policy));
|
|
93
|
+
|
|
94
|
+
if (prompt) {
|
|
95
|
+
const c = classifyPrompt(prompt, policy);
|
|
96
|
+
if (c.tier && c.confidence === 'high' && c.tier !== mtier) {
|
|
97
|
+
out.push({
|
|
98
|
+
code: CODES.WRONGTIER,
|
|
99
|
+
severity: 'warn',
|
|
100
|
+
message: `stage reads like ${c.tier} work (${c.matched.slice(0, 3).join(', ')}) but pins "${model}" — consider model: '${c.tier}'`
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
const role = matchedRole(prompt, policy.alwaysOpus);
|
|
104
|
+
if (role && mtier !== defaultTier) {
|
|
105
|
+
out.push({
|
|
106
|
+
code: CODES.ALWAYSOPUS,
|
|
107
|
+
severity: 'warn',
|
|
108
|
+
message: `stage looks like the "${role}" role (policy.alwaysOpus) but pins "${model}" — these stay on ${tierModel(policy.default, policy)}`
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (effort) {
|
|
114
|
+
const cap = policy?.effort?.maxByModel?.[mtier];
|
|
115
|
+
if (cap && effortRank(effort, policy) > effortRank(cap, policy) && effortRank(effort, policy) !== -1) {
|
|
116
|
+
out.push({
|
|
117
|
+
code: CODES.OVEREFFORT,
|
|
118
|
+
severity: 'warn',
|
|
119
|
+
message: `effort '${effort}' exceeds the '${cap}' cap for ${mtier} (policy.effort.maxByModel)`
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return out;
|
|
125
|
+
}
|
package/src/cost.js
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
// Turn real transcript token usage into USD, using the policy's per-model rates plus
|
|
2
|
+
// cache multipliers (cache reads bill at ~0.1x input, cache writes at ~1.25x — the
|
|
3
|
+
// pattern Claude Code's own cost math uses). Model ids are resolved by substring so
|
|
4
|
+
// both aliases (claude-opus-4-8) and dated ids (claude-sonnet-4-6-20250929) price.
|
|
5
|
+
|
|
6
|
+
const PRICE_KEYS = ['opus', 'sonnet', 'haiku'];
|
|
7
|
+
|
|
8
|
+
export function modelPrice(model, policy) {
|
|
9
|
+
const v = String(model || '').toLowerCase();
|
|
10
|
+
const key = PRICE_KEYS.find((k) => v.includes(k)) || 'opus';
|
|
11
|
+
return policy?.pricing?.[key] || policy?.pricing?.opus || { input: 5, output: 25 };
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
// Cache-creation tokens: prefer the flat field, else sum the newer nested ephemeral
|
|
15
|
+
// buckets (cache_creation.ephemeral_5m_input_tokens + ephemeral_1h_input_tokens).
|
|
16
|
+
function cacheCreate(u) {
|
|
17
|
+
if (typeof u.cache_creation_input_tokens === 'number') return u.cache_creation_input_tokens;
|
|
18
|
+
const c = u.cache_creation;
|
|
19
|
+
if (c) return (c.ephemeral_5m_input_tokens || 0) + (c.ephemeral_1h_input_tokens || 0);
|
|
20
|
+
return 0;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Sum a list of message.usage objects into one normalized usage record.
|
|
24
|
+
export function sumUsage(list) {
|
|
25
|
+
const acc = { input_tokens: 0, output_tokens: 0, cache_creation_input_tokens: 0, cache_read_input_tokens: 0 };
|
|
26
|
+
for (const u of list) {
|
|
27
|
+
if (!u) continue;
|
|
28
|
+
acc.input_tokens += u.input_tokens || 0;
|
|
29
|
+
acc.output_tokens += u.output_tokens || 0;
|
|
30
|
+
acc.cache_creation_input_tokens += cacheCreate(u);
|
|
31
|
+
acc.cache_read_input_tokens += u.cache_read_input_tokens || 0;
|
|
32
|
+
}
|
|
33
|
+
return acc;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// USD for one usage record at a given price ({ input, output } per MTok).
|
|
37
|
+
export function costFromUsage(usage, price, policy) {
|
|
38
|
+
const mult = policy?.estimation?.cacheMultipliers || { cacheRead: 0.1, cacheWrite: 1.25 };
|
|
39
|
+
const u = usage || {};
|
|
40
|
+
const input = u.input_tokens || 0;
|
|
41
|
+
const output = u.output_tokens || 0;
|
|
42
|
+
const cr = u.cache_read_input_tokens || 0;
|
|
43
|
+
const cw = u.cache_creation_input_tokens || 0;
|
|
44
|
+
return (
|
|
45
|
+
input * price.input +
|
|
46
|
+
output * price.output +
|
|
47
|
+
cr * price.input * (mult.cacheRead ?? 0.1) +
|
|
48
|
+
cw * price.input * (mult.cacheWrite ?? 1.25)
|
|
49
|
+
) / 1e6;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Total tokens billed (for display) — every bucket counts as a token moved.
|
|
53
|
+
export const totalTokens = (u) =>
|
|
54
|
+
(u.input_tokens || 0) + (u.output_tokens || 0) + (u.cache_creation_input_tokens || 0) + (u.cache_read_input_tokens || 0);
|
package/src/detect.js
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { existsSync, readFileSync, readdirSync } from 'node:fs';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
import {
|
|
4
|
+
CLAUDE_MD, SETTINGS, SETTINGS_LOCAL, HOOK_PATH, POLICY_PATH,
|
|
5
|
+
PLUGIN_CACHE_DIR, PLUGIN_ID, MARKER_START
|
|
6
|
+
} from './paths.js';
|
|
7
|
+
|
|
8
|
+
// How ultracost is actually wired into Claude Code. The plugin ships its hooks via
|
|
9
|
+
// plugins/cache/<owner>/<name>/<version>/hooks/hooks.json (resolved with
|
|
10
|
+
// ${CLAUDE_PLUGIN_ROOT}); the legacy npm CLI writes ~/.claude/CLAUDE.md + a
|
|
11
|
+
// SessionStart hook in settings.json. status/doctor/init read this so they stop
|
|
12
|
+
// reporting the plugin as "off" and refuse to double-install.
|
|
13
|
+
|
|
14
|
+
const BYPASS_MODES = new Set(['bypassPermissions', 'dontAsk']);
|
|
15
|
+
|
|
16
|
+
// null = file absent; undefined = present but invalid JSON.
|
|
17
|
+
function readJson(p) {
|
|
18
|
+
if (!existsSync(p)) return null;
|
|
19
|
+
try { return JSON.parse(readFileSync(p, 'utf8')); } catch { return undefined; }
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const pluginEnabledIn = (s) => !!(s && s.enabledPlugins && s.enabledPlugins[PLUGIN_ID]);
|
|
23
|
+
|
|
24
|
+
const hookHasUltracost = (s) =>
|
|
25
|
+
Array.isArray(s?.hooks?.SessionStart) &&
|
|
26
|
+
s.hooks.SessionStart.some((h) => h.hooks?.some((hh) => typeof hh.command === 'string' && hh.command.includes('ultracost')));
|
|
27
|
+
|
|
28
|
+
function pluginCache() {
|
|
29
|
+
if (!existsSync(PLUGIN_CACHE_DIR)) return { cacheDir: null, version: null, hooks: { sessionStart: false, preToolUse: false } };
|
|
30
|
+
let versions;
|
|
31
|
+
try { versions = readdirSync(PLUGIN_CACHE_DIR).filter((v) => !v.startsWith('.')); } catch { versions = []; }
|
|
32
|
+
if (!versions.length) return { cacheDir: null, version: null, hooks: { sessionStart: false, preToolUse: false } };
|
|
33
|
+
const version = versions.sort().at(-1);
|
|
34
|
+
const cacheDir = join(PLUGIN_CACHE_DIR, version);
|
|
35
|
+
const hj = readJson(join(cacheDir, 'hooks', 'hooks.json'));
|
|
36
|
+
return {
|
|
37
|
+
cacheDir,
|
|
38
|
+
version,
|
|
39
|
+
hooks: { sessionStart: !!hj?.hooks?.SessionStart, preToolUse: !!hj?.hooks?.PreToolUse }
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function detectDelivery(env = process.env) {
|
|
44
|
+
const settings = readJson(SETTINGS);
|
|
45
|
+
const local = readJson(SETTINGS_LOCAL);
|
|
46
|
+
|
|
47
|
+
const enabledIn = [];
|
|
48
|
+
if (pluginEnabledIn(settings)) enabledIn.push('settings.json');
|
|
49
|
+
if (pluginEnabledIn(local)) enabledIn.push('settings.local.json');
|
|
50
|
+
|
|
51
|
+
const cache = pluginCache();
|
|
52
|
+
const plugin = {
|
|
53
|
+
enabled: enabledIn.length > 0,
|
|
54
|
+
enabledIn,
|
|
55
|
+
cacheDir: cache.cacheDir,
|
|
56
|
+
version: cache.version,
|
|
57
|
+
hooks: cache.hooks,
|
|
58
|
+
// Require BOTH enablement and the cached hooks — a stale cache after /plugin
|
|
59
|
+
// uninstall must not read as active.
|
|
60
|
+
ok: enabledIn.length > 0 && cache.hooks.sessionStart && cache.hooks.preToolUse
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
const rules = existsSync(CLAUDE_MD) && readFileSync(CLAUDE_MD, 'utf8').includes(MARKER_START);
|
|
64
|
+
const settingsHook = hookHasUltracost(settings) || hookHasUltracost(local);
|
|
65
|
+
const cli = {
|
|
66
|
+
rules,
|
|
67
|
+
hook: existsSync(HOOK_PATH),
|
|
68
|
+
settingsHook,
|
|
69
|
+
policy: existsSync(POLICY_PATH),
|
|
70
|
+
ok: rules && settingsHook
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
const perm = { ...(settings?.permissions || {}), ...(local?.permissions || {}) };
|
|
74
|
+
const permissionMode = perm.defaultMode;
|
|
75
|
+
const skipDangerous = !!(
|
|
76
|
+
perm.skipDangerousModePermissionPrompt ??
|
|
77
|
+
settings?.skipDangerousModePermissionPrompt ??
|
|
78
|
+
local?.skipDangerousModePermissionPrompt
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
const verdict = plugin.ok && cli.ok ? 'both' : plugin.ok ? 'plugin' : cli.ok ? 'cli' : 'none';
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
verdict,
|
|
85
|
+
plugin,
|
|
86
|
+
cli,
|
|
87
|
+
permissionMode,
|
|
88
|
+
skipDangerous,
|
|
89
|
+
bypass: BYPASS_MODES.has(permissionMode) || skipDangerous,
|
|
90
|
+
gateEnv: env.ULTRACOST_GATE,
|
|
91
|
+
settingsInvalid: settings === undefined || local === undefined
|
|
92
|
+
};
|
|
93
|
+
}
|
package/src/estimate.js
CHANGED
|
@@ -99,3 +99,21 @@ export function estimateText(text, policy, opts = {}) {
|
|
|
99
99
|
export function estimateFile(file, policy, opts) {
|
|
100
100
|
return estimateText(readFileSync(file, 'utf8'), policy, opts);
|
|
101
101
|
}
|
|
102
|
+
|
|
103
|
+
// Total cost of the same workflow under three policies, for `ultracost simulate`:
|
|
104
|
+
// all-opus (the unguided ultracode default), all-sonnet (aggressive cost-first), and
|
|
105
|
+
// tiered (the per-stage pins as written).
|
|
106
|
+
export function scenarioTotals(text, policy) {
|
|
107
|
+
const stages = stageList(text);
|
|
108
|
+
const assumedFanout = policy.estimation.assumedFanout;
|
|
109
|
+
const weight = (s) => (s.fanout ? assumedFanout : 1);
|
|
110
|
+
const sum = (model, effort) => stages.reduce((n, s) => n + stageCost(model, effort, policy) * weight(s), 0);
|
|
111
|
+
const def = policy.tiers[policy.default] || { model: 'opus', effort: 'xhigh' };
|
|
112
|
+
const son = policy.tiers.sonnet || { model: 'sonnet', effort: 'high' };
|
|
113
|
+
return {
|
|
114
|
+
stages: stages.length,
|
|
115
|
+
allOpus: round(sum(def.model, def.effort)),
|
|
116
|
+
allSonnet: round(sum(son.model, son.effort || 'high')),
|
|
117
|
+
tiered: round(estimateText(text, policy).cost.tiered)
|
|
118
|
+
};
|
|
119
|
+
}
|