@pugi/cli 0.1.0-beta.19 → 0.1.0-beta.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/cost/rate-card.js +129 -0
- package/dist/core/cost/tracker.js +221 -0
- package/dist/core/engine/tool-bridge.js +18 -0
- package/dist/core/repl/session.js +47 -8
- package/dist/core/repl/slash-commands.js +9 -3
- package/dist/core/retry-budget/budget.js +284 -0
- package/dist/core/retry-budget/index.js +5 -0
- package/dist/runtime/cli.js +39 -0
- package/dist/runtime/commands/cost.js +199 -0
- package/dist/runtime/version.js +1 -1
- package/dist/tui/cost-table.js +111 -0
- package/package.json +2 -2
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rate card for the `pugi cost` / `/cost` / `/usage` surface — L19 sprint.
|
|
3
|
+
*
|
|
4
|
+
* Distinct from `core/repl/model-pricing.ts` on purpose:
|
|
5
|
+
*
|
|
6
|
+
* - `model-pricing.ts` powers the TUI cost meter (per-turn flash, status
|
|
7
|
+
* row USD). Its ladder is keyed against the live Anvil model slugs and
|
|
8
|
+
* intentionally inflates an honest worst-case figure via the Sonnet
|
|
9
|
+
* fallback so an operator on a quiet model never gets billed by a
|
|
10
|
+
* surprise. It rounds to USD per 1M tokens at runtime.
|
|
11
|
+
*
|
|
12
|
+
* - `rate-card.ts` (this file) powers the persisted `/cost` table the
|
|
13
|
+
* operator reads to plan budget. It distinguishes open-weight models
|
|
14
|
+
* ($0 / $0 — infra cost only) from hosted closed models so the table
|
|
15
|
+
* does not double-charge an operator running a self-hosted Qwen or
|
|
16
|
+
* Kimi behind Pugi. The L19 spec calls these out by name.
|
|
17
|
+
*
|
|
18
|
+
* Both ladders intentionally agree on Anthropic Claude family pricing so
|
|
19
|
+
* the TUI flash and the persisted table cannot disagree on a Claude turn.
|
|
20
|
+
* If they diverge, the per-model-pricing ladder wins for live UI; the
|
|
21
|
+
* rate card here wins for the persisted `.pugi/cost.json` aggregate.
|
|
22
|
+
*
|
|
23
|
+
* Prices are USD per 1,000,000 tokens, sourced from the L19 spec
|
|
24
|
+
* (2026-05-27) which mirrors provider list-price pages as of that date.
|
|
25
|
+
*/
|
|
26
|
+
/**
|
|
27
|
+
* Exact-match price ladder keyed by model slug. Slugs match the L19 task
|
|
28
|
+
* spec verbatim so a copy-paste from the sprint doc resolves without
|
|
29
|
+
* normalisation.
|
|
30
|
+
*/
|
|
31
|
+
export const RATES_PER_MTOKEN = Object.freeze({
|
|
32
|
+
// Anthropic Claude family (hosted, billed).
|
|
33
|
+
'claude-opus-4-7': { input: 15, output: 75 },
|
|
34
|
+
'claude-opus-4-6': { input: 15, output: 75 },
|
|
35
|
+
'claude-sonnet-4-6': { input: 3, output: 15 },
|
|
36
|
+
'claude-sonnet-4-5': { input: 3, output: 15 },
|
|
37
|
+
'claude-haiku-4-5-20251001': { input: 1, output: 5 },
|
|
38
|
+
'claude-haiku-4-5': { input: 1, output: 5 },
|
|
39
|
+
// Open-weight models — infra cost only, never per-token billed. The
|
|
40
|
+
// note column surfaces the reason so a CFO reading the JSON envelope
|
|
41
|
+
// does not assume the row is broken.
|
|
42
|
+
'qwen3-coder-480b-instruct-fp8': { input: 0, output: 0, note: 'open-weight' },
|
|
43
|
+
'kimi-k2.6': { input: 0, output: 0, note: 'open-weight' },
|
|
44
|
+
'deepseek-v4-pro': { input: 0, output: 0, note: 'open-weight' },
|
|
45
|
+
});
|
|
46
|
+
/**
|
|
47
|
+
* Family-prefix fallback — used only when an exact slug miss. Mirrors the
|
|
48
|
+
* approach in `model-pricing.ts` so a future model rebind (e.g.
|
|
49
|
+
* `claude-opus-4-8`) prices reasonably without a code edit.
|
|
50
|
+
*/
|
|
51
|
+
const FAMILY_FALLBACKS = [
|
|
52
|
+
['claude-opus-', { input: 15, output: 75 }],
|
|
53
|
+
['claude-sonnet-', { input: 3, output: 15 }],
|
|
54
|
+
['claude-haiku-', { input: 1, output: 5 }],
|
|
55
|
+
['qwen', { input: 0, output: 0, note: 'open-weight' }],
|
|
56
|
+
['kimi', { input: 0, output: 0, note: 'open-weight' }],
|
|
57
|
+
['deepseek', { input: 0, output: 0, note: 'open-weight' }],
|
|
58
|
+
];
|
|
59
|
+
/**
|
|
60
|
+
* Final fallback for unknown slugs. Pinned to Sonnet-tier — same posture
|
|
61
|
+
* as `model-pricing.ts`'s default, so an unrecognised hosted model bills
|
|
62
|
+
* "honestly conservative" rather than $0 (which would silently hide cost
|
|
63
|
+
* from the operator).
|
|
64
|
+
*/
|
|
65
|
+
const DEFAULT_RATE = { input: 3, output: 15, note: 'unknown model — Sonnet-tier estimate' };
|
|
66
|
+
/**
|
|
67
|
+
* Look up the rate for a model slug.
|
|
68
|
+
*
|
|
69
|
+
* Resolution order:
|
|
70
|
+
* 1. Exact match in `RATES_PER_MTOKEN`.
|
|
71
|
+
* 2. Family-prefix match (first hit wins).
|
|
72
|
+
* 3. Default Sonnet-tier estimate.
|
|
73
|
+
*
|
|
74
|
+
* Pure, never throws. Called on every cost-tracker write so the hot path
|
|
75
|
+
* stays branch-cheap.
|
|
76
|
+
*/
|
|
77
|
+
export function rateFor(model) {
|
|
78
|
+
if (!model || typeof model !== 'string')
|
|
79
|
+
return DEFAULT_RATE;
|
|
80
|
+
const exact = RATES_PER_MTOKEN[model];
|
|
81
|
+
if (exact)
|
|
82
|
+
return exact;
|
|
83
|
+
for (const [prefix, rate] of FAMILY_FALLBACKS) {
|
|
84
|
+
if (model.startsWith(prefix))
|
|
85
|
+
return rate;
|
|
86
|
+
}
|
|
87
|
+
return DEFAULT_RATE;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Compute the USD cost for a single (model, inputTokens, outputTokens)
|
|
91
|
+
* triple. Defensive against negative / NaN inputs — out-of-range values
|
|
92
|
+
* floor to zero so a buggy upstream cannot credit a negative cost.
|
|
93
|
+
*/
|
|
94
|
+
export function estimateUsd(model, inputTokens, outputTokens) {
|
|
95
|
+
const rate = rateFor(model);
|
|
96
|
+
const safeIn = Number.isFinite(inputTokens) && inputTokens > 0 ? inputTokens : 0;
|
|
97
|
+
const safeOut = Number.isFinite(outputTokens) && outputTokens > 0 ? outputTokens : 0;
|
|
98
|
+
const usd = (safeIn * rate.input + safeOut * rate.output) / 1_000_000;
|
|
99
|
+
return Number.isFinite(usd) && usd > 0 ? usd : 0;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Format a USD figure for the `/cost` table.
|
|
103
|
+
*
|
|
104
|
+
* - `≥ $0.01` → two decimals (`$0.46`).
|
|
105
|
+
* - `< $0.01` but `> 0` → three decimals (`$0.003`) so fractions of a
|
|
106
|
+
* cent are honest instead of rounding to `$0.00`.
|
|
107
|
+
* - Exactly `0` or NaN → `$0.00`.
|
|
108
|
+
*
|
|
109
|
+
* Mirrors `formatCostUsd` from `model-pricing.ts` intentionally — both
|
|
110
|
+
* surfaces should print the same number in the same shape.
|
|
111
|
+
*/
|
|
112
|
+
export function formatUsd(value) {
|
|
113
|
+
if (!Number.isFinite(value) || value <= 0)
|
|
114
|
+
return '$0.00';
|
|
115
|
+
if (value >= 0.01)
|
|
116
|
+
return `$${value.toFixed(2)}`;
|
|
117
|
+
return `$${value.toFixed(3)}`;
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Format a token count for the `/cost` table. Uses comma-thousands so the
|
|
121
|
+
* table reads `14,300` instead of `14.3k` — distinct from the TUI status
|
|
122
|
+
* row which uses `k`/`m` shortening to save column width.
|
|
123
|
+
*/
|
|
124
|
+
export function formatTokensWithCommas(value) {
|
|
125
|
+
if (!Number.isFinite(value) || value <= 0)
|
|
126
|
+
return '0';
|
|
127
|
+
return Math.floor(value).toLocaleString('en-US');
|
|
128
|
+
}
|
|
129
|
+
//# sourceMappingURL=rate-card.js.map
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Persisted per-session cost tracker — L19 sprint (2026-05-27).
|
|
3
|
+
*
|
|
4
|
+
* Mission: every Anvil-mediated LLM call goes through `recordCall`, which
|
|
5
|
+
* aggregates per-model token + USD totals and atomically persists them
|
|
6
|
+
* to `.pugi/cost.json` so the operator can read `/cost` across REPL
|
|
7
|
+
* restarts and reconcile a 14-min session that crossed a process boundary.
|
|
8
|
+
*
|
|
9
|
+
* Why a fresh module instead of bolting onto `core/repl/session.ts`?
|
|
10
|
+
*
|
|
11
|
+
* - `session.ts` accumulates in-memory state for the live TUI status
|
|
12
|
+
* row, which is by-design ephemeral and cleared on REPL boot. The
|
|
13
|
+
* operator's "what did I spend across the project?" question needs
|
|
14
|
+
* a durable surface that survives a process restart.
|
|
15
|
+
* - L19 also has to read `--all-sessions` (last 30 days). The natural
|
|
16
|
+
* store for that is a per-workspace history of session aggregates,
|
|
17
|
+
* which is easy with the JSON file pattern below and would be
|
|
18
|
+
* awkward stitched into the REPL reducer.
|
|
19
|
+
*
|
|
20
|
+
* On-disk shape (single JSON file, atomic tmp+rename writes):
|
|
21
|
+
*
|
|
22
|
+
* {
|
|
23
|
+
* "version": 1,
|
|
24
|
+
* "current": { sessionId, startedAt, models: { <slug>: ModelEntry } },
|
|
25
|
+
* "history": [
|
|
26
|
+
* { sessionId, startedAt, endedAt, models: { ... } }
|
|
27
|
+
* ]
|
|
28
|
+
* }
|
|
29
|
+
*
|
|
30
|
+
* History rotation: when `recordCall` is invoked with a sessionId
|
|
31
|
+
* different from `current.sessionId`, the existing `current` row is
|
|
32
|
+
* stamped with `endedAt = now()` and pushed onto `history`, then a new
|
|
33
|
+
* `current` row is initialised. History is capped at 90 entries (the L19
|
|
34
|
+
* `--all-sessions` window is 30 days; 90 gives a generous buffer for
|
|
35
|
+
* operators on >1 session/day cadence without unbounded growth).
|
|
36
|
+
*
|
|
37
|
+
* The tracker is workspace-scoped — every workspace has its own
|
|
38
|
+
* `.pugi/cost.json`. This matches the existing `.pugi/events.jsonl` /
|
|
39
|
+
* `.pugi/index.json` pattern and means a multi-repo operator's costs
|
|
40
|
+
* are billed against the repo they were incurred in.
|
|
41
|
+
*/
|
|
42
|
+
import { existsSync, mkdirSync, readFileSync, renameSync, writeFileSync, unlinkSync } from 'node:fs';
|
|
43
|
+
import { dirname, resolve } from 'node:path';
|
|
44
|
+
import { estimateUsd } from './rate-card.js';
|
|
45
|
+
/** On-disk schema version. Bump if the file shape changes. */
|
|
46
|
+
export const COST_FILE_SCHEMA_VERSION = 1;
|
|
47
|
+
/** Maximum number of historical sessions persisted in `.pugi/cost.json`. */
|
|
48
|
+
export const COST_HISTORY_CAP = 90;
|
|
49
|
+
export function createCostTracker(opts) {
|
|
50
|
+
const filePath = resolve(opts.workspaceRoot, '.pugi/cost.json');
|
|
51
|
+
const now = opts.now ?? Date.now;
|
|
52
|
+
let state = readOrInit(filePath);
|
|
53
|
+
function ensureCurrent(sessionId) {
|
|
54
|
+
if (state.current && state.current.sessionId === sessionId) {
|
|
55
|
+
return state.current;
|
|
56
|
+
}
|
|
57
|
+
// Session rotation: stamp the previous current with endedAt and push
|
|
58
|
+
// onto history. Idempotent — calling rotate twice with the same
|
|
59
|
+
// session id is a no-op.
|
|
60
|
+
if (state.current) {
|
|
61
|
+
const ended = {
|
|
62
|
+
...state.current,
|
|
63
|
+
endedAt: new Date(now()).toISOString(),
|
|
64
|
+
};
|
|
65
|
+
state.history = [ended, ...state.history].slice(0, COST_HISTORY_CAP);
|
|
66
|
+
}
|
|
67
|
+
state.current = {
|
|
68
|
+
sessionId,
|
|
69
|
+
startedAt: new Date(now()).toISOString(),
|
|
70
|
+
models: {},
|
|
71
|
+
};
|
|
72
|
+
return state.current;
|
|
73
|
+
}
|
|
74
|
+
function persist() {
|
|
75
|
+
try {
|
|
76
|
+
mkdirSync(dirname(filePath), { recursive: true });
|
|
77
|
+
}
|
|
78
|
+
catch {
|
|
79
|
+
// best-effort directory create; the write below surfaces the real
|
|
80
|
+
// error if the parent is genuinely unwritable
|
|
81
|
+
}
|
|
82
|
+
const tmp = `${filePath}.tmp`;
|
|
83
|
+
writeFileSync(tmp, JSON.stringify(state, null, 2), 'utf8');
|
|
84
|
+
renameSync(tmp, filePath);
|
|
85
|
+
}
|
|
86
|
+
return {
|
|
87
|
+
recordCall(input) {
|
|
88
|
+
const sessionId = opts.sessionIdProvider();
|
|
89
|
+
if (!sessionId)
|
|
90
|
+
return;
|
|
91
|
+
const current = ensureCurrent(sessionId);
|
|
92
|
+
const slug = typeof input.model === 'string' && input.model.length > 0 ? input.model : 'unknown';
|
|
93
|
+
const safeIn = Number.isFinite(input.inputTokens) && input.inputTokens > 0 ? input.inputTokens : 0;
|
|
94
|
+
const safeOut = Number.isFinite(input.outputTokens) && input.outputTokens > 0 ? input.outputTokens : 0;
|
|
95
|
+
const existing = current.models[slug] ?? { input: 0, output: 0, callCount: 0 };
|
|
96
|
+
current.models[slug] = {
|
|
97
|
+
input: existing.input + safeIn,
|
|
98
|
+
output: existing.output + safeOut,
|
|
99
|
+
callCount: existing.callCount + 1,
|
|
100
|
+
};
|
|
101
|
+
persist();
|
|
102
|
+
},
|
|
103
|
+
current() {
|
|
104
|
+
return state.current;
|
|
105
|
+
},
|
|
106
|
+
history() {
|
|
107
|
+
return state.history;
|
|
108
|
+
},
|
|
109
|
+
aggregateWithin(withinDays) {
|
|
110
|
+
const cutoffMs = now() - withinDays * 24 * 60 * 60 * 1000;
|
|
111
|
+
const aggregate = {
|
|
112
|
+
sessionId: 'aggregate',
|
|
113
|
+
startedAt: new Date(cutoffMs).toISOString(),
|
|
114
|
+
endedAt: new Date(now()).toISOString(),
|
|
115
|
+
models: {},
|
|
116
|
+
};
|
|
117
|
+
const rows = [];
|
|
118
|
+
if (state.current)
|
|
119
|
+
rows.push(state.current);
|
|
120
|
+
for (const row of state.history) {
|
|
121
|
+
const stamp = Date.parse(row.startedAt);
|
|
122
|
+
if (Number.isFinite(stamp) && stamp >= cutoffMs)
|
|
123
|
+
rows.push(row);
|
|
124
|
+
}
|
|
125
|
+
for (const row of rows) {
|
|
126
|
+
for (const [slug, entry] of Object.entries(row.models)) {
|
|
127
|
+
const existing = aggregate.models[slug] ?? { input: 0, output: 0, callCount: 0 };
|
|
128
|
+
aggregate.models[slug] = {
|
|
129
|
+
input: existing.input + entry.input,
|
|
130
|
+
output: existing.output + entry.output,
|
|
131
|
+
callCount: existing.callCount + entry.callCount,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return aggregate;
|
|
136
|
+
},
|
|
137
|
+
resetCurrent() {
|
|
138
|
+
const wiped = state.current;
|
|
139
|
+
state.current = null;
|
|
140
|
+
persist();
|
|
141
|
+
return wiped;
|
|
142
|
+
},
|
|
143
|
+
flush() {
|
|
144
|
+
persist();
|
|
145
|
+
},
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Compute the per-session USD total from a `SessionAggregate`. Pure —
|
|
150
|
+
* uses the rate card to bind a price to every model entry. Open-weight
|
|
151
|
+
* models contribute $0 (their entries always have $0/$0 rate).
|
|
152
|
+
*/
|
|
153
|
+
export function totalUsd(aggregate) {
|
|
154
|
+
let total = 0;
|
|
155
|
+
for (const [slug, entry] of Object.entries(aggregate.models)) {
|
|
156
|
+
total += estimateUsd(slug, entry.input, entry.output);
|
|
157
|
+
}
|
|
158
|
+
return total;
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Compute total input + output token sums across all models in an
|
|
162
|
+
* aggregate. Used by the CLI table footer.
|
|
163
|
+
*/
|
|
164
|
+
export function totalTokens(aggregate) {
|
|
165
|
+
let input = 0;
|
|
166
|
+
let output = 0;
|
|
167
|
+
for (const entry of Object.values(aggregate.models)) {
|
|
168
|
+
input += entry.input;
|
|
169
|
+
output += entry.output;
|
|
170
|
+
}
|
|
171
|
+
return { input, output };
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Read the persisted file (or initialise an empty one). Tolerates a
|
|
175
|
+
* corrupted file by returning a fresh empty state — losing one
|
|
176
|
+
* session's history is preferable to throwing from the boot path of
|
|
177
|
+
* every `pugi cost` invocation.
|
|
178
|
+
*/
|
|
179
|
+
function readOrInit(filePath) {
|
|
180
|
+
if (!existsSync(filePath)) {
|
|
181
|
+
return { version: COST_FILE_SCHEMA_VERSION, current: null, history: [] };
|
|
182
|
+
}
|
|
183
|
+
try {
|
|
184
|
+
const raw = readFileSync(filePath, 'utf8');
|
|
185
|
+
const parsed = JSON.parse(raw);
|
|
186
|
+
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
187
|
+
return { version: COST_FILE_SCHEMA_VERSION, current: null, history: [] };
|
|
188
|
+
}
|
|
189
|
+
const obj = parsed;
|
|
190
|
+
return {
|
|
191
|
+
version: typeof obj.version === 'number' ? obj.version : COST_FILE_SCHEMA_VERSION,
|
|
192
|
+
current: isAggregate(obj.current) ? obj.current : null,
|
|
193
|
+
history: Array.isArray(obj.history) ? obj.history.filter(isAggregate) : [],
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
catch {
|
|
197
|
+
return { version: COST_FILE_SCHEMA_VERSION, current: null, history: [] };
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
function isAggregate(v) {
|
|
201
|
+
if (!v || typeof v !== 'object' || Array.isArray(v))
|
|
202
|
+
return false;
|
|
203
|
+
const obj = v;
|
|
204
|
+
if (typeof obj.sessionId !== 'string' || typeof obj.startedAt !== 'string')
|
|
205
|
+
return false;
|
|
206
|
+
if (!obj.models || typeof obj.models !== 'object')
|
|
207
|
+
return false;
|
|
208
|
+
return true;
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Test helper — wipe the `.pugi/cost.json` file. Not exported through the
|
|
212
|
+
* public CostTracker surface because production code must never call
|
|
213
|
+
* this; an operator-facing reset goes through `resetCurrent()` which
|
|
214
|
+
* preserves history.
|
|
215
|
+
*/
|
|
216
|
+
export function _danger_wipeCostFile_forTests(workspaceRoot) {
|
|
217
|
+
const filePath = resolve(workspaceRoot, '.pugi/cost.json');
|
|
218
|
+
if (existsSync(filePath))
|
|
219
|
+
unlinkSync(filePath);
|
|
220
|
+
}
|
|
221
|
+
//# sourceMappingURL=tracker.js.map
|
|
@@ -12,6 +12,7 @@ import { buildMcpToolDefs, defaultNonInteractiveMcpPrompt, dispatchMcpTool, MCP_
|
|
|
12
12
|
import { buildDenialContext, DENIAL_REMINDER_THRESHOLD, } from '../denial-tracking/state.js';
|
|
13
13
|
import { stripInternalFields } from './strip-internal-fields.js';
|
|
14
14
|
import { applyAskAnswer, gate as permissionGate, getToolClass, PermissionDenied, } from '../permissions/index.js';
|
|
15
|
+
import { RetryBudget, RetryBudgetExhausted, hashArgs } from '../retry-budget/index.js';
|
|
15
16
|
/**
|
|
16
17
|
* Tool-bridge: turns the abstract tool registry into:
|
|
17
18
|
* 1. An OpenAI-shaped tools schema for `EngineLoopClient.send`.
|
|
@@ -492,6 +493,9 @@ function requireString(obj, key) {
|
|
|
492
493
|
}
|
|
493
494
|
export function buildExecutor(input) {
|
|
494
495
|
const { kind, ctx, hooks, sessionId, askUserBridge, interactive, allowFetch, allowSearch, agentDispatch, mcpRegistry, permissionMode, permissionAlwaysCache, permissionAsk, } = input;
|
|
496
|
+
// Leak L31: per-cycle budget. Default to a fresh instance scoped to
|
|
497
|
+
// this executor's closure lifetime; tests pass their own.
|
|
498
|
+
const retryBudget = input.retryBudget ?? new RetryBudget();
|
|
495
499
|
const mcpPrompt = input.mcpPrompt ?? defaultNonInteractiveMcpPrompt;
|
|
496
500
|
const workspaceRoot = input.workspaceRoot ?? ctx.root;
|
|
497
501
|
const planMode = kind === 'plan';
|
|
@@ -608,6 +612,20 @@ export function buildExecutor(input) {
|
|
|
608
612
|
if (ctx.cancellation && ctx.cancellation.isAborted) {
|
|
609
613
|
throw recordDenial(name, argsForTracking, `OPERATOR_ABORTED: ${name} refused — operator cancelled the dispatch.`);
|
|
610
614
|
}
|
|
615
|
+
// Leak L31 — per-cycle tool retry budget. Same tool + same canonical
|
|
616
|
+
// args = same bucket. Once the cap is hit we throw a typed sentinel
|
|
617
|
+
// so the model is forced out of a repair loop. We gate AFTER
|
|
618
|
+
// permission (denied calls do not burn budget) and BEFORE PreToolUse
|
|
619
|
+
// hooks (hook-blocked retries DO count — the model still issued the
|
|
620
|
+
// same call). The `recordAttempt` fires unconditionally so warn-only
|
|
621
|
+
// mode (PUGI_RETRY_BUDGET_DISABLED=1) still tracks the pattern for
|
|
622
|
+
// diagnostics.
|
|
623
|
+
const argHash = hashArgs(argsRaw);
|
|
624
|
+
const budgetDecision = retryBudget.shouldAllow(name, argHash);
|
|
625
|
+
retryBudget.recordAttempt(name, argHash);
|
|
626
|
+
if (!budgetDecision.allowed) {
|
|
627
|
+
throw new RetryBudgetExhausted(name, budgetDecision.cap, argHash);
|
|
628
|
+
}
|
|
611
629
|
// Fire PreToolUse hooks. The match grammar takes the tool name and
|
|
612
630
|
// (when extractable) the target path. Each new tool dispatch starts a
|
|
613
631
|
// fresh dedup batch so a hook fires once per dispatch, not once per
|
|
@@ -633,7 +633,7 @@ export class ReplSession {
|
|
|
633
633
|
return verdict;
|
|
634
634
|
}
|
|
635
635
|
case 'cost': {
|
|
636
|
-
this.dispatchCost();
|
|
636
|
+
await this.dispatchCost();
|
|
637
637
|
return verdict;
|
|
638
638
|
}
|
|
639
639
|
case 'quota': {
|
|
@@ -1166,13 +1166,21 @@ export class ReplSession {
|
|
|
1166
1166
|
this.appendSystemLine(`/diff failed: ${this.errorMessage(error)}`);
|
|
1167
1167
|
}
|
|
1168
1168
|
}
|
|
1169
|
-
dispatchCost() {
|
|
1169
|
+
async dispatchCost() {
|
|
1170
1170
|
// α7 cost-meter sprint — full breakdown matching the TUI status row
|
|
1171
1171
|
// footer. The session totals line mirrors the footer format
|
|
1172
1172
|
// (`↑ <in> ↓ <out> · $X.XX · <elapsed>`) so the operator scans the
|
|
1173
1173
|
// same numbers in two places. Per-turn list shows the last 5 turns
|
|
1174
1174
|
// oldest → newest; an empty list renders one system line so the
|
|
1175
1175
|
// operator knows the surface is wired (`No completed turns yet.`).
|
|
1176
|
+
//
|
|
1177
|
+
// L19 (2026-05-27) — after the in-memory recap, also render the
|
|
1178
|
+
// persisted per-model table from `.pugi/cost.json`. That surface
|
|
1179
|
+
// survives a REPL restart and answers the "what did I spend on
|
|
1180
|
+
// claude-opus vs qwen this week?" question the in-memory recap can
|
|
1181
|
+
// not. Errors loading the file collapse to a single warning line so
|
|
1182
|
+
// the in-memory recap (the older, well-tested surface) is never
|
|
1183
|
+
// gated behind a fresh dependency.
|
|
1176
1184
|
const { sessionTokensIn, sessionTokensOut, sessionCostUsd, sessionStartedAtEpochMs, recentTurns, agents, } = this.state;
|
|
1177
1185
|
const active = agents.filter((a) => a.status === 'queued' || a.status === 'thinking').length;
|
|
1178
1186
|
const elapsedMs = Math.max(0, this.now() - sessionStartedAtEpochMs);
|
|
@@ -1181,13 +1189,44 @@ export class ReplSession {
|
|
|
1181
1189
|
this.appendSystemLine(`Active dispatches: ${active} of cap.`);
|
|
1182
1190
|
if (recentTurns.length === 0) {
|
|
1183
1191
|
this.appendSystemLine('No completed turns yet — brief the workforce to charge the meter.');
|
|
1184
|
-
return;
|
|
1185
1192
|
}
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1193
|
+
else {
|
|
1194
|
+
this.appendSystemLine(`Recent turns (last ${recentTurns.length}):`);
|
|
1195
|
+
for (let i = 0; i < recentTurns.length; i += 1) {
|
|
1196
|
+
const turn = recentTurns[i];
|
|
1197
|
+
const idx = (i + 1).toString().padStart(2, ' ');
|
|
1198
|
+
this.appendSystemLine(` ${idx}. ↑ ${formatTokens(turn.tokensIn)} ↓ ${formatTokens(turn.tokensOut)} · ${formatCostUsd(turn.costUsd)}`);
|
|
1199
|
+
}
|
|
1200
|
+
}
|
|
1201
|
+
// L19: append the persisted per-model table from .pugi/cost.json.
|
|
1202
|
+
try {
|
|
1203
|
+
const [{ createCostTracker }, { renderCostForSlash }] = await Promise.all([
|
|
1204
|
+
import('../cost/tracker.js'),
|
|
1205
|
+
import('../../runtime/commands/cost.js'),
|
|
1206
|
+
]);
|
|
1207
|
+
const workspaceRoot = this.options.workspace?.workspaceCwd ?? process.cwd();
|
|
1208
|
+
const sessionId = this.state.sessionId ?? 'no-session';
|
|
1209
|
+
const tracker = createCostTracker({
|
|
1210
|
+
workspaceRoot,
|
|
1211
|
+
sessionIdProvider: () => sessionId,
|
|
1212
|
+
now: () => this.now(),
|
|
1213
|
+
});
|
|
1214
|
+
const current = tracker.current();
|
|
1215
|
+
if (current && Object.keys(current.models).length > 0) {
|
|
1216
|
+
this.appendSystemLine('');
|
|
1217
|
+
const { lines } = renderCostForSlash({
|
|
1218
|
+
tracker,
|
|
1219
|
+
allSessions: false,
|
|
1220
|
+
windowDays: 30,
|
|
1221
|
+
now: () => this.now(),
|
|
1222
|
+
});
|
|
1223
|
+
for (const line of lines)
|
|
1224
|
+
this.appendSystemLine(line);
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
catch {
|
|
1228
|
+
// best-effort — the persisted view is additive; failure never
|
|
1229
|
+
// breaks the in-memory recap above
|
|
1191
1230
|
}
|
|
1192
1231
|
}
|
|
1193
1232
|
/**
|
|
@@ -223,11 +223,17 @@ export function parseSlashCommand(input) {
|
|
|
223
223
|
case 'diff': {
|
|
224
224
|
return { kind: 'diff' };
|
|
225
225
|
}
|
|
226
|
-
case 'cost':
|
|
226
|
+
case 'cost':
|
|
227
|
+
case 'usage': {
|
|
228
|
+
// L19 (2026-05-27): `/usage` is an alias of `/cost` per the cost-
|
|
229
|
+
// command spec. The previous mapping routed `/usage` to the
|
|
230
|
+
// network-backed `/quota` surface, but operators trained on Claude
|
|
231
|
+
// Code expect `/usage` to surface the per-model token breakdown
|
|
232
|
+
// (same shape as `/cost`). `/quota` remains the canonical name
|
|
233
|
+
// for the tier + monthly-cap fetch.
|
|
227
234
|
return { kind: 'cost' };
|
|
228
235
|
}
|
|
229
|
-
case 'quota':
|
|
230
|
-
case 'usage': {
|
|
236
|
+
case 'quota': {
|
|
231
237
|
return { kind: 'quota' };
|
|
232
238
|
}
|
|
233
239
|
case 'status': {
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Leak L31 — Per-command tool retry budget (Claude Code parity).
|
|
3
|
+
*
|
|
4
|
+
* Claude Code limits the number of times the model may retry the SAME
|
|
5
|
+
* tool with the SAME arguments inside a single operator-input cycle.
|
|
6
|
+
* Once the cap is hit, the dispatcher hard-refuses and surfaces a
|
|
7
|
+
* sentinel string telling the model that this exact call has exhausted
|
|
8
|
+
* its retry budget. The model is expected (via system-prompt rule) to
|
|
9
|
+
* either change approach or ask the operator for guidance instead of
|
|
10
|
+
* looping forever on a transient failure.
|
|
11
|
+
*
|
|
12
|
+
* Why per-cycle, not per-session: a retry budget that persists across
|
|
13
|
+
* operator turns would surprise the operator. After the operator says
|
|
14
|
+
* "try again" the model rightly retries; the budget must reset when a
|
|
15
|
+
* fresh brief arrives. The simplest reset boundary is the executor
|
|
16
|
+
* lifetime — `buildExecutor` is called once per `runEngineLoop` and
|
|
17
|
+
* the loop drives exactly one operator-input cycle. Constructing the
|
|
18
|
+
* budget inside `buildExecutor` therefore gives us per-cycle scoping
|
|
19
|
+
* "for free" via closure lifetime; no external clear() call is needed
|
|
20
|
+
* from production callsites. The exported `clear()` exists so tests
|
|
21
|
+
* and a future hook surface (PreToolUse) can introspect the state.
|
|
22
|
+
*
|
|
23
|
+
* Hash design: same tool + same canonical args = same bucket. We
|
|
24
|
+
* canonicalise the args record by sorting object keys (stable across
|
|
25
|
+
* model output ordering) and then sha256 the JSON. The model emits
|
|
26
|
+
* `arguments` as a raw JSON string; we parse, canonicalise, hash. If
|
|
27
|
+
* parse fails we hash the raw string verbatim — that way an
|
|
28
|
+
* unparseable repeat still counts toward the cap (otherwise the model
|
|
29
|
+
* could loop on syntactic noise variants forever).
|
|
30
|
+
*
|
|
31
|
+
* Env overrides:
|
|
32
|
+
* PUGI_RETRY_BUDGET_<TOOLNAME>=<N> — override a single tool's cap.
|
|
33
|
+
* Toolname matches DEFAULT_CAPS
|
|
34
|
+
* keys verbatim, uppercased
|
|
35
|
+
* (PUGI_RETRY_BUDGET_BASH=8).
|
|
36
|
+
* PUGI_RETRY_BUDGET_DEFAULT=<N> — override the fallback cap for
|
|
37
|
+
* any tool not in DEFAULT_CAPS.
|
|
38
|
+
* PUGI_RETRY_BUDGET_DISABLED=1 — warn-only mode. `shouldAllow`
|
|
39
|
+
* still records but always
|
|
40
|
+
* returns `allowed: true`. The
|
|
41
|
+
* count is preserved so
|
|
42
|
+
* diagnostics can still surface
|
|
43
|
+
* the pattern.
|
|
44
|
+
*/
|
|
45
|
+
import { createHash } from 'node:crypto';
|
|
46
|
+
/**
|
|
47
|
+
* Default per-tool retry caps. Tuned per leak research:
|
|
48
|
+
*
|
|
49
|
+
* bash — 5 (most volatile; transient flakes common)
|
|
50
|
+
* edit — 3 (deterministic; repeat = real bug)
|
|
51
|
+
* write — 3 (same)
|
|
52
|
+
* read — 10 (cheap; legitimate re-reads after edits)
|
|
53
|
+
* search/grep/glob — 10 (cheap; exploration loop)
|
|
54
|
+
* web_fetch — 5 (transient network; not infinite)
|
|
55
|
+
* default — 5 (any tool not in the table)
|
|
56
|
+
*
|
|
57
|
+
* Operators override per-tool via `PUGI_RETRY_BUDGET_<NAME>` env vars.
|
|
58
|
+
* Caps are bounded `[1, 1000]` after override to defend against typo
|
|
59
|
+
* runaway (e.g. `PUGI_RETRY_BUDGET_BASH=5000000`).
|
|
60
|
+
*/
|
|
61
|
+
export const DEFAULT_CAPS = Object.freeze({
|
|
62
|
+
bash: 5,
|
|
63
|
+
edit: 3,
|
|
64
|
+
write: 3,
|
|
65
|
+
read: 10,
|
|
66
|
+
search: 10,
|
|
67
|
+
grep: 10,
|
|
68
|
+
glob: 10,
|
|
69
|
+
web_fetch: 5,
|
|
70
|
+
default: 5,
|
|
71
|
+
});
|
|
72
|
+
/**
|
|
73
|
+
* Lower / upper bound for any resolved cap. Defends against:
|
|
74
|
+
* - PUGI_RETRY_BUDGET_BASH=0 -> first call instantly denied
|
|
75
|
+
* - PUGI_RETRY_BUDGET_BASH=99999 -> effectively unbounded loop
|
|
76
|
+
*/
|
|
77
|
+
export const MIN_CAP = 1;
|
|
78
|
+
export const MAX_CAP = 1000;
|
|
79
|
+
/**
|
|
80
|
+
* Per-cycle retry budget. One instance per `buildExecutor` call.
|
|
81
|
+
*
|
|
82
|
+
* Not thread-safe: the executor is single-threaded by construction
|
|
83
|
+
* (Node event loop + sequential await in dispatcher). If a future
|
|
84
|
+
* executor parallelises tool dispatch it must serialise the budget
|
|
85
|
+
* mutation explicitly.
|
|
86
|
+
*/
|
|
87
|
+
export class RetryBudget {
|
|
88
|
+
counts = new Map();
|
|
89
|
+
capCache = new Map();
|
|
90
|
+
env;
|
|
91
|
+
programmaticCaps;
|
|
92
|
+
constructor(options = {}) {
|
|
93
|
+
this.env = options.env ?? process.env;
|
|
94
|
+
this.programmaticCaps = options.caps ?? {};
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Returns true when PUGI_RETRY_BUDGET_DISABLED=1. In disabled mode
|
|
98
|
+
* `shouldAllow` still records attempts but always allows the
|
|
99
|
+
* dispatch — useful for operators triaging a false-positive without
|
|
100
|
+
* a code change.
|
|
101
|
+
*/
|
|
102
|
+
isDisabled() {
|
|
103
|
+
return this.env.PUGI_RETRY_BUDGET_DISABLED === '1';
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Record one dispatch attempt. Idempotent on the bucket key (tool
|
|
107
|
+
* + argHash). Call this BEFORE the dispatch (or after `shouldAllow`
|
|
108
|
+
* but before `dispatch()` resolves) so a thrown dispatch counts.
|
|
109
|
+
*/
|
|
110
|
+
recordAttempt(toolName, argHash) {
|
|
111
|
+
const key = `${toolName}::${argHash}`;
|
|
112
|
+
const next = (this.counts.get(key) ?? 0) + 1;
|
|
113
|
+
this.counts.set(key, next);
|
|
114
|
+
return next;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Returns the current count for (tool, argHash) WITHOUT mutating.
|
|
118
|
+
*/
|
|
119
|
+
peek(toolName, argHash) {
|
|
120
|
+
return this.counts.get(`${toolName}::${argHash}`) ?? 0;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Resolve the effective cap for a tool.
|
|
124
|
+
*
|
|
125
|
+
* Precedence:
|
|
126
|
+
* 1. PUGI_RETRY_BUDGET_<TOOL_UPPER>=<N> (env)
|
|
127
|
+
* 2. programmaticCaps[toolName] (constructor)
|
|
128
|
+
* 3. DEFAULT_CAPS[toolName] (this module)
|
|
129
|
+
* 4. PUGI_RETRY_BUDGET_DEFAULT=<N> (env fallback)
|
|
130
|
+
* 5. DEFAULT_CAPS.default (final fallback)
|
|
131
|
+
*
|
|
132
|
+
* Bounded by [MIN_CAP, MAX_CAP] post-resolution. Invalid (NaN, ≤0,
|
|
133
|
+
* non-integer) env values are ignored and the next layer wins.
|
|
134
|
+
*/
|
|
135
|
+
capFor(toolName) {
|
|
136
|
+
const cached = this.capCache.get(toolName);
|
|
137
|
+
if (cached !== undefined)
|
|
138
|
+
return cached;
|
|
139
|
+
const envKey = `PUGI_RETRY_BUDGET_${toolName.toUpperCase()}`;
|
|
140
|
+
const envCap = parseCapEnv(this.env[envKey]);
|
|
141
|
+
const programmaticCap = this.programmaticCaps[toolName];
|
|
142
|
+
const defaultCap = DEFAULT_CAPS[toolName];
|
|
143
|
+
const fallbackEnvCap = parseCapEnv(this.env.PUGI_RETRY_BUDGET_DEFAULT);
|
|
144
|
+
// DEFAULT_CAPS.default is hard-coded above; cast keeps the type-
|
|
145
|
+
// narrower happy without leaking `| undefined` through the index
|
|
146
|
+
// access (tsc cannot prove the literal key exists).
|
|
147
|
+
const finalFallback = DEFAULT_CAPS.default;
|
|
148
|
+
let resolved;
|
|
149
|
+
if (envCap !== undefined) {
|
|
150
|
+
resolved = envCap;
|
|
151
|
+
}
|
|
152
|
+
else if (programmaticCap !== undefined) {
|
|
153
|
+
resolved = programmaticCap;
|
|
154
|
+
}
|
|
155
|
+
else if (defaultCap !== undefined) {
|
|
156
|
+
resolved = defaultCap;
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
resolved = fallbackEnvCap ?? finalFallback;
|
|
160
|
+
}
|
|
161
|
+
const bounded = Math.min(MAX_CAP, Math.max(MIN_CAP, resolved));
|
|
162
|
+
this.capCache.set(toolName, bounded);
|
|
163
|
+
return bounded;
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Should this dispatch be allowed? Caller passes the current count
|
|
167
|
+
* BEFORE recording — i.e. shouldAllow returns true when count < cap,
|
|
168
|
+
* then recordAttempt fires, bringing count up to cap. The next
|
|
169
|
+
* identical call sees count === cap and is refused.
|
|
170
|
+
*
|
|
171
|
+
* In disabled mode `allowed` is forced to true; `count` and `cap`
|
|
172
|
+
* still reflect reality so logs / diagnostics can spot the pattern.
|
|
173
|
+
*/
|
|
174
|
+
shouldAllow(toolName, argHash) {
|
|
175
|
+
const cap = this.capFor(toolName);
|
|
176
|
+
const count = this.peek(toolName, argHash);
|
|
177
|
+
const disabled = this.isDisabled();
|
|
178
|
+
const allowed = disabled ? true : count < cap;
|
|
179
|
+
return { allowed, count, cap, argHash, disabled };
|
|
180
|
+
}
|
|
181
|
+
/** Reset all state. Used between operator-input cycles when the
|
|
182
|
+
* budget instance is reused (most callers throw the instance away
|
|
183
|
+
* per cycle, so clear() is mostly for tests and hook surfaces). */
|
|
184
|
+
clear() {
|
|
185
|
+
this.counts.clear();
|
|
186
|
+
this.capCache.clear();
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Snapshot the current state for diagnostics. Returns a plain
|
|
190
|
+
* object so it round-trips through JSON.stringify cleanly.
|
|
191
|
+
*/
|
|
192
|
+
snapshot() {
|
|
193
|
+
const out = [];
|
|
194
|
+
for (const [key, count] of this.counts) {
|
|
195
|
+
const sep = key.indexOf('::');
|
|
196
|
+
if (sep < 0)
|
|
197
|
+
continue;
|
|
198
|
+
out.push({ tool: key.slice(0, sep), argHash: key.slice(sep + 2), count });
|
|
199
|
+
}
|
|
200
|
+
return out;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Hash the model's tool-call arguments into a stable key. Same
|
|
205
|
+
* canonical args = same hash regardless of JSON whitespace / key
|
|
206
|
+
* order. Unparseable JSON is hashed verbatim so the budget still
|
|
207
|
+
* catches syntactically degenerate retry loops.
|
|
208
|
+
*/
|
|
209
|
+
export function hashArgs(argsRaw) {
|
|
210
|
+
const canonical = canonicalise(argsRaw);
|
|
211
|
+
return createHash('sha256').update(canonical).digest('hex');
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Canonicalise a raw JSON arg string. Object keys are sorted
|
|
215
|
+
* recursively. Arrays preserve order (semantic). Primitives untouched.
|
|
216
|
+
* On parse failure, returns the original string prefixed with `raw:`
|
|
217
|
+
* so a malformed-args repeat still hashes to the same bucket.
|
|
218
|
+
*/
|
|
219
|
+
function canonicalise(argsRaw) {
|
|
220
|
+
try {
|
|
221
|
+
const parsed = JSON.parse(argsRaw);
|
|
222
|
+
return JSON.stringify(sortKeys(parsed));
|
|
223
|
+
}
|
|
224
|
+
catch {
|
|
225
|
+
return `raw:${argsRaw}`;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
function sortKeys(value) {
|
|
229
|
+
if (value === null || typeof value !== 'object')
|
|
230
|
+
return value;
|
|
231
|
+
if (Array.isArray(value))
|
|
232
|
+
return value.map(sortKeys);
|
|
233
|
+
const obj = value;
|
|
234
|
+
const sorted = {};
|
|
235
|
+
for (const k of Object.keys(obj).sort()) {
|
|
236
|
+
sorted[k] = sortKeys(obj[k]);
|
|
237
|
+
}
|
|
238
|
+
return sorted;
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Parse and bound a `PUGI_RETRY_BUDGET_*` env var. Returns `undefined`
|
|
242
|
+
* for any non-positive-integer string so the resolver can fall
|
|
243
|
+
* through to the next precedence layer. Bounded by [MIN_CAP, MAX_CAP]
|
|
244
|
+
* is NOT applied here — `capFor` clamps after the final layer wins,
|
|
245
|
+
* matching the "operator typo defends against runaway" requirement
|
|
246
|
+
* without silently swallowing a meaningful low value (e.g.
|
|
247
|
+
* `PUGI_RETRY_BUDGET_BASH=1` should clamp to MIN_CAP=1, which it
|
|
248
|
+
* does naturally since 1 >= MIN_CAP).
|
|
249
|
+
*/
|
|
250
|
+
function parseCapEnv(raw) {
|
|
251
|
+
if (raw === undefined || raw === '')
|
|
252
|
+
return undefined;
|
|
253
|
+
const n = Number(raw);
|
|
254
|
+
if (!Number.isInteger(n) || n <= 0)
|
|
255
|
+
return undefined;
|
|
256
|
+
return n;
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Sentinel emitted to the model when the budget is exhausted. The
|
|
260
|
+
* format is stable so the engine adapter, spec layer, and operator
|
|
261
|
+
* dashboards can pattern-match on it.
|
|
262
|
+
*/
|
|
263
|
+
export function retryBudgetExhaustedSentinel(toolName, cap) {
|
|
264
|
+
return `RETRY_BUDGET_EXHAUSTED: ${toolName} exceeded ${cap} attempts with these args. Operator must intervene.`;
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Typed error thrown by the tool-bridge when the cap is hit. Carries
|
|
268
|
+
* the sentinel string so the engine loop can pattern-match without
|
|
269
|
+
* re-parsing. `instanceof RetryBudgetExhausted` is the canonical
|
|
270
|
+
* downstream test.
|
|
271
|
+
*/
|
|
272
|
+
export class RetryBudgetExhausted extends Error {
|
|
273
|
+
toolName;
|
|
274
|
+
cap;
|
|
275
|
+
argHash;
|
|
276
|
+
constructor(toolName, cap, argHash) {
|
|
277
|
+
super(retryBudgetExhaustedSentinel(toolName, cap));
|
|
278
|
+
this.name = 'RetryBudgetExhausted';
|
|
279
|
+
this.toolName = toolName;
|
|
280
|
+
this.cap = cap;
|
|
281
|
+
this.argHash = argHash;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
//# sourceMappingURL=budget.js.map
|
package/dist/runtime/cli.js
CHANGED
|
@@ -32,6 +32,7 @@ import { runStatusCommand, defaultStatusHome, } from './commands/status.js';
|
|
|
32
32
|
import { runUndoCommand } from './commands/undo.js';
|
|
33
33
|
import { runCompactCommand } from './commands/compact.js';
|
|
34
34
|
import { runBudgetCommand } from './commands/budget.js';
|
|
35
|
+
import { runCostCommand } from './commands/cost.js';
|
|
35
36
|
import { runSkillsCommand } from './commands/skills.js';
|
|
36
37
|
import { installDefaultSkills } from '../core/skills/defaults.js';
|
|
37
38
|
import { runAgentsCommand } from './commands/agents.js';
|
|
@@ -75,6 +76,7 @@ const handlers = {
|
|
|
75
76
|
budget: dispatchBudget,
|
|
76
77
|
code: runEngineTask('code'),
|
|
77
78
|
config: dispatchConfig,
|
|
79
|
+
cost: dispatchCost,
|
|
78
80
|
delegate: dispatchDelegate,
|
|
79
81
|
deploy: dispatchDeploy,
|
|
80
82
|
doctor,
|
|
@@ -108,6 +110,10 @@ const handlers = {
|
|
|
108
110
|
sync,
|
|
109
111
|
undo: dispatchUndo,
|
|
110
112
|
compact: dispatchCompact,
|
|
113
|
+
// L19 (2026-05-27): `pugi usage` is an alias of `pugi cost` — same
|
|
114
|
+
// handler, same flags. Operators trained on Claude Code expect either
|
|
115
|
+
// verb to surface the per-model token + USD table.
|
|
116
|
+
usage: dispatchCost,
|
|
111
117
|
version,
|
|
112
118
|
web: dispatchWeb,
|
|
113
119
|
whoami,
|
|
@@ -419,6 +425,19 @@ async function dispatchPermissions(args, flags, _session) {
|
|
|
419
425
|
writeOutput: (text) => writeOutput(flags, { text }, text),
|
|
420
426
|
});
|
|
421
427
|
}
|
|
428
|
+
/**
|
|
429
|
+
* L19 sprint (2026-05-27): `pugi cost` / `pugi usage` top-level surface.
|
|
430
|
+
*
|
|
431
|
+
* Aliased through the handlers table so `pugi usage` reuses the same
|
|
432
|
+
* implementation. The persisted store lives at `<cwd>/.pugi/cost.json`
|
|
433
|
+
* and is shared with the REPL `/cost` / `/usage` slash handlers.
|
|
434
|
+
*/
|
|
435
|
+
async function dispatchCost(args, flags, _session) {
|
|
436
|
+
await runCostCommand(args, {
|
|
437
|
+
workspaceRoot: process.cwd(),
|
|
438
|
+
writeOutput: (payload, text) => writeOutput(flags, payload, text),
|
|
439
|
+
});
|
|
440
|
+
}
|
|
422
441
|
async function dispatchSkills(args, flags, _session) {
|
|
423
442
|
await runSkillsCommand(args, {
|
|
424
443
|
workspaceRoot: process.cwd(),
|
|
@@ -1025,6 +1044,26 @@ const COMMAND_HELP_BODIES = {
|
|
|
1025
1044
|
' pugi config get privacy',
|
|
1026
1045
|
' pugi config set privacy=<mode>',
|
|
1027
1046
|
],
|
|
1047
|
+
cost: [
|
|
1048
|
+
'pugi cost — token + USD breakdown for the current Pugi session.',
|
|
1049
|
+
'',
|
|
1050
|
+
'Reads .pugi/cost.json (persisted via the in-REPL CostTracker) and',
|
|
1051
|
+
'prints a per-model table plus dollar estimate. Alias: pugi usage.',
|
|
1052
|
+
'',
|
|
1053
|
+
'Flags:',
|
|
1054
|
+
' --all-sessions 30-day rolling aggregate across all sessions.',
|
|
1055
|
+
' --window=<days> Override the aggregate window (max 365).',
|
|
1056
|
+
' --reset --yes Clear the current-session counter. History',
|
|
1057
|
+
' is preserved. Requires --yes to confirm.',
|
|
1058
|
+
' --json Emit a structured JSON envelope only.',
|
|
1059
|
+
'',
|
|
1060
|
+
'Examples:',
|
|
1061
|
+
' pugi cost Current session totals.',
|
|
1062
|
+
' pugi cost --all-sessions Past 30 days aggregated.',
|
|
1063
|
+
' pugi cost --all-sessions --window=7',
|
|
1064
|
+
' pugi cost --reset --yes Wipe the session counter.',
|
|
1065
|
+
' pugi usage Alias for pugi cost.',
|
|
1066
|
+
],
|
|
1028
1067
|
config: [
|
|
1029
1068
|
'pugi config — read / write CLI + tenant configuration.',
|
|
1030
1069
|
'',
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `pugi cost` / `pugi usage` command handler — L19 sprint (2026-05-27).
|
|
3
|
+
*
|
|
4
|
+
* Shared backend for three operator surfaces:
|
|
5
|
+
*
|
|
6
|
+
* - `pugi cost` current session (default)
|
|
7
|
+
* - `pugi cost --all-sessions` 30-day rolling aggregate
|
|
8
|
+
* - `pugi cost --reset --yes` wipe current session counter (operator-only)
|
|
9
|
+
* - `pugi usage` alias of `pugi cost`
|
|
10
|
+
* - `/cost` REPL slash same handler, in-REPL output
|
|
11
|
+
* - `/usage` REPL slash same handler, alias of /cost
|
|
12
|
+
*
|
|
13
|
+
* Why a separate command from the existing `pugi budget`:
|
|
14
|
+
*
|
|
15
|
+
* - `pugi budget` walks `.pugi/events.jsonl` and bills against the
|
|
16
|
+
* event-log heuristic (per-command / per-persona attribution). It
|
|
17
|
+
* is the right surface for "what did this brief / this persona
|
|
18
|
+
* spend?". It does not break down by model and it does not persist
|
|
19
|
+
* a cross-session aggregate.
|
|
20
|
+
*
|
|
21
|
+
* - `pugi cost` (this command) reads the persisted `.pugi/cost.json`
|
|
22
|
+
* written by the `CostTracker`. It is the right surface for "what
|
|
23
|
+
* did this model spend?" and "what did I spend across the last 30
|
|
24
|
+
* days?". Token + USD figures are sourced from the rate card, which
|
|
25
|
+
* distinguishes hosted Claude (per-token billed) from open-weight
|
|
26
|
+
* Qwen / Kimi / DeepSeek (infra cost only).
|
|
27
|
+
*
|
|
28
|
+
* Both commands intentionally coexist — they answer adjacent but distinct
|
|
29
|
+
* operator questions. The L19 spec calls out `/cost` and `/usage` by
|
|
30
|
+
* name; the budget surface is unaffected.
|
|
31
|
+
*/
|
|
32
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
33
|
+
import { resolve } from 'node:path';
|
|
34
|
+
import { createCostTracker, totalTokens, totalUsd, } from '../../core/cost/tracker.js';
|
|
35
|
+
import { buildCostView, renderCostTableText } from '../../tui/cost-table.js';
|
|
36
|
+
/**
|
|
37
|
+
* Parsed flag bundle. Exported for the test surface; production callers
|
|
38
|
+
* never touch it directly — `runCostCommand` owns parsing.
|
|
39
|
+
*/
|
|
40
|
+
export function parseCostFlags(args) {
|
|
41
|
+
const flags = {
|
|
42
|
+
allSessions: false,
|
|
43
|
+
reset: false,
|
|
44
|
+
yes: false,
|
|
45
|
+
json: false,
|
|
46
|
+
windowDays: 30,
|
|
47
|
+
};
|
|
48
|
+
for (let i = 0; i < args.length; i += 1) {
|
|
49
|
+
const arg = args[i] ?? '';
|
|
50
|
+
if (arg === '--all-sessions')
|
|
51
|
+
flags.allSessions = true;
|
|
52
|
+
else if (arg === '--reset')
|
|
53
|
+
flags.reset = true;
|
|
54
|
+
else if (arg === '--yes' || arg === '-y')
|
|
55
|
+
flags.yes = true;
|
|
56
|
+
else if (arg === '--json')
|
|
57
|
+
flags.json = true;
|
|
58
|
+
else if (arg.startsWith('--window=')) {
|
|
59
|
+
const raw = Number.parseInt(arg.slice('--window='.length), 10);
|
|
60
|
+
if (Number.isFinite(raw) && raw > 0 && raw <= 365)
|
|
61
|
+
flags.windowDays = raw;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return flags;
|
|
65
|
+
}
|
|
66
|
+
export async function runCostCommand(args, ctx) {
|
|
67
|
+
const flags = parseCostFlags(args);
|
|
68
|
+
const sessionId = ctx.sessionId ?? deriveSessionIdFromEvents(ctx.workspaceRoot) ?? 'no-session';
|
|
69
|
+
const tracker = createCostTracker({
|
|
70
|
+
workspaceRoot: ctx.workspaceRoot,
|
|
71
|
+
sessionIdProvider: () => sessionId,
|
|
72
|
+
now: ctx.now,
|
|
73
|
+
});
|
|
74
|
+
// --reset: clear the current session counter. Operator-only — refuses
|
|
75
|
+
// without `--yes` so a typo / shell completion never wipes the meter.
|
|
76
|
+
if (flags.reset) {
|
|
77
|
+
if (!flags.yes) {
|
|
78
|
+
ctx.writeOutput({ command: 'cost', status: 'reset_pending_confirmation' }, 'pugi cost --reset clears the current session counter. Re-run with --yes to confirm.');
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
const wiped = tracker.resetCurrent();
|
|
82
|
+
const payload = {
|
|
83
|
+
command: 'cost',
|
|
84
|
+
status: 'reset_ok',
|
|
85
|
+
wiped: wiped ?? null,
|
|
86
|
+
};
|
|
87
|
+
ctx.writeOutput(payload, wiped
|
|
88
|
+
? `Cleared session ${wiped.sessionId} (${Object.keys(wiped.models).length} model(s) wiped).`
|
|
89
|
+
: 'No current session counter to clear.');
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
const aggregate = flags.allSessions ? tracker.aggregateWithin(flags.windowDays) : (tracker.current() ?? emptyAggregate(sessionId, ctx.now ?? Date.now));
|
|
93
|
+
const tier = ctx.resolveTier ? await safeResolveTier(ctx.resolveTier) : null;
|
|
94
|
+
const heading = flags.allSessions
|
|
95
|
+
? `Pugi cost / usage — aggregate (last ${flags.windowDays} days)`
|
|
96
|
+
: buildSessionHeading(aggregate, ctx.now ?? Date.now);
|
|
97
|
+
const view = buildCostView({ aggregate, heading, tier: tier ?? undefined });
|
|
98
|
+
const text = renderCostTableText(view);
|
|
99
|
+
ctx.writeOutput({
|
|
100
|
+
command: flags.allSessions ? 'cost.aggregate' : 'cost.session',
|
|
101
|
+
status: 'ok',
|
|
102
|
+
window: flags.allSessions ? `${flags.windowDays}d` : 'current',
|
|
103
|
+
tokens: {
|
|
104
|
+
input: view.totalInputTokens,
|
|
105
|
+
output: view.totalOutputTokens,
|
|
106
|
+
},
|
|
107
|
+
dollars: view.totalUsd,
|
|
108
|
+
perModel: view.rows.map((row) => ({
|
|
109
|
+
model: row.model,
|
|
110
|
+
input: row.inputTokens,
|
|
111
|
+
output: row.outputTokens,
|
|
112
|
+
usd: row.usd,
|
|
113
|
+
note: row.note ?? null,
|
|
114
|
+
})),
|
|
115
|
+
tier: tier ?? null,
|
|
116
|
+
}, text);
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Render-only helper for the REPL slash. The slash dispatcher inside
|
|
120
|
+
* `session.ts` owns the side-effect of pushing system lines; this
|
|
121
|
+
* function builds the view and the text rendition so the slash handler
|
|
122
|
+
* can fan the lines into the existing `appendSystemLine` queue.
|
|
123
|
+
*
|
|
124
|
+
* Exposed here (not in the Ink module) so the slash path never imports
|
|
125
|
+
* Ink/React — keeps the REPL bundle slim and the slash handler async-free.
|
|
126
|
+
*/
|
|
127
|
+
export function renderCostForSlash(input) {
|
|
128
|
+
const aggregate = input.allSessions
|
|
129
|
+
? input.tracker.aggregateWithin(input.windowDays)
|
|
130
|
+
: (input.tracker.current() ?? emptyAggregate('no-session', input.now));
|
|
131
|
+
const heading = input.allSessions
|
|
132
|
+
? `Pugi cost / usage — aggregate (last ${input.windowDays} days)`
|
|
133
|
+
: buildSessionHeading(aggregate, input.now);
|
|
134
|
+
const view = buildCostView({ aggregate, heading, tier: input.tier ?? undefined });
|
|
135
|
+
return { view, lines: renderCostTableText(view).split('\n') };
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Derive a session id from `.pugi/events.jsonl` when the caller does not
|
|
139
|
+
* pass one. Walks the file once and picks the most recent `session.start`
|
|
140
|
+
* event's id. Falls back to `null` when the file is missing / corrupted
|
|
141
|
+
* — the caller substitutes a `'no-session'` placeholder so the table
|
|
142
|
+
* still renders an empty state instead of crashing.
|
|
143
|
+
*/
|
|
144
|
+
function deriveSessionIdFromEvents(workspaceRoot) {
|
|
145
|
+
const path = resolve(workspaceRoot, '.pugi/events.jsonl');
|
|
146
|
+
if (!existsSync(path))
|
|
147
|
+
return null;
|
|
148
|
+
try {
|
|
149
|
+
const raw = readFileSync(path, 'utf8');
|
|
150
|
+
const lines = raw.split('\n').filter((line) => line.trim().length > 0);
|
|
151
|
+
// Walk from newest to oldest — `session.start` is rare, no reason to
|
|
152
|
+
// scan the whole file when the answer is at the tail.
|
|
153
|
+
for (let i = lines.length - 1; i >= 0; i -= 1) {
|
|
154
|
+
try {
|
|
155
|
+
const parsed = JSON.parse(lines[i]);
|
|
156
|
+
if (parsed.type === 'session' && parsed.name === 'start' && typeof parsed.sessionId === 'string') {
|
|
157
|
+
return parsed.sessionId;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
catch {
|
|
161
|
+
// partial-write lines are ignored
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
catch {
|
|
166
|
+
// best-effort; absent events.jsonl is a normal first-boot state
|
|
167
|
+
}
|
|
168
|
+
return null;
|
|
169
|
+
}
|
|
170
|
+
function buildSessionHeading(aggregate, now) {
|
|
171
|
+
if (!aggregate || aggregate.sessionId === 'no-session' || aggregate.sessionId === 'aggregate') {
|
|
172
|
+
return 'Pugi cost / usage — no active session';
|
|
173
|
+
}
|
|
174
|
+
const start = Date.parse(aggregate.startedAt);
|
|
175
|
+
if (!Number.isFinite(start)) {
|
|
176
|
+
return `Pugi cost / usage — session ${aggregate.sessionId}`;
|
|
177
|
+
}
|
|
178
|
+
const elapsedMin = Math.max(0, Math.floor((now() - start) / 60_000));
|
|
179
|
+
return `Pugi cost / usage — session ${aggregate.sessionId} (${elapsedMin} min)`;
|
|
180
|
+
}
|
|
181
|
+
function emptyAggregate(sessionId, now) {
|
|
182
|
+
return {
|
|
183
|
+
sessionId,
|
|
184
|
+
startedAt: new Date(now()).toISOString(),
|
|
185
|
+
models: {},
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
async function safeResolveTier(resolver) {
|
|
189
|
+
try {
|
|
190
|
+
return await resolver();
|
|
191
|
+
}
|
|
192
|
+
catch {
|
|
193
|
+
return null;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
// Re-export aggregate helpers so the cli.ts wire-up can read totals
|
|
197
|
+
// without reaching into the tracker module directly.
|
|
198
|
+
export { totalUsd, totalTokens };
|
|
199
|
+
//# sourceMappingURL=cost.js.map
|
package/dist/runtime/version.js
CHANGED
|
@@ -44,7 +44,7 @@ export function sanitizeSemver(raw) {
|
|
|
44
44
|
* during import). When bumping the CLI version BOTH literals must be
|
|
45
45
|
* updated; the release smoke-test (`pack:smoke`) verifies they agree.
|
|
46
46
|
*/
|
|
47
|
-
export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.
|
|
47
|
+
export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.20');
|
|
48
48
|
/**
|
|
49
49
|
* Outbound: the CLI's installed semver. Read at request time by
|
|
50
50
|
* `version-interceptor.ts` and injected on every `fetch` call.
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { jsx as _jsx, jsxs as _jsxs } from "react/jsx-runtime";
|
|
2
|
+
import { Box, Text } from 'ink';
|
|
3
|
+
import { estimateUsd, formatTokensWithCommas, formatUsd, rateFor } from '../core/cost/rate-card.js';
|
|
4
|
+
/**
|
|
5
|
+
* Column widths chosen to match the L19 spec output. The model column is
|
|
6
|
+
* the widest because slug strings like `qwen3-coder-480b-instruct-fp8`
|
|
7
|
+
* run 31 chars. We pad/truncate inside the renderer so a TUI on a 80-col
|
|
8
|
+
* terminal does not wrap mid-table.
|
|
9
|
+
*/
|
|
10
|
+
const COL_MODEL = 34;
|
|
11
|
+
const COL_IN = 8;
|
|
12
|
+
const COL_OUT = 9;
|
|
13
|
+
const COL_USD = 10;
|
|
14
|
+
/**
|
|
15
|
+
* Render one cost report. Stateless — re-rendering with the same view
|
|
16
|
+
* produces the same output by construction. Tests assert against
|
|
17
|
+
* `lastFrame()` from `ink-testing-library`.
|
|
18
|
+
*/
|
|
19
|
+
export function CostTable({ view }) {
|
|
20
|
+
return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Text, { children: view.heading }), _jsx(Text, { children: "\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550" }), _jsx(Text, { children: " " }), _jsxs(Text, { children: [pad('MODEL', COL_MODEL), padLeft('IN_TOK', COL_IN), padLeft('OUT_TOK', COL_OUT), padLeft('$ EST', COL_USD)] }), view.rows.length === 0 ? (_jsx(Text, { dimColor: true, children: "No calls recorded yet \u2014 brief a persona to charge the meter." })) : (view.rows.map((row) => (_jsxs(Text, { children: [pad(row.model, COL_MODEL), padLeft(formatTokensWithCommas(row.inputTokens), COL_IN), padLeft(formatTokensWithCommas(row.outputTokens), COL_OUT), padLeft(formatUsd(row.usd), COL_USD), row.note ? ` (${row.note})` : ''] }, row.model)))), _jsx(Text, { children: " " }), _jsxs(Text, { children: ["Total tokens: ", formatTokensWithCommas(view.totalInputTokens + view.totalOutputTokens), ' (in: ', formatTokensWithCommas(view.totalInputTokens), ', out: ', formatTokensWithCommas(view.totalOutputTokens), ')'] }), _jsxs(Text, { children: ["Total dollar estimate: ", formatUsd(view.totalUsd)] }), view.tier ? _jsxs(Text, { children: ["Tier: ", view.tier.tier, view.tier.quotaLine ? ` (${view.tier.quotaLine})` : ''] }) : null] }));
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Build a `CostView` from a `SessionAggregate`. The function lives here
|
|
24
|
+
* (next to the renderer) so a future caller — the REPL slash, the CLI
|
|
25
|
+
* command, a JSON-only path — uses the same view-model shape and the
|
|
26
|
+
* same row-sort rule.
|
|
27
|
+
*/
|
|
28
|
+
export function buildCostView(input) {
|
|
29
|
+
const rows = [];
|
|
30
|
+
for (const [model, entry] of Object.entries(input.aggregate.models)) {
|
|
31
|
+
const usd = estimateUsd(model, entry.input, entry.output);
|
|
32
|
+
const rate = rateFor(model);
|
|
33
|
+
rows.push({
|
|
34
|
+
model,
|
|
35
|
+
inputTokens: entry.input,
|
|
36
|
+
outputTokens: entry.output,
|
|
37
|
+
usd,
|
|
38
|
+
note: rate.note,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
// Sort by USD descending first (most-expensive-first, matches the L19
|
|
42
|
+
// sample output where the Claude row leads). Ties break by total
|
|
43
|
+
// tokens so two free open-weight rows order deterministically.
|
|
44
|
+
rows.sort((a, b) => {
|
|
45
|
+
if (b.usd !== a.usd)
|
|
46
|
+
return b.usd - a.usd;
|
|
47
|
+
return (b.inputTokens + b.outputTokens) - (a.inputTokens + a.outputTokens);
|
|
48
|
+
});
|
|
49
|
+
let totalIn = 0;
|
|
50
|
+
let totalOut = 0;
|
|
51
|
+
let totalUsd = 0;
|
|
52
|
+
for (const row of rows) {
|
|
53
|
+
totalIn += row.inputTokens;
|
|
54
|
+
totalOut += row.outputTokens;
|
|
55
|
+
totalUsd += row.usd;
|
|
56
|
+
}
|
|
57
|
+
return {
|
|
58
|
+
heading: input.heading,
|
|
59
|
+
rows,
|
|
60
|
+
totalInputTokens: totalIn,
|
|
61
|
+
totalOutputTokens: totalOut,
|
|
62
|
+
totalUsd,
|
|
63
|
+
tier: input.tier,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Plain-string renderer for non-TTY / `--json` callers. Produces the
|
|
68
|
+
* same table the Ink component would render — no ANSI / no color so it
|
|
69
|
+
* pipes cleanly into `less` or a JSON tool.
|
|
70
|
+
*/
|
|
71
|
+
export function renderCostTableText(view) {
|
|
72
|
+
const lines = [];
|
|
73
|
+
lines.push(view.heading);
|
|
74
|
+
lines.push('════════════════════════════════════════════════');
|
|
75
|
+
lines.push('');
|
|
76
|
+
lines.push(pad('MODEL', COL_MODEL) +
|
|
77
|
+
padLeft('IN_TOK', COL_IN) +
|
|
78
|
+
padLeft('OUT_TOK', COL_OUT) +
|
|
79
|
+
padLeft('$ EST', COL_USD));
|
|
80
|
+
if (view.rows.length === 0) {
|
|
81
|
+
lines.push('No calls recorded yet — brief a persona to charge the meter.');
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
for (const row of view.rows) {
|
|
85
|
+
const noteSuffix = row.note ? ` (${row.note})` : '';
|
|
86
|
+
lines.push(pad(row.model, COL_MODEL) +
|
|
87
|
+
padLeft(formatTokensWithCommas(row.inputTokens), COL_IN) +
|
|
88
|
+
padLeft(formatTokensWithCommas(row.outputTokens), COL_OUT) +
|
|
89
|
+
padLeft(formatUsd(row.usd), COL_USD) +
|
|
90
|
+
noteSuffix);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
lines.push('');
|
|
94
|
+
lines.push(`Total tokens: ${formatTokensWithCommas(view.totalInputTokens + view.totalOutputTokens)} (in: ${formatTokensWithCommas(view.totalInputTokens)}, out: ${formatTokensWithCommas(view.totalOutputTokens)})`);
|
|
95
|
+
lines.push(`Total dollar estimate: ${formatUsd(view.totalUsd)}`);
|
|
96
|
+
if (view.tier) {
|
|
97
|
+
lines.push(`Tier: ${view.tier.tier}${view.tier.quotaLine ? ` (${view.tier.quotaLine})` : ''}`);
|
|
98
|
+
}
|
|
99
|
+
return lines.join('\n');
|
|
100
|
+
}
|
|
101
|
+
function pad(value, width) {
|
|
102
|
+
if (value.length >= width)
|
|
103
|
+
return value.slice(0, Math.max(0, width - 1)) + ' ';
|
|
104
|
+
return value + ' '.repeat(width - value.length);
|
|
105
|
+
}
|
|
106
|
+
function padLeft(value, width) {
|
|
107
|
+
if (value.length >= width)
|
|
108
|
+
return value.slice(0, width);
|
|
109
|
+
return ' '.repeat(width - value.length) + value;
|
|
110
|
+
}
|
|
111
|
+
//# sourceMappingURL=cost-table.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pugi/cli",
|
|
3
|
-
"version": "0.1.0-beta.
|
|
3
|
+
"version": "0.1.0-beta.20",
|
|
4
4
|
"description": "Pugi CLI - terminal-native software execution system",
|
|
5
5
|
"homepage": "https://pugi.io",
|
|
6
6
|
"repository": {
|
|
@@ -54,7 +54,7 @@
|
|
|
54
54
|
"undici": "^8.3.0",
|
|
55
55
|
"zod": "^3.23.0",
|
|
56
56
|
"@pugi/personas": "0.1.2",
|
|
57
|
-
"@pugi/sdk": "0.1.0-beta.
|
|
57
|
+
"@pugi/sdk": "0.1.0-beta.20"
|
|
58
58
|
},
|
|
59
59
|
"devDependencies": {
|
|
60
60
|
"@types/node": "^22.0.0",
|