@pugi/cli 0.1.0-beta.19 → 0.1.0-beta.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Rate card for the `pugi cost` / `/cost` / `/usage` surface — L19 sprint.
3
+ *
4
+ * Distinct from `core/repl/model-pricing.ts` on purpose:
5
+ *
6
+ * - `model-pricing.ts` powers the TUI cost meter (per-turn flash, status
7
+ * row USD). Its ladder is keyed against the live Anvil model slugs and
8
+ * intentionally inflates an honest worst-case figure via the Sonnet
9
+ * fallback so an operator on a quiet model never gets billed by a
10
+ * surprise. It rounds to USD per 1M tokens at runtime.
11
+ *
12
+ * - `rate-card.ts` (this file) powers the persisted `/cost` table the
13
+ * operator reads to plan budget. It distinguishes open-weight models
14
+ * ($0 / $0 — infra cost only) from hosted closed models so the table
15
+ * does not double-charge an operator running a self-hosted Qwen or
16
+ * Kimi behind Pugi. The L19 spec calls these out by name.
17
+ *
18
+ * Both ladders intentionally agree on Anthropic Claude family pricing so
19
+ * the TUI flash and the persisted table cannot disagree on a Claude turn.
20
+ * If they diverge, the per-model-pricing ladder wins for live UI; the
21
+ * rate card here wins for the persisted `.pugi/cost.json` aggregate.
22
+ *
23
+ * Prices are USD per 1,000,000 tokens, sourced from the L19 spec
24
+ * (2026-05-27) which mirrors provider list-price pages as of that date.
25
+ */
26
+ /**
27
+ * Exact-match price ladder keyed by model slug. Slugs match the L19 task
28
+ * spec verbatim so a copy-paste from the sprint doc resolves without
29
+ * normalisation.
30
+ */
31
+ export const RATES_PER_MTOKEN = Object.freeze({
32
+ // Anthropic Claude family (hosted, billed).
33
+ 'claude-opus-4-7': { input: 15, output: 75 },
34
+ 'claude-opus-4-6': { input: 15, output: 75 },
35
+ 'claude-sonnet-4-6': { input: 3, output: 15 },
36
+ 'claude-sonnet-4-5': { input: 3, output: 15 },
37
+ 'claude-haiku-4-5-20251001': { input: 1, output: 5 },
38
+ 'claude-haiku-4-5': { input: 1, output: 5 },
39
+ // Open-weight models — infra cost only, never per-token billed. The
40
+ // note column surfaces the reason so a CFO reading the JSON envelope
41
+ // does not assume the row is broken.
42
+ 'qwen3-coder-480b-instruct-fp8': { input: 0, output: 0, note: 'open-weight' },
43
+ 'kimi-k2.6': { input: 0, output: 0, note: 'open-weight' },
44
+ 'deepseek-v4-pro': { input: 0, output: 0, note: 'open-weight' },
45
+ });
46
+ /**
47
+ * Family-prefix fallback — used only when an exact slug miss. Mirrors the
48
+ * approach in `model-pricing.ts` so a future model rebind (e.g.
49
+ * `claude-opus-4-8`) prices reasonably without a code edit.
50
+ */
51
+ const FAMILY_FALLBACKS = [
52
+ ['claude-opus-', { input: 15, output: 75 }],
53
+ ['claude-sonnet-', { input: 3, output: 15 }],
54
+ ['claude-haiku-', { input: 1, output: 5 }],
55
+ ['qwen', { input: 0, output: 0, note: 'open-weight' }],
56
+ ['kimi', { input: 0, output: 0, note: 'open-weight' }],
57
+ ['deepseek', { input: 0, output: 0, note: 'open-weight' }],
58
+ ];
59
+ /**
60
+ * Final fallback for unknown slugs. Pinned to Sonnet-tier — same posture
61
+ * as `model-pricing.ts`'s default, so an unrecognised hosted model bills
62
+ * "honestly conservative" rather than $0 (which would silently hide cost
63
+ * from the operator).
64
+ */
65
+ const DEFAULT_RATE = { input: 3, output: 15, note: 'unknown model — Sonnet-tier estimate' };
66
+ /**
67
+ * Look up the rate for a model slug.
68
+ *
69
+ * Resolution order:
70
+ * 1. Exact match in `RATES_PER_MTOKEN`.
71
+ * 2. Family-prefix match (first hit wins).
72
+ * 3. Default Sonnet-tier estimate.
73
+ *
74
+ * Pure, never throws. Called on every cost-tracker write so the hot path
75
+ * stays branch-cheap.
76
+ */
77
+ export function rateFor(model) {
78
+ if (!model || typeof model !== 'string')
79
+ return DEFAULT_RATE;
80
+ const exact = RATES_PER_MTOKEN[model];
81
+ if (exact)
82
+ return exact;
83
+ for (const [prefix, rate] of FAMILY_FALLBACKS) {
84
+ if (model.startsWith(prefix))
85
+ return rate;
86
+ }
87
+ return DEFAULT_RATE;
88
+ }
89
+ /**
90
+ * Compute the USD cost for a single (model, inputTokens, outputTokens)
91
+ * triple. Defensive against negative / NaN inputs — out-of-range values
92
+ * floor to zero so a buggy upstream cannot credit a negative cost.
93
+ */
94
+ export function estimateUsd(model, inputTokens, outputTokens) {
95
+ const rate = rateFor(model);
96
+ const safeIn = Number.isFinite(inputTokens) && inputTokens > 0 ? inputTokens : 0;
97
+ const safeOut = Number.isFinite(outputTokens) && outputTokens > 0 ? outputTokens : 0;
98
+ const usd = (safeIn * rate.input + safeOut * rate.output) / 1_000_000;
99
+ return Number.isFinite(usd) && usd > 0 ? usd : 0;
100
+ }
101
+ /**
102
+ * Format a USD figure for the `/cost` table.
103
+ *
104
+ * - `≥ $0.01` → two decimals (`$0.46`).
105
+ * - `< $0.01` but `> 0` → three decimals (`$0.003`) so fractions of a
106
+ * cent are honest instead of rounding to `$0.00`.
107
+ * - Exactly `0` or NaN → `$0.00`.
108
+ *
109
+ * Mirrors `formatCostUsd` from `model-pricing.ts` intentionally — both
110
+ * surfaces should print the same number in the same shape.
111
+ */
112
+ export function formatUsd(value) {
113
+ if (!Number.isFinite(value) || value <= 0)
114
+ return '$0.00';
115
+ if (value >= 0.01)
116
+ return `$${value.toFixed(2)}`;
117
+ return `$${value.toFixed(3)}`;
118
+ }
119
+ /**
120
+ * Format a token count for the `/cost` table. Uses comma-thousands so the
121
+ * table reads `14,300` instead of `14.3k` — distinct from the TUI status
122
+ * row which uses `k`/`m` shortening to save column width.
123
+ */
124
+ export function formatTokensWithCommas(value) {
125
+ if (!Number.isFinite(value) || value <= 0)
126
+ return '0';
127
+ return Math.floor(value).toLocaleString('en-US');
128
+ }
129
+ //# sourceMappingURL=rate-card.js.map
@@ -0,0 +1,221 @@
1
+ /**
2
+ * Persisted per-session cost tracker — L19 sprint (2026-05-27).
3
+ *
4
+ * Mission: every Anvil-mediated LLM call goes through `recordCall`, which
5
+ * aggregates per-model token + USD totals and atomically persists them
6
+ * to `.pugi/cost.json` so the operator can read `/cost` across REPL
7
+ * restarts and reconcile a 14-min session that crossed a process boundary.
8
+ *
9
+ * Why a fresh module instead of bolting onto `core/repl/session.ts`?
10
+ *
11
+ * - `session.ts` accumulates in-memory state for the live TUI status
12
+ * row, which is by-design ephemeral and cleared on REPL boot. The
13
+ * operator's "what did I spend across the project?" question needs
14
+ * a durable surface that survives a process restart.
15
+ * - L19 also has to read `--all-sessions` (last 30 days). The natural
16
+ * store for that is a per-workspace history of session aggregates,
17
+ * which is easy with the JSON file pattern below and would be
18
+ * awkward stitched into the REPL reducer.
19
+ *
20
+ * On-disk shape (single JSON file, atomic tmp+rename writes):
21
+ *
22
+ * {
23
+ * "version": 1,
24
+ * "current": { sessionId, startedAt, models: { <slug>: ModelEntry } },
25
+ * "history": [
26
+ * { sessionId, startedAt, endedAt, models: { ... } }
27
+ * ]
28
+ * }
29
+ *
30
+ * History rotation: when `recordCall` is invoked with a sessionId
31
+ * different from `current.sessionId`, the existing `current` row is
32
+ * stamped with `endedAt = now()` and pushed onto `history`, then a new
33
+ * `current` row is initialised. History is capped at 90 entries (the L19
34
+ * `--all-sessions` window is 30 days; 90 gives a generous buffer for
35
+ * operators on >1 session/day cadence without unbounded growth).
36
+ *
37
+ * The tracker is workspace-scoped — every workspace has its own
38
+ * `.pugi/cost.json`. This matches the existing `.pugi/events.jsonl` /
39
+ * `.pugi/index.json` pattern and means a multi-repo operator's costs
40
+ * are billed against the repo they were incurred in.
41
+ */
42
+ import { existsSync, mkdirSync, readFileSync, renameSync, writeFileSync, unlinkSync } from 'node:fs';
43
+ import { dirname, resolve } from 'node:path';
44
+ import { estimateUsd } from './rate-card.js';
45
+ /** On-disk schema version. Bump if the file shape changes. */
46
+ export const COST_FILE_SCHEMA_VERSION = 1;
47
+ /** Maximum number of historical sessions persisted in `.pugi/cost.json`. */
48
+ export const COST_HISTORY_CAP = 90;
49
+ export function createCostTracker(opts) {
50
+ const filePath = resolve(opts.workspaceRoot, '.pugi/cost.json');
51
+ const now = opts.now ?? Date.now;
52
+ let state = readOrInit(filePath);
53
+ function ensureCurrent(sessionId) {
54
+ if (state.current && state.current.sessionId === sessionId) {
55
+ return state.current;
56
+ }
57
+ // Session rotation: stamp the previous current with endedAt and push
58
+ // onto history. Idempotent — calling rotate twice with the same
59
+ // session id is a no-op.
60
+ if (state.current) {
61
+ const ended = {
62
+ ...state.current,
63
+ endedAt: new Date(now()).toISOString(),
64
+ };
65
+ state.history = [ended, ...state.history].slice(0, COST_HISTORY_CAP);
66
+ }
67
+ state.current = {
68
+ sessionId,
69
+ startedAt: new Date(now()).toISOString(),
70
+ models: {},
71
+ };
72
+ return state.current;
73
+ }
74
+ function persist() {
75
+ try {
76
+ mkdirSync(dirname(filePath), { recursive: true });
77
+ }
78
+ catch {
79
+ // best-effort directory create; the write below surfaces the real
80
+ // error if the parent is genuinely unwritable
81
+ }
82
+ const tmp = `${filePath}.tmp`;
83
+ writeFileSync(tmp, JSON.stringify(state, null, 2), 'utf8');
84
+ renameSync(tmp, filePath);
85
+ }
86
+ return {
87
+ recordCall(input) {
88
+ const sessionId = opts.sessionIdProvider();
89
+ if (!sessionId)
90
+ return;
91
+ const current = ensureCurrent(sessionId);
92
+ const slug = typeof input.model === 'string' && input.model.length > 0 ? input.model : 'unknown';
93
+ const safeIn = Number.isFinite(input.inputTokens) && input.inputTokens > 0 ? input.inputTokens : 0;
94
+ const safeOut = Number.isFinite(input.outputTokens) && input.outputTokens > 0 ? input.outputTokens : 0;
95
+ const existing = current.models[slug] ?? { input: 0, output: 0, callCount: 0 };
96
+ current.models[slug] = {
97
+ input: existing.input + safeIn,
98
+ output: existing.output + safeOut,
99
+ callCount: existing.callCount + 1,
100
+ };
101
+ persist();
102
+ },
103
+ current() {
104
+ return state.current;
105
+ },
106
+ history() {
107
+ return state.history;
108
+ },
109
+ aggregateWithin(withinDays) {
110
+ const cutoffMs = now() - withinDays * 24 * 60 * 60 * 1000;
111
+ const aggregate = {
112
+ sessionId: 'aggregate',
113
+ startedAt: new Date(cutoffMs).toISOString(),
114
+ endedAt: new Date(now()).toISOString(),
115
+ models: {},
116
+ };
117
+ const rows = [];
118
+ if (state.current)
119
+ rows.push(state.current);
120
+ for (const row of state.history) {
121
+ const stamp = Date.parse(row.startedAt);
122
+ if (Number.isFinite(stamp) && stamp >= cutoffMs)
123
+ rows.push(row);
124
+ }
125
+ for (const row of rows) {
126
+ for (const [slug, entry] of Object.entries(row.models)) {
127
+ const existing = aggregate.models[slug] ?? { input: 0, output: 0, callCount: 0 };
128
+ aggregate.models[slug] = {
129
+ input: existing.input + entry.input,
130
+ output: existing.output + entry.output,
131
+ callCount: existing.callCount + entry.callCount,
132
+ };
133
+ }
134
+ }
135
+ return aggregate;
136
+ },
137
+ resetCurrent() {
138
+ const wiped = state.current;
139
+ state.current = null;
140
+ persist();
141
+ return wiped;
142
+ },
143
+ flush() {
144
+ persist();
145
+ },
146
+ };
147
+ }
148
+ /**
149
+ * Compute the per-session USD total from a `SessionAggregate`. Pure —
150
+ * uses the rate card to bind a price to every model entry. Open-weight
151
+ * models contribute $0 (their entries always have $0/$0 rate).
152
+ */
153
+ export function totalUsd(aggregate) {
154
+ let total = 0;
155
+ for (const [slug, entry] of Object.entries(aggregate.models)) {
156
+ total += estimateUsd(slug, entry.input, entry.output);
157
+ }
158
+ return total;
159
+ }
160
+ /**
161
+ * Compute total input + output token sums across all models in an
162
+ * aggregate. Used by the CLI table footer.
163
+ */
164
+ export function totalTokens(aggregate) {
165
+ let input = 0;
166
+ let output = 0;
167
+ for (const entry of Object.values(aggregate.models)) {
168
+ input += entry.input;
169
+ output += entry.output;
170
+ }
171
+ return { input, output };
172
+ }
173
+ /**
174
+ * Read the persisted file (or initialise an empty one). Tolerates a
175
+ * corrupted file by returning a fresh empty state — losing one
176
+ * session's history is preferable to throwing from the boot path of
177
+ * every `pugi cost` invocation.
178
+ */
179
+ function readOrInit(filePath) {
180
+ if (!existsSync(filePath)) {
181
+ return { version: COST_FILE_SCHEMA_VERSION, current: null, history: [] };
182
+ }
183
+ try {
184
+ const raw = readFileSync(filePath, 'utf8');
185
+ const parsed = JSON.parse(raw);
186
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
187
+ return { version: COST_FILE_SCHEMA_VERSION, current: null, history: [] };
188
+ }
189
+ const obj = parsed;
190
+ return {
191
+ version: typeof obj.version === 'number' ? obj.version : COST_FILE_SCHEMA_VERSION,
192
+ current: isAggregate(obj.current) ? obj.current : null,
193
+ history: Array.isArray(obj.history) ? obj.history.filter(isAggregate) : [],
194
+ };
195
+ }
196
+ catch {
197
+ return { version: COST_FILE_SCHEMA_VERSION, current: null, history: [] };
198
+ }
199
+ }
200
+ function isAggregate(v) {
201
+ if (!v || typeof v !== 'object' || Array.isArray(v))
202
+ return false;
203
+ const obj = v;
204
+ if (typeof obj.sessionId !== 'string' || typeof obj.startedAt !== 'string')
205
+ return false;
206
+ if (!obj.models || typeof obj.models !== 'object')
207
+ return false;
208
+ return true;
209
+ }
210
+ /**
211
+ * Test helper — wipe the `.pugi/cost.json` file. Not exported through the
212
+ * public CostTracker surface because production code must never call
213
+ * this; an operator-facing reset goes through `resetCurrent()` which
214
+ * preserves history.
215
+ */
216
+ export function _danger_wipeCostFile_forTests(workspaceRoot) {
217
+ const filePath = resolve(workspaceRoot, '.pugi/cost.json');
218
+ if (existsSync(filePath))
219
+ unlinkSync(filePath);
220
+ }
221
+ //# sourceMappingURL=tracker.js.map
@@ -12,6 +12,7 @@ import { buildMcpToolDefs, defaultNonInteractiveMcpPrompt, dispatchMcpTool, MCP_
12
12
  import { buildDenialContext, DENIAL_REMINDER_THRESHOLD, } from '../denial-tracking/state.js';
13
13
  import { stripInternalFields } from './strip-internal-fields.js';
14
14
  import { applyAskAnswer, gate as permissionGate, getToolClass, PermissionDenied, } from '../permissions/index.js';
15
+ import { RetryBudget, RetryBudgetExhausted, hashArgs } from '../retry-budget/index.js';
15
16
  /**
16
17
  * Tool-bridge: turns the abstract tool registry into:
17
18
  * 1. An OpenAI-shaped tools schema for `EngineLoopClient.send`.
@@ -492,6 +493,9 @@ function requireString(obj, key) {
492
493
  }
493
494
  export function buildExecutor(input) {
494
495
  const { kind, ctx, hooks, sessionId, askUserBridge, interactive, allowFetch, allowSearch, agentDispatch, mcpRegistry, permissionMode, permissionAlwaysCache, permissionAsk, } = input;
496
+ // Leak L31: per-cycle budget. Default to a fresh instance scoped to
497
+ // this executor's closure lifetime; tests pass their own.
498
+ const retryBudget = input.retryBudget ?? new RetryBudget();
495
499
  const mcpPrompt = input.mcpPrompt ?? defaultNonInteractiveMcpPrompt;
496
500
  const workspaceRoot = input.workspaceRoot ?? ctx.root;
497
501
  const planMode = kind === 'plan';
@@ -608,6 +612,20 @@ export function buildExecutor(input) {
608
612
  if (ctx.cancellation && ctx.cancellation.isAborted) {
609
613
  throw recordDenial(name, argsForTracking, `OPERATOR_ABORTED: ${name} refused — operator cancelled the dispatch.`);
610
614
  }
615
+ // Leak L31 — per-cycle tool retry budget. Same tool + same canonical
616
+ // args = same bucket. Once the cap is hit we throw a typed sentinel
617
+ // so the model is forced out of a repair loop. We gate AFTER
618
+ // permission (denied calls do not burn budget) and BEFORE PreToolUse
619
+ // hooks (hook-blocked retries DO count — the model still issued the
620
+ // same call). The `recordAttempt` fires unconditionally so warn-only
621
+ // mode (PUGI_RETRY_BUDGET_DISABLED=1) still tracks the pattern for
622
+ // diagnostics.
623
+ const argHash = hashArgs(argsRaw);
624
+ const budgetDecision = retryBudget.shouldAllow(name, argHash);
625
+ retryBudget.recordAttempt(name, argHash);
626
+ if (!budgetDecision.allowed) {
627
+ throw new RetryBudgetExhausted(name, budgetDecision.cap, argHash);
628
+ }
611
629
  // Fire PreToolUse hooks. The match grammar takes the tool name and
612
630
  // (when extractable) the target path. Each new tool dispatch starts a
613
631
  // fresh dedup batch so a hook fires once per dispatch, not once per
@@ -633,7 +633,7 @@ export class ReplSession {
633
633
  return verdict;
634
634
  }
635
635
  case 'cost': {
636
- this.dispatchCost();
636
+ await this.dispatchCost();
637
637
  return verdict;
638
638
  }
639
639
  case 'quota': {
@@ -1166,13 +1166,21 @@ export class ReplSession {
1166
1166
  this.appendSystemLine(`/diff failed: ${this.errorMessage(error)}`);
1167
1167
  }
1168
1168
  }
1169
- dispatchCost() {
1169
+ async dispatchCost() {
1170
1170
  // α7 cost-meter sprint — full breakdown matching the TUI status row
1171
1171
  // footer. The session totals line mirrors the footer format
1172
1172
  // (`↑ <in> ↓ <out> · $X.XX · <elapsed>`) so the operator scans the
1173
1173
  // same numbers in two places. Per-turn list shows the last 5 turns
1174
1174
  // oldest → newest; an empty list renders one system line so the
1175
1175
  // operator knows the surface is wired (`No completed turns yet.`).
1176
+ //
1177
+ // L19 (2026-05-27) — after the in-memory recap, also render the
1178
+ // persisted per-model table from `.pugi/cost.json`. That surface
1179
+ // survives a REPL restart and answers the "what did I spend on
1180
+ // claude-opus vs qwen this week?" question the in-memory recap can
1181
+ // not. Errors loading the file collapse to a single warning line so
1182
+ // the in-memory recap (the older, well-tested surface) is never
1183
+ // gated behind a fresh dependency.
1176
1184
  const { sessionTokensIn, sessionTokensOut, sessionCostUsd, sessionStartedAtEpochMs, recentTurns, agents, } = this.state;
1177
1185
  const active = agents.filter((a) => a.status === 'queued' || a.status === 'thinking').length;
1178
1186
  const elapsedMs = Math.max(0, this.now() - sessionStartedAtEpochMs);
@@ -1181,13 +1189,44 @@ export class ReplSession {
1181
1189
  this.appendSystemLine(`Active dispatches: ${active} of cap.`);
1182
1190
  if (recentTurns.length === 0) {
1183
1191
  this.appendSystemLine('No completed turns yet — brief the workforce to charge the meter.');
1184
- return;
1185
1192
  }
1186
- this.appendSystemLine(`Recent turns (last ${recentTurns.length}):`);
1187
- for (let i = 0; i < recentTurns.length; i += 1) {
1188
- const turn = recentTurns[i];
1189
- const idx = (i + 1).toString().padStart(2, ' ');
1190
- this.appendSystemLine(` ${idx}. ${formatTokens(turn.tokensIn)} ${formatTokens(turn.tokensOut)} · ${formatCostUsd(turn.costUsd)}`);
1193
+ else {
1194
+ this.appendSystemLine(`Recent turns (last ${recentTurns.length}):`);
1195
+ for (let i = 0; i < recentTurns.length; i += 1) {
1196
+ const turn = recentTurns[i];
1197
+ const idx = (i + 1).toString().padStart(2, ' ');
1198
+ this.appendSystemLine(` ${idx}. ↑ ${formatTokens(turn.tokensIn)} ↓ ${formatTokens(turn.tokensOut)} · ${formatCostUsd(turn.costUsd)}`);
1199
+ }
1200
+ }
1201
+ // L19: append the persisted per-model table from .pugi/cost.json.
1202
+ try {
1203
+ const [{ createCostTracker }, { renderCostForSlash }] = await Promise.all([
1204
+ import('../cost/tracker.js'),
1205
+ import('../../runtime/commands/cost.js'),
1206
+ ]);
1207
+ const workspaceRoot = this.options.workspace?.workspaceCwd ?? process.cwd();
1208
+ const sessionId = this.state.sessionId ?? 'no-session';
1209
+ const tracker = createCostTracker({
1210
+ workspaceRoot,
1211
+ sessionIdProvider: () => sessionId,
1212
+ now: () => this.now(),
1213
+ });
1214
+ const current = tracker.current();
1215
+ if (current && Object.keys(current.models).length > 0) {
1216
+ this.appendSystemLine('');
1217
+ const { lines } = renderCostForSlash({
1218
+ tracker,
1219
+ allSessions: false,
1220
+ windowDays: 30,
1221
+ now: () => this.now(),
1222
+ });
1223
+ for (const line of lines)
1224
+ this.appendSystemLine(line);
1225
+ }
1226
+ }
1227
+ catch {
1228
+ // best-effort — the persisted view is additive; failure never
1229
+ // breaks the in-memory recap above
1191
1230
  }
1192
1231
  }
1193
1232
  /**
@@ -223,11 +223,17 @@ export function parseSlashCommand(input) {
223
223
  case 'diff': {
224
224
  return { kind: 'diff' };
225
225
  }
226
- case 'cost': {
226
+ case 'cost':
227
+ case 'usage': {
228
+ // L19 (2026-05-27): `/usage` is an alias of `/cost` per the cost-
229
+ // command spec. The previous mapping routed `/usage` to the
230
+ // network-backed `/quota` surface, but operators trained on Claude
231
+ // Code expect `/usage` to surface the per-model token breakdown
232
+ // (same shape as `/cost`). `/quota` remains the canonical name
233
+ // for the tier + monthly-cap fetch.
227
234
  return { kind: 'cost' };
228
235
  }
229
- case 'quota':
230
- case 'usage': {
236
+ case 'quota': {
231
237
  return { kind: 'quota' };
232
238
  }
233
239
  case 'status': {
@@ -0,0 +1,284 @@
1
+ /**
2
+ * Leak L31 — Per-command tool retry budget (Claude Code parity).
3
+ *
4
+ * Claude Code limits the number of times the model may retry the SAME
5
+ * tool with the SAME arguments inside a single operator-input cycle.
6
+ * Once the cap is hit, the dispatcher hard-refuses and surfaces a
7
+ * sentinel string telling the model that this exact call has exhausted
8
+ * its retry budget. The model is expected (via system-prompt rule) to
9
+ * either change approach or ask the operator for guidance instead of
10
+ * looping forever on a transient failure.
11
+ *
12
+ * Why per-cycle, not per-session: a retry budget that persists across
13
+ * operator turns would surprise the operator. After the operator says
14
+ * "try again" the model rightly retries; the budget must reset when a
15
+ * fresh brief arrives. The simplest reset boundary is the executor
16
+ * lifetime — `buildExecutor` is called once per `runEngineLoop` and
17
+ * the loop drives exactly one operator-input cycle. Constructing the
18
+ * budget inside `buildExecutor` therefore gives us per-cycle scoping
19
+ * "for free" via closure lifetime; no external clear() call is needed
20
+ * from production callsites. The exported `clear()` exists so tests
21
+ * and a future hook surface (PreToolUse) can introspect the state.
22
+ *
23
+ * Hash design: same tool + same canonical args = same bucket. We
24
+ * canonicalise the args record by sorting object keys (stable across
25
+ * model output ordering) and then sha256 the JSON. The model emits
26
+ * `arguments` as a raw JSON string; we parse, canonicalise, hash. If
27
+ * parse fails we hash the raw string verbatim — that way an
28
+ * unparseable repeat still counts toward the cap (otherwise the model
29
+ * could loop on syntactic noise variants forever).
30
+ *
31
+ * Env overrides:
32
+ * PUGI_RETRY_BUDGET_<TOOLNAME>=<N> — override a single tool's cap.
33
+ * Toolname matches DEFAULT_CAPS
34
+ * keys verbatim, uppercased
35
+ * (PUGI_RETRY_BUDGET_BASH=8).
36
+ * PUGI_RETRY_BUDGET_DEFAULT=<N> — override the fallback cap for
37
+ * any tool not in DEFAULT_CAPS.
38
+ * PUGI_RETRY_BUDGET_DISABLED=1 — warn-only mode. `shouldAllow`
39
+ * still records but always
40
+ * returns `allowed: true`. The
41
+ * count is preserved so
42
+ * diagnostics can still surface
43
+ * the pattern.
44
+ */
45
+ import { createHash } from 'node:crypto';
46
+ /**
47
+ * Default per-tool retry caps. Tuned per leak research:
48
+ *
49
+ * bash — 5 (most volatile; transient flakes common)
50
+ * edit — 3 (deterministic; repeat = real bug)
51
+ * write — 3 (same)
52
+ * read — 10 (cheap; legitimate re-reads after edits)
53
+ * search/grep/glob — 10 (cheap; exploration loop)
54
+ * web_fetch — 5 (transient network; not infinite)
55
+ * default — 5 (any tool not in the table)
56
+ *
57
+ * Operators override per-tool via `PUGI_RETRY_BUDGET_<NAME>` env vars.
58
+ * Caps are bounded `[1, 1000]` after override to defend against typo
59
+ * runaway (e.g. `PUGI_RETRY_BUDGET_BASH=5000000`).
60
+ */
61
+ export const DEFAULT_CAPS = Object.freeze({
62
+ bash: 5,
63
+ edit: 3,
64
+ write: 3,
65
+ read: 10,
66
+ search: 10,
67
+ grep: 10,
68
+ glob: 10,
69
+ web_fetch: 5,
70
+ default: 5,
71
+ });
72
+ /**
73
+ * Lower / upper bound for any resolved cap. Defends against:
74
+ * - PUGI_RETRY_BUDGET_BASH=0 -> first call instantly denied
75
+ * - PUGI_RETRY_BUDGET_BASH=99999 -> effectively unbounded loop
76
+ */
77
+ export const MIN_CAP = 1;
78
+ export const MAX_CAP = 1000;
79
+ /**
80
+ * Per-cycle retry budget. One instance per `buildExecutor` call.
81
+ *
82
+ * Not thread-safe: the executor is single-threaded by construction
83
+ * (Node event loop + sequential await in dispatcher). If a future
84
+ * executor parallelises tool dispatch it must serialise the budget
85
+ * mutation explicitly.
86
+ */
87
+ export class RetryBudget {
88
+ counts = new Map();
89
+ capCache = new Map();
90
+ env;
91
+ programmaticCaps;
92
+ constructor(options = {}) {
93
+ this.env = options.env ?? process.env;
94
+ this.programmaticCaps = options.caps ?? {};
95
+ }
96
+ /**
97
+ * Returns true when PUGI_RETRY_BUDGET_DISABLED=1. In disabled mode
98
+ * `shouldAllow` still records attempts but always allows the
99
+ * dispatch — useful for operators triaging a false-positive without
100
+ * a code change.
101
+ */
102
+ isDisabled() {
103
+ return this.env.PUGI_RETRY_BUDGET_DISABLED === '1';
104
+ }
105
+ /**
106
+ * Record one dispatch attempt. Idempotent on the bucket key (tool
107
+ * + argHash). Call this BEFORE the dispatch (or after `shouldAllow`
108
+ * but before `dispatch()` resolves) so a thrown dispatch counts.
109
+ */
110
+ recordAttempt(toolName, argHash) {
111
+ const key = `${toolName}::${argHash}`;
112
+ const next = (this.counts.get(key) ?? 0) + 1;
113
+ this.counts.set(key, next);
114
+ return next;
115
+ }
116
+ /**
117
+ * Returns the current count for (tool, argHash) WITHOUT mutating.
118
+ */
119
+ peek(toolName, argHash) {
120
+ return this.counts.get(`${toolName}::${argHash}`) ?? 0;
121
+ }
122
+ /**
123
+ * Resolve the effective cap for a tool.
124
+ *
125
+ * Precedence:
126
+ * 1. PUGI_RETRY_BUDGET_<TOOL_UPPER>=<N> (env)
127
+ * 2. programmaticCaps[toolName] (constructor)
128
+ * 3. DEFAULT_CAPS[toolName] (this module)
129
+ * 4. PUGI_RETRY_BUDGET_DEFAULT=<N> (env fallback)
130
+ * 5. DEFAULT_CAPS.default (final fallback)
131
+ *
132
+ * Bounded by [MIN_CAP, MAX_CAP] post-resolution. Invalid (NaN, ≤0,
133
+ * non-integer) env values are ignored and the next layer wins.
134
+ */
135
+ capFor(toolName) {
136
+ const cached = this.capCache.get(toolName);
137
+ if (cached !== undefined)
138
+ return cached;
139
+ const envKey = `PUGI_RETRY_BUDGET_${toolName.toUpperCase()}`;
140
+ const envCap = parseCapEnv(this.env[envKey]);
141
+ const programmaticCap = this.programmaticCaps[toolName];
142
+ const defaultCap = DEFAULT_CAPS[toolName];
143
+ const fallbackEnvCap = parseCapEnv(this.env.PUGI_RETRY_BUDGET_DEFAULT);
144
+ // DEFAULT_CAPS.default is hard-coded above; cast keeps the type-
145
+ // narrower happy without leaking `| undefined` through the index
146
+ // access (tsc cannot prove the literal key exists).
147
+ const finalFallback = DEFAULT_CAPS.default;
148
+ let resolved;
149
+ if (envCap !== undefined) {
150
+ resolved = envCap;
151
+ }
152
+ else if (programmaticCap !== undefined) {
153
+ resolved = programmaticCap;
154
+ }
155
+ else if (defaultCap !== undefined) {
156
+ resolved = defaultCap;
157
+ }
158
+ else {
159
+ resolved = fallbackEnvCap ?? finalFallback;
160
+ }
161
+ const bounded = Math.min(MAX_CAP, Math.max(MIN_CAP, resolved));
162
+ this.capCache.set(toolName, bounded);
163
+ return bounded;
164
+ }
165
+ /**
166
+ * Should this dispatch be allowed? Caller passes the current count
167
+ * BEFORE recording — i.e. shouldAllow returns true when count < cap,
168
+ * then recordAttempt fires, bringing count up to cap. The next
169
+ * identical call sees count === cap and is refused.
170
+ *
171
+ * In disabled mode `allowed` is forced to true; `count` and `cap`
172
+ * still reflect reality so logs / diagnostics can spot the pattern.
173
+ */
174
+ shouldAllow(toolName, argHash) {
175
+ const cap = this.capFor(toolName);
176
+ const count = this.peek(toolName, argHash);
177
+ const disabled = this.isDisabled();
178
+ const allowed = disabled ? true : count < cap;
179
+ return { allowed, count, cap, argHash, disabled };
180
+ }
181
+ /** Reset all state. Used between operator-input cycles when the
182
+ * budget instance is reused (most callers throw the instance away
183
+ * per cycle, so clear() is mostly for tests and hook surfaces). */
184
+ clear() {
185
+ this.counts.clear();
186
+ this.capCache.clear();
187
+ }
188
+ /**
189
+ * Snapshot the current state for diagnostics. Returns a plain
190
+ * object so it round-trips through JSON.stringify cleanly.
191
+ */
192
+ snapshot() {
193
+ const out = [];
194
+ for (const [key, count] of this.counts) {
195
+ const sep = key.indexOf('::');
196
+ if (sep < 0)
197
+ continue;
198
+ out.push({ tool: key.slice(0, sep), argHash: key.slice(sep + 2), count });
199
+ }
200
+ return out;
201
+ }
202
+ }
203
+ /**
204
+ * Hash the model's tool-call arguments into a stable key. Same
205
+ * canonical args = same hash regardless of JSON whitespace / key
206
+ * order. Unparseable JSON is hashed verbatim so the budget still
207
+ * catches syntactically degenerate retry loops.
208
+ */
209
+ export function hashArgs(argsRaw) {
210
+ const canonical = canonicalise(argsRaw);
211
+ return createHash('sha256').update(canonical).digest('hex');
212
+ }
213
+ /**
214
+ * Canonicalise a raw JSON arg string. Object keys are sorted
215
+ * recursively. Arrays preserve order (semantic). Primitives untouched.
216
+ * On parse failure, returns the original string prefixed with `raw:`
217
+ * so a malformed-args repeat still hashes to the same bucket.
218
+ */
219
+ function canonicalise(argsRaw) {
220
+ try {
221
+ const parsed = JSON.parse(argsRaw);
222
+ return JSON.stringify(sortKeys(parsed));
223
+ }
224
+ catch {
225
+ return `raw:${argsRaw}`;
226
+ }
227
+ }
228
+ function sortKeys(value) {
229
+ if (value === null || typeof value !== 'object')
230
+ return value;
231
+ if (Array.isArray(value))
232
+ return value.map(sortKeys);
233
+ const obj = value;
234
+ const sorted = {};
235
+ for (const k of Object.keys(obj).sort()) {
236
+ sorted[k] = sortKeys(obj[k]);
237
+ }
238
+ return sorted;
239
+ }
240
+ /**
241
+ * Parse and bound a `PUGI_RETRY_BUDGET_*` env var. Returns `undefined`
242
+ * for any non-positive-integer string so the resolver can fall
243
+ * through to the next precedence layer. Bounded by [MIN_CAP, MAX_CAP]
244
+ * is NOT applied here — `capFor` clamps after the final layer wins,
245
+ * matching the "operator typo defends against runaway" requirement
246
+ * without silently swallowing a meaningful low value (e.g.
247
+ * `PUGI_RETRY_BUDGET_BASH=1` should clamp to MIN_CAP=1, which it
248
+ * does naturally since 1 >= MIN_CAP).
249
+ */
250
+ function parseCapEnv(raw) {
251
+ if (raw === undefined || raw === '')
252
+ return undefined;
253
+ const n = Number(raw);
254
+ if (!Number.isInteger(n) || n <= 0)
255
+ return undefined;
256
+ return n;
257
+ }
258
+ /**
259
+ * Sentinel emitted to the model when the budget is exhausted. The
260
+ * format is stable so the engine adapter, spec layer, and operator
261
+ * dashboards can pattern-match on it.
262
+ */
263
+ export function retryBudgetExhaustedSentinel(toolName, cap) {
264
+ return `RETRY_BUDGET_EXHAUSTED: ${toolName} exceeded ${cap} attempts with these args. Operator must intervene.`;
265
+ }
266
+ /**
267
+ * Typed error thrown by the tool-bridge when the cap is hit. Carries
268
+ * the sentinel string so the engine loop can pattern-match without
269
+ * re-parsing. `instanceof RetryBudgetExhausted` is the canonical
270
+ * downstream test.
271
+ */
272
+ export class RetryBudgetExhausted extends Error {
273
+ toolName;
274
+ cap;
275
+ argHash;
276
+ constructor(toolName, cap, argHash) {
277
+ super(retryBudgetExhaustedSentinel(toolName, cap));
278
+ this.name = 'RetryBudgetExhausted';
279
+ this.toolName = toolName;
280
+ this.cap = cap;
281
+ this.argHash = argHash;
282
+ }
283
+ }
284
+ //# sourceMappingURL=budget.js.map
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Leak L31 — Tool retry budget. Public surface.
3
+ */
4
+ export { DEFAULT_CAPS, MIN_CAP, MAX_CAP, RetryBudget, RetryBudgetExhausted, hashArgs, retryBudgetExhaustedSentinel, } from './budget.js';
5
+ //# sourceMappingURL=index.js.map
@@ -32,6 +32,7 @@ import { runStatusCommand, defaultStatusHome, } from './commands/status.js';
32
32
  import { runUndoCommand } from './commands/undo.js';
33
33
  import { runCompactCommand } from './commands/compact.js';
34
34
  import { runBudgetCommand } from './commands/budget.js';
35
+ import { runCostCommand } from './commands/cost.js';
35
36
  import { runSkillsCommand } from './commands/skills.js';
36
37
  import { installDefaultSkills } from '../core/skills/defaults.js';
37
38
  import { runAgentsCommand } from './commands/agents.js';
@@ -75,6 +76,7 @@ const handlers = {
75
76
  budget: dispatchBudget,
76
77
  code: runEngineTask('code'),
77
78
  config: dispatchConfig,
79
+ cost: dispatchCost,
78
80
  delegate: dispatchDelegate,
79
81
  deploy: dispatchDeploy,
80
82
  doctor,
@@ -108,6 +110,10 @@ const handlers = {
108
110
  sync,
109
111
  undo: dispatchUndo,
110
112
  compact: dispatchCompact,
113
+ // L19 (2026-05-27): `pugi usage` is an alias of `pugi cost` — same
114
+ // handler, same flags. Operators trained on Claude Code expect either
115
+ // verb to surface the per-model token + USD table.
116
+ usage: dispatchCost,
111
117
  version,
112
118
  web: dispatchWeb,
113
119
  whoami,
@@ -419,6 +425,19 @@ async function dispatchPermissions(args, flags, _session) {
419
425
  writeOutput: (text) => writeOutput(flags, { text }, text),
420
426
  });
421
427
  }
428
+ /**
429
+ * L19 sprint (2026-05-27): `pugi cost` / `pugi usage` top-level surface.
430
+ *
431
+ * Aliased through the handlers table so `pugi usage` reuses the same
432
+ * implementation. The persisted store lives at `<cwd>/.pugi/cost.json`
433
+ * and is shared with the REPL `/cost` / `/usage` slash handlers.
434
+ */
435
+ async function dispatchCost(args, flags, _session) {
436
+ await runCostCommand(args, {
437
+ workspaceRoot: process.cwd(),
438
+ writeOutput: (payload, text) => writeOutput(flags, payload, text),
439
+ });
440
+ }
422
441
  async function dispatchSkills(args, flags, _session) {
423
442
  await runSkillsCommand(args, {
424
443
  workspaceRoot: process.cwd(),
@@ -1025,6 +1044,26 @@ const COMMAND_HELP_BODIES = {
1025
1044
  ' pugi config get privacy',
1026
1045
  ' pugi config set privacy=<mode>',
1027
1046
  ],
1047
+ cost: [
1048
+ 'pugi cost — token + USD breakdown for the current Pugi session.',
1049
+ '',
1050
+ 'Reads .pugi/cost.json (persisted via the in-REPL CostTracker) and',
1051
+ 'prints a per-model table plus dollar estimate. Alias: pugi usage.',
1052
+ '',
1053
+ 'Flags:',
1054
+ ' --all-sessions 30-day rolling aggregate across all sessions.',
1055
+ ' --window=<days> Override the aggregate window (max 365).',
1056
+ ' --reset --yes Clear the current-session counter. History',
1057
+ ' is preserved. Requires --yes to confirm.',
1058
+ ' --json Emit a structured JSON envelope only.',
1059
+ '',
1060
+ 'Examples:',
1061
+ ' pugi cost Current session totals.',
1062
+ ' pugi cost --all-sessions Past 30 days aggregated.',
1063
+ ' pugi cost --all-sessions --window=7',
1064
+ ' pugi cost --reset --yes Wipe the session counter.',
1065
+ ' pugi usage Alias for pugi cost.',
1066
+ ],
1028
1067
  config: [
1029
1068
  'pugi config — read / write CLI + tenant configuration.',
1030
1069
  '',
@@ -0,0 +1,199 @@
1
+ /**
2
+ * `pugi cost` / `pugi usage` command handler — L19 sprint (2026-05-27).
3
+ *
4
+ * Shared backend for three operator surfaces:
5
+ *
6
+ * - `pugi cost` current session (default)
7
+ * - `pugi cost --all-sessions` 30-day rolling aggregate
8
+ * - `pugi cost --reset --yes` wipe current session counter (operator-only)
9
+ * - `pugi usage` alias of `pugi cost`
10
+ * - `/cost` REPL slash same handler, in-REPL output
11
+ * - `/usage` REPL slash same handler, alias of /cost
12
+ *
13
+ * Why a separate command from the existing `pugi budget`:
14
+ *
15
+ * - `pugi budget` walks `.pugi/events.jsonl` and bills against the
16
+ * event-log heuristic (per-command / per-persona attribution). It
17
+ * is the right surface for "what did this brief / this persona
18
+ * spend?". It does not break down by model and it does not persist
19
+ * a cross-session aggregate.
20
+ *
21
+ * - `pugi cost` (this command) reads the persisted `.pugi/cost.json`
22
+ * written by the `CostTracker`. It is the right surface for "what
23
+ * did this model spend?" and "what did I spend across the last 30
24
+ * days?". Token + USD figures are sourced from the rate card, which
25
+ * distinguishes hosted Claude (per-token billed) from open-weight
26
+ * Qwen / Kimi / DeepSeek (infra cost only).
27
+ *
28
+ * Both commands intentionally coexist — they answer adjacent but distinct
29
+ * operator questions. The L19 spec calls out `/cost` and `/usage` by
30
+ * name; the budget surface is unaffected.
31
+ */
32
+ import { existsSync, readFileSync } from 'node:fs';
33
+ import { resolve } from 'node:path';
34
+ import { createCostTracker, totalTokens, totalUsd, } from '../../core/cost/tracker.js';
35
+ import { buildCostView, renderCostTableText } from '../../tui/cost-table.js';
36
+ /**
37
+ * Parsed flag bundle. Exported for the test surface; production callers
38
+ * never touch it directly — `runCostCommand` owns parsing.
39
+ */
40
+ export function parseCostFlags(args) {
41
+ const flags = {
42
+ allSessions: false,
43
+ reset: false,
44
+ yes: false,
45
+ json: false,
46
+ windowDays: 30,
47
+ };
48
+ for (let i = 0; i < args.length; i += 1) {
49
+ const arg = args[i] ?? '';
50
+ if (arg === '--all-sessions')
51
+ flags.allSessions = true;
52
+ else if (arg === '--reset')
53
+ flags.reset = true;
54
+ else if (arg === '--yes' || arg === '-y')
55
+ flags.yes = true;
56
+ else if (arg === '--json')
57
+ flags.json = true;
58
+ else if (arg.startsWith('--window=')) {
59
+ const raw = Number.parseInt(arg.slice('--window='.length), 10);
60
+ if (Number.isFinite(raw) && raw > 0 && raw <= 365)
61
+ flags.windowDays = raw;
62
+ }
63
+ }
64
+ return flags;
65
+ }
66
+ export async function runCostCommand(args, ctx) {
67
+ const flags = parseCostFlags(args);
68
+ const sessionId = ctx.sessionId ?? deriveSessionIdFromEvents(ctx.workspaceRoot) ?? 'no-session';
69
+ const tracker = createCostTracker({
70
+ workspaceRoot: ctx.workspaceRoot,
71
+ sessionIdProvider: () => sessionId,
72
+ now: ctx.now,
73
+ });
74
+ // --reset: clear the current session counter. Operator-only — refuses
75
+ // without `--yes` so a typo / shell completion never wipes the meter.
76
+ if (flags.reset) {
77
+ if (!flags.yes) {
78
+ ctx.writeOutput({ command: 'cost', status: 'reset_pending_confirmation' }, 'pugi cost --reset clears the current session counter. Re-run with --yes to confirm.');
79
+ return;
80
+ }
81
+ const wiped = tracker.resetCurrent();
82
+ const payload = {
83
+ command: 'cost',
84
+ status: 'reset_ok',
85
+ wiped: wiped ?? null,
86
+ };
87
+ ctx.writeOutput(payload, wiped
88
+ ? `Cleared session ${wiped.sessionId} (${Object.keys(wiped.models).length} model(s) wiped).`
89
+ : 'No current session counter to clear.');
90
+ return;
91
+ }
92
+ const aggregate = flags.allSessions ? tracker.aggregateWithin(flags.windowDays) : (tracker.current() ?? emptyAggregate(sessionId, ctx.now ?? Date.now));
93
+ const tier = ctx.resolveTier ? await safeResolveTier(ctx.resolveTier) : null;
94
+ const heading = flags.allSessions
95
+ ? `Pugi cost / usage — aggregate (last ${flags.windowDays} days)`
96
+ : buildSessionHeading(aggregate, ctx.now ?? Date.now);
97
+ const view = buildCostView({ aggregate, heading, tier: tier ?? undefined });
98
+ const text = renderCostTableText(view);
99
+ ctx.writeOutput({
100
+ command: flags.allSessions ? 'cost.aggregate' : 'cost.session',
101
+ status: 'ok',
102
+ window: flags.allSessions ? `${flags.windowDays}d` : 'current',
103
+ tokens: {
104
+ input: view.totalInputTokens,
105
+ output: view.totalOutputTokens,
106
+ },
107
+ dollars: view.totalUsd,
108
+ perModel: view.rows.map((row) => ({
109
+ model: row.model,
110
+ input: row.inputTokens,
111
+ output: row.outputTokens,
112
+ usd: row.usd,
113
+ note: row.note ?? null,
114
+ })),
115
+ tier: tier ?? null,
116
+ }, text);
117
+ }
118
+ /**
119
+ * Render-only helper for the REPL slash. The slash dispatcher inside
120
+ * `session.ts` owns the side-effect of pushing system lines; this
121
+ * function builds the view and the text rendition so the slash handler
122
+ * can fan the lines into the existing `appendSystemLine` queue.
123
+ *
124
+ * Exposed here (not in the Ink module) so the slash path never imports
125
+ * Ink/React — keeps the REPL bundle slim and the slash handler async-free.
126
+ */
127
+ export function renderCostForSlash(input) {
128
+ const aggregate = input.allSessions
129
+ ? input.tracker.aggregateWithin(input.windowDays)
130
+ : (input.tracker.current() ?? emptyAggregate('no-session', input.now));
131
+ const heading = input.allSessions
132
+ ? `Pugi cost / usage — aggregate (last ${input.windowDays} days)`
133
+ : buildSessionHeading(aggregate, input.now);
134
+ const view = buildCostView({ aggregate, heading, tier: input.tier ?? undefined });
135
+ return { view, lines: renderCostTableText(view).split('\n') };
136
+ }
137
+ /**
138
+ * Derive a session id from `.pugi/events.jsonl` when the caller does not
139
+ * pass one. Walks the file once and picks the most recent `session.start`
140
+ * event's id. Falls back to `null` when the file is missing / corrupted
141
+ * — the caller substitutes a `'no-session'` placeholder so the table
142
+ * still renders an empty state instead of crashing.
143
+ */
144
+ function deriveSessionIdFromEvents(workspaceRoot) {
145
+ const path = resolve(workspaceRoot, '.pugi/events.jsonl');
146
+ if (!existsSync(path))
147
+ return null;
148
+ try {
149
+ const raw = readFileSync(path, 'utf8');
150
+ const lines = raw.split('\n').filter((line) => line.trim().length > 0);
151
+ // Walk from newest to oldest — `session.start` is rare, no reason to
152
+ // scan the whole file when the answer is at the tail.
153
+ for (let i = lines.length - 1; i >= 0; i -= 1) {
154
+ try {
155
+ const parsed = JSON.parse(lines[i]);
156
+ if (parsed.type === 'session' && parsed.name === 'start' && typeof parsed.sessionId === 'string') {
157
+ return parsed.sessionId;
158
+ }
159
+ }
160
+ catch {
161
+ // partial-write lines are ignored
162
+ }
163
+ }
164
+ }
165
+ catch {
166
+ // best-effort; absent events.jsonl is a normal first-boot state
167
+ }
168
+ return null;
169
+ }
170
+ function buildSessionHeading(aggregate, now) {
171
+ if (!aggregate || aggregate.sessionId === 'no-session' || aggregate.sessionId === 'aggregate') {
172
+ return 'Pugi cost / usage — no active session';
173
+ }
174
+ const start = Date.parse(aggregate.startedAt);
175
+ if (!Number.isFinite(start)) {
176
+ return `Pugi cost / usage — session ${aggregate.sessionId}`;
177
+ }
178
+ const elapsedMin = Math.max(0, Math.floor((now() - start) / 60_000));
179
+ return `Pugi cost / usage — session ${aggregate.sessionId} (${elapsedMin} min)`;
180
+ }
181
+ function emptyAggregate(sessionId, now) {
182
+ return {
183
+ sessionId,
184
+ startedAt: new Date(now()).toISOString(),
185
+ models: {},
186
+ };
187
+ }
188
+ async function safeResolveTier(resolver) {
189
+ try {
190
+ return await resolver();
191
+ }
192
+ catch {
193
+ return null;
194
+ }
195
+ }
196
+ // Re-export aggregate helpers so the cli.ts wire-up can read totals
197
+ // without reaching into the tracker module directly.
198
+ export { totalUsd, totalTokens };
199
+ //# sourceMappingURL=cost.js.map
@@ -44,7 +44,7 @@ export function sanitizeSemver(raw) {
44
44
  * during import). When bumping the CLI version BOTH literals must be
45
45
  * updated; the release smoke-test (`pack:smoke`) verifies they agree.
46
46
  */
47
- export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.19');
47
+ export const PUGI_CLI_VERSION = sanitizeSemver('0.1.0-beta.20');
48
48
  /**
49
49
  * Outbound: the CLI's installed semver. Read at request time by
50
50
  * `version-interceptor.ts` and injected on every `fetch` call.
@@ -0,0 +1,111 @@
1
+ import { jsx as _jsx, jsxs as _jsxs } from "react/jsx-runtime";
2
+ import { Box, Text } from 'ink';
3
+ import { estimateUsd, formatTokensWithCommas, formatUsd, rateFor } from '../core/cost/rate-card.js';
4
+ /**
5
+ * Column widths chosen to match the L19 spec output. The model column is
6
+ * the widest because slug strings like `qwen3-coder-480b-instruct-fp8`
7
+ * run 31 chars. We pad/truncate inside the renderer so a TUI on a 80-col
8
+ * terminal does not wrap mid-table.
9
+ */
10
+ const COL_MODEL = 34;
11
+ const COL_IN = 8;
12
+ const COL_OUT = 9;
13
+ const COL_USD = 10;
14
+ /**
15
+ * Render one cost report. Stateless — re-rendering with the same view
16
+ * produces the same output by construction. Tests assert against
17
+ * `lastFrame()` from `ink-testing-library`.
18
+ */
19
+ export function CostTable({ view }) {
20
+ return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Text, { children: view.heading }), _jsx(Text, { children: "\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550" }), _jsx(Text, { children: " " }), _jsxs(Text, { children: [pad('MODEL', COL_MODEL), padLeft('IN_TOK', COL_IN), padLeft('OUT_TOK', COL_OUT), padLeft('$ EST', COL_USD)] }), view.rows.length === 0 ? (_jsx(Text, { dimColor: true, children: "No calls recorded yet \u2014 brief a persona to charge the meter." })) : (view.rows.map((row) => (_jsxs(Text, { children: [pad(row.model, COL_MODEL), padLeft(formatTokensWithCommas(row.inputTokens), COL_IN), padLeft(formatTokensWithCommas(row.outputTokens), COL_OUT), padLeft(formatUsd(row.usd), COL_USD), row.note ? ` (${row.note})` : ''] }, row.model)))), _jsx(Text, { children: " " }), _jsxs(Text, { children: ["Total tokens: ", formatTokensWithCommas(view.totalInputTokens + view.totalOutputTokens), ' (in: ', formatTokensWithCommas(view.totalInputTokens), ', out: ', formatTokensWithCommas(view.totalOutputTokens), ')'] }), _jsxs(Text, { children: ["Total dollar estimate: ", formatUsd(view.totalUsd)] }), view.tier ? _jsxs(Text, { children: ["Tier: ", view.tier.tier, view.tier.quotaLine ? ` (${view.tier.quotaLine})` : ''] }) : null] }));
21
+ }
22
+ /**
23
+ * Build a `CostView` from a `SessionAggregate`. The function lives here
24
+ * (next to the renderer) so a future caller — the REPL slash, the CLI
25
+ * command, a JSON-only path — uses the same view-model shape and the
26
+ * same row-sort rule.
27
+ */
28
+ export function buildCostView(input) {
29
+ const rows = [];
30
+ for (const [model, entry] of Object.entries(input.aggregate.models)) {
31
+ const usd = estimateUsd(model, entry.input, entry.output);
32
+ const rate = rateFor(model);
33
+ rows.push({
34
+ model,
35
+ inputTokens: entry.input,
36
+ outputTokens: entry.output,
37
+ usd,
38
+ note: rate.note,
39
+ });
40
+ }
41
+ // Sort by USD descending first (most-expensive-first, matches the L19
42
+ // sample output where the Claude row leads). Ties break by total
43
+ // tokens so two free open-weight rows order deterministically.
44
+ rows.sort((a, b) => {
45
+ if (b.usd !== a.usd)
46
+ return b.usd - a.usd;
47
+ return (b.inputTokens + b.outputTokens) - (a.inputTokens + a.outputTokens);
48
+ });
49
+ let totalIn = 0;
50
+ let totalOut = 0;
51
+ let totalUsd = 0;
52
+ for (const row of rows) {
53
+ totalIn += row.inputTokens;
54
+ totalOut += row.outputTokens;
55
+ totalUsd += row.usd;
56
+ }
57
+ return {
58
+ heading: input.heading,
59
+ rows,
60
+ totalInputTokens: totalIn,
61
+ totalOutputTokens: totalOut,
62
+ totalUsd,
63
+ tier: input.tier,
64
+ };
65
+ }
66
+ /**
67
+ * Plain-string renderer for non-TTY / `--json` callers. Produces the
68
+ * same table the Ink component would render — no ANSI / no color so it
69
+ * pipes cleanly into `less` or a JSON tool.
70
+ */
71
+ export function renderCostTableText(view) {
72
+ const lines = [];
73
+ lines.push(view.heading);
74
+ lines.push('════════════════════════════════════════════════');
75
+ lines.push('');
76
+ lines.push(pad('MODEL', COL_MODEL) +
77
+ padLeft('IN_TOK', COL_IN) +
78
+ padLeft('OUT_TOK', COL_OUT) +
79
+ padLeft('$ EST', COL_USD));
80
+ if (view.rows.length === 0) {
81
+ lines.push('No calls recorded yet — brief a persona to charge the meter.');
82
+ }
83
+ else {
84
+ for (const row of view.rows) {
85
+ const noteSuffix = row.note ? ` (${row.note})` : '';
86
+ lines.push(pad(row.model, COL_MODEL) +
87
+ padLeft(formatTokensWithCommas(row.inputTokens), COL_IN) +
88
+ padLeft(formatTokensWithCommas(row.outputTokens), COL_OUT) +
89
+ padLeft(formatUsd(row.usd), COL_USD) +
90
+ noteSuffix);
91
+ }
92
+ }
93
+ lines.push('');
94
+ lines.push(`Total tokens: ${formatTokensWithCommas(view.totalInputTokens + view.totalOutputTokens)} (in: ${formatTokensWithCommas(view.totalInputTokens)}, out: ${formatTokensWithCommas(view.totalOutputTokens)})`);
95
+ lines.push(`Total dollar estimate: ${formatUsd(view.totalUsd)}`);
96
+ if (view.tier) {
97
+ lines.push(`Tier: ${view.tier.tier}${view.tier.quotaLine ? ` (${view.tier.quotaLine})` : ''}`);
98
+ }
99
+ return lines.join('\n');
100
+ }
101
+ function pad(value, width) {
102
+ if (value.length >= width)
103
+ return value.slice(0, Math.max(0, width - 1)) + ' ';
104
+ return value + ' '.repeat(width - value.length);
105
+ }
106
+ function padLeft(value, width) {
107
+ if (value.length >= width)
108
+ return value.slice(0, width);
109
+ return ' '.repeat(width - value.length) + value;
110
+ }
111
+ //# sourceMappingURL=cost-table.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pugi/cli",
3
- "version": "0.1.0-beta.19",
3
+ "version": "0.1.0-beta.20",
4
4
  "description": "Pugi CLI - terminal-native software execution system",
5
5
  "homepage": "https://pugi.io",
6
6
  "repository": {
@@ -54,7 +54,7 @@
54
54
  "undici": "^8.3.0",
55
55
  "zod": "^3.23.0",
56
56
  "@pugi/personas": "0.1.2",
57
- "@pugi/sdk": "0.1.0-beta.19"
57
+ "@pugi/sdk": "0.1.0-beta.20"
58
58
  },
59
59
  "devDependencies": {
60
60
  "@types/node": "^22.0.0",